1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/buf.h> 45 #include <sys/conf.h> 46 #include <sys/sysent.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/mountctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/linker.h> 56 #include <sys/stat.h> 57 #include <sys/unistd.h> 58 #include <sys/vnode.h> 59 #include <sys/proc.h> 60 #include <sys/priv.h> 61 #include <sys/jail.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/dirent.h> 65 #include <sys/extattr.h> 66 #include <sys/spinlock.h> 67 #include <sys/kern_syscall.h> 68 #include <sys/objcache.h> 69 #include <sys/sysctl.h> 70 71 #include <sys/buf2.h> 72 #include <sys/file2.h> 73 #include <sys/spinlock2.h> 74 #include <sys/mplock2.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 80 #include <machine/limits.h> 81 #include <machine/stdarg.h> 82 83 #include <vfs/union/union.h> 84 85 static void mount_warning(struct mount *mp, const char *ctl, ...); 86 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 87 static int checkvp_chdir (struct vnode *vn, struct thread *td); 88 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 89 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 90 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 91 static int getutimes (const struct timeval *, struct timespec *); 92 static int setfown (struct vnode *, uid_t, gid_t); 93 static int setfmode (struct vnode *, int); 94 static int setfflags (struct vnode *, int); 95 static int setutimes (struct vnode *, struct vattr *, 96 const struct timespec *, int); 97 static int usermount = 0; /* if 1, non-root can mount fs. */ 98 99 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 100 101 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 102 103 /* 104 * Virtual File System System Calls 105 */ 106 107 /* 108 * Mount a file system. 109 * 110 * mount_args(char *type, char *path, int flags, caddr_t data) 111 * 112 * MPALMOSTSAFE 113 */ 114 int 115 sys_mount(struct mount_args *uap) 116 { 117 struct thread *td = curthread; 118 struct vnode *vp; 119 struct nchandle nch; 120 struct mount *mp, *nullmp; 121 struct vfsconf *vfsp; 122 int error, flag = 0, flag2 = 0; 123 int hasmount; 124 struct vattr va; 125 struct nlookupdata nd; 126 char fstypename[MFSNAMELEN]; 127 struct ucred *cred; 128 129 get_mplock(); 130 cred = td->td_ucred; 131 if (jailed(cred)) { 132 error = EPERM; 133 goto done; 134 } 135 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 136 goto done; 137 138 /* 139 * Do not allow NFS export by non-root users. 140 */ 141 if (uap->flags & MNT_EXPORTED) { 142 error = priv_check(td, PRIV_ROOT); 143 if (error) 144 goto done; 145 } 146 /* 147 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 148 */ 149 if (priv_check(td, PRIV_ROOT)) 150 uap->flags |= MNT_NOSUID | MNT_NODEV; 151 152 /* 153 * Lookup the requested path and extract the nch and vnode. 154 */ 155 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 156 if (error == 0) { 157 if ((error = nlookup(&nd)) == 0) { 158 if (nd.nl_nch.ncp->nc_vp == NULL) 159 error = ENOENT; 160 } 161 } 162 if (error) { 163 nlookup_done(&nd); 164 goto done; 165 } 166 167 /* 168 * If the target filesystem is resolved via a nullfs mount, then 169 * nd.nl_nch.mount will be pointing to the nullfs mount structure 170 * instead of the target file system. We need it in case we are 171 * doing an update. 172 */ 173 nullmp = nd.nl_nch.mount; 174 175 /* 176 * Extract the locked+refd ncp and cleanup the nd structure 177 */ 178 nch = nd.nl_nch; 179 cache_zero(&nd.nl_nch); 180 nlookup_done(&nd); 181 182 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 183 hasmount = 1; 184 else 185 hasmount = 0; 186 187 188 /* 189 * now we have the locked ref'd nch and unreferenced vnode. 190 */ 191 vp = nch.ncp->nc_vp; 192 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 193 cache_put(&nch); 194 goto done; 195 } 196 cache_unlock(&nch); 197 198 /* 199 * Extract the file system type. We need to know this early, to take 200 * appropriate actions if we are dealing with a nullfs. 201 */ 202 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 203 cache_drop(&nch); 204 vput(vp); 205 goto done; 206 } 207 208 /* 209 * Now we have an unlocked ref'd nch and a locked ref'd vp 210 */ 211 if (uap->flags & MNT_UPDATE) { 212 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 213 cache_drop(&nch); 214 vput(vp); 215 error = EINVAL; 216 goto done; 217 } 218 219 if (strncmp(fstypename, "null", 5) == 0) { 220 KKASSERT(nullmp); 221 mp = nullmp; 222 } else { 223 mp = vp->v_mount; 224 } 225 226 flag = mp->mnt_flag; 227 flag2 = mp->mnt_kern_flag; 228 /* 229 * We only allow the filesystem to be reloaded if it 230 * is currently mounted read-only. 231 */ 232 if ((uap->flags & MNT_RELOAD) && 233 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 234 cache_drop(&nch); 235 vput(vp); 236 error = EOPNOTSUPP; /* Needs translation */ 237 goto done; 238 } 239 /* 240 * Only root, or the user that did the original mount is 241 * permitted to update it. 242 */ 243 if (mp->mnt_stat.f_owner != cred->cr_uid && 244 (error = priv_check(td, PRIV_ROOT))) { 245 cache_drop(&nch); 246 vput(vp); 247 goto done; 248 } 249 if (vfs_busy(mp, LK_NOWAIT)) { 250 cache_drop(&nch); 251 vput(vp); 252 error = EBUSY; 253 goto done; 254 } 255 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 256 cache_drop(&nch); 257 vfs_unbusy(mp); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 vsetflags(vp, VMOUNT); 263 mp->mnt_flag |= 264 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 265 vn_unlock(vp); 266 goto update; 267 } 268 /* 269 * If the user is not root, ensure that they own the directory 270 * onto which we are attempting to mount. 271 */ 272 if ((error = VOP_GETATTR(vp, &va)) || 273 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 274 cache_drop(&nch); 275 vput(vp); 276 goto done; 277 } 278 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 279 cache_drop(&nch); 280 vput(vp); 281 goto done; 282 } 283 if (vp->v_type != VDIR) { 284 cache_drop(&nch); 285 vput(vp); 286 error = ENOTDIR; 287 goto done; 288 } 289 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 290 cache_drop(&nch); 291 vput(vp); 292 error = EPERM; 293 goto done; 294 } 295 vfsp = vfsconf_find_by_name(fstypename); 296 if (vfsp == NULL) { 297 linker_file_t lf; 298 299 /* Only load modules for root (very important!) */ 300 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 301 cache_drop(&nch); 302 vput(vp); 303 goto done; 304 } 305 error = linker_load_file(fstypename, &lf); 306 if (error || lf == NULL) { 307 cache_drop(&nch); 308 vput(vp); 309 if (lf == NULL) 310 error = ENODEV; 311 goto done; 312 } 313 lf->userrefs++; 314 /* lookup again, see if the VFS was loaded */ 315 vfsp = vfsconf_find_by_name(fstypename); 316 if (vfsp == NULL) { 317 lf->userrefs--; 318 linker_file_unload(lf); 319 cache_drop(&nch); 320 vput(vp); 321 error = ENODEV; 322 goto done; 323 } 324 } 325 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 326 cache_drop(&nch); 327 vput(vp); 328 error = EBUSY; 329 goto done; 330 } 331 vsetflags(vp, VMOUNT); 332 333 /* 334 * Allocate and initialize the filesystem. 335 */ 336 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 337 mount_init(mp); 338 vfs_busy(mp, LK_NOWAIT); 339 mp->mnt_op = vfsp->vfc_vfsops; 340 mp->mnt_vfc = vfsp; 341 vfsp->vfc_refcount++; 342 mp->mnt_stat.f_type = vfsp->vfc_typenum; 343 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 344 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 345 mp->mnt_stat.f_owner = cred->cr_uid; 346 vn_unlock(vp); 347 update: 348 /* 349 * Set the mount level flags. 350 */ 351 if (uap->flags & MNT_RDONLY) 352 mp->mnt_flag |= MNT_RDONLY; 353 else if (mp->mnt_flag & MNT_RDONLY) 354 mp->mnt_kern_flag |= MNTK_WANTRDWR; 355 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 356 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 357 MNT_NOSYMFOLLOW | MNT_IGNORE | 358 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 359 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 360 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 361 MNT_NOSYMFOLLOW | MNT_IGNORE | 362 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 363 /* 364 * Mount the filesystem. 365 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 366 * get. 367 */ 368 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 369 if (mp->mnt_flag & MNT_UPDATE) { 370 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 371 mp->mnt_flag &= ~MNT_RDONLY; 372 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 373 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 374 if (error) { 375 mp->mnt_flag = flag; 376 mp->mnt_kern_flag = flag2; 377 } 378 vfs_unbusy(mp); 379 vclrflags(vp, VMOUNT); 380 vrele(vp); 381 cache_drop(&nch); 382 goto done; 383 } 384 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 385 /* 386 * Put the new filesystem on the mount list after root. The mount 387 * point gets its own mnt_ncmountpt (unless the VFS already set one 388 * up) which represents the root of the mount. The lookup code 389 * detects the mount point going forward and checks the root of 390 * the mount going backwards. 391 * 392 * It is not necessary to invalidate or purge the vnode underneath 393 * because elements under the mount will be given their own glue 394 * namecache record. 395 */ 396 if (!error) { 397 if (mp->mnt_ncmountpt.ncp == NULL) { 398 /* 399 * allocate, then unlock, but leave the ref intact 400 */ 401 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 402 cache_unlock(&mp->mnt_ncmountpt); 403 } 404 mp->mnt_ncmounton = nch; /* inherits ref */ 405 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 406 407 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 408 vclrflags(vp, VMOUNT); 409 mountlist_insert(mp, MNTINS_LAST); 410 vn_unlock(vp); 411 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 412 error = vfs_allocate_syncvnode(mp); 413 vfs_unbusy(mp); 414 error = VFS_START(mp, 0); 415 vrele(vp); 416 } else { 417 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 418 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 419 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 420 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 421 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 422 vclrflags(vp, VMOUNT); 423 mp->mnt_vfc->vfc_refcount--; 424 vfs_unbusy(mp); 425 kfree(mp, M_MOUNT); 426 cache_drop(&nch); 427 vput(vp); 428 } 429 done: 430 rel_mplock(); 431 return (error); 432 } 433 434 /* 435 * Scan all active processes to see if any of them have a current 436 * or root directory onto which the new filesystem has just been 437 * mounted. If so, replace them with the new mount point. 438 * 439 * The passed ncp is ref'd and locked (from the mount code) and 440 * must be associated with the vnode representing the root of the 441 * mount point. 442 */ 443 struct checkdirs_info { 444 struct nchandle old_nch; 445 struct nchandle new_nch; 446 struct vnode *old_vp; 447 struct vnode *new_vp; 448 }; 449 450 static int checkdirs_callback(struct proc *p, void *data); 451 452 static void 453 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 454 { 455 struct checkdirs_info info; 456 struct vnode *olddp; 457 struct vnode *newdp; 458 struct mount *mp; 459 460 /* 461 * If the old mount point's vnode has a usecount of 1, it is not 462 * being held as a descriptor anywhere. 463 */ 464 olddp = old_nch->ncp->nc_vp; 465 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 466 return; 467 468 /* 469 * Force the root vnode of the new mount point to be resolved 470 * so we can update any matching processes. 471 */ 472 mp = new_nch->mount; 473 if (VFS_ROOT(mp, &newdp)) 474 panic("mount: lost mount"); 475 cache_setunresolved(new_nch); 476 cache_setvp(new_nch, newdp); 477 478 /* 479 * Special handling of the root node 480 */ 481 if (rootvnode == olddp) { 482 vref(newdp); 483 vfs_cache_setroot(newdp, cache_hold(new_nch)); 484 } 485 486 /* 487 * Pass newdp separately so the callback does not have to access 488 * it via new_nch->ncp->nc_vp. 489 */ 490 info.old_nch = *old_nch; 491 info.new_nch = *new_nch; 492 info.new_vp = newdp; 493 allproc_scan(checkdirs_callback, &info); 494 vput(newdp); 495 } 496 497 /* 498 * NOTE: callback is not MP safe because the scanned process's filedesc 499 * structure can be ripped out from under us, amoung other things. 500 */ 501 static int 502 checkdirs_callback(struct proc *p, void *data) 503 { 504 struct checkdirs_info *info = data; 505 struct filedesc *fdp; 506 struct nchandle ncdrop1; 507 struct nchandle ncdrop2; 508 struct vnode *vprele1; 509 struct vnode *vprele2; 510 511 if ((fdp = p->p_fd) != NULL) { 512 cache_zero(&ncdrop1); 513 cache_zero(&ncdrop2); 514 vprele1 = NULL; 515 vprele2 = NULL; 516 517 /* 518 * MPUNSAFE - XXX fdp can be pulled out from under a 519 * foreign process. 520 * 521 * A shared filedesc is ok, we don't have to copy it 522 * because we are making this change globally. 523 */ 524 spin_lock_wr(&fdp->fd_spin); 525 if (fdp->fd_ncdir.mount == info->old_nch.mount && 526 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 527 vprele1 = fdp->fd_cdir; 528 vref(info->new_vp); 529 fdp->fd_cdir = info->new_vp; 530 ncdrop1 = fdp->fd_ncdir; 531 cache_copy(&info->new_nch, &fdp->fd_ncdir); 532 } 533 if (fdp->fd_nrdir.mount == info->old_nch.mount && 534 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 535 vprele2 = fdp->fd_rdir; 536 vref(info->new_vp); 537 fdp->fd_rdir = info->new_vp; 538 ncdrop2 = fdp->fd_nrdir; 539 cache_copy(&info->new_nch, &fdp->fd_nrdir); 540 } 541 spin_unlock_wr(&fdp->fd_spin); 542 if (ncdrop1.ncp) 543 cache_drop(&ncdrop1); 544 if (ncdrop2.ncp) 545 cache_drop(&ncdrop2); 546 if (vprele1) 547 vrele(vprele1); 548 if (vprele2) 549 vrele(vprele2); 550 } 551 return(0); 552 } 553 554 /* 555 * Unmount a file system. 556 * 557 * Note: unmount takes a path to the vnode mounted on as argument, 558 * not special file (as before). 559 * 560 * umount_args(char *path, int flags) 561 * 562 * MPALMOSTSAFE 563 */ 564 int 565 sys_unmount(struct unmount_args *uap) 566 { 567 struct thread *td = curthread; 568 struct proc *p __debugvar = td->td_proc; 569 struct mount *mp = NULL; 570 struct nlookupdata nd; 571 int error; 572 573 KKASSERT(p); 574 get_mplock(); 575 if (td->td_ucred->cr_prison != NULL) { 576 error = EPERM; 577 goto done; 578 } 579 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 580 goto done; 581 582 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 583 if (error == 0) 584 error = nlookup(&nd); 585 if (error) 586 goto out; 587 588 mp = nd.nl_nch.mount; 589 590 /* 591 * Only root, or the user that did the original mount is 592 * permitted to unmount this filesystem. 593 */ 594 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 595 (error = priv_check(td, PRIV_ROOT))) 596 goto out; 597 598 /* 599 * Don't allow unmounting the root file system. 600 */ 601 if (mp->mnt_flag & MNT_ROOTFS) { 602 error = EINVAL; 603 goto out; 604 } 605 606 /* 607 * Must be the root of the filesystem 608 */ 609 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 610 error = EINVAL; 611 goto out; 612 } 613 614 out: 615 nlookup_done(&nd); 616 if (error == 0) 617 error = dounmount(mp, uap->flags); 618 done: 619 rel_mplock(); 620 return (error); 621 } 622 623 /* 624 * Do the actual file system unmount. 625 */ 626 static int 627 dounmount_interlock(struct mount *mp) 628 { 629 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 630 return (EBUSY); 631 mp->mnt_kern_flag |= MNTK_UNMOUNT; 632 return(0); 633 } 634 635 static int 636 unmount_allproc_cb(struct proc *p, void *arg) 637 { 638 struct mount *mp; 639 640 if (p->p_textnch.ncp == NULL) 641 return 0; 642 643 mp = (struct mount *)arg; 644 if (p->p_textnch.mount == mp) 645 cache_drop(&p->p_textnch); 646 647 return 0; 648 } 649 650 int 651 dounmount(struct mount *mp, int flags) 652 { 653 struct namecache *ncp; 654 struct nchandle nch; 655 struct vnode *vp; 656 int error; 657 int async_flag; 658 int lflags; 659 int freeok = 1; 660 661 /* 662 * Exclusive access for unmounting purposes 663 */ 664 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 665 return (error); 666 667 /* 668 * Allow filesystems to detect that a forced unmount is in progress. 669 */ 670 if (flags & MNT_FORCE) 671 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 672 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 673 error = lockmgr(&mp->mnt_lock, lflags); 674 if (error) { 675 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 676 if (mp->mnt_kern_flag & MNTK_MWAIT) 677 wakeup(mp); 678 return (error); 679 } 680 681 if (mp->mnt_flag & MNT_EXPUBLIC) 682 vfs_setpublicfs(NULL, NULL, NULL); 683 684 vfs_msync(mp, MNT_WAIT); 685 async_flag = mp->mnt_flag & MNT_ASYNC; 686 mp->mnt_flag &=~ MNT_ASYNC; 687 688 /* 689 * If this filesystem isn't aliasing other filesystems, 690 * try to invalidate any remaining namecache entries and 691 * check the count afterwords. 692 */ 693 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 694 cache_lock(&mp->mnt_ncmountpt); 695 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 696 cache_unlock(&mp->mnt_ncmountpt); 697 698 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 699 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 700 allproc_scan(&unmount_allproc_cb, mp); 701 } 702 703 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 704 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 705 706 if ((flags & MNT_FORCE) == 0) { 707 error = EBUSY; 708 mount_warning(mp, "Cannot unmount: " 709 "%d namecache " 710 "references still " 711 "present", 712 ncp->nc_refs - 1); 713 } else { 714 mount_warning(mp, "Forced unmount: " 715 "%d namecache " 716 "references still " 717 "present", 718 ncp->nc_refs - 1); 719 freeok = 0; 720 } 721 } 722 } 723 724 /* 725 * nchandle records ref the mount structure. Expect a count of 1 726 * (our mount->mnt_ncmountpt). 727 */ 728 if (mp->mnt_refs != 1) { 729 if ((flags & MNT_FORCE) == 0) { 730 mount_warning(mp, "Cannot unmount: " 731 "%d process references still " 732 "present", mp->mnt_refs); 733 error = EBUSY; 734 } else { 735 mount_warning(mp, "Forced unmount: " 736 "%d process references still " 737 "present", mp->mnt_refs); 738 freeok = 0; 739 } 740 } 741 742 /* 743 * Decomission our special mnt_syncer vnode. This also stops 744 * the vnlru code. If we are unable to unmount we recommission 745 * the vnode. 746 */ 747 if (error == 0) { 748 if ((vp = mp->mnt_syncer) != NULL) { 749 mp->mnt_syncer = NULL; 750 vrele(vp); 751 } 752 if (((mp->mnt_flag & MNT_RDONLY) || 753 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 754 (flags & MNT_FORCE)) { 755 error = VFS_UNMOUNT(mp, flags); 756 } 757 } 758 if (error) { 759 if (mp->mnt_syncer == NULL) 760 vfs_allocate_syncvnode(mp); 761 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 762 mp->mnt_flag |= async_flag; 763 lockmgr(&mp->mnt_lock, LK_RELEASE); 764 if (mp->mnt_kern_flag & MNTK_MWAIT) 765 wakeup(mp); 766 return (error); 767 } 768 /* 769 * Clean up any journals still associated with the mount after 770 * filesystem activity has ceased. 771 */ 772 journal_remove_all_journals(mp, 773 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 774 775 mountlist_remove(mp); 776 777 /* 778 * Remove any installed vnode ops here so the individual VFSs don't 779 * have to. 780 */ 781 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 782 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 783 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 784 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 785 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 786 787 if (mp->mnt_ncmountpt.ncp != NULL) { 788 nch = mp->mnt_ncmountpt; 789 cache_zero(&mp->mnt_ncmountpt); 790 cache_clrmountpt(&nch); 791 cache_drop(&nch); 792 } 793 if (mp->mnt_ncmounton.ncp != NULL) { 794 nch = mp->mnt_ncmounton; 795 cache_zero(&mp->mnt_ncmounton); 796 cache_clrmountpt(&nch); 797 cache_drop(&nch); 798 } 799 800 mp->mnt_vfc->vfc_refcount--; 801 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 802 panic("unmount: dangling vnode"); 803 lockmgr(&mp->mnt_lock, LK_RELEASE); 804 if (mp->mnt_kern_flag & MNTK_MWAIT) 805 wakeup(mp); 806 if (freeok) 807 kfree(mp, M_MOUNT); 808 return (0); 809 } 810 811 static 812 void 813 mount_warning(struct mount *mp, const char *ctl, ...) 814 { 815 char *ptr; 816 char *buf; 817 __va_list va; 818 819 __va_start(va, ctl); 820 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf, 0) == 0) { 821 kprintf("unmount(%s): ", ptr); 822 kvprintf(ctl, va); 823 kprintf("\n"); 824 kfree(buf, M_TEMP); 825 } else { 826 kprintf("unmount(%p", mp); 827 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 828 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 829 kprintf("): "); 830 kvprintf(ctl, va); 831 kprintf("\n"); 832 } 833 __va_end(va); 834 } 835 836 /* 837 * Shim cache_fullpath() to handle the case where a process is chrooted into 838 * a subdirectory of a mount. In this case if the root mount matches the 839 * process root directory's mount we have to specify the process's root 840 * directory instead of the mount point, because the mount point might 841 * be above the root directory. 842 */ 843 static 844 int 845 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 846 { 847 struct nchandle *nch; 848 849 if (p && p->p_fd->fd_nrdir.mount == mp) 850 nch = &p->p_fd->fd_nrdir; 851 else 852 nch = &mp->mnt_ncmountpt; 853 return(cache_fullpath(p, nch, rb, fb, 0)); 854 } 855 856 /* 857 * Sync each mounted filesystem. 858 */ 859 860 #ifdef DEBUG 861 static int syncprt = 0; 862 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 863 #endif /* DEBUG */ 864 865 static int sync_callback(struct mount *mp, void *data); 866 867 /* 868 * MPALMOSTSAFE 869 */ 870 int 871 sys_sync(struct sync_args *uap) 872 { 873 get_mplock(); 874 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 875 #ifdef DEBUG 876 /* 877 * print out buffer pool stat information on each sync() call. 878 */ 879 if (syncprt) 880 vfs_bufstats(); 881 #endif /* DEBUG */ 882 rel_mplock(); 883 return (0); 884 } 885 886 static 887 int 888 sync_callback(struct mount *mp, void *data __unused) 889 { 890 int asyncflag; 891 892 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 893 asyncflag = mp->mnt_flag & MNT_ASYNC; 894 mp->mnt_flag &= ~MNT_ASYNC; 895 vfs_msync(mp, MNT_NOWAIT); 896 VFS_SYNC(mp, MNT_NOWAIT); 897 mp->mnt_flag |= asyncflag; 898 } 899 return(0); 900 } 901 902 /* XXX PRISON: could be per prison flag */ 903 static int prison_quotas; 904 #if 0 905 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 906 #endif 907 908 /* 909 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 910 * 911 * Change filesystem quotas. 912 * 913 * MPALMOSTSAFE 914 */ 915 int 916 sys_quotactl(struct quotactl_args *uap) 917 { 918 struct nlookupdata nd; 919 struct thread *td; 920 struct proc *p; 921 struct mount *mp; 922 int error; 923 924 get_mplock(); 925 td = curthread; 926 p = td->td_proc; 927 if (td->td_ucred->cr_prison && !prison_quotas) { 928 error = EPERM; 929 goto done; 930 } 931 932 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 933 if (error == 0) 934 error = nlookup(&nd); 935 if (error == 0) { 936 mp = nd.nl_nch.mount; 937 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 938 uap->arg, nd.nl_cred); 939 } 940 nlookup_done(&nd); 941 done: 942 rel_mplock(); 943 return (error); 944 } 945 946 /* 947 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 948 * void *buf, int buflen) 949 * 950 * This function operates on a mount point and executes the specified 951 * operation using the specified control data, and possibly returns data. 952 * 953 * The actual number of bytes stored in the result buffer is returned, 0 954 * if none, otherwise an error is returned. 955 * 956 * MPALMOSTSAFE 957 */ 958 int 959 sys_mountctl(struct mountctl_args *uap) 960 { 961 struct thread *td = curthread; 962 struct proc *p = td->td_proc; 963 struct file *fp; 964 void *ctl = NULL; 965 void *buf = NULL; 966 char *path = NULL; 967 int error; 968 969 /* 970 * Sanity and permissions checks. We must be root. 971 */ 972 KKASSERT(p); 973 if (td->td_ucred->cr_prison != NULL) 974 return (EPERM); 975 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 976 (error = priv_check(td, PRIV_ROOT)) != 0) 977 return (error); 978 979 /* 980 * Argument length checks 981 */ 982 if (uap->ctllen < 0 || uap->ctllen > 1024) 983 return (EINVAL); 984 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 985 return (EINVAL); 986 if (uap->path == NULL) 987 return (EINVAL); 988 989 /* 990 * Allocate the necessary buffers and copyin data 991 */ 992 path = objcache_get(namei_oc, M_WAITOK); 993 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 994 if (error) 995 goto done; 996 997 if (uap->ctllen) { 998 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 999 error = copyin(uap->ctl, ctl, uap->ctllen); 1000 if (error) 1001 goto done; 1002 } 1003 if (uap->buflen) 1004 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1005 1006 /* 1007 * Validate the descriptor 1008 */ 1009 if (uap->fd >= 0) { 1010 fp = holdfp(p->p_fd, uap->fd, -1); 1011 if (fp == NULL) { 1012 error = EBADF; 1013 goto done; 1014 } 1015 } else { 1016 fp = NULL; 1017 } 1018 1019 /* 1020 * Execute the internal kernel function and clean up. 1021 */ 1022 get_mplock(); 1023 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1024 rel_mplock(); 1025 if (fp) 1026 fdrop(fp); 1027 if (error == 0 && uap->sysmsg_result > 0) 1028 error = copyout(buf, uap->buf, uap->sysmsg_result); 1029 done: 1030 if (path) 1031 objcache_put(namei_oc, path); 1032 if (ctl) 1033 kfree(ctl, M_TEMP); 1034 if (buf) 1035 kfree(buf, M_TEMP); 1036 return (error); 1037 } 1038 1039 /* 1040 * Execute a mount control operation by resolving the path to a mount point 1041 * and calling vop_mountctl(). 1042 * 1043 * Use the mount point from the nch instead of the vnode so nullfs mounts 1044 * can properly spike the VOP. 1045 */ 1046 int 1047 kern_mountctl(const char *path, int op, struct file *fp, 1048 const void *ctl, int ctllen, 1049 void *buf, int buflen, int *res) 1050 { 1051 struct vnode *vp; 1052 struct mount *mp; 1053 struct nlookupdata nd; 1054 int error; 1055 1056 *res = 0; 1057 vp = NULL; 1058 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1059 if (error == 0) 1060 error = nlookup(&nd); 1061 if (error == 0) 1062 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1063 mp = nd.nl_nch.mount; 1064 nlookup_done(&nd); 1065 if (error) 1066 return (error); 1067 vn_unlock(vp); 1068 1069 /* 1070 * Must be the root of the filesystem 1071 */ 1072 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1073 vrele(vp); 1074 return (EINVAL); 1075 } 1076 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1077 buf, buflen, res); 1078 vrele(vp); 1079 return (error); 1080 } 1081 1082 int 1083 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1084 { 1085 struct thread *td = curthread; 1086 struct proc *p = td->td_proc; 1087 struct mount *mp; 1088 struct statfs *sp; 1089 char *fullpath, *freepath; 1090 int error; 1091 1092 if ((error = nlookup(nd)) != 0) 1093 return (error); 1094 mp = nd->nl_nch.mount; 1095 sp = &mp->mnt_stat; 1096 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1097 return (error); 1098 1099 error = mount_path(p, mp, &fullpath, &freepath); 1100 if (error) 1101 return(error); 1102 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1103 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1104 kfree(freepath, M_TEMP); 1105 1106 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1107 bcopy(sp, buf, sizeof(*buf)); 1108 /* Only root should have access to the fsid's. */ 1109 if (priv_check(td, PRIV_ROOT)) 1110 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1111 return (0); 1112 } 1113 1114 /* 1115 * statfs_args(char *path, struct statfs *buf) 1116 * 1117 * Get filesystem statistics. 1118 * 1119 * MPALMOSTSAFE 1120 */ 1121 int 1122 sys_statfs(struct statfs_args *uap) 1123 { 1124 struct nlookupdata nd; 1125 struct statfs buf; 1126 int error; 1127 1128 get_mplock(); 1129 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1130 if (error == 0) 1131 error = kern_statfs(&nd, &buf); 1132 nlookup_done(&nd); 1133 if (error == 0) 1134 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1135 rel_mplock(); 1136 return (error); 1137 } 1138 1139 /* 1140 * MPALMOSTSAFE 1141 */ 1142 int 1143 kern_fstatfs(int fd, struct statfs *buf) 1144 { 1145 struct thread *td = curthread; 1146 struct proc *p = td->td_proc; 1147 struct file *fp; 1148 struct mount *mp; 1149 struct statfs *sp; 1150 char *fullpath, *freepath; 1151 int error; 1152 1153 KKASSERT(p); 1154 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1155 return (error); 1156 get_mplock(); 1157 mp = ((struct vnode *)fp->f_data)->v_mount; 1158 if (mp == NULL) { 1159 error = EBADF; 1160 goto done; 1161 } 1162 if (fp->f_cred == NULL) { 1163 error = EINVAL; 1164 goto done; 1165 } 1166 sp = &mp->mnt_stat; 1167 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1168 goto done; 1169 1170 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1171 goto done; 1172 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1173 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1174 kfree(freepath, M_TEMP); 1175 1176 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1177 bcopy(sp, buf, sizeof(*buf)); 1178 1179 /* Only root should have access to the fsid's. */ 1180 if (priv_check(td, PRIV_ROOT)) 1181 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1182 error = 0; 1183 done: 1184 rel_mplock(); 1185 fdrop(fp); 1186 return (error); 1187 } 1188 1189 /* 1190 * fstatfs_args(int fd, struct statfs *buf) 1191 * 1192 * Get filesystem statistics. 1193 * 1194 * MPSAFE 1195 */ 1196 int 1197 sys_fstatfs(struct fstatfs_args *uap) 1198 { 1199 struct statfs buf; 1200 int error; 1201 1202 error = kern_fstatfs(uap->fd, &buf); 1203 1204 if (error == 0) 1205 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1206 return (error); 1207 } 1208 1209 int 1210 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1211 { 1212 struct mount *mp; 1213 struct statvfs *sp; 1214 int error; 1215 1216 if ((error = nlookup(nd)) != 0) 1217 return (error); 1218 mp = nd->nl_nch.mount; 1219 sp = &mp->mnt_vstat; 1220 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1221 return (error); 1222 1223 sp->f_flag = 0; 1224 if (mp->mnt_flag & MNT_RDONLY) 1225 sp->f_flag |= ST_RDONLY; 1226 if (mp->mnt_flag & MNT_NOSUID) 1227 sp->f_flag |= ST_NOSUID; 1228 bcopy(sp, buf, sizeof(*buf)); 1229 return (0); 1230 } 1231 1232 /* 1233 * statfs_args(char *path, struct statfs *buf) 1234 * 1235 * Get filesystem statistics. 1236 * 1237 * MPALMOSTSAFE 1238 */ 1239 int 1240 sys_statvfs(struct statvfs_args *uap) 1241 { 1242 struct nlookupdata nd; 1243 struct statvfs buf; 1244 int error; 1245 1246 get_mplock(); 1247 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1248 if (error == 0) 1249 error = kern_statvfs(&nd, &buf); 1250 nlookup_done(&nd); 1251 if (error == 0) 1252 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1253 rel_mplock(); 1254 return (error); 1255 } 1256 1257 int 1258 kern_fstatvfs(int fd, struct statvfs *buf) 1259 { 1260 struct thread *td = curthread; 1261 struct proc *p = td->td_proc; 1262 struct file *fp; 1263 struct mount *mp; 1264 struct statvfs *sp; 1265 int error; 1266 1267 KKASSERT(p); 1268 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1269 return (error); 1270 mp = ((struct vnode *)fp->f_data)->v_mount; 1271 if (mp == NULL) { 1272 error = EBADF; 1273 goto done; 1274 } 1275 if (fp->f_cred == NULL) { 1276 error = EINVAL; 1277 goto done; 1278 } 1279 sp = &mp->mnt_vstat; 1280 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1281 goto done; 1282 1283 sp->f_flag = 0; 1284 if (mp->mnt_flag & MNT_RDONLY) 1285 sp->f_flag |= ST_RDONLY; 1286 if (mp->mnt_flag & MNT_NOSUID) 1287 sp->f_flag |= ST_NOSUID; 1288 1289 bcopy(sp, buf, sizeof(*buf)); 1290 error = 0; 1291 done: 1292 fdrop(fp); 1293 return (error); 1294 } 1295 1296 /* 1297 * fstatfs_args(int fd, struct statfs *buf) 1298 * 1299 * Get filesystem statistics. 1300 * 1301 * MPALMOSTSAFE 1302 */ 1303 int 1304 sys_fstatvfs(struct fstatvfs_args *uap) 1305 { 1306 struct statvfs buf; 1307 int error; 1308 1309 get_mplock(); 1310 error = kern_fstatvfs(uap->fd, &buf); 1311 rel_mplock(); 1312 1313 if (error == 0) 1314 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1315 return (error); 1316 } 1317 1318 /* 1319 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1320 * 1321 * Get statistics on all filesystems. 1322 */ 1323 1324 struct getfsstat_info { 1325 struct statfs *sfsp; 1326 long count; 1327 long maxcount; 1328 int error; 1329 int flags; 1330 struct thread *td; 1331 }; 1332 1333 static int getfsstat_callback(struct mount *, void *); 1334 1335 /* 1336 * MPALMOSTSAFE 1337 */ 1338 int 1339 sys_getfsstat(struct getfsstat_args *uap) 1340 { 1341 struct thread *td = curthread; 1342 struct getfsstat_info info; 1343 1344 bzero(&info, sizeof(info)); 1345 1346 info.maxcount = uap->bufsize / sizeof(struct statfs); 1347 info.sfsp = uap->buf; 1348 info.count = 0; 1349 info.flags = uap->flags; 1350 info.td = td; 1351 1352 get_mplock(); 1353 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1354 rel_mplock(); 1355 if (info.sfsp && info.count > info.maxcount) 1356 uap->sysmsg_result = info.maxcount; 1357 else 1358 uap->sysmsg_result = info.count; 1359 return (info.error); 1360 } 1361 1362 static int 1363 getfsstat_callback(struct mount *mp, void *data) 1364 { 1365 struct getfsstat_info *info = data; 1366 struct statfs *sp; 1367 char *freepath; 1368 char *fullpath; 1369 int error; 1370 1371 if (info->sfsp && info->count < info->maxcount) { 1372 if (info->td->td_proc && 1373 !chroot_visible_mnt(mp, info->td->td_proc)) { 1374 return(0); 1375 } 1376 sp = &mp->mnt_stat; 1377 1378 /* 1379 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1380 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1381 * overrides MNT_WAIT. 1382 */ 1383 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1384 (info->flags & MNT_WAIT)) && 1385 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1386 return(0); 1387 } 1388 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1389 1390 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1391 if (error) { 1392 info->error = error; 1393 return(-1); 1394 } 1395 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1396 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1397 kfree(freepath, M_TEMP); 1398 1399 error = copyout(sp, info->sfsp, sizeof(*sp)); 1400 if (error) { 1401 info->error = error; 1402 return (-1); 1403 } 1404 ++info->sfsp; 1405 } 1406 info->count++; 1407 return(0); 1408 } 1409 1410 /* 1411 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1412 long bufsize, int flags) 1413 * 1414 * Get statistics on all filesystems. 1415 */ 1416 1417 struct getvfsstat_info { 1418 struct statfs *sfsp; 1419 struct statvfs *vsfsp; 1420 long count; 1421 long maxcount; 1422 int error; 1423 int flags; 1424 struct thread *td; 1425 }; 1426 1427 static int getvfsstat_callback(struct mount *, void *); 1428 1429 /* 1430 * MPALMOSTSAFE 1431 */ 1432 int 1433 sys_getvfsstat(struct getvfsstat_args *uap) 1434 { 1435 struct thread *td = curthread; 1436 struct getvfsstat_info info; 1437 1438 bzero(&info, sizeof(info)); 1439 1440 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1441 info.sfsp = uap->buf; 1442 info.vsfsp = uap->vbuf; 1443 info.count = 0; 1444 info.flags = uap->flags; 1445 info.td = td; 1446 1447 get_mplock(); 1448 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1449 if (info.vsfsp && info.count > info.maxcount) 1450 uap->sysmsg_result = info.maxcount; 1451 else 1452 uap->sysmsg_result = info.count; 1453 rel_mplock(); 1454 return (info.error); 1455 } 1456 1457 static int 1458 getvfsstat_callback(struct mount *mp, void *data) 1459 { 1460 struct getvfsstat_info *info = data; 1461 struct statfs *sp; 1462 struct statvfs *vsp; 1463 char *freepath; 1464 char *fullpath; 1465 int error; 1466 1467 if (info->vsfsp && info->count < info->maxcount) { 1468 if (info->td->td_proc && 1469 !chroot_visible_mnt(mp, info->td->td_proc)) { 1470 return(0); 1471 } 1472 sp = &mp->mnt_stat; 1473 vsp = &mp->mnt_vstat; 1474 1475 /* 1476 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1477 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1478 * overrides MNT_WAIT. 1479 */ 1480 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1481 (info->flags & MNT_WAIT)) && 1482 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1483 return(0); 1484 } 1485 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1486 1487 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1488 (info->flags & MNT_WAIT)) && 1489 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1490 return(0); 1491 } 1492 vsp->f_flag = 0; 1493 if (mp->mnt_flag & MNT_RDONLY) 1494 vsp->f_flag |= ST_RDONLY; 1495 if (mp->mnt_flag & MNT_NOSUID) 1496 vsp->f_flag |= ST_NOSUID; 1497 1498 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1499 if (error) { 1500 info->error = error; 1501 return(-1); 1502 } 1503 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1504 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1505 kfree(freepath, M_TEMP); 1506 1507 error = copyout(sp, info->sfsp, sizeof(*sp)); 1508 if (error == 0) 1509 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1510 if (error) { 1511 info->error = error; 1512 return (-1); 1513 } 1514 ++info->sfsp; 1515 ++info->vsfsp; 1516 } 1517 info->count++; 1518 return(0); 1519 } 1520 1521 1522 /* 1523 * fchdir_args(int fd) 1524 * 1525 * Change current working directory to a given file descriptor. 1526 * 1527 * MPALMOSTSAFE 1528 */ 1529 int 1530 sys_fchdir(struct fchdir_args *uap) 1531 { 1532 struct thread *td = curthread; 1533 struct proc *p = td->td_proc; 1534 struct filedesc *fdp = p->p_fd; 1535 struct vnode *vp, *ovp; 1536 struct mount *mp; 1537 struct file *fp; 1538 struct nchandle nch, onch, tnch; 1539 int error; 1540 1541 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1542 return (error); 1543 get_mplock(); 1544 vp = (struct vnode *)fp->f_data; 1545 vref(vp); 1546 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1547 if (fp->f_nchandle.ncp == NULL) 1548 error = ENOTDIR; 1549 else 1550 error = checkvp_chdir(vp, td); 1551 if (error) { 1552 vput(vp); 1553 goto done; 1554 } 1555 cache_copy(&fp->f_nchandle, &nch); 1556 1557 /* 1558 * If the ncp has become a mount point, traverse through 1559 * the mount point. 1560 */ 1561 1562 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1563 (mp = cache_findmount(&nch)) != NULL 1564 ) { 1565 error = nlookup_mp(mp, &tnch); 1566 if (error == 0) { 1567 cache_unlock(&tnch); /* leave ref intact */ 1568 vput(vp); 1569 vp = tnch.ncp->nc_vp; 1570 error = vget(vp, LK_SHARED); 1571 KKASSERT(error == 0); 1572 cache_drop(&nch); 1573 nch = tnch; 1574 } 1575 } 1576 if (error == 0) { 1577 ovp = fdp->fd_cdir; 1578 onch = fdp->fd_ncdir; 1579 vn_unlock(vp); /* leave ref intact */ 1580 fdp->fd_cdir = vp; 1581 fdp->fd_ncdir = nch; 1582 cache_drop(&onch); 1583 vrele(ovp); 1584 } else { 1585 cache_drop(&nch); 1586 vput(vp); 1587 } 1588 fdrop(fp); 1589 done: 1590 rel_mplock(); 1591 return (error); 1592 } 1593 1594 int 1595 kern_chdir(struct nlookupdata *nd) 1596 { 1597 struct thread *td = curthread; 1598 struct proc *p = td->td_proc; 1599 struct filedesc *fdp = p->p_fd; 1600 struct vnode *vp, *ovp; 1601 struct nchandle onch; 1602 int error; 1603 1604 if ((error = nlookup(nd)) != 0) 1605 return (error); 1606 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1607 return (ENOENT); 1608 if ((error = vget(vp, LK_SHARED)) != 0) 1609 return (error); 1610 1611 error = checkvp_chdir(vp, td); 1612 vn_unlock(vp); 1613 if (error == 0) { 1614 ovp = fdp->fd_cdir; 1615 onch = fdp->fd_ncdir; 1616 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1617 fdp->fd_ncdir = nd->nl_nch; 1618 fdp->fd_cdir = vp; 1619 cache_drop(&onch); 1620 vrele(ovp); 1621 cache_zero(&nd->nl_nch); 1622 } else { 1623 vrele(vp); 1624 } 1625 return (error); 1626 } 1627 1628 /* 1629 * chdir_args(char *path) 1630 * 1631 * Change current working directory (``.''). 1632 * 1633 * MPALMOSTSAFE 1634 */ 1635 int 1636 sys_chdir(struct chdir_args *uap) 1637 { 1638 struct nlookupdata nd; 1639 int error; 1640 1641 get_mplock(); 1642 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1643 if (error == 0) 1644 error = kern_chdir(&nd); 1645 nlookup_done(&nd); 1646 rel_mplock(); 1647 return (error); 1648 } 1649 1650 /* 1651 * Helper function for raised chroot(2) security function: Refuse if 1652 * any filedescriptors are open directories. 1653 */ 1654 static int 1655 chroot_refuse_vdir_fds(struct filedesc *fdp) 1656 { 1657 struct vnode *vp; 1658 struct file *fp; 1659 int error; 1660 int fd; 1661 1662 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1663 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1664 continue; 1665 vp = (struct vnode *)fp->f_data; 1666 if (vp->v_type != VDIR) { 1667 fdrop(fp); 1668 continue; 1669 } 1670 fdrop(fp); 1671 return(EPERM); 1672 } 1673 return (0); 1674 } 1675 1676 /* 1677 * This sysctl determines if we will allow a process to chroot(2) if it 1678 * has a directory open: 1679 * 0: disallowed for all processes. 1680 * 1: allowed for processes that were not already chroot(2)'ed. 1681 * 2: allowed for all processes. 1682 */ 1683 1684 static int chroot_allow_open_directories = 1; 1685 1686 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1687 &chroot_allow_open_directories, 0, ""); 1688 1689 /* 1690 * chroot to the specified namecache entry. We obtain the vp from the 1691 * namecache data. The passed ncp must be locked and referenced and will 1692 * remain locked and referenced on return. 1693 */ 1694 int 1695 kern_chroot(struct nchandle *nch) 1696 { 1697 struct thread *td = curthread; 1698 struct proc *p = td->td_proc; 1699 struct filedesc *fdp = p->p_fd; 1700 struct vnode *vp; 1701 int error; 1702 1703 /* 1704 * Only privileged user can chroot 1705 */ 1706 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1707 if (error) 1708 return (error); 1709 1710 /* 1711 * Disallow open directory descriptors (fchdir() breakouts). 1712 */ 1713 if (chroot_allow_open_directories == 0 || 1714 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1715 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1716 return (error); 1717 } 1718 if ((vp = nch->ncp->nc_vp) == NULL) 1719 return (ENOENT); 1720 1721 if ((error = vget(vp, LK_SHARED)) != 0) 1722 return (error); 1723 1724 /* 1725 * Check the validity of vp as a directory to change to and 1726 * associate it with rdir/jdir. 1727 */ 1728 error = checkvp_chdir(vp, td); 1729 vn_unlock(vp); /* leave reference intact */ 1730 if (error == 0) { 1731 vrele(fdp->fd_rdir); 1732 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1733 cache_drop(&fdp->fd_nrdir); 1734 cache_copy(nch, &fdp->fd_nrdir); 1735 if (fdp->fd_jdir == NULL) { 1736 fdp->fd_jdir = vp; 1737 vref(fdp->fd_jdir); 1738 cache_copy(nch, &fdp->fd_njdir); 1739 } 1740 } else { 1741 vrele(vp); 1742 } 1743 return (error); 1744 } 1745 1746 /* 1747 * chroot_args(char *path) 1748 * 1749 * Change notion of root (``/'') directory. 1750 * 1751 * MPALMOSTSAFE 1752 */ 1753 int 1754 sys_chroot(struct chroot_args *uap) 1755 { 1756 struct thread *td __debugvar = curthread; 1757 struct nlookupdata nd; 1758 int error; 1759 1760 KKASSERT(td->td_proc); 1761 get_mplock(); 1762 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1763 if (error == 0) { 1764 nd.nl_flags |= NLC_EXEC; 1765 error = nlookup(&nd); 1766 if (error == 0) 1767 error = kern_chroot(&nd.nl_nch); 1768 } 1769 nlookup_done(&nd); 1770 rel_mplock(); 1771 return(error); 1772 } 1773 1774 int 1775 sys_chroot_kernel(struct chroot_kernel_args *uap) 1776 { 1777 struct thread *td = curthread; 1778 struct nlookupdata nd; 1779 struct nchandle *nch; 1780 struct vnode *vp; 1781 int error; 1782 1783 get_mplock(); 1784 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1785 if (error) 1786 goto error_nond; 1787 1788 error = nlookup(&nd); 1789 if (error) 1790 goto error_out; 1791 1792 nch = &nd.nl_nch; 1793 1794 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1795 if (error) 1796 goto error_out; 1797 1798 if ((vp = nch->ncp->nc_vp) == NULL) { 1799 error = ENOENT; 1800 goto error_out; 1801 } 1802 1803 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1804 goto error_out; 1805 1806 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1807 vfs_cache_setroot(vp, cache_hold(nch)); 1808 1809 error_out: 1810 nlookup_done(&nd); 1811 error_nond: 1812 rel_mplock(); 1813 return(error); 1814 } 1815 1816 /* 1817 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1818 * determine whether it is legal to chdir to the vnode. The vnode's state 1819 * is not changed by this call. 1820 */ 1821 int 1822 checkvp_chdir(struct vnode *vp, struct thread *td) 1823 { 1824 int error; 1825 1826 if (vp->v_type != VDIR) 1827 error = ENOTDIR; 1828 else 1829 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1830 return (error); 1831 } 1832 1833 /* 1834 * MPSAFE 1835 */ 1836 int 1837 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1838 { 1839 struct thread *td = curthread; 1840 struct proc *p = td->td_proc; 1841 struct lwp *lp = td->td_lwp; 1842 struct filedesc *fdp = p->p_fd; 1843 int cmode, flags; 1844 struct file *nfp; 1845 struct file *fp; 1846 struct vnode *vp; 1847 int type, indx, error; 1848 struct flock lf; 1849 1850 if ((oflags & O_ACCMODE) == O_ACCMODE) 1851 return (EINVAL); 1852 flags = FFLAGS(oflags); 1853 error = falloc(lp, &nfp, NULL); 1854 if (error) 1855 return (error); 1856 fp = nfp; 1857 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1858 1859 /* 1860 * XXX p_dupfd is a real mess. It allows a device to return a 1861 * file descriptor to be duplicated rather then doing the open 1862 * itself. 1863 */ 1864 lp->lwp_dupfd = -1; 1865 1866 /* 1867 * Call vn_open() to do the lookup and assign the vnode to the 1868 * file pointer. vn_open() does not change the ref count on fp 1869 * and the vnode, on success, will be inherited by the file pointer 1870 * and unlocked. 1871 */ 1872 nd->nl_flags |= NLC_LOCKVP; 1873 error = vn_open(nd, fp, flags, cmode); 1874 nlookup_done(nd); 1875 if (error) { 1876 /* 1877 * handle special fdopen() case. bleh. dupfdopen() is 1878 * responsible for dropping the old contents of ofiles[indx] 1879 * if it succeeds. 1880 * 1881 * Note that fsetfd() will add a ref to fp which represents 1882 * the fd_files[] assignment. We must still drop our 1883 * reference. 1884 */ 1885 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1886 if (fdalloc(p, 0, &indx) == 0) { 1887 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1888 if (error == 0) { 1889 *res = indx; 1890 fdrop(fp); /* our ref */ 1891 return (0); 1892 } 1893 fsetfd(fdp, NULL, indx); 1894 } 1895 } 1896 fdrop(fp); /* our ref */ 1897 if (error == ERESTART) 1898 error = EINTR; 1899 return (error); 1900 } 1901 1902 /* 1903 * ref the vnode for ourselves so it can't be ripped out from under 1904 * is. XXX need an ND flag to request that the vnode be returned 1905 * anyway. 1906 * 1907 * Reserve a file descriptor but do not assign it until the open 1908 * succeeds. 1909 */ 1910 vp = (struct vnode *)fp->f_data; 1911 vref(vp); 1912 if ((error = fdalloc(p, 0, &indx)) != 0) { 1913 fdrop(fp); 1914 vrele(vp); 1915 return (error); 1916 } 1917 1918 /* 1919 * If no error occurs the vp will have been assigned to the file 1920 * pointer. 1921 */ 1922 lp->lwp_dupfd = 0; 1923 1924 if (flags & (O_EXLOCK | O_SHLOCK)) { 1925 lf.l_whence = SEEK_SET; 1926 lf.l_start = 0; 1927 lf.l_len = 0; 1928 if (flags & O_EXLOCK) 1929 lf.l_type = F_WRLCK; 1930 else 1931 lf.l_type = F_RDLCK; 1932 if (flags & FNONBLOCK) 1933 type = 0; 1934 else 1935 type = F_WAIT; 1936 1937 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1938 /* 1939 * lock request failed. Clean up the reserved 1940 * descriptor. 1941 */ 1942 vrele(vp); 1943 fsetfd(fdp, NULL, indx); 1944 fdrop(fp); 1945 return (error); 1946 } 1947 fp->f_flag |= FHASLOCK; 1948 } 1949 #if 0 1950 /* 1951 * Assert that all regular file vnodes were created with a object. 1952 */ 1953 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1954 ("open: regular file has no backing object after vn_open")); 1955 #endif 1956 1957 vrele(vp); 1958 1959 /* 1960 * release our private reference, leaving the one associated with the 1961 * descriptor table intact. 1962 */ 1963 fsetfd(fdp, fp, indx); 1964 fdrop(fp); 1965 *res = indx; 1966 return (0); 1967 } 1968 1969 /* 1970 * open_args(char *path, int flags, int mode) 1971 * 1972 * Check permissions, allocate an open file structure, 1973 * and call the device open routine if any. 1974 * 1975 * MPALMOSTSAFE 1976 */ 1977 int 1978 sys_open(struct open_args *uap) 1979 { 1980 CACHE_MPLOCK_DECLARE; 1981 struct nlookupdata nd; 1982 int error; 1983 1984 CACHE_GETMPLOCK1(); 1985 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1986 if (error == 0) { 1987 error = kern_open(&nd, uap->flags, 1988 uap->mode, &uap->sysmsg_result); 1989 } 1990 nlookup_done(&nd); 1991 CACHE_RELMPLOCK(); 1992 return (error); 1993 } 1994 1995 /* 1996 * openat_args(int fd, char *path, int flags, int mode) 1997 * 1998 * MPALMOSTSAFE 1999 */ 2000 int 2001 sys_openat(struct openat_args *uap) 2002 { 2003 CACHE_MPLOCK_DECLARE; 2004 struct nlookupdata nd; 2005 int error; 2006 struct file *fp; 2007 2008 CACHE_GETMPLOCK1(); 2009 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2010 if (error == 0) { 2011 error = kern_open(&nd, uap->flags, uap->mode, 2012 &uap->sysmsg_result); 2013 } 2014 nlookup_done_at(&nd, fp); 2015 CACHE_RELMPLOCK(); 2016 return (error); 2017 } 2018 2019 int 2020 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2021 { 2022 struct thread *td = curthread; 2023 struct proc *p = td->td_proc; 2024 struct vnode *vp; 2025 struct vattr vattr; 2026 int error; 2027 int whiteout = 0; 2028 2029 KKASSERT(p); 2030 2031 VATTR_NULL(&vattr); 2032 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2033 vattr.va_rmajor = rmajor; 2034 vattr.va_rminor = rminor; 2035 2036 switch (mode & S_IFMT) { 2037 case S_IFMT: /* used by badsect to flag bad sectors */ 2038 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2039 vattr.va_type = VBAD; 2040 break; 2041 case S_IFCHR: 2042 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2043 vattr.va_type = VCHR; 2044 break; 2045 case S_IFBLK: 2046 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2047 vattr.va_type = VBLK; 2048 break; 2049 case S_IFWHT: 2050 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2051 whiteout = 1; 2052 break; 2053 case S_IFDIR: /* special directories support for HAMMER */ 2054 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2055 vattr.va_type = VDIR; 2056 break; 2057 default: 2058 error = EINVAL; 2059 break; 2060 } 2061 2062 if (error) 2063 return (error); 2064 2065 bwillinode(1); 2066 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2067 if ((error = nlookup(nd)) != 0) 2068 return (error); 2069 if (nd->nl_nch.ncp->nc_vp) 2070 return (EEXIST); 2071 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2072 return (error); 2073 2074 if (whiteout) { 2075 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2076 nd->nl_cred, NAMEI_CREATE); 2077 } else { 2078 vp = NULL; 2079 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2080 &vp, nd->nl_cred, &vattr); 2081 if (error == 0) 2082 vput(vp); 2083 } 2084 return (error); 2085 } 2086 2087 /* 2088 * mknod_args(char *path, int mode, int dev) 2089 * 2090 * Create a special file. 2091 * 2092 * MPALMOSTSAFE 2093 */ 2094 int 2095 sys_mknod(struct mknod_args *uap) 2096 { 2097 struct nlookupdata nd; 2098 int error; 2099 2100 get_mplock(); 2101 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2102 if (error == 0) { 2103 error = kern_mknod(&nd, uap->mode, 2104 umajor(uap->dev), uminor(uap->dev)); 2105 } 2106 nlookup_done(&nd); 2107 rel_mplock(); 2108 return (error); 2109 } 2110 2111 /* 2112 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2113 * 2114 * Create a special file. The path is relative to the directory associated 2115 * with fd. 2116 * 2117 * MPALMOSTSAFE 2118 */ 2119 int 2120 sys_mknodat(struct mknodat_args *uap) 2121 { 2122 struct nlookupdata nd; 2123 struct file *fp; 2124 int error; 2125 2126 get_mplock(); 2127 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2128 if (error == 0) { 2129 error = kern_mknod(&nd, uap->mode, 2130 umajor(uap->dev), uminor(uap->dev)); 2131 } 2132 nlookup_done_at(&nd, fp); 2133 rel_mplock(); 2134 return (error); 2135 } 2136 2137 int 2138 kern_mkfifo(struct nlookupdata *nd, int mode) 2139 { 2140 struct thread *td = curthread; 2141 struct proc *p = td->td_proc; 2142 struct vattr vattr; 2143 struct vnode *vp; 2144 int error; 2145 2146 bwillinode(1); 2147 2148 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2149 if ((error = nlookup(nd)) != 0) 2150 return (error); 2151 if (nd->nl_nch.ncp->nc_vp) 2152 return (EEXIST); 2153 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2154 return (error); 2155 2156 VATTR_NULL(&vattr); 2157 vattr.va_type = VFIFO; 2158 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2159 vp = NULL; 2160 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2161 if (error == 0) 2162 vput(vp); 2163 return (error); 2164 } 2165 2166 /* 2167 * mkfifo_args(char *path, int mode) 2168 * 2169 * Create a named pipe. 2170 * 2171 * MPALMOSTSAFE 2172 */ 2173 int 2174 sys_mkfifo(struct mkfifo_args *uap) 2175 { 2176 struct nlookupdata nd; 2177 int error; 2178 2179 get_mplock(); 2180 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2181 if (error == 0) 2182 error = kern_mkfifo(&nd, uap->mode); 2183 nlookup_done(&nd); 2184 rel_mplock(); 2185 return (error); 2186 } 2187 2188 /* 2189 * mkfifoat_args(int fd, char *path, mode_t mode) 2190 * 2191 * Create a named pipe. The path is relative to the directory associated 2192 * with fd. 2193 * 2194 * MPALMOSTSAFE 2195 */ 2196 int 2197 sys_mkfifoat(struct mkfifoat_args *uap) 2198 { 2199 struct nlookupdata nd; 2200 struct file *fp; 2201 int error; 2202 2203 get_mplock(); 2204 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2205 if (error == 0) 2206 error = kern_mkfifo(&nd, uap->mode); 2207 nlookup_done_at(&nd, fp); 2208 rel_mplock(); 2209 return (error); 2210 } 2211 2212 static int hardlink_check_uid = 0; 2213 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2214 &hardlink_check_uid, 0, 2215 "Unprivileged processes cannot create hard links to files owned by other " 2216 "users"); 2217 static int hardlink_check_gid = 0; 2218 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2219 &hardlink_check_gid, 0, 2220 "Unprivileged processes cannot create hard links to files owned by other " 2221 "groups"); 2222 2223 static int 2224 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2225 { 2226 struct vattr va; 2227 int error; 2228 2229 /* 2230 * Shortcut if disabled 2231 */ 2232 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2233 return (0); 2234 2235 /* 2236 * Privileged user can always hardlink 2237 */ 2238 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2239 return (0); 2240 2241 /* 2242 * Otherwise only if the originating file is owned by the 2243 * same user or group. Note that any group is allowed if 2244 * the file is owned by the caller. 2245 */ 2246 error = VOP_GETATTR(vp, &va); 2247 if (error != 0) 2248 return (error); 2249 2250 if (hardlink_check_uid) { 2251 if (cred->cr_uid != va.va_uid) 2252 return (EPERM); 2253 } 2254 2255 if (hardlink_check_gid) { 2256 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2257 return (EPERM); 2258 } 2259 2260 return (0); 2261 } 2262 2263 int 2264 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2265 { 2266 struct thread *td = curthread; 2267 struct vnode *vp; 2268 int error; 2269 2270 /* 2271 * Lookup the source and obtained a locked vnode. 2272 * 2273 * You may only hardlink a file which you have write permission 2274 * on or which you own. 2275 * 2276 * XXX relookup on vget failure / race ? 2277 */ 2278 bwillinode(1); 2279 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2280 if ((error = nlookup(nd)) != 0) 2281 return (error); 2282 vp = nd->nl_nch.ncp->nc_vp; 2283 KKASSERT(vp != NULL); 2284 if (vp->v_type == VDIR) 2285 return (EPERM); /* POSIX */ 2286 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2287 return (error); 2288 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2289 return (error); 2290 2291 /* 2292 * Unlock the source so we can lookup the target without deadlocking 2293 * (XXX vp is locked already, possible other deadlock?). The target 2294 * must not exist. 2295 */ 2296 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2297 nd->nl_flags &= ~NLC_NCPISLOCKED; 2298 cache_unlock(&nd->nl_nch); 2299 vn_unlock(vp); 2300 2301 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2302 if ((error = nlookup(linknd)) != 0) { 2303 vrele(vp); 2304 return (error); 2305 } 2306 if (linknd->nl_nch.ncp->nc_vp) { 2307 vrele(vp); 2308 return (EEXIST); 2309 } 2310 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2311 vrele(vp); 2312 return (error); 2313 } 2314 2315 /* 2316 * Finally run the new API VOP. 2317 */ 2318 error = can_hardlink(vp, td, td->td_ucred); 2319 if (error == 0) { 2320 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2321 vp, linknd->nl_cred); 2322 } 2323 vput(vp); 2324 return (error); 2325 } 2326 2327 /* 2328 * link_args(char *path, char *link) 2329 * 2330 * Make a hard file link. 2331 * 2332 * MPALMOSTSAFE 2333 */ 2334 int 2335 sys_link(struct link_args *uap) 2336 { 2337 struct nlookupdata nd, linknd; 2338 int error; 2339 2340 get_mplock(); 2341 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2342 if (error == 0) { 2343 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2344 if (error == 0) 2345 error = kern_link(&nd, &linknd); 2346 nlookup_done(&linknd); 2347 } 2348 nlookup_done(&nd); 2349 rel_mplock(); 2350 return (error); 2351 } 2352 2353 int 2354 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2355 { 2356 struct vattr vattr; 2357 struct vnode *vp; 2358 struct vnode *dvp; 2359 int error; 2360 2361 bwillinode(1); 2362 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2363 if ((error = nlookup(nd)) != 0) 2364 return (error); 2365 if (nd->nl_nch.ncp->nc_vp) 2366 return (EEXIST); 2367 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2368 return (error); 2369 dvp = nd->nl_dvp; 2370 VATTR_NULL(&vattr); 2371 vattr.va_mode = mode; 2372 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2373 if (error == 0) 2374 vput(vp); 2375 return (error); 2376 } 2377 2378 /* 2379 * symlink(char *path, char *link) 2380 * 2381 * Make a symbolic link. 2382 * 2383 * MPALMOSTSAFE 2384 */ 2385 int 2386 sys_symlink(struct symlink_args *uap) 2387 { 2388 struct thread *td = curthread; 2389 struct nlookupdata nd; 2390 char *path; 2391 int error; 2392 int mode; 2393 2394 path = objcache_get(namei_oc, M_WAITOK); 2395 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2396 if (error == 0) { 2397 get_mplock(); 2398 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2399 if (error == 0) { 2400 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2401 error = kern_symlink(&nd, path, mode); 2402 } 2403 nlookup_done(&nd); 2404 rel_mplock(); 2405 } 2406 objcache_put(namei_oc, path); 2407 return (error); 2408 } 2409 2410 /* 2411 * symlinkat_args(char *path1, int fd, char *path2) 2412 * 2413 * Make a symbolic link. The path2 argument is relative to the directory 2414 * associated with fd. 2415 * 2416 * MPALMOSTSAFE 2417 */ 2418 int 2419 sys_symlinkat(struct symlinkat_args *uap) 2420 { 2421 struct thread *td = curthread; 2422 struct nlookupdata nd; 2423 struct file *fp; 2424 char *path1; 2425 int error; 2426 int mode; 2427 2428 path1 = objcache_get(namei_oc, M_WAITOK); 2429 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2430 if (error == 0) { 2431 get_mplock(); 2432 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2433 UIO_USERSPACE, 0); 2434 if (error == 0) { 2435 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2436 error = kern_symlink(&nd, path1, mode); 2437 } 2438 nlookup_done_at(&nd, fp); 2439 rel_mplock(); 2440 } 2441 objcache_put(namei_oc, path1); 2442 return (error); 2443 } 2444 2445 /* 2446 * undelete_args(char *path) 2447 * 2448 * Delete a whiteout from the filesystem. 2449 * 2450 * MPALMOSTSAFE 2451 */ 2452 int 2453 sys_undelete(struct undelete_args *uap) 2454 { 2455 struct nlookupdata nd; 2456 int error; 2457 2458 get_mplock(); 2459 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2460 bwillinode(1); 2461 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2462 if (error == 0) 2463 error = nlookup(&nd); 2464 if (error == 0) 2465 error = ncp_writechk(&nd.nl_nch); 2466 if (error == 0) { 2467 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2468 NAMEI_DELETE); 2469 } 2470 nlookup_done(&nd); 2471 rel_mplock(); 2472 return (error); 2473 } 2474 2475 int 2476 kern_unlink(struct nlookupdata *nd) 2477 { 2478 int error; 2479 2480 bwillinode(1); 2481 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2482 if ((error = nlookup(nd)) != 0) 2483 return (error); 2484 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2485 return (error); 2486 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2487 return (error); 2488 } 2489 2490 /* 2491 * unlink_args(char *path) 2492 * 2493 * Delete a name from the filesystem. 2494 * 2495 * MPALMOSTSAFE 2496 */ 2497 int 2498 sys_unlink(struct unlink_args *uap) 2499 { 2500 struct nlookupdata nd; 2501 int error; 2502 2503 get_mplock(); 2504 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2505 if (error == 0) 2506 error = kern_unlink(&nd); 2507 nlookup_done(&nd); 2508 rel_mplock(); 2509 return (error); 2510 } 2511 2512 2513 /* 2514 * unlinkat_args(int fd, char *path, int flags) 2515 * 2516 * Delete the file or directory entry pointed to by fd/path. 2517 * 2518 * MPALMOSTSAFE 2519 */ 2520 int 2521 sys_unlinkat(struct unlinkat_args *uap) 2522 { 2523 struct nlookupdata nd; 2524 struct file *fp; 2525 int error; 2526 2527 if (uap->flags & ~AT_REMOVEDIR) 2528 return (EINVAL); 2529 2530 get_mplock(); 2531 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2532 if (error == 0) { 2533 if (uap->flags & AT_REMOVEDIR) 2534 error = kern_rmdir(&nd); 2535 else 2536 error = kern_unlink(&nd); 2537 } 2538 nlookup_done_at(&nd, fp); 2539 rel_mplock(); 2540 return (error); 2541 } 2542 2543 /* 2544 * MPALMOSTSAFE 2545 */ 2546 int 2547 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2548 { 2549 struct thread *td = curthread; 2550 struct proc *p = td->td_proc; 2551 struct file *fp; 2552 struct vnode *vp; 2553 struct vattr vattr; 2554 off_t new_offset; 2555 int error; 2556 2557 fp = holdfp(p->p_fd, fd, -1); 2558 if (fp == NULL) 2559 return (EBADF); 2560 if (fp->f_type != DTYPE_VNODE) { 2561 error = ESPIPE; 2562 goto done; 2563 } 2564 vp = (struct vnode *)fp->f_data; 2565 2566 switch (whence) { 2567 case L_INCR: 2568 spin_lock_wr(&fp->f_spin); 2569 new_offset = fp->f_offset + offset; 2570 error = 0; 2571 break; 2572 case L_XTND: 2573 get_mplock(); 2574 error = VOP_GETATTR(vp, &vattr); 2575 rel_mplock(); 2576 spin_lock_wr(&fp->f_spin); 2577 new_offset = offset + vattr.va_size; 2578 break; 2579 case L_SET: 2580 new_offset = offset; 2581 error = 0; 2582 spin_lock_wr(&fp->f_spin); 2583 break; 2584 default: 2585 new_offset = 0; 2586 error = EINVAL; 2587 spin_lock_wr(&fp->f_spin); 2588 break; 2589 } 2590 2591 /* 2592 * Validate the seek position. Negative offsets are not allowed 2593 * for regular files or directories. 2594 * 2595 * Normally we would also not want to allow negative offsets for 2596 * character and block-special devices. However kvm addresses 2597 * on 64 bit architectures might appear to be negative and must 2598 * be allowed. 2599 */ 2600 if (error == 0) { 2601 if (new_offset < 0 && 2602 (vp->v_type == VREG || vp->v_type == VDIR)) { 2603 error = EINVAL; 2604 } else { 2605 fp->f_offset = new_offset; 2606 } 2607 } 2608 *res = fp->f_offset; 2609 spin_unlock_wr(&fp->f_spin); 2610 done: 2611 fdrop(fp); 2612 return (error); 2613 } 2614 2615 /* 2616 * lseek_args(int fd, int pad, off_t offset, int whence) 2617 * 2618 * Reposition read/write file offset. 2619 * 2620 * MPSAFE 2621 */ 2622 int 2623 sys_lseek(struct lseek_args *uap) 2624 { 2625 int error; 2626 2627 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2628 &uap->sysmsg_offset); 2629 2630 return (error); 2631 } 2632 2633 /* 2634 * Check if current process can access given file. amode is a bitmask of *_OK 2635 * access bits. flags is a bitmask of AT_* flags. 2636 */ 2637 int 2638 kern_access(struct nlookupdata *nd, int amode, int flags) 2639 { 2640 struct vnode *vp; 2641 int error, mode; 2642 2643 if (flags & ~AT_EACCESS) 2644 return (EINVAL); 2645 if ((error = nlookup(nd)) != 0) 2646 return (error); 2647 retry: 2648 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2649 if (error) 2650 return (error); 2651 2652 /* Flags == 0 means only check for existence. */ 2653 if (amode) { 2654 mode = 0; 2655 if (amode & R_OK) 2656 mode |= VREAD; 2657 if (amode & W_OK) 2658 mode |= VWRITE; 2659 if (amode & X_OK) 2660 mode |= VEXEC; 2661 if ((mode & VWRITE) == 0 || 2662 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2663 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2664 2665 /* 2666 * If the file handle is stale we have to re-resolve the 2667 * entry. This is a hack at the moment. 2668 */ 2669 if (error == ESTALE) { 2670 vput(vp); 2671 cache_setunresolved(&nd->nl_nch); 2672 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2673 if (error == 0) { 2674 vp = NULL; 2675 goto retry; 2676 } 2677 return(error); 2678 } 2679 } 2680 vput(vp); 2681 return (error); 2682 } 2683 2684 /* 2685 * access_args(char *path, int flags) 2686 * 2687 * Check access permissions. 2688 * 2689 * MPALMOSTSAFE 2690 */ 2691 int 2692 sys_access(struct access_args *uap) 2693 { 2694 struct nlookupdata nd; 2695 int error; 2696 2697 get_mplock(); 2698 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2699 if (error == 0) 2700 error = kern_access(&nd, uap->flags, 0); 2701 nlookup_done(&nd); 2702 rel_mplock(); 2703 return (error); 2704 } 2705 2706 2707 /* 2708 * faccessat_args(int fd, char *path, int amode, int flags) 2709 * 2710 * Check access permissions. 2711 * 2712 * MPALMOSTSAFE 2713 */ 2714 int 2715 sys_faccessat(struct faccessat_args *uap) 2716 { 2717 struct nlookupdata nd; 2718 struct file *fp; 2719 int error; 2720 2721 get_mplock(); 2722 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2723 NLC_FOLLOW); 2724 if (error == 0) 2725 error = kern_access(&nd, uap->amode, uap->flags); 2726 nlookup_done_at(&nd, fp); 2727 rel_mplock(); 2728 return (error); 2729 } 2730 2731 2732 /* 2733 * MPSAFE 2734 */ 2735 int 2736 kern_stat(struct nlookupdata *nd, struct stat *st) 2737 { 2738 int error; 2739 struct vnode *vp; 2740 thread_t td; 2741 2742 if ((error = nlookup(nd)) != 0) 2743 return (error); 2744 again: 2745 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2746 return (ENOENT); 2747 2748 td = curthread; 2749 if ((error = vget(vp, LK_SHARED)) != 0) 2750 return (error); 2751 error = vn_stat(vp, st, nd->nl_cred); 2752 2753 /* 2754 * If the file handle is stale we have to re-resolve the entry. This 2755 * is a hack at the moment. 2756 */ 2757 if (error == ESTALE) { 2758 vput(vp); 2759 cache_setunresolved(&nd->nl_nch); 2760 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2761 if (error == 0) 2762 goto again; 2763 } else { 2764 vput(vp); 2765 } 2766 return (error); 2767 } 2768 2769 /* 2770 * stat_args(char *path, struct stat *ub) 2771 * 2772 * Get file status; this version follows links. 2773 * 2774 * MPSAFE 2775 */ 2776 int 2777 sys_stat(struct stat_args *uap) 2778 { 2779 CACHE_MPLOCK_DECLARE; 2780 struct nlookupdata nd; 2781 struct stat st; 2782 int error; 2783 2784 CACHE_GETMPLOCK1(); 2785 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2786 if (error == 0) { 2787 error = kern_stat(&nd, &st); 2788 if (error == 0) 2789 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2790 } 2791 nlookup_done(&nd); 2792 CACHE_RELMPLOCK(); 2793 return (error); 2794 } 2795 2796 /* 2797 * lstat_args(char *path, struct stat *ub) 2798 * 2799 * Get file status; this version does not follow links. 2800 * 2801 * MPALMOSTSAFE 2802 */ 2803 int 2804 sys_lstat(struct lstat_args *uap) 2805 { 2806 CACHE_MPLOCK_DECLARE; 2807 struct nlookupdata nd; 2808 struct stat st; 2809 int error; 2810 2811 CACHE_GETMPLOCK1(); 2812 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2813 if (error == 0) { 2814 error = kern_stat(&nd, &st); 2815 if (error == 0) 2816 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2817 } 2818 nlookup_done(&nd); 2819 CACHE_RELMPLOCK(); 2820 return (error); 2821 } 2822 2823 /* 2824 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2825 * 2826 * Get status of file pointed to by fd/path. 2827 * 2828 * MPALMOSTSAFE 2829 */ 2830 int 2831 sys_fstatat(struct fstatat_args *uap) 2832 { 2833 CACHE_MPLOCK_DECLARE; 2834 struct nlookupdata nd; 2835 struct stat st; 2836 int error; 2837 int flags; 2838 struct file *fp; 2839 2840 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2841 return (EINVAL); 2842 2843 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2844 2845 CACHE_GETMPLOCK1(); 2846 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2847 UIO_USERSPACE, flags); 2848 if (error == 0) { 2849 error = kern_stat(&nd, &st); 2850 if (error == 0) 2851 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2852 } 2853 nlookup_done_at(&nd, fp); 2854 CACHE_RELMPLOCK(); 2855 return (error); 2856 } 2857 2858 /* 2859 * pathconf_Args(char *path, int name) 2860 * 2861 * Get configurable pathname variables. 2862 * 2863 * MPALMOSTSAFE 2864 */ 2865 int 2866 sys_pathconf(struct pathconf_args *uap) 2867 { 2868 struct nlookupdata nd; 2869 struct vnode *vp; 2870 int error; 2871 2872 vp = NULL; 2873 get_mplock(); 2874 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2875 if (error == 0) 2876 error = nlookup(&nd); 2877 if (error == 0) 2878 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2879 nlookup_done(&nd); 2880 if (error == 0) { 2881 error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); 2882 vput(vp); 2883 } 2884 rel_mplock(); 2885 return (error); 2886 } 2887 2888 /* 2889 * XXX: daver 2890 * kern_readlink isn't properly split yet. There is a copyin burried 2891 * in VOP_READLINK(). 2892 */ 2893 int 2894 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2895 { 2896 struct thread *td = curthread; 2897 struct vnode *vp; 2898 struct iovec aiov; 2899 struct uio auio; 2900 int error; 2901 2902 if ((error = nlookup(nd)) != 0) 2903 return (error); 2904 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2905 if (error) 2906 return (error); 2907 if (vp->v_type != VLNK) { 2908 error = EINVAL; 2909 } else { 2910 aiov.iov_base = buf; 2911 aiov.iov_len = count; 2912 auio.uio_iov = &aiov; 2913 auio.uio_iovcnt = 1; 2914 auio.uio_offset = 0; 2915 auio.uio_rw = UIO_READ; 2916 auio.uio_segflg = UIO_USERSPACE; 2917 auio.uio_td = td; 2918 auio.uio_resid = count; 2919 error = VOP_READLINK(vp, &auio, td->td_ucred); 2920 } 2921 vput(vp); 2922 *res = count - auio.uio_resid; 2923 return (error); 2924 } 2925 2926 /* 2927 * readlink_args(char *path, char *buf, int count) 2928 * 2929 * Return target name of a symbolic link. 2930 * 2931 * MPALMOSTSAFE 2932 */ 2933 int 2934 sys_readlink(struct readlink_args *uap) 2935 { 2936 struct nlookupdata nd; 2937 int error; 2938 2939 get_mplock(); 2940 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2941 if (error == 0) { 2942 error = kern_readlink(&nd, uap->buf, uap->count, 2943 &uap->sysmsg_result); 2944 } 2945 nlookup_done(&nd); 2946 rel_mplock(); 2947 return (error); 2948 } 2949 2950 /* 2951 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2952 * 2953 * Return target name of a symbolic link. The path is relative to the 2954 * directory associated with fd. 2955 * 2956 * MPALMOSTSAFE 2957 */ 2958 int 2959 sys_readlinkat(struct readlinkat_args *uap) 2960 { 2961 struct nlookupdata nd; 2962 struct file *fp; 2963 int error; 2964 2965 get_mplock(); 2966 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2967 if (error == 0) { 2968 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2969 &uap->sysmsg_result); 2970 } 2971 nlookup_done_at(&nd, fp); 2972 rel_mplock(); 2973 return (error); 2974 } 2975 2976 static int 2977 setfflags(struct vnode *vp, int flags) 2978 { 2979 struct thread *td = curthread; 2980 int error; 2981 struct vattr vattr; 2982 2983 /* 2984 * Prevent non-root users from setting flags on devices. When 2985 * a device is reused, users can retain ownership of the device 2986 * if they are allowed to set flags and programs assume that 2987 * chown can't fail when done as root. 2988 */ 2989 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2990 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2991 return (error); 2992 2993 /* 2994 * note: vget is required for any operation that might mod the vnode 2995 * so VINACTIVE is properly cleared. 2996 */ 2997 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2998 VATTR_NULL(&vattr); 2999 vattr.va_flags = flags; 3000 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3001 vput(vp); 3002 } 3003 return (error); 3004 } 3005 3006 /* 3007 * chflags(char *path, int flags) 3008 * 3009 * Change flags of a file given a path name. 3010 * 3011 * MPALMOSTSAFE 3012 */ 3013 int 3014 sys_chflags(struct chflags_args *uap) 3015 { 3016 struct nlookupdata nd; 3017 struct vnode *vp; 3018 int error; 3019 3020 vp = NULL; 3021 get_mplock(); 3022 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3023 if (error == 0) 3024 error = nlookup(&nd); 3025 if (error == 0) 3026 error = ncp_writechk(&nd.nl_nch); 3027 if (error == 0) 3028 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3029 nlookup_done(&nd); 3030 if (error == 0) { 3031 error = setfflags(vp, uap->flags); 3032 vrele(vp); 3033 } 3034 rel_mplock(); 3035 return (error); 3036 } 3037 3038 /* 3039 * lchflags(char *path, int flags) 3040 * 3041 * Change flags of a file given a path name, but don't follow symlinks. 3042 * 3043 * MPALMOSTSAFE 3044 */ 3045 int 3046 sys_lchflags(struct lchflags_args *uap) 3047 { 3048 struct nlookupdata nd; 3049 struct vnode *vp; 3050 int error; 3051 3052 vp = NULL; 3053 get_mplock(); 3054 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3055 if (error == 0) 3056 error = nlookup(&nd); 3057 if (error == 0) 3058 error = ncp_writechk(&nd.nl_nch); 3059 if (error == 0) 3060 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3061 nlookup_done(&nd); 3062 if (error == 0) { 3063 error = setfflags(vp, uap->flags); 3064 vrele(vp); 3065 } 3066 rel_mplock(); 3067 return (error); 3068 } 3069 3070 /* 3071 * fchflags_args(int fd, int flags) 3072 * 3073 * Change flags of a file given a file descriptor. 3074 * 3075 * MPALMOSTSAFE 3076 */ 3077 int 3078 sys_fchflags(struct fchflags_args *uap) 3079 { 3080 struct thread *td = curthread; 3081 struct proc *p = td->td_proc; 3082 struct file *fp; 3083 int error; 3084 3085 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3086 return (error); 3087 get_mplock(); 3088 if (fp->f_nchandle.ncp) 3089 error = ncp_writechk(&fp->f_nchandle); 3090 if (error == 0) 3091 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3092 rel_mplock(); 3093 fdrop(fp); 3094 return (error); 3095 } 3096 3097 static int 3098 setfmode(struct vnode *vp, int mode) 3099 { 3100 struct thread *td = curthread; 3101 int error; 3102 struct vattr vattr; 3103 3104 /* 3105 * note: vget is required for any operation that might mod the vnode 3106 * so VINACTIVE is properly cleared. 3107 */ 3108 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3109 VATTR_NULL(&vattr); 3110 vattr.va_mode = mode & ALLPERMS; 3111 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3112 vput(vp); 3113 } 3114 return error; 3115 } 3116 3117 int 3118 kern_chmod(struct nlookupdata *nd, int mode) 3119 { 3120 struct vnode *vp; 3121 int error; 3122 3123 if ((error = nlookup(nd)) != 0) 3124 return (error); 3125 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3126 return (error); 3127 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3128 error = setfmode(vp, mode); 3129 vrele(vp); 3130 return (error); 3131 } 3132 3133 /* 3134 * chmod_args(char *path, int mode) 3135 * 3136 * Change mode of a file given path name. 3137 * 3138 * MPALMOSTSAFE 3139 */ 3140 int 3141 sys_chmod(struct chmod_args *uap) 3142 { 3143 struct nlookupdata nd; 3144 int error; 3145 3146 get_mplock(); 3147 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3148 if (error == 0) 3149 error = kern_chmod(&nd, uap->mode); 3150 nlookup_done(&nd); 3151 rel_mplock(); 3152 return (error); 3153 } 3154 3155 /* 3156 * lchmod_args(char *path, int mode) 3157 * 3158 * Change mode of a file given path name (don't follow links.) 3159 * 3160 * MPALMOSTSAFE 3161 */ 3162 int 3163 sys_lchmod(struct lchmod_args *uap) 3164 { 3165 struct nlookupdata nd; 3166 int error; 3167 3168 get_mplock(); 3169 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3170 if (error == 0) 3171 error = kern_chmod(&nd, uap->mode); 3172 nlookup_done(&nd); 3173 rel_mplock(); 3174 return (error); 3175 } 3176 3177 /* 3178 * fchmod_args(int fd, int mode) 3179 * 3180 * Change mode of a file given a file descriptor. 3181 * 3182 * MPALMOSTSAFE 3183 */ 3184 int 3185 sys_fchmod(struct fchmod_args *uap) 3186 { 3187 struct thread *td = curthread; 3188 struct proc *p = td->td_proc; 3189 struct file *fp; 3190 int error; 3191 3192 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3193 return (error); 3194 get_mplock(); 3195 if (fp->f_nchandle.ncp) 3196 error = ncp_writechk(&fp->f_nchandle); 3197 if (error == 0) 3198 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3199 rel_mplock(); 3200 fdrop(fp); 3201 return (error); 3202 } 3203 3204 /* 3205 * fchmodat_args(char *path, int mode) 3206 * 3207 * Change mode of a file pointed to by fd/path. 3208 * 3209 * MPALMOSTSAFE 3210 */ 3211 int 3212 sys_fchmodat(struct fchmodat_args *uap) 3213 { 3214 struct nlookupdata nd; 3215 struct file *fp; 3216 int error; 3217 int flags; 3218 3219 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3220 return (EINVAL); 3221 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3222 3223 get_mplock(); 3224 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3225 UIO_USERSPACE, flags); 3226 if (error == 0) 3227 error = kern_chmod(&nd, uap->mode); 3228 nlookup_done_at(&nd, fp); 3229 rel_mplock(); 3230 return (error); 3231 } 3232 3233 static int 3234 setfown(struct vnode *vp, uid_t uid, gid_t gid) 3235 { 3236 struct thread *td = curthread; 3237 int error; 3238 struct vattr vattr; 3239 3240 /* 3241 * note: vget is required for any operation that might mod the vnode 3242 * so VINACTIVE is properly cleared. 3243 */ 3244 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3245 VATTR_NULL(&vattr); 3246 vattr.va_uid = uid; 3247 vattr.va_gid = gid; 3248 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3249 vput(vp); 3250 } 3251 return error; 3252 } 3253 3254 int 3255 kern_chown(struct nlookupdata *nd, int uid, int gid) 3256 { 3257 struct vnode *vp; 3258 int error; 3259 3260 if ((error = nlookup(nd)) != 0) 3261 return (error); 3262 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3263 return (error); 3264 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3265 error = setfown(vp, uid, gid); 3266 vrele(vp); 3267 return (error); 3268 } 3269 3270 /* 3271 * chown(char *path, int uid, int gid) 3272 * 3273 * Set ownership given a path name. 3274 * 3275 * MPALMOSTSAFE 3276 */ 3277 int 3278 sys_chown(struct chown_args *uap) 3279 { 3280 struct nlookupdata nd; 3281 int error; 3282 3283 get_mplock(); 3284 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3285 if (error == 0) 3286 error = kern_chown(&nd, uap->uid, uap->gid); 3287 nlookup_done(&nd); 3288 rel_mplock(); 3289 return (error); 3290 } 3291 3292 /* 3293 * lchown_args(char *path, int uid, int gid) 3294 * 3295 * Set ownership given a path name, do not cross symlinks. 3296 * 3297 * MPALMOSTSAFE 3298 */ 3299 int 3300 sys_lchown(struct lchown_args *uap) 3301 { 3302 struct nlookupdata nd; 3303 int error; 3304 3305 get_mplock(); 3306 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3307 if (error == 0) 3308 error = kern_chown(&nd, uap->uid, uap->gid); 3309 nlookup_done(&nd); 3310 rel_mplock(); 3311 return (error); 3312 } 3313 3314 /* 3315 * fchown_args(int fd, int uid, int gid) 3316 * 3317 * Set ownership given a file descriptor. 3318 * 3319 * MPALMOSTSAFE 3320 */ 3321 int 3322 sys_fchown(struct fchown_args *uap) 3323 { 3324 struct thread *td = curthread; 3325 struct proc *p = td->td_proc; 3326 struct file *fp; 3327 int error; 3328 3329 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3330 return (error); 3331 get_mplock(); 3332 if (fp->f_nchandle.ncp) 3333 error = ncp_writechk(&fp->f_nchandle); 3334 if (error == 0) 3335 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 3336 rel_mplock(); 3337 fdrop(fp); 3338 return (error); 3339 } 3340 3341 /* 3342 * fchownat(int fd, char *path, int uid, int gid, int flags) 3343 * 3344 * Set ownership of file pointed to by fd/path. 3345 * 3346 * MPALMOSTSAFE 3347 */ 3348 int 3349 sys_fchownat(struct fchownat_args *uap) 3350 { 3351 struct nlookupdata nd; 3352 struct file *fp; 3353 int error; 3354 int flags; 3355 3356 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3357 return (EINVAL); 3358 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3359 3360 get_mplock(); 3361 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3362 UIO_USERSPACE, flags); 3363 if (error == 0) 3364 error = kern_chown(&nd, uap->uid, uap->gid); 3365 nlookup_done_at(&nd, fp); 3366 rel_mplock(); 3367 return (error); 3368 } 3369 3370 3371 static int 3372 getutimes(const struct timeval *tvp, struct timespec *tsp) 3373 { 3374 struct timeval tv[2]; 3375 3376 if (tvp == NULL) { 3377 microtime(&tv[0]); 3378 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3379 tsp[1] = tsp[0]; 3380 } else { 3381 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3382 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3383 } 3384 return 0; 3385 } 3386 3387 static int 3388 setutimes(struct vnode *vp, struct vattr *vattr, 3389 const struct timespec *ts, int nullflag) 3390 { 3391 struct thread *td = curthread; 3392 int error; 3393 3394 VATTR_NULL(vattr); 3395 vattr->va_atime = ts[0]; 3396 vattr->va_mtime = ts[1]; 3397 if (nullflag) 3398 vattr->va_vaflags |= VA_UTIMES_NULL; 3399 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3400 3401 return error; 3402 } 3403 3404 int 3405 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3406 { 3407 struct timespec ts[2]; 3408 struct vnode *vp; 3409 struct vattr vattr; 3410 int error; 3411 3412 if ((error = getutimes(tptr, ts)) != 0) 3413 return (error); 3414 3415 /* 3416 * NOTE: utimes() succeeds for the owner even if the file 3417 * is not user-writable. 3418 */ 3419 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3420 3421 if ((error = nlookup(nd)) != 0) 3422 return (error); 3423 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3424 return (error); 3425 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3426 return (error); 3427 3428 /* 3429 * note: vget is required for any operation that might mod the vnode 3430 * so VINACTIVE is properly cleared. 3431 */ 3432 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3433 error = vget(vp, LK_EXCLUSIVE); 3434 if (error == 0) { 3435 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3436 vput(vp); 3437 } 3438 } 3439 vrele(vp); 3440 return (error); 3441 } 3442 3443 /* 3444 * utimes_args(char *path, struct timeval *tptr) 3445 * 3446 * Set the access and modification times of a file. 3447 * 3448 * MPALMOSTSAFE 3449 */ 3450 int 3451 sys_utimes(struct utimes_args *uap) 3452 { 3453 struct timeval tv[2]; 3454 struct nlookupdata nd; 3455 int error; 3456 3457 if (uap->tptr) { 3458 error = copyin(uap->tptr, tv, sizeof(tv)); 3459 if (error) 3460 return (error); 3461 } 3462 get_mplock(); 3463 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3464 if (error == 0) 3465 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3466 nlookup_done(&nd); 3467 rel_mplock(); 3468 return (error); 3469 } 3470 3471 /* 3472 * lutimes_args(char *path, struct timeval *tptr) 3473 * 3474 * Set the access and modification times of a file. 3475 * 3476 * MPALMOSTSAFE 3477 */ 3478 int 3479 sys_lutimes(struct lutimes_args *uap) 3480 { 3481 struct timeval tv[2]; 3482 struct nlookupdata nd; 3483 int error; 3484 3485 if (uap->tptr) { 3486 error = copyin(uap->tptr, tv, sizeof(tv)); 3487 if (error) 3488 return (error); 3489 } 3490 get_mplock(); 3491 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3492 if (error == 0) 3493 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3494 nlookup_done(&nd); 3495 rel_mplock(); 3496 return (error); 3497 } 3498 3499 /* 3500 * Set utimes on a file descriptor. The creds used to open the 3501 * file are used to determine whether the operation is allowed 3502 * or not. 3503 */ 3504 int 3505 kern_futimes(int fd, struct timeval *tptr) 3506 { 3507 struct thread *td = curthread; 3508 struct proc *p = td->td_proc; 3509 struct timespec ts[2]; 3510 struct file *fp; 3511 struct vnode *vp; 3512 struct vattr vattr; 3513 int error; 3514 3515 error = getutimes(tptr, ts); 3516 if (error) 3517 return (error); 3518 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3519 return (error); 3520 if (fp->f_nchandle.ncp) 3521 error = ncp_writechk(&fp->f_nchandle); 3522 if (error == 0) { 3523 vp = fp->f_data; 3524 error = vget(vp, LK_EXCLUSIVE); 3525 if (error == 0) { 3526 error = VOP_GETATTR(vp, &vattr); 3527 if (error == 0) { 3528 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3529 fp->f_cred); 3530 } 3531 if (error == 0) { 3532 error = setutimes(vp, &vattr, ts, 3533 (tptr == NULL)); 3534 } 3535 vput(vp); 3536 } 3537 } 3538 fdrop(fp); 3539 return (error); 3540 } 3541 3542 /* 3543 * futimes_args(int fd, struct timeval *tptr) 3544 * 3545 * Set the access and modification times of a file. 3546 * 3547 * MPALMOSTSAFE 3548 */ 3549 int 3550 sys_futimes(struct futimes_args *uap) 3551 { 3552 struct timeval tv[2]; 3553 int error; 3554 3555 if (uap->tptr) { 3556 error = copyin(uap->tptr, tv, sizeof(tv)); 3557 if (error) 3558 return (error); 3559 } 3560 get_mplock(); 3561 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3562 rel_mplock(); 3563 3564 return (error); 3565 } 3566 3567 int 3568 kern_truncate(struct nlookupdata *nd, off_t length) 3569 { 3570 struct vnode *vp; 3571 struct vattr vattr; 3572 int error; 3573 3574 if (length < 0) 3575 return(EINVAL); 3576 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3577 if ((error = nlookup(nd)) != 0) 3578 return (error); 3579 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3580 return (error); 3581 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3582 return (error); 3583 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3584 vrele(vp); 3585 return (error); 3586 } 3587 if (vp->v_type == VDIR) { 3588 error = EISDIR; 3589 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3590 VATTR_NULL(&vattr); 3591 vattr.va_size = length; 3592 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3593 } 3594 vput(vp); 3595 return (error); 3596 } 3597 3598 /* 3599 * truncate(char *path, int pad, off_t length) 3600 * 3601 * Truncate a file given its path name. 3602 * 3603 * MPALMOSTSAFE 3604 */ 3605 int 3606 sys_truncate(struct truncate_args *uap) 3607 { 3608 struct nlookupdata nd; 3609 int error; 3610 3611 get_mplock(); 3612 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3613 if (error == 0) 3614 error = kern_truncate(&nd, uap->length); 3615 nlookup_done(&nd); 3616 rel_mplock(); 3617 return error; 3618 } 3619 3620 int 3621 kern_ftruncate(int fd, off_t length) 3622 { 3623 struct thread *td = curthread; 3624 struct proc *p = td->td_proc; 3625 struct vattr vattr; 3626 struct vnode *vp; 3627 struct file *fp; 3628 int error; 3629 3630 if (length < 0) 3631 return(EINVAL); 3632 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3633 return (error); 3634 if (fp->f_nchandle.ncp) { 3635 error = ncp_writechk(&fp->f_nchandle); 3636 if (error) 3637 goto done; 3638 } 3639 if ((fp->f_flag & FWRITE) == 0) { 3640 error = EINVAL; 3641 goto done; 3642 } 3643 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3644 error = EINVAL; 3645 goto done; 3646 } 3647 vp = (struct vnode *)fp->f_data; 3648 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3649 if (vp->v_type == VDIR) { 3650 error = EISDIR; 3651 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3652 VATTR_NULL(&vattr); 3653 vattr.va_size = length; 3654 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3655 } 3656 vn_unlock(vp); 3657 done: 3658 fdrop(fp); 3659 return (error); 3660 } 3661 3662 /* 3663 * ftruncate_args(int fd, int pad, off_t length) 3664 * 3665 * Truncate a file given a file descriptor. 3666 * 3667 * MPALMOSTSAFE 3668 */ 3669 int 3670 sys_ftruncate(struct ftruncate_args *uap) 3671 { 3672 int error; 3673 3674 get_mplock(); 3675 error = kern_ftruncate(uap->fd, uap->length); 3676 rel_mplock(); 3677 3678 return (error); 3679 } 3680 3681 /* 3682 * fsync(int fd) 3683 * 3684 * Sync an open file. 3685 * 3686 * MPALMOSTSAFE 3687 */ 3688 int 3689 sys_fsync(struct fsync_args *uap) 3690 { 3691 struct thread *td = curthread; 3692 struct proc *p = td->td_proc; 3693 struct vnode *vp; 3694 struct file *fp; 3695 vm_object_t obj; 3696 int error; 3697 3698 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3699 return (error); 3700 get_mplock(); 3701 vp = (struct vnode *)fp->f_data; 3702 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3703 if ((obj = vp->v_object) != NULL) 3704 vm_object_page_clean(obj, 0, 0, 0); 3705 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3706 if (error == 0 && vp->v_mount) 3707 error = buf_fsync(vp); 3708 vn_unlock(vp); 3709 rel_mplock(); 3710 fdrop(fp); 3711 3712 return (error); 3713 } 3714 3715 int 3716 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3717 { 3718 struct nchandle fnchd; 3719 struct nchandle tnchd; 3720 struct namecache *ncp; 3721 struct vnode *fdvp; 3722 struct vnode *tdvp; 3723 struct mount *mp; 3724 int error; 3725 3726 bwillinode(1); 3727 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3728 if ((error = nlookup(fromnd)) != 0) 3729 return (error); 3730 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3731 return (ENOENT); 3732 fnchd.mount = fromnd->nl_nch.mount; 3733 cache_hold(&fnchd); 3734 3735 /* 3736 * unlock the source nch so we can lookup the target nch without 3737 * deadlocking. The target may or may not exist so we do not check 3738 * for a target vp like kern_mkdir() and other creation functions do. 3739 * 3740 * The source and target directories are ref'd and rechecked after 3741 * everything is relocked to determine if the source or target file 3742 * has been renamed. 3743 */ 3744 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3745 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3746 cache_unlock(&fromnd->nl_nch); 3747 3748 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3749 if ((error = nlookup(tond)) != 0) { 3750 cache_drop(&fnchd); 3751 return (error); 3752 } 3753 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3754 cache_drop(&fnchd); 3755 return (ENOENT); 3756 } 3757 tnchd.mount = tond->nl_nch.mount; 3758 cache_hold(&tnchd); 3759 3760 /* 3761 * If the source and target are the same there is nothing to do 3762 */ 3763 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3764 cache_drop(&fnchd); 3765 cache_drop(&tnchd); 3766 return (0); 3767 } 3768 3769 /* 3770 * Mount points cannot be renamed or overwritten 3771 */ 3772 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3773 NCF_ISMOUNTPT 3774 ) { 3775 cache_drop(&fnchd); 3776 cache_drop(&tnchd); 3777 return (EINVAL); 3778 } 3779 3780 /* 3781 * Relock the source ncp. cache_relock() will deal with any 3782 * deadlocks against the already-locked tond and will also 3783 * make sure both are resolved. 3784 * 3785 * NOTE AFTER RELOCKING: The source or target ncp may have become 3786 * invalid while they were unlocked, nc_vp and nc_mount could 3787 * be NULL. 3788 */ 3789 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3790 &tond->nl_nch, tond->nl_cred); 3791 fromnd->nl_flags |= NLC_NCPISLOCKED; 3792 3793 /* 3794 * make sure the parent directories linkages are the same 3795 */ 3796 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3797 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3798 cache_drop(&fnchd); 3799 cache_drop(&tnchd); 3800 return (ENOENT); 3801 } 3802 3803 /* 3804 * Both the source and target must be within the same filesystem and 3805 * in the same filesystem as their parent directories within the 3806 * namecache topology. 3807 * 3808 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3809 */ 3810 mp = fnchd.mount; 3811 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3812 mp != tond->nl_nch.mount) { 3813 cache_drop(&fnchd); 3814 cache_drop(&tnchd); 3815 return (EXDEV); 3816 } 3817 3818 /* 3819 * Make sure the mount point is writable 3820 */ 3821 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3822 cache_drop(&fnchd); 3823 cache_drop(&tnchd); 3824 return (error); 3825 } 3826 3827 /* 3828 * If the target exists and either the source or target is a directory, 3829 * then both must be directories. 3830 * 3831 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3832 * have become NULL. 3833 */ 3834 if (tond->nl_nch.ncp->nc_vp) { 3835 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3836 error = ENOENT; 3837 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3838 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3839 error = ENOTDIR; 3840 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3841 error = EISDIR; 3842 } 3843 } 3844 3845 /* 3846 * You cannot rename a source into itself or a subdirectory of itself. 3847 * We check this by travsersing the target directory upwards looking 3848 * for a match against the source. 3849 * 3850 * XXX MPSAFE 3851 */ 3852 if (error == 0) { 3853 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3854 if (fromnd->nl_nch.ncp == ncp) { 3855 error = EINVAL; 3856 break; 3857 } 3858 } 3859 } 3860 3861 cache_drop(&fnchd); 3862 cache_drop(&tnchd); 3863 3864 /* 3865 * Even though the namespaces are different, they may still represent 3866 * hardlinks to the same file. The filesystem might have a hard time 3867 * with this so we issue a NREMOVE of the source instead of a NRENAME 3868 * when we detect the situation. 3869 */ 3870 if (error == 0) { 3871 fdvp = fromnd->nl_dvp; 3872 tdvp = tond->nl_dvp; 3873 if (fdvp == NULL || tdvp == NULL) { 3874 error = EPERM; 3875 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3876 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3877 fromnd->nl_cred); 3878 } else { 3879 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3880 fdvp, tdvp, tond->nl_cred); 3881 } 3882 } 3883 return (error); 3884 } 3885 3886 /* 3887 * rename_args(char *from, char *to) 3888 * 3889 * Rename files. Source and destination must either both be directories, 3890 * or both not be directories. If target is a directory, it must be empty. 3891 * 3892 * MPALMOSTSAFE 3893 */ 3894 int 3895 sys_rename(struct rename_args *uap) 3896 { 3897 struct nlookupdata fromnd, tond; 3898 int error; 3899 3900 get_mplock(); 3901 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3902 if (error == 0) { 3903 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3904 if (error == 0) 3905 error = kern_rename(&fromnd, &tond); 3906 nlookup_done(&tond); 3907 } 3908 nlookup_done(&fromnd); 3909 rel_mplock(); 3910 return (error); 3911 } 3912 3913 /* 3914 * renameat_args(int oldfd, char *old, int newfd, char *new) 3915 * 3916 * Rename files using paths relative to the directories associated with 3917 * oldfd and newfd. Source and destination must either both be directories, 3918 * or both not be directories. If target is a directory, it must be empty. 3919 * 3920 * MPALMOSTSAFE 3921 */ 3922 int 3923 sys_renameat(struct renameat_args *uap) 3924 { 3925 struct nlookupdata oldnd, newnd; 3926 struct file *oldfp, *newfp; 3927 int error; 3928 3929 get_mplock(); 3930 error = nlookup_init_at(&oldnd, &oldfp, uap->oldfd, uap->old, 3931 UIO_USERSPACE, 0); 3932 if (error == 0) { 3933 error = nlookup_init_at(&newnd, &newfp, uap->newfd, uap->new, 3934 UIO_USERSPACE, 0); 3935 if (error == 0) 3936 error = kern_rename(&oldnd, &newnd); 3937 nlookup_done_at(&newnd, newfp); 3938 } 3939 nlookup_done_at(&oldnd, oldfp); 3940 rel_mplock(); 3941 return (error); 3942 } 3943 3944 int 3945 kern_mkdir(struct nlookupdata *nd, int mode) 3946 { 3947 struct thread *td = curthread; 3948 struct proc *p = td->td_proc; 3949 struct vnode *vp; 3950 struct vattr vattr; 3951 int error; 3952 3953 bwillinode(1); 3954 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3955 if ((error = nlookup(nd)) != 0) 3956 return (error); 3957 3958 if (nd->nl_nch.ncp->nc_vp) 3959 return (EEXIST); 3960 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3961 return (error); 3962 VATTR_NULL(&vattr); 3963 vattr.va_type = VDIR; 3964 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3965 3966 vp = NULL; 3967 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3968 if (error == 0) 3969 vput(vp); 3970 return (error); 3971 } 3972 3973 /* 3974 * mkdir_args(char *path, int mode) 3975 * 3976 * Make a directory file. 3977 * 3978 * MPALMOSTSAFE 3979 */ 3980 int 3981 sys_mkdir(struct mkdir_args *uap) 3982 { 3983 struct nlookupdata nd; 3984 int error; 3985 3986 get_mplock(); 3987 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3988 if (error == 0) 3989 error = kern_mkdir(&nd, uap->mode); 3990 nlookup_done(&nd); 3991 rel_mplock(); 3992 return (error); 3993 } 3994 3995 /* 3996 * mkdirat_args(int fd, char *path, mode_t mode) 3997 * 3998 * Make a directory file. The path is relative to the directory associated 3999 * with fd. 4000 * 4001 * MPALMOSTSAFE 4002 */ 4003 int 4004 sys_mkdirat(struct mkdirat_args *uap) 4005 { 4006 struct nlookupdata nd; 4007 struct file *fp; 4008 int error; 4009 4010 get_mplock(); 4011 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4012 if (error == 0) 4013 error = kern_mkdir(&nd, uap->mode); 4014 nlookup_done_at(&nd, fp); 4015 rel_mplock(); 4016 return (error); 4017 } 4018 4019 int 4020 kern_rmdir(struct nlookupdata *nd) 4021 { 4022 int error; 4023 4024 bwillinode(1); 4025 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4026 if ((error = nlookup(nd)) != 0) 4027 return (error); 4028 4029 /* 4030 * Do not allow directories representing mount points to be 4031 * deleted, even if empty. Check write perms on mount point 4032 * in case the vnode is aliased (aka nullfs). 4033 */ 4034 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4035 return (EINVAL); 4036 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4037 return (error); 4038 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4039 return (error); 4040 } 4041 4042 /* 4043 * rmdir_args(char *path) 4044 * 4045 * Remove a directory file. 4046 * 4047 * MPALMOSTSAFE 4048 */ 4049 int 4050 sys_rmdir(struct rmdir_args *uap) 4051 { 4052 struct nlookupdata nd; 4053 int error; 4054 4055 get_mplock(); 4056 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4057 if (error == 0) 4058 error = kern_rmdir(&nd); 4059 nlookup_done(&nd); 4060 rel_mplock(); 4061 return (error); 4062 } 4063 4064 int 4065 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4066 enum uio_seg direction) 4067 { 4068 struct thread *td = curthread; 4069 struct proc *p = td->td_proc; 4070 struct vnode *vp; 4071 struct file *fp; 4072 struct uio auio; 4073 struct iovec aiov; 4074 off_t loff; 4075 int error, eofflag; 4076 4077 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4078 return (error); 4079 if ((fp->f_flag & FREAD) == 0) { 4080 error = EBADF; 4081 goto done; 4082 } 4083 vp = (struct vnode *)fp->f_data; 4084 unionread: 4085 if (vp->v_type != VDIR) { 4086 error = EINVAL; 4087 goto done; 4088 } 4089 aiov.iov_base = buf; 4090 aiov.iov_len = count; 4091 auio.uio_iov = &aiov; 4092 auio.uio_iovcnt = 1; 4093 auio.uio_rw = UIO_READ; 4094 auio.uio_segflg = direction; 4095 auio.uio_td = td; 4096 auio.uio_resid = count; 4097 loff = auio.uio_offset = fp->f_offset; 4098 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4099 fp->f_offset = auio.uio_offset; 4100 if (error) 4101 goto done; 4102 if (count == auio.uio_resid) { 4103 if (union_dircheckp) { 4104 error = union_dircheckp(td, &vp, fp); 4105 if (error == -1) 4106 goto unionread; 4107 if (error) 4108 goto done; 4109 } 4110 #if 0 4111 if ((vp->v_flag & VROOT) && 4112 (vp->v_mount->mnt_flag & MNT_UNION)) { 4113 struct vnode *tvp = vp; 4114 vp = vp->v_mount->mnt_vnodecovered; 4115 vref(vp); 4116 fp->f_data = vp; 4117 fp->f_offset = 0; 4118 vrele(tvp); 4119 goto unionread; 4120 } 4121 #endif 4122 } 4123 4124 /* 4125 * WARNING! *basep may not be wide enough to accomodate the 4126 * seek offset. XXX should we hack this to return the upper 32 bits 4127 * for offsets greater then 4G? 4128 */ 4129 if (basep) { 4130 *basep = (long)loff; 4131 } 4132 *res = count - auio.uio_resid; 4133 done: 4134 fdrop(fp); 4135 return (error); 4136 } 4137 4138 /* 4139 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4140 * 4141 * Read a block of directory entries in a file system independent format. 4142 * 4143 * MPALMOSTSAFE 4144 */ 4145 int 4146 sys_getdirentries(struct getdirentries_args *uap) 4147 { 4148 long base; 4149 int error; 4150 4151 get_mplock(); 4152 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4153 &uap->sysmsg_result, UIO_USERSPACE); 4154 rel_mplock(); 4155 4156 if (error == 0 && uap->basep) 4157 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4158 return (error); 4159 } 4160 4161 /* 4162 * getdents_args(int fd, char *buf, size_t count) 4163 * 4164 * MPALMOSTSAFE 4165 */ 4166 int 4167 sys_getdents(struct getdents_args *uap) 4168 { 4169 int error; 4170 4171 get_mplock(); 4172 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4173 &uap->sysmsg_result, UIO_USERSPACE); 4174 rel_mplock(); 4175 4176 return (error); 4177 } 4178 4179 /* 4180 * Set the mode mask for creation of filesystem nodes. 4181 * 4182 * umask(int newmask) 4183 * 4184 * MPSAFE 4185 */ 4186 int 4187 sys_umask(struct umask_args *uap) 4188 { 4189 struct thread *td = curthread; 4190 struct proc *p = td->td_proc; 4191 struct filedesc *fdp; 4192 4193 fdp = p->p_fd; 4194 uap->sysmsg_result = fdp->fd_cmask; 4195 fdp->fd_cmask = uap->newmask & ALLPERMS; 4196 return (0); 4197 } 4198 4199 /* 4200 * revoke(char *path) 4201 * 4202 * Void all references to file by ripping underlying filesystem 4203 * away from vnode. 4204 * 4205 * MPALMOSTSAFE 4206 */ 4207 int 4208 sys_revoke(struct revoke_args *uap) 4209 { 4210 struct nlookupdata nd; 4211 struct vattr vattr; 4212 struct vnode *vp; 4213 struct ucred *cred; 4214 int error; 4215 4216 vp = NULL; 4217 get_mplock(); 4218 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4219 if (error == 0) 4220 error = nlookup(&nd); 4221 if (error == 0) 4222 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4223 cred = crhold(nd.nl_cred); 4224 nlookup_done(&nd); 4225 if (error == 0) { 4226 if (error == 0) 4227 error = VOP_GETATTR(vp, &vattr); 4228 if (error == 0 && cred->cr_uid != vattr.va_uid) 4229 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4230 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4231 if (vcount(vp) > 0) 4232 error = vrevoke(vp, cred); 4233 } else if (error == 0) { 4234 error = vrevoke(vp, cred); 4235 } 4236 vrele(vp); 4237 } 4238 if (cred) 4239 crfree(cred); 4240 rel_mplock(); 4241 return (error); 4242 } 4243 4244 /* 4245 * getfh_args(char *fname, fhandle_t *fhp) 4246 * 4247 * Get (NFS) file handle 4248 * 4249 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4250 * mount. This allows nullfs mounts to be explicitly exported. 4251 * 4252 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4253 * 4254 * nullfs mounts of subdirectories are not safe. That is, it will 4255 * work, but you do not really have protection against access to 4256 * the related parent directories. 4257 * 4258 * MPALMOSTSAFE 4259 */ 4260 int 4261 sys_getfh(struct getfh_args *uap) 4262 { 4263 struct thread *td = curthread; 4264 struct nlookupdata nd; 4265 fhandle_t fh; 4266 struct vnode *vp; 4267 struct mount *mp; 4268 int error; 4269 4270 /* 4271 * Must be super user 4272 */ 4273 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4274 return (error); 4275 4276 vp = NULL; 4277 get_mplock(); 4278 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4279 if (error == 0) 4280 error = nlookup(&nd); 4281 if (error == 0) 4282 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4283 mp = nd.nl_nch.mount; 4284 nlookup_done(&nd); 4285 if (error == 0) { 4286 bzero(&fh, sizeof(fh)); 4287 fh.fh_fsid = mp->mnt_stat.f_fsid; 4288 error = VFS_VPTOFH(vp, &fh.fh_fid); 4289 vput(vp); 4290 if (error == 0) 4291 error = copyout(&fh, uap->fhp, sizeof(fh)); 4292 } 4293 rel_mplock(); 4294 return (error); 4295 } 4296 4297 /* 4298 * fhopen_args(const struct fhandle *u_fhp, int flags) 4299 * 4300 * syscall for the rpc.lockd to use to translate a NFS file handle into 4301 * an open descriptor. 4302 * 4303 * warning: do not remove the priv_check() call or this becomes one giant 4304 * security hole. 4305 * 4306 * MPALMOSTSAFE 4307 */ 4308 int 4309 sys_fhopen(struct fhopen_args *uap) 4310 { 4311 struct thread *td = curthread; 4312 struct filedesc *fdp = td->td_proc->p_fd; 4313 struct mount *mp; 4314 struct vnode *vp; 4315 struct fhandle fhp; 4316 struct vattr vat; 4317 struct vattr *vap = &vat; 4318 struct flock lf; 4319 int fmode, mode, error, type; 4320 struct file *nfp; 4321 struct file *fp; 4322 int indx; 4323 4324 /* 4325 * Must be super user 4326 */ 4327 error = priv_check(td, PRIV_ROOT); 4328 if (error) 4329 return (error); 4330 4331 fmode = FFLAGS(uap->flags); 4332 4333 /* 4334 * Why not allow a non-read/write open for our lockd? 4335 */ 4336 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4337 return (EINVAL); 4338 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4339 if (error) 4340 return(error); 4341 4342 /* 4343 * Find the mount point 4344 */ 4345 get_mplock(); 4346 mp = vfs_getvfs(&fhp.fh_fsid); 4347 if (mp == NULL) { 4348 error = ESTALE; 4349 goto done; 4350 } 4351 /* now give me my vnode, it gets returned to me locked */ 4352 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4353 if (error) 4354 goto done; 4355 /* 4356 * from now on we have to make sure not 4357 * to forget about the vnode 4358 * any error that causes an abort must vput(vp) 4359 * just set error = err and 'goto bad;'. 4360 */ 4361 4362 /* 4363 * from vn_open 4364 */ 4365 if (vp->v_type == VLNK) { 4366 error = EMLINK; 4367 goto bad; 4368 } 4369 if (vp->v_type == VSOCK) { 4370 error = EOPNOTSUPP; 4371 goto bad; 4372 } 4373 mode = 0; 4374 if (fmode & (FWRITE | O_TRUNC)) { 4375 if (vp->v_type == VDIR) { 4376 error = EISDIR; 4377 goto bad; 4378 } 4379 error = vn_writechk(vp, NULL); 4380 if (error) 4381 goto bad; 4382 mode |= VWRITE; 4383 } 4384 if (fmode & FREAD) 4385 mode |= VREAD; 4386 if (mode) { 4387 error = VOP_ACCESS(vp, mode, td->td_ucred); 4388 if (error) 4389 goto bad; 4390 } 4391 if (fmode & O_TRUNC) { 4392 vn_unlock(vp); /* XXX */ 4393 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4394 VATTR_NULL(vap); 4395 vap->va_size = 0; 4396 error = VOP_SETATTR(vp, vap, td->td_ucred); 4397 if (error) 4398 goto bad; 4399 } 4400 4401 /* 4402 * VOP_OPEN needs the file pointer so it can potentially override 4403 * it. 4404 * 4405 * WARNING! no f_nchandle will be associated when fhopen()ing a 4406 * directory. XXX 4407 */ 4408 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4409 goto bad; 4410 fp = nfp; 4411 4412 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4413 if (error) { 4414 /* 4415 * setting f_ops this way prevents VOP_CLOSE from being 4416 * called or fdrop() releasing the vp from v_data. Since 4417 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4418 */ 4419 fp->f_ops = &badfileops; 4420 fp->f_data = NULL; 4421 goto bad_drop; 4422 } 4423 4424 /* 4425 * The fp is given its own reference, we still have our ref and lock. 4426 * 4427 * Assert that all regular files must be created with a VM object. 4428 */ 4429 if (vp->v_type == VREG && vp->v_object == NULL) { 4430 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4431 goto bad_drop; 4432 } 4433 4434 /* 4435 * The open was successful. Handle any locking requirements. 4436 */ 4437 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4438 lf.l_whence = SEEK_SET; 4439 lf.l_start = 0; 4440 lf.l_len = 0; 4441 if (fmode & O_EXLOCK) 4442 lf.l_type = F_WRLCK; 4443 else 4444 lf.l_type = F_RDLCK; 4445 if (fmode & FNONBLOCK) 4446 type = 0; 4447 else 4448 type = F_WAIT; 4449 vn_unlock(vp); 4450 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4451 /* 4452 * release our private reference. 4453 */ 4454 fsetfd(fdp, NULL, indx); 4455 fdrop(fp); 4456 vrele(vp); 4457 goto done; 4458 } 4459 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4460 fp->f_flag |= FHASLOCK; 4461 } 4462 4463 /* 4464 * Clean up. Associate the file pointer with the previously 4465 * reserved descriptor and return it. 4466 */ 4467 vput(vp); 4468 rel_mplock(); 4469 fsetfd(fdp, fp, indx); 4470 fdrop(fp); 4471 uap->sysmsg_result = indx; 4472 return (0); 4473 4474 bad_drop: 4475 fsetfd(fdp, NULL, indx); 4476 fdrop(fp); 4477 bad: 4478 vput(vp); 4479 done: 4480 rel_mplock(); 4481 return (error); 4482 } 4483 4484 /* 4485 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4486 * 4487 * MPALMOSTSAFE 4488 */ 4489 int 4490 sys_fhstat(struct fhstat_args *uap) 4491 { 4492 struct thread *td = curthread; 4493 struct stat sb; 4494 fhandle_t fh; 4495 struct mount *mp; 4496 struct vnode *vp; 4497 int error; 4498 4499 /* 4500 * Must be super user 4501 */ 4502 error = priv_check(td, PRIV_ROOT); 4503 if (error) 4504 return (error); 4505 4506 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4507 if (error) 4508 return (error); 4509 4510 get_mplock(); 4511 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4512 error = ESTALE; 4513 if (error == 0) { 4514 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4515 error = vn_stat(vp, &sb, td->td_ucred); 4516 vput(vp); 4517 } 4518 } 4519 rel_mplock(); 4520 if (error == 0) 4521 error = copyout(&sb, uap->sb, sizeof(sb)); 4522 return (error); 4523 } 4524 4525 /* 4526 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4527 * 4528 * MPALMOSTSAFE 4529 */ 4530 int 4531 sys_fhstatfs(struct fhstatfs_args *uap) 4532 { 4533 struct thread *td = curthread; 4534 struct proc *p = td->td_proc; 4535 struct statfs *sp; 4536 struct mount *mp; 4537 struct vnode *vp; 4538 struct statfs sb; 4539 char *fullpath, *freepath; 4540 fhandle_t fh; 4541 int error; 4542 4543 /* 4544 * Must be super user 4545 */ 4546 if ((error = priv_check(td, PRIV_ROOT))) 4547 return (error); 4548 4549 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4550 return (error); 4551 4552 get_mplock(); 4553 4554 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4555 error = ESTALE; 4556 goto done; 4557 } 4558 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4559 error = ESTALE; 4560 goto done; 4561 } 4562 4563 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4564 goto done; 4565 mp = vp->v_mount; 4566 sp = &mp->mnt_stat; 4567 vput(vp); 4568 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4569 goto done; 4570 4571 error = mount_path(p, mp, &fullpath, &freepath); 4572 if (error) 4573 goto done; 4574 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4575 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4576 kfree(freepath, M_TEMP); 4577 4578 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4579 if (priv_check(td, PRIV_ROOT)) { 4580 bcopy(sp, &sb, sizeof(sb)); 4581 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4582 sp = &sb; 4583 } 4584 error = copyout(sp, uap->buf, sizeof(*sp)); 4585 done: 4586 rel_mplock(); 4587 return (error); 4588 } 4589 4590 /* 4591 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4592 * 4593 * MPALMOSTSAFE 4594 */ 4595 int 4596 sys_fhstatvfs(struct fhstatvfs_args *uap) 4597 { 4598 struct thread *td = curthread; 4599 struct proc *p = td->td_proc; 4600 struct statvfs *sp; 4601 struct mount *mp; 4602 struct vnode *vp; 4603 fhandle_t fh; 4604 int error; 4605 4606 /* 4607 * Must be super user 4608 */ 4609 if ((error = priv_check(td, PRIV_ROOT))) 4610 return (error); 4611 4612 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4613 return (error); 4614 4615 get_mplock(); 4616 4617 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4618 error = ESTALE; 4619 goto done; 4620 } 4621 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4622 error = ESTALE; 4623 goto done; 4624 } 4625 4626 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4627 goto done; 4628 mp = vp->v_mount; 4629 sp = &mp->mnt_vstat; 4630 vput(vp); 4631 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4632 goto done; 4633 4634 sp->f_flag = 0; 4635 if (mp->mnt_flag & MNT_RDONLY) 4636 sp->f_flag |= ST_RDONLY; 4637 if (mp->mnt_flag & MNT_NOSUID) 4638 sp->f_flag |= ST_NOSUID; 4639 error = copyout(sp, uap->buf, sizeof(*sp)); 4640 done: 4641 rel_mplock(); 4642 return (error); 4643 } 4644 4645 4646 /* 4647 * Syscall to push extended attribute configuration information into the 4648 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4649 * a command (int cmd), and attribute name and misc data. For now, the 4650 * attribute name is left in userspace for consumption by the VFS_op. 4651 * It will probably be changed to be copied into sysspace by the 4652 * syscall in the future, once issues with various consumers of the 4653 * attribute code have raised their hands. 4654 * 4655 * Currently this is used only by UFS Extended Attributes. 4656 * 4657 * MPALMOSTSAFE 4658 */ 4659 int 4660 sys_extattrctl(struct extattrctl_args *uap) 4661 { 4662 struct nlookupdata nd; 4663 struct vnode *vp; 4664 char attrname[EXTATTR_MAXNAMELEN]; 4665 int error; 4666 size_t size; 4667 4668 get_mplock(); 4669 4670 attrname[0] = 0; 4671 vp = NULL; 4672 error = 0; 4673 4674 if (error == 0 && uap->filename) { 4675 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4676 NLC_FOLLOW); 4677 if (error == 0) 4678 error = nlookup(&nd); 4679 if (error == 0) 4680 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4681 nlookup_done(&nd); 4682 } 4683 4684 if (error == 0 && uap->attrname) { 4685 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4686 &size); 4687 } 4688 4689 if (error == 0) { 4690 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4691 if (error == 0) 4692 error = nlookup(&nd); 4693 if (error == 0) 4694 error = ncp_writechk(&nd.nl_nch); 4695 if (error == 0) { 4696 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4697 uap->attrnamespace, 4698 uap->attrname, nd.nl_cred); 4699 } 4700 nlookup_done(&nd); 4701 } 4702 4703 rel_mplock(); 4704 4705 return (error); 4706 } 4707 4708 /* 4709 * Syscall to get a named extended attribute on a file or directory. 4710 * 4711 * MPALMOSTSAFE 4712 */ 4713 int 4714 sys_extattr_set_file(struct extattr_set_file_args *uap) 4715 { 4716 char attrname[EXTATTR_MAXNAMELEN]; 4717 struct nlookupdata nd; 4718 struct vnode *vp; 4719 struct uio auio; 4720 struct iovec aiov; 4721 int error; 4722 4723 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4724 if (error) 4725 return (error); 4726 4727 vp = NULL; 4728 get_mplock(); 4729 4730 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4731 if (error == 0) 4732 error = nlookup(&nd); 4733 if (error == 0) 4734 error = ncp_writechk(&nd.nl_nch); 4735 if (error == 0) 4736 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4737 if (error) { 4738 nlookup_done(&nd); 4739 rel_mplock(); 4740 return (error); 4741 } 4742 4743 bzero(&auio, sizeof(auio)); 4744 aiov.iov_base = uap->data; 4745 aiov.iov_len = uap->nbytes; 4746 auio.uio_iov = &aiov; 4747 auio.uio_iovcnt = 1; 4748 auio.uio_offset = 0; 4749 auio.uio_resid = uap->nbytes; 4750 auio.uio_rw = UIO_WRITE; 4751 auio.uio_td = curthread; 4752 4753 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4754 &auio, nd.nl_cred); 4755 4756 vput(vp); 4757 nlookup_done(&nd); 4758 rel_mplock(); 4759 return (error); 4760 } 4761 4762 /* 4763 * Syscall to get a named extended attribute on a file or directory. 4764 * 4765 * MPALMOSTSAFE 4766 */ 4767 int 4768 sys_extattr_get_file(struct extattr_get_file_args *uap) 4769 { 4770 char attrname[EXTATTR_MAXNAMELEN]; 4771 struct nlookupdata nd; 4772 struct uio auio; 4773 struct iovec aiov; 4774 struct vnode *vp; 4775 int error; 4776 4777 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4778 if (error) 4779 return (error); 4780 4781 vp = NULL; 4782 get_mplock(); 4783 4784 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4785 if (error == 0) 4786 error = nlookup(&nd); 4787 if (error == 0) 4788 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4789 if (error) { 4790 nlookup_done(&nd); 4791 rel_mplock(); 4792 return (error); 4793 } 4794 4795 bzero(&auio, sizeof(auio)); 4796 aiov.iov_base = uap->data; 4797 aiov.iov_len = uap->nbytes; 4798 auio.uio_iov = &aiov; 4799 auio.uio_iovcnt = 1; 4800 auio.uio_offset = 0; 4801 auio.uio_resid = uap->nbytes; 4802 auio.uio_rw = UIO_READ; 4803 auio.uio_td = curthread; 4804 4805 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4806 &auio, nd.nl_cred); 4807 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4808 4809 vput(vp); 4810 nlookup_done(&nd); 4811 rel_mplock(); 4812 return(error); 4813 } 4814 4815 /* 4816 * Syscall to delete a named extended attribute from a file or directory. 4817 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4818 * 4819 * MPALMOSTSAFE 4820 */ 4821 int 4822 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4823 { 4824 char attrname[EXTATTR_MAXNAMELEN]; 4825 struct nlookupdata nd; 4826 struct vnode *vp; 4827 int error; 4828 4829 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4830 if (error) 4831 return(error); 4832 4833 get_mplock(); 4834 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4835 if (error == 0) 4836 error = nlookup(&nd); 4837 if (error == 0) 4838 error = ncp_writechk(&nd.nl_nch); 4839 if (error == 0) { 4840 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4841 if (error == 0) { 4842 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4843 attrname, NULL, nd.nl_cred); 4844 vput(vp); 4845 } 4846 } 4847 nlookup_done(&nd); 4848 rel_mplock(); 4849 return(error); 4850 } 4851 4852 /* 4853 * Determine if the mount is visible to the process. 4854 */ 4855 static int 4856 chroot_visible_mnt(struct mount *mp, struct proc *p) 4857 { 4858 struct nchandle nch; 4859 4860 /* 4861 * Traverse from the mount point upwards. If we hit the process 4862 * root then the mount point is visible to the process. 4863 */ 4864 nch = mp->mnt_ncmountpt; 4865 while (nch.ncp) { 4866 if (nch.mount == p->p_fd->fd_nrdir.mount && 4867 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4868 return(1); 4869 } 4870 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4871 nch = nch.mount->mnt_ncmounton; 4872 } else { 4873 nch.ncp = nch.ncp->nc_parent; 4874 } 4875 } 4876 4877 /* 4878 * If the mount point is not visible to the process, but the 4879 * process root is in a subdirectory of the mount, return 4880 * TRUE anyway. 4881 */ 4882 if (p->p_fd->fd_nrdir.mount == mp) 4883 return(1); 4884 4885 return(0); 4886 } 4887 4888