1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 */ 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/buf.h> 45 #include <sys/conf.h> 46 #include <sys/sysent.h> 47 #include <sys/malloc.h> 48 #include <sys/mount.h> 49 #include <sys/mountctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/linker.h> 56 #include <sys/stat.h> 57 #include <sys/unistd.h> 58 #include <sys/vnode.h> 59 #include <sys/proc.h> 60 #include <sys/priv.h> 61 #include <sys/jail.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/dirent.h> 65 #include <sys/extattr.h> 66 #include <sys/spinlock.h> 67 #include <sys/kern_syscall.h> 68 #include <sys/objcache.h> 69 #include <sys/sysctl.h> 70 71 #include <sys/buf2.h> 72 #include <sys/file2.h> 73 #include <sys/spinlock2.h> 74 #include <sys/mplock2.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 80 #include <machine/limits.h> 81 #include <machine/stdarg.h> 82 83 #include <vfs/union/union.h> 84 85 static void mount_warning(struct mount *mp, const char *ctl, ...) 86 __printflike(2, 3); 87 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 88 static int checkvp_chdir (struct vnode *vn, struct thread *td); 89 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 90 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 91 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 92 static int getutimes (const struct timeval *, struct timespec *); 93 static int setfown (struct vnode *, uid_t, gid_t); 94 static int setfmode (struct vnode *, int); 95 static int setfflags (struct vnode *, int); 96 static int setutimes (struct vnode *, struct vattr *, 97 const struct timespec *, int); 98 static int usermount = 0; /* if 1, non-root can mount fs. */ 99 100 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 101 102 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 103 "Allow non-root users to mount filesystems"); 104 105 /* 106 * Virtual File System System Calls 107 */ 108 109 /* 110 * Mount a file system. 111 * 112 * mount_args(char *type, char *path, int flags, caddr_t data) 113 * 114 * MPALMOSTSAFE 115 */ 116 int 117 sys_mount(struct mount_args *uap) 118 { 119 struct thread *td = curthread; 120 struct vnode *vp; 121 struct nchandle nch; 122 struct mount *mp, *nullmp; 123 struct vfsconf *vfsp; 124 int error, flag = 0, flag2 = 0; 125 int hasmount; 126 struct vattr va; 127 struct nlookupdata nd; 128 char fstypename[MFSNAMELEN]; 129 struct ucred *cred; 130 131 get_mplock(); 132 cred = td->td_ucred; 133 if (jailed(cred)) { 134 error = EPERM; 135 goto done; 136 } 137 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 138 goto done; 139 140 /* 141 * Do not allow NFS export by non-root users. 142 */ 143 if (uap->flags & MNT_EXPORTED) { 144 error = priv_check(td, PRIV_ROOT); 145 if (error) 146 goto done; 147 } 148 /* 149 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 150 */ 151 if (priv_check(td, PRIV_ROOT)) 152 uap->flags |= MNT_NOSUID | MNT_NODEV; 153 154 /* 155 * Lookup the requested path and extract the nch and vnode. 156 */ 157 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 158 if (error == 0) { 159 if ((error = nlookup(&nd)) == 0) { 160 if (nd.nl_nch.ncp->nc_vp == NULL) 161 error = ENOENT; 162 } 163 } 164 if (error) { 165 nlookup_done(&nd); 166 goto done; 167 } 168 169 /* 170 * If the target filesystem is resolved via a nullfs mount, then 171 * nd.nl_nch.mount will be pointing to the nullfs mount structure 172 * instead of the target file system. We need it in case we are 173 * doing an update. 174 */ 175 nullmp = nd.nl_nch.mount; 176 177 /* 178 * Extract the locked+refd ncp and cleanup the nd structure 179 */ 180 nch = nd.nl_nch; 181 cache_zero(&nd.nl_nch); 182 nlookup_done(&nd); 183 184 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 185 hasmount = 1; 186 else 187 hasmount = 0; 188 189 190 /* 191 * now we have the locked ref'd nch and unreferenced vnode. 192 */ 193 vp = nch.ncp->nc_vp; 194 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 195 cache_put(&nch); 196 goto done; 197 } 198 cache_unlock(&nch); 199 200 /* 201 * Extract the file system type. We need to know this early, to take 202 * appropriate actions if we are dealing with a nullfs. 203 */ 204 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 205 cache_drop(&nch); 206 vput(vp); 207 goto done; 208 } 209 210 /* 211 * Now we have an unlocked ref'd nch and a locked ref'd vp 212 */ 213 if (uap->flags & MNT_UPDATE) { 214 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 215 cache_drop(&nch); 216 vput(vp); 217 error = EINVAL; 218 goto done; 219 } 220 221 if (strncmp(fstypename, "null", 5) == 0) { 222 KKASSERT(nullmp); 223 mp = nullmp; 224 } else { 225 mp = vp->v_mount; 226 } 227 228 flag = mp->mnt_flag; 229 flag2 = mp->mnt_kern_flag; 230 /* 231 * We only allow the filesystem to be reloaded if it 232 * is currently mounted read-only. 233 */ 234 if ((uap->flags & MNT_RELOAD) && 235 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 236 cache_drop(&nch); 237 vput(vp); 238 error = EOPNOTSUPP; /* Needs translation */ 239 goto done; 240 } 241 /* 242 * Only root, or the user that did the original mount is 243 * permitted to update it. 244 */ 245 if (mp->mnt_stat.f_owner != cred->cr_uid && 246 (error = priv_check(td, PRIV_ROOT))) { 247 cache_drop(&nch); 248 vput(vp); 249 goto done; 250 } 251 if (vfs_busy(mp, LK_NOWAIT)) { 252 cache_drop(&nch); 253 vput(vp); 254 error = EBUSY; 255 goto done; 256 } 257 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 258 cache_drop(&nch); 259 vfs_unbusy(mp); 260 vput(vp); 261 error = EBUSY; 262 goto done; 263 } 264 vsetflags(vp, VMOUNT); 265 mp->mnt_flag |= 266 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 267 vn_unlock(vp); 268 goto update; 269 } 270 /* 271 * If the user is not root, ensure that they own the directory 272 * onto which we are attempting to mount. 273 */ 274 if ((error = VOP_GETATTR(vp, &va)) || 275 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 vsetflags(vp, VMOUNT); 334 335 /* 336 * Allocate and initialize the filesystem. 337 */ 338 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 339 mount_init(mp); 340 vfs_busy(mp, LK_NOWAIT); 341 mp->mnt_op = vfsp->vfc_vfsops; 342 mp->mnt_vfc = vfsp; 343 vfsp->vfc_refcount++; 344 mp->mnt_stat.f_type = vfsp->vfc_typenum; 345 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 346 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 347 mp->mnt_stat.f_owner = cred->cr_uid; 348 vn_unlock(vp); 349 update: 350 /* 351 * Set the mount level flags. 352 */ 353 if (uap->flags & MNT_RDONLY) 354 mp->mnt_flag |= MNT_RDONLY; 355 else if (mp->mnt_flag & MNT_RDONLY) 356 mp->mnt_kern_flag |= MNTK_WANTRDWR; 357 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 358 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 359 MNT_NOSYMFOLLOW | MNT_IGNORE | 360 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 361 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 362 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 363 MNT_NOSYMFOLLOW | MNT_IGNORE | 364 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 365 /* 366 * Mount the filesystem. 367 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 368 * get. 369 */ 370 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 371 if (mp->mnt_flag & MNT_UPDATE) { 372 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 373 mp->mnt_flag &= ~MNT_RDONLY; 374 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 375 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 376 if (error) { 377 mp->mnt_flag = flag; 378 mp->mnt_kern_flag = flag2; 379 } 380 vfs_unbusy(mp); 381 vclrflags(vp, VMOUNT); 382 vrele(vp); 383 cache_drop(&nch); 384 goto done; 385 } 386 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 387 /* 388 * Put the new filesystem on the mount list after root. The mount 389 * point gets its own mnt_ncmountpt (unless the VFS already set one 390 * up) which represents the root of the mount. The lookup code 391 * detects the mount point going forward and checks the root of 392 * the mount going backwards. 393 * 394 * It is not necessary to invalidate or purge the vnode underneath 395 * because elements under the mount will be given their own glue 396 * namecache record. 397 */ 398 if (!error) { 399 if (mp->mnt_ncmountpt.ncp == NULL) { 400 /* 401 * allocate, then unlock, but leave the ref intact 402 */ 403 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 404 cache_unlock(&mp->mnt_ncmountpt); 405 } 406 mp->mnt_ncmounton = nch; /* inherits ref */ 407 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 408 409 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 410 vclrflags(vp, VMOUNT); 411 mountlist_insert(mp, MNTINS_LAST); 412 vn_unlock(vp); 413 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 414 error = vfs_allocate_syncvnode(mp); 415 vfs_unbusy(mp); 416 error = VFS_START(mp, 0); 417 vrele(vp); 418 } else { 419 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 420 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 421 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 422 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 423 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 424 vclrflags(vp, VMOUNT); 425 mp->mnt_vfc->vfc_refcount--; 426 vfs_unbusy(mp); 427 kfree(mp, M_MOUNT); 428 cache_drop(&nch); 429 vput(vp); 430 } 431 done: 432 rel_mplock(); 433 return (error); 434 } 435 436 /* 437 * Scan all active processes to see if any of them have a current 438 * or root directory onto which the new filesystem has just been 439 * mounted. If so, replace them with the new mount point. 440 * 441 * The passed ncp is ref'd and locked (from the mount code) and 442 * must be associated with the vnode representing the root of the 443 * mount point. 444 */ 445 struct checkdirs_info { 446 struct nchandle old_nch; 447 struct nchandle new_nch; 448 struct vnode *old_vp; 449 struct vnode *new_vp; 450 }; 451 452 static int checkdirs_callback(struct proc *p, void *data); 453 454 static void 455 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 456 { 457 struct checkdirs_info info; 458 struct vnode *olddp; 459 struct vnode *newdp; 460 struct mount *mp; 461 462 /* 463 * If the old mount point's vnode has a usecount of 1, it is not 464 * being held as a descriptor anywhere. 465 */ 466 olddp = old_nch->ncp->nc_vp; 467 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 468 return; 469 470 /* 471 * Force the root vnode of the new mount point to be resolved 472 * so we can update any matching processes. 473 */ 474 mp = new_nch->mount; 475 if (VFS_ROOT(mp, &newdp)) 476 panic("mount: lost mount"); 477 cache_setunresolved(new_nch); 478 cache_setvp(new_nch, newdp); 479 480 /* 481 * Special handling of the root node 482 */ 483 if (rootvnode == olddp) { 484 vref(newdp); 485 vfs_cache_setroot(newdp, cache_hold(new_nch)); 486 } 487 488 /* 489 * Pass newdp separately so the callback does not have to access 490 * it via new_nch->ncp->nc_vp. 491 */ 492 info.old_nch = *old_nch; 493 info.new_nch = *new_nch; 494 info.new_vp = newdp; 495 allproc_scan(checkdirs_callback, &info); 496 vput(newdp); 497 } 498 499 /* 500 * NOTE: callback is not MP safe because the scanned process's filedesc 501 * structure can be ripped out from under us, amoung other things. 502 */ 503 static int 504 checkdirs_callback(struct proc *p, void *data) 505 { 506 struct checkdirs_info *info = data; 507 struct filedesc *fdp; 508 struct nchandle ncdrop1; 509 struct nchandle ncdrop2; 510 struct vnode *vprele1; 511 struct vnode *vprele2; 512 513 if ((fdp = p->p_fd) != NULL) { 514 cache_zero(&ncdrop1); 515 cache_zero(&ncdrop2); 516 vprele1 = NULL; 517 vprele2 = NULL; 518 519 /* 520 * MPUNSAFE - XXX fdp can be pulled out from under a 521 * foreign process. 522 * 523 * A shared filedesc is ok, we don't have to copy it 524 * because we are making this change globally. 525 */ 526 spin_lock(&fdp->fd_spin); 527 if (fdp->fd_ncdir.mount == info->old_nch.mount && 528 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 529 vprele1 = fdp->fd_cdir; 530 vref(info->new_vp); 531 fdp->fd_cdir = info->new_vp; 532 ncdrop1 = fdp->fd_ncdir; 533 cache_copy(&info->new_nch, &fdp->fd_ncdir); 534 } 535 if (fdp->fd_nrdir.mount == info->old_nch.mount && 536 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 537 vprele2 = fdp->fd_rdir; 538 vref(info->new_vp); 539 fdp->fd_rdir = info->new_vp; 540 ncdrop2 = fdp->fd_nrdir; 541 cache_copy(&info->new_nch, &fdp->fd_nrdir); 542 } 543 spin_unlock(&fdp->fd_spin); 544 if (ncdrop1.ncp) 545 cache_drop(&ncdrop1); 546 if (ncdrop2.ncp) 547 cache_drop(&ncdrop2); 548 if (vprele1) 549 vrele(vprele1); 550 if (vprele2) 551 vrele(vprele2); 552 } 553 return(0); 554 } 555 556 /* 557 * Unmount a file system. 558 * 559 * Note: unmount takes a path to the vnode mounted on as argument, 560 * not special file (as before). 561 * 562 * umount_args(char *path, int flags) 563 * 564 * MPALMOSTSAFE 565 */ 566 int 567 sys_unmount(struct unmount_args *uap) 568 { 569 struct thread *td = curthread; 570 struct proc *p __debugvar = td->td_proc; 571 struct mount *mp = NULL; 572 struct nlookupdata nd; 573 int error; 574 575 KKASSERT(p); 576 get_mplock(); 577 if (td->td_ucred->cr_prison != NULL) { 578 error = EPERM; 579 goto done; 580 } 581 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 582 goto done; 583 584 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 585 if (error == 0) 586 error = nlookup(&nd); 587 if (error) 588 goto out; 589 590 mp = nd.nl_nch.mount; 591 592 /* 593 * Only root, or the user that did the original mount is 594 * permitted to unmount this filesystem. 595 */ 596 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 597 (error = priv_check(td, PRIV_ROOT))) 598 goto out; 599 600 /* 601 * Don't allow unmounting the root file system. 602 */ 603 if (mp->mnt_flag & MNT_ROOTFS) { 604 error = EINVAL; 605 goto out; 606 } 607 608 /* 609 * Must be the root of the filesystem 610 */ 611 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 612 error = EINVAL; 613 goto out; 614 } 615 616 out: 617 nlookup_done(&nd); 618 if (error == 0) 619 error = dounmount(mp, uap->flags); 620 done: 621 rel_mplock(); 622 return (error); 623 } 624 625 /* 626 * Do the actual file system unmount. 627 */ 628 static int 629 dounmount_interlock(struct mount *mp) 630 { 631 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 632 return (EBUSY); 633 mp->mnt_kern_flag |= MNTK_UNMOUNT; 634 return(0); 635 } 636 637 static int 638 unmount_allproc_cb(struct proc *p, void *arg) 639 { 640 struct mount *mp; 641 642 if (p->p_textnch.ncp == NULL) 643 return 0; 644 645 mp = (struct mount *)arg; 646 if (p->p_textnch.mount == mp) 647 cache_drop(&p->p_textnch); 648 649 return 0; 650 } 651 652 int 653 dounmount(struct mount *mp, int flags) 654 { 655 struct namecache *ncp; 656 struct nchandle nch; 657 struct vnode *vp; 658 int error; 659 int async_flag; 660 int lflags; 661 int freeok = 1; 662 663 /* 664 * Exclusive access for unmounting purposes 665 */ 666 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 667 return (error); 668 669 /* 670 * Allow filesystems to detect that a forced unmount is in progress. 671 */ 672 if (flags & MNT_FORCE) 673 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 674 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 675 error = lockmgr(&mp->mnt_lock, lflags); 676 if (error) { 677 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 678 if (mp->mnt_kern_flag & MNTK_MWAIT) 679 wakeup(mp); 680 return (error); 681 } 682 683 if (mp->mnt_flag & MNT_EXPUBLIC) 684 vfs_setpublicfs(NULL, NULL, NULL); 685 686 vfs_msync(mp, MNT_WAIT); 687 async_flag = mp->mnt_flag & MNT_ASYNC; 688 mp->mnt_flag &=~ MNT_ASYNC; 689 690 /* 691 * If this filesystem isn't aliasing other filesystems, 692 * try to invalidate any remaining namecache entries and 693 * check the count afterwords. 694 */ 695 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 696 cache_lock(&mp->mnt_ncmountpt); 697 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 698 cache_unlock(&mp->mnt_ncmountpt); 699 700 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 701 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 702 allproc_scan(&unmount_allproc_cb, mp); 703 } 704 705 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 706 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 707 708 if ((flags & MNT_FORCE) == 0) { 709 error = EBUSY; 710 mount_warning(mp, "Cannot unmount: " 711 "%d namecache " 712 "references still " 713 "present", 714 ncp->nc_refs - 1); 715 } else { 716 mount_warning(mp, "Forced unmount: " 717 "%d namecache " 718 "references still " 719 "present", 720 ncp->nc_refs - 1); 721 freeok = 0; 722 } 723 } 724 } 725 726 /* 727 * nchandle records ref the mount structure. Expect a count of 1 728 * (our mount->mnt_ncmountpt). 729 */ 730 if (mp->mnt_refs != 1) { 731 if ((flags & MNT_FORCE) == 0) { 732 mount_warning(mp, "Cannot unmount: " 733 "%d process references still " 734 "present", mp->mnt_refs); 735 error = EBUSY; 736 } else { 737 mount_warning(mp, "Forced unmount: " 738 "%d process references still " 739 "present", mp->mnt_refs); 740 freeok = 0; 741 } 742 } 743 744 /* 745 * Decomission our special mnt_syncer vnode. This also stops 746 * the vnlru code. If we are unable to unmount we recommission 747 * the vnode. 748 */ 749 if (error == 0) { 750 if ((vp = mp->mnt_syncer) != NULL) { 751 mp->mnt_syncer = NULL; 752 vrele(vp); 753 } 754 if (((mp->mnt_flag & MNT_RDONLY) || 755 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 756 (flags & MNT_FORCE)) { 757 error = VFS_UNMOUNT(mp, flags); 758 } 759 } 760 if (error) { 761 if (mp->mnt_syncer == NULL) 762 vfs_allocate_syncvnode(mp); 763 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 764 mp->mnt_flag |= async_flag; 765 lockmgr(&mp->mnt_lock, LK_RELEASE); 766 if (mp->mnt_kern_flag & MNTK_MWAIT) 767 wakeup(mp); 768 return (error); 769 } 770 /* 771 * Clean up any journals still associated with the mount after 772 * filesystem activity has ceased. 773 */ 774 journal_remove_all_journals(mp, 775 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 776 777 mountlist_remove(mp); 778 779 /* 780 * Remove any installed vnode ops here so the individual VFSs don't 781 * have to. 782 */ 783 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 784 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 785 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 786 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 787 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 788 789 if (mp->mnt_ncmountpt.ncp != NULL) { 790 nch = mp->mnt_ncmountpt; 791 cache_zero(&mp->mnt_ncmountpt); 792 cache_clrmountpt(&nch); 793 cache_drop(&nch); 794 } 795 if (mp->mnt_ncmounton.ncp != NULL) { 796 nch = mp->mnt_ncmounton; 797 cache_zero(&mp->mnt_ncmounton); 798 cache_clrmountpt(&nch); 799 cache_drop(&nch); 800 } 801 802 mp->mnt_vfc->vfc_refcount--; 803 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 804 panic("unmount: dangling vnode"); 805 lockmgr(&mp->mnt_lock, LK_RELEASE); 806 if (mp->mnt_kern_flag & MNTK_MWAIT) 807 wakeup(mp); 808 if (freeok) 809 kfree(mp, M_MOUNT); 810 return (0); 811 } 812 813 static 814 void 815 mount_warning(struct mount *mp, const char *ctl, ...) 816 { 817 char *ptr; 818 char *buf; 819 __va_list va; 820 821 __va_start(va, ctl); 822 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf, 0) == 0) { 823 kprintf("unmount(%s): ", ptr); 824 kvprintf(ctl, va); 825 kprintf("\n"); 826 kfree(buf, M_TEMP); 827 } else { 828 kprintf("unmount(%p", mp); 829 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 830 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 831 kprintf("): "); 832 kvprintf(ctl, va); 833 kprintf("\n"); 834 } 835 __va_end(va); 836 } 837 838 /* 839 * Shim cache_fullpath() to handle the case where a process is chrooted into 840 * a subdirectory of a mount. In this case if the root mount matches the 841 * process root directory's mount we have to specify the process's root 842 * directory instead of the mount point, because the mount point might 843 * be above the root directory. 844 */ 845 static 846 int 847 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 848 { 849 struct nchandle *nch; 850 851 if (p && p->p_fd->fd_nrdir.mount == mp) 852 nch = &p->p_fd->fd_nrdir; 853 else 854 nch = &mp->mnt_ncmountpt; 855 return(cache_fullpath(p, nch, rb, fb, 0)); 856 } 857 858 /* 859 * Sync each mounted filesystem. 860 */ 861 862 #ifdef DEBUG 863 static int syncprt = 0; 864 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 865 #endif /* DEBUG */ 866 867 static int sync_callback(struct mount *mp, void *data); 868 869 /* 870 * MPALMOSTSAFE 871 */ 872 int 873 sys_sync(struct sync_args *uap) 874 { 875 get_mplock(); 876 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 877 #ifdef DEBUG 878 /* 879 * print out buffer pool stat information on each sync() call. 880 */ 881 if (syncprt) 882 vfs_bufstats(); 883 #endif /* DEBUG */ 884 rel_mplock(); 885 return (0); 886 } 887 888 static 889 int 890 sync_callback(struct mount *mp, void *data __unused) 891 { 892 int asyncflag; 893 894 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 895 asyncflag = mp->mnt_flag & MNT_ASYNC; 896 mp->mnt_flag &= ~MNT_ASYNC; 897 vfs_msync(mp, MNT_NOWAIT); 898 VFS_SYNC(mp, MNT_NOWAIT | MNT_LAZY); 899 mp->mnt_flag |= asyncflag; 900 } 901 return(0); 902 } 903 904 /* XXX PRISON: could be per prison flag */ 905 static int prison_quotas; 906 #if 0 907 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 908 #endif 909 910 /* 911 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 912 * 913 * Change filesystem quotas. 914 * 915 * MPALMOSTSAFE 916 */ 917 int 918 sys_quotactl(struct quotactl_args *uap) 919 { 920 struct nlookupdata nd; 921 struct thread *td; 922 struct proc *p; 923 struct mount *mp; 924 int error; 925 926 get_mplock(); 927 td = curthread; 928 p = td->td_proc; 929 if (td->td_ucred->cr_prison && !prison_quotas) { 930 error = EPERM; 931 goto done; 932 } 933 934 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 935 if (error == 0) 936 error = nlookup(&nd); 937 if (error == 0) { 938 mp = nd.nl_nch.mount; 939 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 940 uap->arg, nd.nl_cred); 941 } 942 nlookup_done(&nd); 943 done: 944 rel_mplock(); 945 return (error); 946 } 947 948 /* 949 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 950 * void *buf, int buflen) 951 * 952 * This function operates on a mount point and executes the specified 953 * operation using the specified control data, and possibly returns data. 954 * 955 * The actual number of bytes stored in the result buffer is returned, 0 956 * if none, otherwise an error is returned. 957 * 958 * MPALMOSTSAFE 959 */ 960 int 961 sys_mountctl(struct mountctl_args *uap) 962 { 963 struct thread *td = curthread; 964 struct proc *p = td->td_proc; 965 struct file *fp; 966 void *ctl = NULL; 967 void *buf = NULL; 968 char *path = NULL; 969 int error; 970 971 /* 972 * Sanity and permissions checks. We must be root. 973 */ 974 KKASSERT(p); 975 if (td->td_ucred->cr_prison != NULL) 976 return (EPERM); 977 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 978 (error = priv_check(td, PRIV_ROOT)) != 0) 979 return (error); 980 981 /* 982 * Argument length checks 983 */ 984 if (uap->ctllen < 0 || uap->ctllen > 1024) 985 return (EINVAL); 986 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 987 return (EINVAL); 988 if (uap->path == NULL) 989 return (EINVAL); 990 991 /* 992 * Allocate the necessary buffers and copyin data 993 */ 994 path = objcache_get(namei_oc, M_WAITOK); 995 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 996 if (error) 997 goto done; 998 999 if (uap->ctllen) { 1000 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1001 error = copyin(uap->ctl, ctl, uap->ctllen); 1002 if (error) 1003 goto done; 1004 } 1005 if (uap->buflen) 1006 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1007 1008 /* 1009 * Validate the descriptor 1010 */ 1011 if (uap->fd >= 0) { 1012 fp = holdfp(p->p_fd, uap->fd, -1); 1013 if (fp == NULL) { 1014 error = EBADF; 1015 goto done; 1016 } 1017 } else { 1018 fp = NULL; 1019 } 1020 1021 /* 1022 * Execute the internal kernel function and clean up. 1023 */ 1024 get_mplock(); 1025 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 1026 rel_mplock(); 1027 if (fp) 1028 fdrop(fp); 1029 if (error == 0 && uap->sysmsg_result > 0) 1030 error = copyout(buf, uap->buf, uap->sysmsg_result); 1031 done: 1032 if (path) 1033 objcache_put(namei_oc, path); 1034 if (ctl) 1035 kfree(ctl, M_TEMP); 1036 if (buf) 1037 kfree(buf, M_TEMP); 1038 return (error); 1039 } 1040 1041 /* 1042 * Execute a mount control operation by resolving the path to a mount point 1043 * and calling vop_mountctl(). 1044 * 1045 * Use the mount point from the nch instead of the vnode so nullfs mounts 1046 * can properly spike the VOP. 1047 */ 1048 int 1049 kern_mountctl(const char *path, int op, struct file *fp, 1050 const void *ctl, int ctllen, 1051 void *buf, int buflen, int *res) 1052 { 1053 struct vnode *vp; 1054 struct mount *mp; 1055 struct nlookupdata nd; 1056 int error; 1057 1058 *res = 0; 1059 vp = NULL; 1060 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1061 if (error == 0) 1062 error = nlookup(&nd); 1063 if (error == 0) 1064 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1065 mp = nd.nl_nch.mount; 1066 nlookup_done(&nd); 1067 if (error) 1068 return (error); 1069 vn_unlock(vp); 1070 1071 /* 1072 * Must be the root of the filesystem 1073 */ 1074 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1075 vrele(vp); 1076 return (EINVAL); 1077 } 1078 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1079 buf, buflen, res); 1080 vrele(vp); 1081 return (error); 1082 } 1083 1084 int 1085 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1086 { 1087 struct thread *td = curthread; 1088 struct proc *p = td->td_proc; 1089 struct mount *mp; 1090 struct statfs *sp; 1091 char *fullpath, *freepath; 1092 int error; 1093 1094 if ((error = nlookup(nd)) != 0) 1095 return (error); 1096 mp = nd->nl_nch.mount; 1097 sp = &mp->mnt_stat; 1098 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1099 return (error); 1100 1101 error = mount_path(p, mp, &fullpath, &freepath); 1102 if (error) 1103 return(error); 1104 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1105 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1106 kfree(freepath, M_TEMP); 1107 1108 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1109 bcopy(sp, buf, sizeof(*buf)); 1110 /* Only root should have access to the fsid's. */ 1111 if (priv_check(td, PRIV_ROOT)) 1112 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1113 return (0); 1114 } 1115 1116 /* 1117 * statfs_args(char *path, struct statfs *buf) 1118 * 1119 * Get filesystem statistics. 1120 * 1121 * MPALMOSTSAFE 1122 */ 1123 int 1124 sys_statfs(struct statfs_args *uap) 1125 { 1126 struct nlookupdata nd; 1127 struct statfs buf; 1128 int error; 1129 1130 get_mplock(); 1131 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1132 if (error == 0) 1133 error = kern_statfs(&nd, &buf); 1134 nlookup_done(&nd); 1135 if (error == 0) 1136 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1137 rel_mplock(); 1138 return (error); 1139 } 1140 1141 /* 1142 * MPALMOSTSAFE 1143 */ 1144 int 1145 kern_fstatfs(int fd, struct statfs *buf) 1146 { 1147 struct thread *td = curthread; 1148 struct proc *p = td->td_proc; 1149 struct file *fp; 1150 struct mount *mp; 1151 struct statfs *sp; 1152 char *fullpath, *freepath; 1153 int error; 1154 1155 KKASSERT(p); 1156 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1157 return (error); 1158 get_mplock(); 1159 mp = ((struct vnode *)fp->f_data)->v_mount; 1160 if (mp == NULL) { 1161 error = EBADF; 1162 goto done; 1163 } 1164 if (fp->f_cred == NULL) { 1165 error = EINVAL; 1166 goto done; 1167 } 1168 sp = &mp->mnt_stat; 1169 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1170 goto done; 1171 1172 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1173 goto done; 1174 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1175 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1176 kfree(freepath, M_TEMP); 1177 1178 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1179 bcopy(sp, buf, sizeof(*buf)); 1180 1181 /* Only root should have access to the fsid's. */ 1182 if (priv_check(td, PRIV_ROOT)) 1183 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1184 error = 0; 1185 done: 1186 rel_mplock(); 1187 fdrop(fp); 1188 return (error); 1189 } 1190 1191 /* 1192 * fstatfs_args(int fd, struct statfs *buf) 1193 * 1194 * Get filesystem statistics. 1195 * 1196 * MPSAFE 1197 */ 1198 int 1199 sys_fstatfs(struct fstatfs_args *uap) 1200 { 1201 struct statfs buf; 1202 int error; 1203 1204 error = kern_fstatfs(uap->fd, &buf); 1205 1206 if (error == 0) 1207 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1208 return (error); 1209 } 1210 1211 int 1212 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1213 { 1214 struct mount *mp; 1215 struct statvfs *sp; 1216 int error; 1217 1218 if ((error = nlookup(nd)) != 0) 1219 return (error); 1220 mp = nd->nl_nch.mount; 1221 sp = &mp->mnt_vstat; 1222 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1223 return (error); 1224 1225 sp->f_flag = 0; 1226 if (mp->mnt_flag & MNT_RDONLY) 1227 sp->f_flag |= ST_RDONLY; 1228 if (mp->mnt_flag & MNT_NOSUID) 1229 sp->f_flag |= ST_NOSUID; 1230 bcopy(sp, buf, sizeof(*buf)); 1231 return (0); 1232 } 1233 1234 /* 1235 * statfs_args(char *path, struct statfs *buf) 1236 * 1237 * Get filesystem statistics. 1238 * 1239 * MPALMOSTSAFE 1240 */ 1241 int 1242 sys_statvfs(struct statvfs_args *uap) 1243 { 1244 struct nlookupdata nd; 1245 struct statvfs buf; 1246 int error; 1247 1248 get_mplock(); 1249 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1250 if (error == 0) 1251 error = kern_statvfs(&nd, &buf); 1252 nlookup_done(&nd); 1253 if (error == 0) 1254 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1255 rel_mplock(); 1256 return (error); 1257 } 1258 1259 int 1260 kern_fstatvfs(int fd, struct statvfs *buf) 1261 { 1262 struct thread *td = curthread; 1263 struct proc *p = td->td_proc; 1264 struct file *fp; 1265 struct mount *mp; 1266 struct statvfs *sp; 1267 int error; 1268 1269 KKASSERT(p); 1270 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1271 return (error); 1272 mp = ((struct vnode *)fp->f_data)->v_mount; 1273 if (mp == NULL) { 1274 error = EBADF; 1275 goto done; 1276 } 1277 if (fp->f_cred == NULL) { 1278 error = EINVAL; 1279 goto done; 1280 } 1281 sp = &mp->mnt_vstat; 1282 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1283 goto done; 1284 1285 sp->f_flag = 0; 1286 if (mp->mnt_flag & MNT_RDONLY) 1287 sp->f_flag |= ST_RDONLY; 1288 if (mp->mnt_flag & MNT_NOSUID) 1289 sp->f_flag |= ST_NOSUID; 1290 1291 bcopy(sp, buf, sizeof(*buf)); 1292 error = 0; 1293 done: 1294 fdrop(fp); 1295 return (error); 1296 } 1297 1298 /* 1299 * fstatfs_args(int fd, struct statfs *buf) 1300 * 1301 * Get filesystem statistics. 1302 * 1303 * MPALMOSTSAFE 1304 */ 1305 int 1306 sys_fstatvfs(struct fstatvfs_args *uap) 1307 { 1308 struct statvfs buf; 1309 int error; 1310 1311 get_mplock(); 1312 error = kern_fstatvfs(uap->fd, &buf); 1313 rel_mplock(); 1314 1315 if (error == 0) 1316 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1317 return (error); 1318 } 1319 1320 /* 1321 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1322 * 1323 * Get statistics on all filesystems. 1324 */ 1325 1326 struct getfsstat_info { 1327 struct statfs *sfsp; 1328 long count; 1329 long maxcount; 1330 int error; 1331 int flags; 1332 struct thread *td; 1333 }; 1334 1335 static int getfsstat_callback(struct mount *, void *); 1336 1337 /* 1338 * MPALMOSTSAFE 1339 */ 1340 int 1341 sys_getfsstat(struct getfsstat_args *uap) 1342 { 1343 struct thread *td = curthread; 1344 struct getfsstat_info info; 1345 1346 bzero(&info, sizeof(info)); 1347 1348 info.maxcount = uap->bufsize / sizeof(struct statfs); 1349 info.sfsp = uap->buf; 1350 info.count = 0; 1351 info.flags = uap->flags; 1352 info.td = td; 1353 1354 get_mplock(); 1355 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1356 rel_mplock(); 1357 if (info.sfsp && info.count > info.maxcount) 1358 uap->sysmsg_result = info.maxcount; 1359 else 1360 uap->sysmsg_result = info.count; 1361 return (info.error); 1362 } 1363 1364 static int 1365 getfsstat_callback(struct mount *mp, void *data) 1366 { 1367 struct getfsstat_info *info = data; 1368 struct statfs *sp; 1369 char *freepath; 1370 char *fullpath; 1371 int error; 1372 1373 if (info->sfsp && info->count < info->maxcount) { 1374 if (info->td->td_proc && 1375 !chroot_visible_mnt(mp, info->td->td_proc)) { 1376 return(0); 1377 } 1378 sp = &mp->mnt_stat; 1379 1380 /* 1381 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1382 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1383 * overrides MNT_WAIT. 1384 */ 1385 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1386 (info->flags & MNT_WAIT)) && 1387 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1388 return(0); 1389 } 1390 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1391 1392 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1393 if (error) { 1394 info->error = error; 1395 return(-1); 1396 } 1397 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1398 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1399 kfree(freepath, M_TEMP); 1400 1401 error = copyout(sp, info->sfsp, sizeof(*sp)); 1402 if (error) { 1403 info->error = error; 1404 return (-1); 1405 } 1406 ++info->sfsp; 1407 } 1408 info->count++; 1409 return(0); 1410 } 1411 1412 /* 1413 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1414 long bufsize, int flags) 1415 * 1416 * Get statistics on all filesystems. 1417 */ 1418 1419 struct getvfsstat_info { 1420 struct statfs *sfsp; 1421 struct statvfs *vsfsp; 1422 long count; 1423 long maxcount; 1424 int error; 1425 int flags; 1426 struct thread *td; 1427 }; 1428 1429 static int getvfsstat_callback(struct mount *, void *); 1430 1431 /* 1432 * MPALMOSTSAFE 1433 */ 1434 int 1435 sys_getvfsstat(struct getvfsstat_args *uap) 1436 { 1437 struct thread *td = curthread; 1438 struct getvfsstat_info info; 1439 1440 bzero(&info, sizeof(info)); 1441 1442 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1443 info.sfsp = uap->buf; 1444 info.vsfsp = uap->vbuf; 1445 info.count = 0; 1446 info.flags = uap->flags; 1447 info.td = td; 1448 1449 get_mplock(); 1450 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1451 if (info.vsfsp && info.count > info.maxcount) 1452 uap->sysmsg_result = info.maxcount; 1453 else 1454 uap->sysmsg_result = info.count; 1455 rel_mplock(); 1456 return (info.error); 1457 } 1458 1459 static int 1460 getvfsstat_callback(struct mount *mp, void *data) 1461 { 1462 struct getvfsstat_info *info = data; 1463 struct statfs *sp; 1464 struct statvfs *vsp; 1465 char *freepath; 1466 char *fullpath; 1467 int error; 1468 1469 if (info->vsfsp && info->count < info->maxcount) { 1470 if (info->td->td_proc && 1471 !chroot_visible_mnt(mp, info->td->td_proc)) { 1472 return(0); 1473 } 1474 sp = &mp->mnt_stat; 1475 vsp = &mp->mnt_vstat; 1476 1477 /* 1478 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1479 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1480 * overrides MNT_WAIT. 1481 */ 1482 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1483 (info->flags & MNT_WAIT)) && 1484 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1485 return(0); 1486 } 1487 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1488 1489 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1490 (info->flags & MNT_WAIT)) && 1491 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1492 return(0); 1493 } 1494 vsp->f_flag = 0; 1495 if (mp->mnt_flag & MNT_RDONLY) 1496 vsp->f_flag |= ST_RDONLY; 1497 if (mp->mnt_flag & MNT_NOSUID) 1498 vsp->f_flag |= ST_NOSUID; 1499 1500 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1501 if (error) { 1502 info->error = error; 1503 return(-1); 1504 } 1505 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1506 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1507 kfree(freepath, M_TEMP); 1508 1509 error = copyout(sp, info->sfsp, sizeof(*sp)); 1510 if (error == 0) 1511 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1512 if (error) { 1513 info->error = error; 1514 return (-1); 1515 } 1516 ++info->sfsp; 1517 ++info->vsfsp; 1518 } 1519 info->count++; 1520 return(0); 1521 } 1522 1523 1524 /* 1525 * fchdir_args(int fd) 1526 * 1527 * Change current working directory to a given file descriptor. 1528 * 1529 * MPALMOSTSAFE 1530 */ 1531 int 1532 sys_fchdir(struct fchdir_args *uap) 1533 { 1534 struct thread *td = curthread; 1535 struct proc *p = td->td_proc; 1536 struct filedesc *fdp = p->p_fd; 1537 struct vnode *vp, *ovp; 1538 struct mount *mp; 1539 struct file *fp; 1540 struct nchandle nch, onch, tnch; 1541 int error; 1542 1543 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1544 return (error); 1545 get_mplock(); 1546 vp = (struct vnode *)fp->f_data; 1547 vref(vp); 1548 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1549 if (fp->f_nchandle.ncp == NULL) 1550 error = ENOTDIR; 1551 else 1552 error = checkvp_chdir(vp, td); 1553 if (error) { 1554 vput(vp); 1555 goto done; 1556 } 1557 cache_copy(&fp->f_nchandle, &nch); 1558 1559 /* 1560 * If the ncp has become a mount point, traverse through 1561 * the mount point. 1562 */ 1563 1564 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1565 (mp = cache_findmount(&nch)) != NULL 1566 ) { 1567 error = nlookup_mp(mp, &tnch); 1568 if (error == 0) { 1569 cache_unlock(&tnch); /* leave ref intact */ 1570 vput(vp); 1571 vp = tnch.ncp->nc_vp; 1572 error = vget(vp, LK_SHARED); 1573 KKASSERT(error == 0); 1574 cache_drop(&nch); 1575 nch = tnch; 1576 } 1577 } 1578 if (error == 0) { 1579 ovp = fdp->fd_cdir; 1580 onch = fdp->fd_ncdir; 1581 vn_unlock(vp); /* leave ref intact */ 1582 fdp->fd_cdir = vp; 1583 fdp->fd_ncdir = nch; 1584 cache_drop(&onch); 1585 vrele(ovp); 1586 } else { 1587 cache_drop(&nch); 1588 vput(vp); 1589 } 1590 fdrop(fp); 1591 done: 1592 rel_mplock(); 1593 return (error); 1594 } 1595 1596 int 1597 kern_chdir(struct nlookupdata *nd) 1598 { 1599 struct thread *td = curthread; 1600 struct proc *p = td->td_proc; 1601 struct filedesc *fdp = p->p_fd; 1602 struct vnode *vp, *ovp; 1603 struct nchandle onch; 1604 int error; 1605 1606 if ((error = nlookup(nd)) != 0) 1607 return (error); 1608 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1609 return (ENOENT); 1610 if ((error = vget(vp, LK_SHARED)) != 0) 1611 return (error); 1612 1613 error = checkvp_chdir(vp, td); 1614 vn_unlock(vp); 1615 if (error == 0) { 1616 ovp = fdp->fd_cdir; 1617 onch = fdp->fd_ncdir; 1618 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1619 fdp->fd_ncdir = nd->nl_nch; 1620 fdp->fd_cdir = vp; 1621 cache_drop(&onch); 1622 vrele(ovp); 1623 cache_zero(&nd->nl_nch); 1624 } else { 1625 vrele(vp); 1626 } 1627 return (error); 1628 } 1629 1630 /* 1631 * chdir_args(char *path) 1632 * 1633 * Change current working directory (``.''). 1634 * 1635 * MPALMOSTSAFE 1636 */ 1637 int 1638 sys_chdir(struct chdir_args *uap) 1639 { 1640 struct nlookupdata nd; 1641 int error; 1642 1643 get_mplock(); 1644 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1645 if (error == 0) 1646 error = kern_chdir(&nd); 1647 nlookup_done(&nd); 1648 rel_mplock(); 1649 return (error); 1650 } 1651 1652 /* 1653 * Helper function for raised chroot(2) security function: Refuse if 1654 * any filedescriptors are open directories. 1655 */ 1656 static int 1657 chroot_refuse_vdir_fds(struct filedesc *fdp) 1658 { 1659 struct vnode *vp; 1660 struct file *fp; 1661 int error; 1662 int fd; 1663 1664 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1665 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1666 continue; 1667 vp = (struct vnode *)fp->f_data; 1668 if (vp->v_type != VDIR) { 1669 fdrop(fp); 1670 continue; 1671 } 1672 fdrop(fp); 1673 return(EPERM); 1674 } 1675 return (0); 1676 } 1677 1678 /* 1679 * This sysctl determines if we will allow a process to chroot(2) if it 1680 * has a directory open: 1681 * 0: disallowed for all processes. 1682 * 1: allowed for processes that were not already chroot(2)'ed. 1683 * 2: allowed for all processes. 1684 */ 1685 1686 static int chroot_allow_open_directories = 1; 1687 1688 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1689 &chroot_allow_open_directories, 0, ""); 1690 1691 /* 1692 * chroot to the specified namecache entry. We obtain the vp from the 1693 * namecache data. The passed ncp must be locked and referenced and will 1694 * remain locked and referenced on return. 1695 */ 1696 int 1697 kern_chroot(struct nchandle *nch) 1698 { 1699 struct thread *td = curthread; 1700 struct proc *p = td->td_proc; 1701 struct filedesc *fdp = p->p_fd; 1702 struct vnode *vp; 1703 int error; 1704 1705 /* 1706 * Only privileged user can chroot 1707 */ 1708 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1709 if (error) 1710 return (error); 1711 1712 /* 1713 * Disallow open directory descriptors (fchdir() breakouts). 1714 */ 1715 if (chroot_allow_open_directories == 0 || 1716 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1717 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1718 return (error); 1719 } 1720 if ((vp = nch->ncp->nc_vp) == NULL) 1721 return (ENOENT); 1722 1723 if ((error = vget(vp, LK_SHARED)) != 0) 1724 return (error); 1725 1726 /* 1727 * Check the validity of vp as a directory to change to and 1728 * associate it with rdir/jdir. 1729 */ 1730 error = checkvp_chdir(vp, td); 1731 vn_unlock(vp); /* leave reference intact */ 1732 if (error == 0) { 1733 vrele(fdp->fd_rdir); 1734 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1735 cache_drop(&fdp->fd_nrdir); 1736 cache_copy(nch, &fdp->fd_nrdir); 1737 if (fdp->fd_jdir == NULL) { 1738 fdp->fd_jdir = vp; 1739 vref(fdp->fd_jdir); 1740 cache_copy(nch, &fdp->fd_njdir); 1741 } 1742 } else { 1743 vrele(vp); 1744 } 1745 return (error); 1746 } 1747 1748 /* 1749 * chroot_args(char *path) 1750 * 1751 * Change notion of root (``/'') directory. 1752 * 1753 * MPALMOSTSAFE 1754 */ 1755 int 1756 sys_chroot(struct chroot_args *uap) 1757 { 1758 struct thread *td __debugvar = curthread; 1759 struct nlookupdata nd; 1760 int error; 1761 1762 KKASSERT(td->td_proc); 1763 get_mplock(); 1764 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1765 if (error == 0) { 1766 nd.nl_flags |= NLC_EXEC; 1767 error = nlookup(&nd); 1768 if (error == 0) 1769 error = kern_chroot(&nd.nl_nch); 1770 } 1771 nlookup_done(&nd); 1772 rel_mplock(); 1773 return(error); 1774 } 1775 1776 int 1777 sys_chroot_kernel(struct chroot_kernel_args *uap) 1778 { 1779 struct thread *td = curthread; 1780 struct nlookupdata nd; 1781 struct nchandle *nch; 1782 struct vnode *vp; 1783 int error; 1784 1785 get_mplock(); 1786 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1787 if (error) 1788 goto error_nond; 1789 1790 error = nlookup(&nd); 1791 if (error) 1792 goto error_out; 1793 1794 nch = &nd.nl_nch; 1795 1796 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1797 if (error) 1798 goto error_out; 1799 1800 if ((vp = nch->ncp->nc_vp) == NULL) { 1801 error = ENOENT; 1802 goto error_out; 1803 } 1804 1805 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1806 goto error_out; 1807 1808 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1809 vfs_cache_setroot(vp, cache_hold(nch)); 1810 1811 error_out: 1812 nlookup_done(&nd); 1813 error_nond: 1814 rel_mplock(); 1815 return(error); 1816 } 1817 1818 /* 1819 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1820 * determine whether it is legal to chdir to the vnode. The vnode's state 1821 * is not changed by this call. 1822 */ 1823 int 1824 checkvp_chdir(struct vnode *vp, struct thread *td) 1825 { 1826 int error; 1827 1828 if (vp->v_type != VDIR) 1829 error = ENOTDIR; 1830 else 1831 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1832 return (error); 1833 } 1834 1835 /* 1836 * MPSAFE 1837 */ 1838 int 1839 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1840 { 1841 struct thread *td = curthread; 1842 struct proc *p = td->td_proc; 1843 struct lwp *lp = td->td_lwp; 1844 struct filedesc *fdp = p->p_fd; 1845 int cmode, flags; 1846 struct file *nfp; 1847 struct file *fp; 1848 struct vnode *vp; 1849 int type, indx, error; 1850 struct flock lf; 1851 1852 if ((oflags & O_ACCMODE) == O_ACCMODE) 1853 return (EINVAL); 1854 flags = FFLAGS(oflags); 1855 error = falloc(lp, &nfp, NULL); 1856 if (error) 1857 return (error); 1858 fp = nfp; 1859 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1860 1861 /* 1862 * XXX p_dupfd is a real mess. It allows a device to return a 1863 * file descriptor to be duplicated rather then doing the open 1864 * itself. 1865 */ 1866 lp->lwp_dupfd = -1; 1867 1868 /* 1869 * Call vn_open() to do the lookup and assign the vnode to the 1870 * file pointer. vn_open() does not change the ref count on fp 1871 * and the vnode, on success, will be inherited by the file pointer 1872 * and unlocked. 1873 */ 1874 nd->nl_flags |= NLC_LOCKVP; 1875 error = vn_open(nd, fp, flags, cmode); 1876 nlookup_done(nd); 1877 if (error) { 1878 /* 1879 * handle special fdopen() case. bleh. dupfdopen() is 1880 * responsible for dropping the old contents of ofiles[indx] 1881 * if it succeeds. 1882 * 1883 * Note that fsetfd() will add a ref to fp which represents 1884 * the fd_files[] assignment. We must still drop our 1885 * reference. 1886 */ 1887 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1888 if (fdalloc(p, 0, &indx) == 0) { 1889 error = dupfdopen(fdp, indx, lp->lwp_dupfd, flags, error); 1890 if (error == 0) { 1891 *res = indx; 1892 fdrop(fp); /* our ref */ 1893 return (0); 1894 } 1895 fsetfd(fdp, NULL, indx); 1896 } 1897 } 1898 fdrop(fp); /* our ref */ 1899 if (error == ERESTART) 1900 error = EINTR; 1901 return (error); 1902 } 1903 1904 /* 1905 * ref the vnode for ourselves so it can't be ripped out from under 1906 * is. XXX need an ND flag to request that the vnode be returned 1907 * anyway. 1908 * 1909 * Reserve a file descriptor but do not assign it until the open 1910 * succeeds. 1911 */ 1912 vp = (struct vnode *)fp->f_data; 1913 vref(vp); 1914 if ((error = fdalloc(p, 0, &indx)) != 0) { 1915 fdrop(fp); 1916 vrele(vp); 1917 return (error); 1918 } 1919 1920 /* 1921 * If no error occurs the vp will have been assigned to the file 1922 * pointer. 1923 */ 1924 lp->lwp_dupfd = 0; 1925 1926 if (flags & (O_EXLOCK | O_SHLOCK)) { 1927 lf.l_whence = SEEK_SET; 1928 lf.l_start = 0; 1929 lf.l_len = 0; 1930 if (flags & O_EXLOCK) 1931 lf.l_type = F_WRLCK; 1932 else 1933 lf.l_type = F_RDLCK; 1934 if (flags & FNONBLOCK) 1935 type = 0; 1936 else 1937 type = F_WAIT; 1938 1939 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1940 /* 1941 * lock request failed. Clean up the reserved 1942 * descriptor. 1943 */ 1944 vrele(vp); 1945 fsetfd(fdp, NULL, indx); 1946 fdrop(fp); 1947 return (error); 1948 } 1949 fp->f_flag |= FHASLOCK; 1950 } 1951 #if 0 1952 /* 1953 * Assert that all regular file vnodes were created with a object. 1954 */ 1955 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1956 ("open: regular file has no backing object after vn_open")); 1957 #endif 1958 1959 vrele(vp); 1960 1961 /* 1962 * release our private reference, leaving the one associated with the 1963 * descriptor table intact. 1964 */ 1965 fsetfd(fdp, fp, indx); 1966 fdrop(fp); 1967 *res = indx; 1968 return (0); 1969 } 1970 1971 /* 1972 * open_args(char *path, int flags, int mode) 1973 * 1974 * Check permissions, allocate an open file structure, 1975 * and call the device open routine if any. 1976 * 1977 * MPALMOSTSAFE 1978 */ 1979 int 1980 sys_open(struct open_args *uap) 1981 { 1982 struct nlookupdata nd; 1983 int error; 1984 1985 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1986 if (error == 0) { 1987 error = kern_open(&nd, uap->flags, 1988 uap->mode, &uap->sysmsg_result); 1989 } 1990 nlookup_done(&nd); 1991 return (error); 1992 } 1993 1994 /* 1995 * openat_args(int fd, char *path, int flags, int mode) 1996 * 1997 * MPALMOSTSAFE 1998 */ 1999 int 2000 sys_openat(struct openat_args *uap) 2001 { 2002 struct nlookupdata nd; 2003 int error; 2004 struct file *fp; 2005 2006 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2007 if (error == 0) { 2008 error = kern_open(&nd, uap->flags, uap->mode, 2009 &uap->sysmsg_result); 2010 } 2011 nlookup_done_at(&nd, fp); 2012 return (error); 2013 } 2014 2015 int 2016 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2017 { 2018 struct thread *td = curthread; 2019 struct proc *p = td->td_proc; 2020 struct vnode *vp; 2021 struct vattr vattr; 2022 int error; 2023 int whiteout = 0; 2024 2025 KKASSERT(p); 2026 2027 VATTR_NULL(&vattr); 2028 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2029 vattr.va_rmajor = rmajor; 2030 vattr.va_rminor = rminor; 2031 2032 switch (mode & S_IFMT) { 2033 case S_IFMT: /* used by badsect to flag bad sectors */ 2034 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2035 vattr.va_type = VBAD; 2036 break; 2037 case S_IFCHR: 2038 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2039 vattr.va_type = VCHR; 2040 break; 2041 case S_IFBLK: 2042 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2043 vattr.va_type = VBLK; 2044 break; 2045 case S_IFWHT: 2046 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2047 whiteout = 1; 2048 break; 2049 case S_IFDIR: /* special directories support for HAMMER */ 2050 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2051 vattr.va_type = VDIR; 2052 break; 2053 default: 2054 error = EINVAL; 2055 break; 2056 } 2057 2058 if (error) 2059 return (error); 2060 2061 bwillinode(1); 2062 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2063 if ((error = nlookup(nd)) != 0) 2064 return (error); 2065 if (nd->nl_nch.ncp->nc_vp) 2066 return (EEXIST); 2067 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2068 return (error); 2069 2070 if (whiteout) { 2071 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2072 nd->nl_cred, NAMEI_CREATE); 2073 } else { 2074 vp = NULL; 2075 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2076 &vp, nd->nl_cred, &vattr); 2077 if (error == 0) 2078 vput(vp); 2079 } 2080 return (error); 2081 } 2082 2083 /* 2084 * mknod_args(char *path, int mode, int dev) 2085 * 2086 * Create a special file. 2087 * 2088 * MPALMOSTSAFE 2089 */ 2090 int 2091 sys_mknod(struct mknod_args *uap) 2092 { 2093 struct nlookupdata nd; 2094 int error; 2095 2096 get_mplock(); 2097 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2098 if (error == 0) { 2099 error = kern_mknod(&nd, uap->mode, 2100 umajor(uap->dev), uminor(uap->dev)); 2101 } 2102 nlookup_done(&nd); 2103 rel_mplock(); 2104 return (error); 2105 } 2106 2107 /* 2108 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2109 * 2110 * Create a special file. The path is relative to the directory associated 2111 * with fd. 2112 * 2113 * MPALMOSTSAFE 2114 */ 2115 int 2116 sys_mknodat(struct mknodat_args *uap) 2117 { 2118 struct nlookupdata nd; 2119 struct file *fp; 2120 int error; 2121 2122 get_mplock(); 2123 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2124 if (error == 0) { 2125 error = kern_mknod(&nd, uap->mode, 2126 umajor(uap->dev), uminor(uap->dev)); 2127 } 2128 nlookup_done_at(&nd, fp); 2129 rel_mplock(); 2130 return (error); 2131 } 2132 2133 int 2134 kern_mkfifo(struct nlookupdata *nd, int mode) 2135 { 2136 struct thread *td = curthread; 2137 struct proc *p = td->td_proc; 2138 struct vattr vattr; 2139 struct vnode *vp; 2140 int error; 2141 2142 bwillinode(1); 2143 2144 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2145 if ((error = nlookup(nd)) != 0) 2146 return (error); 2147 if (nd->nl_nch.ncp->nc_vp) 2148 return (EEXIST); 2149 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2150 return (error); 2151 2152 VATTR_NULL(&vattr); 2153 vattr.va_type = VFIFO; 2154 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2155 vp = NULL; 2156 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2157 if (error == 0) 2158 vput(vp); 2159 return (error); 2160 } 2161 2162 /* 2163 * mkfifo_args(char *path, int mode) 2164 * 2165 * Create a named pipe. 2166 * 2167 * MPALMOSTSAFE 2168 */ 2169 int 2170 sys_mkfifo(struct mkfifo_args *uap) 2171 { 2172 struct nlookupdata nd; 2173 int error; 2174 2175 get_mplock(); 2176 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2177 if (error == 0) 2178 error = kern_mkfifo(&nd, uap->mode); 2179 nlookup_done(&nd); 2180 rel_mplock(); 2181 return (error); 2182 } 2183 2184 /* 2185 * mkfifoat_args(int fd, char *path, mode_t mode) 2186 * 2187 * Create a named pipe. The path is relative to the directory associated 2188 * with fd. 2189 * 2190 * MPALMOSTSAFE 2191 */ 2192 int 2193 sys_mkfifoat(struct mkfifoat_args *uap) 2194 { 2195 struct nlookupdata nd; 2196 struct file *fp; 2197 int error; 2198 2199 get_mplock(); 2200 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2201 if (error == 0) 2202 error = kern_mkfifo(&nd, uap->mode); 2203 nlookup_done_at(&nd, fp); 2204 rel_mplock(); 2205 return (error); 2206 } 2207 2208 static int hardlink_check_uid = 0; 2209 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2210 &hardlink_check_uid, 0, 2211 "Unprivileged processes cannot create hard links to files owned by other " 2212 "users"); 2213 static int hardlink_check_gid = 0; 2214 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2215 &hardlink_check_gid, 0, 2216 "Unprivileged processes cannot create hard links to files owned by other " 2217 "groups"); 2218 2219 static int 2220 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2221 { 2222 struct vattr va; 2223 int error; 2224 2225 /* 2226 * Shortcut if disabled 2227 */ 2228 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2229 return (0); 2230 2231 /* 2232 * Privileged user can always hardlink 2233 */ 2234 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2235 return (0); 2236 2237 /* 2238 * Otherwise only if the originating file is owned by the 2239 * same user or group. Note that any group is allowed if 2240 * the file is owned by the caller. 2241 */ 2242 error = VOP_GETATTR(vp, &va); 2243 if (error != 0) 2244 return (error); 2245 2246 if (hardlink_check_uid) { 2247 if (cred->cr_uid != va.va_uid) 2248 return (EPERM); 2249 } 2250 2251 if (hardlink_check_gid) { 2252 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2253 return (EPERM); 2254 } 2255 2256 return (0); 2257 } 2258 2259 int 2260 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2261 { 2262 struct thread *td = curthread; 2263 struct vnode *vp; 2264 int error; 2265 2266 /* 2267 * Lookup the source and obtained a locked vnode. 2268 * 2269 * You may only hardlink a file which you have write permission 2270 * on or which you own. 2271 * 2272 * XXX relookup on vget failure / race ? 2273 */ 2274 bwillinode(1); 2275 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2276 if ((error = nlookup(nd)) != 0) 2277 return (error); 2278 vp = nd->nl_nch.ncp->nc_vp; 2279 KKASSERT(vp != NULL); 2280 if (vp->v_type == VDIR) 2281 return (EPERM); /* POSIX */ 2282 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2283 return (error); 2284 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2285 return (error); 2286 2287 /* 2288 * Unlock the source so we can lookup the target without deadlocking 2289 * (XXX vp is locked already, possible other deadlock?). The target 2290 * must not exist. 2291 */ 2292 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2293 nd->nl_flags &= ~NLC_NCPISLOCKED; 2294 cache_unlock(&nd->nl_nch); 2295 vn_unlock(vp); 2296 2297 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2298 if ((error = nlookup(linknd)) != 0) { 2299 vrele(vp); 2300 return (error); 2301 } 2302 if (linknd->nl_nch.ncp->nc_vp) { 2303 vrele(vp); 2304 return (EEXIST); 2305 } 2306 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2307 vrele(vp); 2308 return (error); 2309 } 2310 2311 /* 2312 * Finally run the new API VOP. 2313 */ 2314 error = can_hardlink(vp, td, td->td_ucred); 2315 if (error == 0) { 2316 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2317 vp, linknd->nl_cred); 2318 } 2319 vput(vp); 2320 return (error); 2321 } 2322 2323 /* 2324 * link_args(char *path, char *link) 2325 * 2326 * Make a hard file link. 2327 * 2328 * MPALMOSTSAFE 2329 */ 2330 int 2331 sys_link(struct link_args *uap) 2332 { 2333 struct nlookupdata nd, linknd; 2334 int error; 2335 2336 get_mplock(); 2337 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2338 if (error == 0) { 2339 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2340 if (error == 0) 2341 error = kern_link(&nd, &linknd); 2342 nlookup_done(&linknd); 2343 } 2344 nlookup_done(&nd); 2345 rel_mplock(); 2346 return (error); 2347 } 2348 2349 int 2350 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2351 { 2352 struct vattr vattr; 2353 struct vnode *vp; 2354 struct vnode *dvp; 2355 int error; 2356 2357 bwillinode(1); 2358 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2359 if ((error = nlookup(nd)) != 0) 2360 return (error); 2361 if (nd->nl_nch.ncp->nc_vp) 2362 return (EEXIST); 2363 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2364 return (error); 2365 dvp = nd->nl_dvp; 2366 VATTR_NULL(&vattr); 2367 vattr.va_mode = mode; 2368 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2369 if (error == 0) 2370 vput(vp); 2371 return (error); 2372 } 2373 2374 /* 2375 * symlink(char *path, char *link) 2376 * 2377 * Make a symbolic link. 2378 * 2379 * MPALMOSTSAFE 2380 */ 2381 int 2382 sys_symlink(struct symlink_args *uap) 2383 { 2384 struct thread *td = curthread; 2385 struct nlookupdata nd; 2386 char *path; 2387 int error; 2388 int mode; 2389 2390 path = objcache_get(namei_oc, M_WAITOK); 2391 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2392 if (error == 0) { 2393 get_mplock(); 2394 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2395 if (error == 0) { 2396 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2397 error = kern_symlink(&nd, path, mode); 2398 } 2399 nlookup_done(&nd); 2400 rel_mplock(); 2401 } 2402 objcache_put(namei_oc, path); 2403 return (error); 2404 } 2405 2406 /* 2407 * symlinkat_args(char *path1, int fd, char *path2) 2408 * 2409 * Make a symbolic link. The path2 argument is relative to the directory 2410 * associated with fd. 2411 * 2412 * MPALMOSTSAFE 2413 */ 2414 int 2415 sys_symlinkat(struct symlinkat_args *uap) 2416 { 2417 struct thread *td = curthread; 2418 struct nlookupdata nd; 2419 struct file *fp; 2420 char *path1; 2421 int error; 2422 int mode; 2423 2424 path1 = objcache_get(namei_oc, M_WAITOK); 2425 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2426 if (error == 0) { 2427 get_mplock(); 2428 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2429 UIO_USERSPACE, 0); 2430 if (error == 0) { 2431 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2432 error = kern_symlink(&nd, path1, mode); 2433 } 2434 nlookup_done_at(&nd, fp); 2435 rel_mplock(); 2436 } 2437 objcache_put(namei_oc, path1); 2438 return (error); 2439 } 2440 2441 /* 2442 * undelete_args(char *path) 2443 * 2444 * Delete a whiteout from the filesystem. 2445 * 2446 * MPALMOSTSAFE 2447 */ 2448 int 2449 sys_undelete(struct undelete_args *uap) 2450 { 2451 struct nlookupdata nd; 2452 int error; 2453 2454 get_mplock(); 2455 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2456 bwillinode(1); 2457 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2458 if (error == 0) 2459 error = nlookup(&nd); 2460 if (error == 0) 2461 error = ncp_writechk(&nd.nl_nch); 2462 if (error == 0) { 2463 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2464 NAMEI_DELETE); 2465 } 2466 nlookup_done(&nd); 2467 rel_mplock(); 2468 return (error); 2469 } 2470 2471 int 2472 kern_unlink(struct nlookupdata *nd) 2473 { 2474 int error; 2475 2476 bwillinode(1); 2477 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2478 if ((error = nlookup(nd)) != 0) 2479 return (error); 2480 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2481 return (error); 2482 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2483 return (error); 2484 } 2485 2486 /* 2487 * unlink_args(char *path) 2488 * 2489 * Delete a name from the filesystem. 2490 * 2491 * MPALMOSTSAFE 2492 */ 2493 int 2494 sys_unlink(struct unlink_args *uap) 2495 { 2496 struct nlookupdata nd; 2497 int error; 2498 2499 get_mplock(); 2500 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2501 if (error == 0) 2502 error = kern_unlink(&nd); 2503 nlookup_done(&nd); 2504 rel_mplock(); 2505 return (error); 2506 } 2507 2508 2509 /* 2510 * unlinkat_args(int fd, char *path, int flags) 2511 * 2512 * Delete the file or directory entry pointed to by fd/path. 2513 * 2514 * MPALMOSTSAFE 2515 */ 2516 int 2517 sys_unlinkat(struct unlinkat_args *uap) 2518 { 2519 struct nlookupdata nd; 2520 struct file *fp; 2521 int error; 2522 2523 if (uap->flags & ~AT_REMOVEDIR) 2524 return (EINVAL); 2525 2526 get_mplock(); 2527 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2528 if (error == 0) { 2529 if (uap->flags & AT_REMOVEDIR) 2530 error = kern_rmdir(&nd); 2531 else 2532 error = kern_unlink(&nd); 2533 } 2534 nlookup_done_at(&nd, fp); 2535 rel_mplock(); 2536 return (error); 2537 } 2538 2539 /* 2540 * MPALMOSTSAFE 2541 */ 2542 int 2543 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2544 { 2545 struct thread *td = curthread; 2546 struct proc *p = td->td_proc; 2547 struct file *fp; 2548 struct vnode *vp; 2549 struct vattr vattr; 2550 off_t new_offset; 2551 int error; 2552 2553 fp = holdfp(p->p_fd, fd, -1); 2554 if (fp == NULL) 2555 return (EBADF); 2556 if (fp->f_type != DTYPE_VNODE) { 2557 error = ESPIPE; 2558 goto done; 2559 } 2560 vp = (struct vnode *)fp->f_data; 2561 2562 switch (whence) { 2563 case L_INCR: 2564 spin_lock(&fp->f_spin); 2565 new_offset = fp->f_offset + offset; 2566 error = 0; 2567 break; 2568 case L_XTND: 2569 get_mplock(); 2570 error = VOP_GETATTR(vp, &vattr); 2571 rel_mplock(); 2572 spin_lock(&fp->f_spin); 2573 new_offset = offset + vattr.va_size; 2574 break; 2575 case L_SET: 2576 new_offset = offset; 2577 error = 0; 2578 spin_lock(&fp->f_spin); 2579 break; 2580 default: 2581 new_offset = 0; 2582 error = EINVAL; 2583 spin_lock(&fp->f_spin); 2584 break; 2585 } 2586 2587 /* 2588 * Validate the seek position. Negative offsets are not allowed 2589 * for regular files or directories. 2590 * 2591 * Normally we would also not want to allow negative offsets for 2592 * character and block-special devices. However kvm addresses 2593 * on 64 bit architectures might appear to be negative and must 2594 * be allowed. 2595 */ 2596 if (error == 0) { 2597 if (new_offset < 0 && 2598 (vp->v_type == VREG || vp->v_type == VDIR)) { 2599 error = EINVAL; 2600 } else { 2601 fp->f_offset = new_offset; 2602 } 2603 } 2604 *res = fp->f_offset; 2605 spin_unlock(&fp->f_spin); 2606 done: 2607 fdrop(fp); 2608 return (error); 2609 } 2610 2611 /* 2612 * lseek_args(int fd, int pad, off_t offset, int whence) 2613 * 2614 * Reposition read/write file offset. 2615 * 2616 * MPSAFE 2617 */ 2618 int 2619 sys_lseek(struct lseek_args *uap) 2620 { 2621 int error; 2622 2623 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2624 &uap->sysmsg_offset); 2625 2626 return (error); 2627 } 2628 2629 /* 2630 * Check if current process can access given file. amode is a bitmask of *_OK 2631 * access bits. flags is a bitmask of AT_* flags. 2632 */ 2633 int 2634 kern_access(struct nlookupdata *nd, int amode, int flags) 2635 { 2636 struct vnode *vp; 2637 int error, mode; 2638 2639 if (flags & ~AT_EACCESS) 2640 return (EINVAL); 2641 if ((error = nlookup(nd)) != 0) 2642 return (error); 2643 retry: 2644 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2645 if (error) 2646 return (error); 2647 2648 /* Flags == 0 means only check for existence. */ 2649 if (amode) { 2650 mode = 0; 2651 if (amode & R_OK) 2652 mode |= VREAD; 2653 if (amode & W_OK) 2654 mode |= VWRITE; 2655 if (amode & X_OK) 2656 mode |= VEXEC; 2657 if ((mode & VWRITE) == 0 || 2658 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2659 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2660 2661 /* 2662 * If the file handle is stale we have to re-resolve the 2663 * entry. This is a hack at the moment. 2664 */ 2665 if (error == ESTALE) { 2666 vput(vp); 2667 cache_setunresolved(&nd->nl_nch); 2668 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2669 if (error == 0) { 2670 vp = NULL; 2671 goto retry; 2672 } 2673 return(error); 2674 } 2675 } 2676 vput(vp); 2677 return (error); 2678 } 2679 2680 /* 2681 * access_args(char *path, int flags) 2682 * 2683 * Check access permissions. 2684 * 2685 * MPALMOSTSAFE 2686 */ 2687 int 2688 sys_access(struct access_args *uap) 2689 { 2690 struct nlookupdata nd; 2691 int error; 2692 2693 get_mplock(); 2694 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2695 if (error == 0) 2696 error = kern_access(&nd, uap->flags, 0); 2697 nlookup_done(&nd); 2698 rel_mplock(); 2699 return (error); 2700 } 2701 2702 2703 /* 2704 * faccessat_args(int fd, char *path, int amode, int flags) 2705 * 2706 * Check access permissions. 2707 * 2708 * MPALMOSTSAFE 2709 */ 2710 int 2711 sys_faccessat(struct faccessat_args *uap) 2712 { 2713 struct nlookupdata nd; 2714 struct file *fp; 2715 int error; 2716 2717 get_mplock(); 2718 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2719 NLC_FOLLOW); 2720 if (error == 0) 2721 error = kern_access(&nd, uap->amode, uap->flags); 2722 nlookup_done_at(&nd, fp); 2723 rel_mplock(); 2724 return (error); 2725 } 2726 2727 2728 /* 2729 * MPSAFE 2730 */ 2731 int 2732 kern_stat(struct nlookupdata *nd, struct stat *st) 2733 { 2734 int error; 2735 struct vnode *vp; 2736 thread_t td; 2737 2738 if ((error = nlookup(nd)) != 0) 2739 return (error); 2740 again: 2741 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2742 return (ENOENT); 2743 2744 td = curthread; 2745 if ((error = vget(vp, LK_SHARED)) != 0) 2746 return (error); 2747 error = vn_stat(vp, st, nd->nl_cred); 2748 2749 /* 2750 * If the file handle is stale we have to re-resolve the entry. This 2751 * is a hack at the moment. 2752 */ 2753 if (error == ESTALE) { 2754 vput(vp); 2755 cache_setunresolved(&nd->nl_nch); 2756 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2757 if (error == 0) 2758 goto again; 2759 } else { 2760 vput(vp); 2761 } 2762 return (error); 2763 } 2764 2765 /* 2766 * stat_args(char *path, struct stat *ub) 2767 * 2768 * Get file status; this version follows links. 2769 * 2770 * MPSAFE 2771 */ 2772 int 2773 sys_stat(struct stat_args *uap) 2774 { 2775 struct nlookupdata nd; 2776 struct stat st; 2777 int error; 2778 2779 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2780 if (error == 0) { 2781 error = kern_stat(&nd, &st); 2782 if (error == 0) 2783 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2784 } 2785 nlookup_done(&nd); 2786 return (error); 2787 } 2788 2789 /* 2790 * lstat_args(char *path, struct stat *ub) 2791 * 2792 * Get file status; this version does not follow links. 2793 * 2794 * MPALMOSTSAFE 2795 */ 2796 int 2797 sys_lstat(struct lstat_args *uap) 2798 { 2799 struct nlookupdata nd; 2800 struct stat st; 2801 int error; 2802 2803 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2804 if (error == 0) { 2805 error = kern_stat(&nd, &st); 2806 if (error == 0) 2807 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2808 } 2809 nlookup_done(&nd); 2810 return (error); 2811 } 2812 2813 /* 2814 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2815 * 2816 * Get status of file pointed to by fd/path. 2817 * 2818 * MPALMOSTSAFE 2819 */ 2820 int 2821 sys_fstatat(struct fstatat_args *uap) 2822 { 2823 struct nlookupdata nd; 2824 struct stat st; 2825 int error; 2826 int flags; 2827 struct file *fp; 2828 2829 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2830 return (EINVAL); 2831 2832 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2833 2834 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2835 UIO_USERSPACE, flags); 2836 if (error == 0) { 2837 error = kern_stat(&nd, &st); 2838 if (error == 0) 2839 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2840 } 2841 nlookup_done_at(&nd, fp); 2842 return (error); 2843 } 2844 2845 /* 2846 * pathconf_Args(char *path, int name) 2847 * 2848 * Get configurable pathname variables. 2849 * 2850 * MPALMOSTSAFE 2851 */ 2852 int 2853 sys_pathconf(struct pathconf_args *uap) 2854 { 2855 struct nlookupdata nd; 2856 struct vnode *vp; 2857 int error; 2858 2859 vp = NULL; 2860 get_mplock(); 2861 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2862 if (error == 0) 2863 error = nlookup(&nd); 2864 if (error == 0) 2865 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2866 nlookup_done(&nd); 2867 if (error == 0) { 2868 error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); 2869 vput(vp); 2870 } 2871 rel_mplock(); 2872 return (error); 2873 } 2874 2875 /* 2876 * XXX: daver 2877 * kern_readlink isn't properly split yet. There is a copyin burried 2878 * in VOP_READLINK(). 2879 */ 2880 int 2881 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2882 { 2883 struct thread *td = curthread; 2884 struct vnode *vp; 2885 struct iovec aiov; 2886 struct uio auio; 2887 int error; 2888 2889 if ((error = nlookup(nd)) != 0) 2890 return (error); 2891 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2892 if (error) 2893 return (error); 2894 if (vp->v_type != VLNK) { 2895 error = EINVAL; 2896 } else { 2897 aiov.iov_base = buf; 2898 aiov.iov_len = count; 2899 auio.uio_iov = &aiov; 2900 auio.uio_iovcnt = 1; 2901 auio.uio_offset = 0; 2902 auio.uio_rw = UIO_READ; 2903 auio.uio_segflg = UIO_USERSPACE; 2904 auio.uio_td = td; 2905 auio.uio_resid = count; 2906 error = VOP_READLINK(vp, &auio, td->td_ucred); 2907 } 2908 vput(vp); 2909 *res = count - auio.uio_resid; 2910 return (error); 2911 } 2912 2913 /* 2914 * readlink_args(char *path, char *buf, int count) 2915 * 2916 * Return target name of a symbolic link. 2917 * 2918 * MPALMOSTSAFE 2919 */ 2920 int 2921 sys_readlink(struct readlink_args *uap) 2922 { 2923 struct nlookupdata nd; 2924 int error; 2925 2926 get_mplock(); 2927 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2928 if (error == 0) { 2929 error = kern_readlink(&nd, uap->buf, uap->count, 2930 &uap->sysmsg_result); 2931 } 2932 nlookup_done(&nd); 2933 rel_mplock(); 2934 return (error); 2935 } 2936 2937 /* 2938 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 2939 * 2940 * Return target name of a symbolic link. The path is relative to the 2941 * directory associated with fd. 2942 * 2943 * MPALMOSTSAFE 2944 */ 2945 int 2946 sys_readlinkat(struct readlinkat_args *uap) 2947 { 2948 struct nlookupdata nd; 2949 struct file *fp; 2950 int error; 2951 2952 get_mplock(); 2953 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2954 if (error == 0) { 2955 error = kern_readlink(&nd, uap->buf, uap->bufsize, 2956 &uap->sysmsg_result); 2957 } 2958 nlookup_done_at(&nd, fp); 2959 rel_mplock(); 2960 return (error); 2961 } 2962 2963 static int 2964 setfflags(struct vnode *vp, int flags) 2965 { 2966 struct thread *td = curthread; 2967 int error; 2968 struct vattr vattr; 2969 2970 /* 2971 * Prevent non-root users from setting flags on devices. When 2972 * a device is reused, users can retain ownership of the device 2973 * if they are allowed to set flags and programs assume that 2974 * chown can't fail when done as root. 2975 */ 2976 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2977 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 2978 return (error); 2979 2980 /* 2981 * note: vget is required for any operation that might mod the vnode 2982 * so VINACTIVE is properly cleared. 2983 */ 2984 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2985 VATTR_NULL(&vattr); 2986 vattr.va_flags = flags; 2987 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2988 vput(vp); 2989 } 2990 return (error); 2991 } 2992 2993 /* 2994 * chflags(char *path, int flags) 2995 * 2996 * Change flags of a file given a path name. 2997 * 2998 * MPALMOSTSAFE 2999 */ 3000 int 3001 sys_chflags(struct chflags_args *uap) 3002 { 3003 struct nlookupdata nd; 3004 struct vnode *vp; 3005 int error; 3006 3007 vp = NULL; 3008 get_mplock(); 3009 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3010 if (error == 0) 3011 error = nlookup(&nd); 3012 if (error == 0) 3013 error = ncp_writechk(&nd.nl_nch); 3014 if (error == 0) 3015 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3016 nlookup_done(&nd); 3017 if (error == 0) { 3018 error = setfflags(vp, uap->flags); 3019 vrele(vp); 3020 } 3021 rel_mplock(); 3022 return (error); 3023 } 3024 3025 /* 3026 * lchflags(char *path, int flags) 3027 * 3028 * Change flags of a file given a path name, but don't follow symlinks. 3029 * 3030 * MPALMOSTSAFE 3031 */ 3032 int 3033 sys_lchflags(struct lchflags_args *uap) 3034 { 3035 struct nlookupdata nd; 3036 struct vnode *vp; 3037 int error; 3038 3039 vp = NULL; 3040 get_mplock(); 3041 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3042 if (error == 0) 3043 error = nlookup(&nd); 3044 if (error == 0) 3045 error = ncp_writechk(&nd.nl_nch); 3046 if (error == 0) 3047 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3048 nlookup_done(&nd); 3049 if (error == 0) { 3050 error = setfflags(vp, uap->flags); 3051 vrele(vp); 3052 } 3053 rel_mplock(); 3054 return (error); 3055 } 3056 3057 /* 3058 * fchflags_args(int fd, int flags) 3059 * 3060 * Change flags of a file given a file descriptor. 3061 * 3062 * MPALMOSTSAFE 3063 */ 3064 int 3065 sys_fchflags(struct fchflags_args *uap) 3066 { 3067 struct thread *td = curthread; 3068 struct proc *p = td->td_proc; 3069 struct file *fp; 3070 int error; 3071 3072 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3073 return (error); 3074 get_mplock(); 3075 if (fp->f_nchandle.ncp) 3076 error = ncp_writechk(&fp->f_nchandle); 3077 if (error == 0) 3078 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3079 rel_mplock(); 3080 fdrop(fp); 3081 return (error); 3082 } 3083 3084 static int 3085 setfmode(struct vnode *vp, int mode) 3086 { 3087 struct thread *td = curthread; 3088 int error; 3089 struct vattr vattr; 3090 3091 /* 3092 * note: vget is required for any operation that might mod the vnode 3093 * so VINACTIVE is properly cleared. 3094 */ 3095 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3096 VATTR_NULL(&vattr); 3097 vattr.va_mode = mode & ALLPERMS; 3098 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3099 vput(vp); 3100 } 3101 return error; 3102 } 3103 3104 int 3105 kern_chmod(struct nlookupdata *nd, int mode) 3106 { 3107 struct vnode *vp; 3108 int error; 3109 3110 if ((error = nlookup(nd)) != 0) 3111 return (error); 3112 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3113 return (error); 3114 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3115 error = setfmode(vp, mode); 3116 vrele(vp); 3117 return (error); 3118 } 3119 3120 /* 3121 * chmod_args(char *path, int mode) 3122 * 3123 * Change mode of a file given path name. 3124 * 3125 * MPALMOSTSAFE 3126 */ 3127 int 3128 sys_chmod(struct chmod_args *uap) 3129 { 3130 struct nlookupdata nd; 3131 int error; 3132 3133 get_mplock(); 3134 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3135 if (error == 0) 3136 error = kern_chmod(&nd, uap->mode); 3137 nlookup_done(&nd); 3138 rel_mplock(); 3139 return (error); 3140 } 3141 3142 /* 3143 * lchmod_args(char *path, int mode) 3144 * 3145 * Change mode of a file given path name (don't follow links.) 3146 * 3147 * MPALMOSTSAFE 3148 */ 3149 int 3150 sys_lchmod(struct lchmod_args *uap) 3151 { 3152 struct nlookupdata nd; 3153 int error; 3154 3155 get_mplock(); 3156 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3157 if (error == 0) 3158 error = kern_chmod(&nd, uap->mode); 3159 nlookup_done(&nd); 3160 rel_mplock(); 3161 return (error); 3162 } 3163 3164 /* 3165 * fchmod_args(int fd, int mode) 3166 * 3167 * Change mode of a file given a file descriptor. 3168 * 3169 * MPALMOSTSAFE 3170 */ 3171 int 3172 sys_fchmod(struct fchmod_args *uap) 3173 { 3174 struct thread *td = curthread; 3175 struct proc *p = td->td_proc; 3176 struct file *fp; 3177 int error; 3178 3179 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3180 return (error); 3181 get_mplock(); 3182 if (fp->f_nchandle.ncp) 3183 error = ncp_writechk(&fp->f_nchandle); 3184 if (error == 0) 3185 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3186 rel_mplock(); 3187 fdrop(fp); 3188 return (error); 3189 } 3190 3191 /* 3192 * fchmodat_args(char *path, int mode) 3193 * 3194 * Change mode of a file pointed to by fd/path. 3195 * 3196 * MPALMOSTSAFE 3197 */ 3198 int 3199 sys_fchmodat(struct fchmodat_args *uap) 3200 { 3201 struct nlookupdata nd; 3202 struct file *fp; 3203 int error; 3204 int flags; 3205 3206 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3207 return (EINVAL); 3208 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3209 3210 get_mplock(); 3211 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3212 UIO_USERSPACE, flags); 3213 if (error == 0) 3214 error = kern_chmod(&nd, uap->mode); 3215 nlookup_done_at(&nd, fp); 3216 rel_mplock(); 3217 return (error); 3218 } 3219 3220 static int 3221 setfown(struct vnode *vp, uid_t uid, gid_t gid) 3222 { 3223 struct thread *td = curthread; 3224 int error; 3225 struct vattr vattr; 3226 3227 /* 3228 * note: vget is required for any operation that might mod the vnode 3229 * so VINACTIVE is properly cleared. 3230 */ 3231 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3232 VATTR_NULL(&vattr); 3233 vattr.va_uid = uid; 3234 vattr.va_gid = gid; 3235 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3236 vput(vp); 3237 } 3238 return error; 3239 } 3240 3241 int 3242 kern_chown(struct nlookupdata *nd, int uid, int gid) 3243 { 3244 struct vnode *vp; 3245 int error; 3246 3247 if ((error = nlookup(nd)) != 0) 3248 return (error); 3249 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3250 return (error); 3251 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3252 error = setfown(vp, uid, gid); 3253 vrele(vp); 3254 return (error); 3255 } 3256 3257 /* 3258 * chown(char *path, int uid, int gid) 3259 * 3260 * Set ownership given a path name. 3261 * 3262 * MPALMOSTSAFE 3263 */ 3264 int 3265 sys_chown(struct chown_args *uap) 3266 { 3267 struct nlookupdata nd; 3268 int error; 3269 3270 get_mplock(); 3271 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3272 if (error == 0) 3273 error = kern_chown(&nd, uap->uid, uap->gid); 3274 nlookup_done(&nd); 3275 rel_mplock(); 3276 return (error); 3277 } 3278 3279 /* 3280 * lchown_args(char *path, int uid, int gid) 3281 * 3282 * Set ownership given a path name, do not cross symlinks. 3283 * 3284 * MPALMOSTSAFE 3285 */ 3286 int 3287 sys_lchown(struct lchown_args *uap) 3288 { 3289 struct nlookupdata nd; 3290 int error; 3291 3292 get_mplock(); 3293 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3294 if (error == 0) 3295 error = kern_chown(&nd, uap->uid, uap->gid); 3296 nlookup_done(&nd); 3297 rel_mplock(); 3298 return (error); 3299 } 3300 3301 /* 3302 * fchown_args(int fd, int uid, int gid) 3303 * 3304 * Set ownership given a file descriptor. 3305 * 3306 * MPALMOSTSAFE 3307 */ 3308 int 3309 sys_fchown(struct fchown_args *uap) 3310 { 3311 struct thread *td = curthread; 3312 struct proc *p = td->td_proc; 3313 struct file *fp; 3314 int error; 3315 3316 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3317 return (error); 3318 get_mplock(); 3319 if (fp->f_nchandle.ncp) 3320 error = ncp_writechk(&fp->f_nchandle); 3321 if (error == 0) 3322 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 3323 rel_mplock(); 3324 fdrop(fp); 3325 return (error); 3326 } 3327 3328 /* 3329 * fchownat(int fd, char *path, int uid, int gid, int flags) 3330 * 3331 * Set ownership of file pointed to by fd/path. 3332 * 3333 * MPALMOSTSAFE 3334 */ 3335 int 3336 sys_fchownat(struct fchownat_args *uap) 3337 { 3338 struct nlookupdata nd; 3339 struct file *fp; 3340 int error; 3341 int flags; 3342 3343 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3344 return (EINVAL); 3345 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3346 3347 get_mplock(); 3348 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3349 UIO_USERSPACE, flags); 3350 if (error == 0) 3351 error = kern_chown(&nd, uap->uid, uap->gid); 3352 nlookup_done_at(&nd, fp); 3353 rel_mplock(); 3354 return (error); 3355 } 3356 3357 3358 static int 3359 getutimes(const struct timeval *tvp, struct timespec *tsp) 3360 { 3361 struct timeval tv[2]; 3362 3363 if (tvp == NULL) { 3364 microtime(&tv[0]); 3365 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3366 tsp[1] = tsp[0]; 3367 } else { 3368 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3369 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3370 } 3371 return 0; 3372 } 3373 3374 static int 3375 setutimes(struct vnode *vp, struct vattr *vattr, 3376 const struct timespec *ts, int nullflag) 3377 { 3378 struct thread *td = curthread; 3379 int error; 3380 3381 VATTR_NULL(vattr); 3382 vattr->va_atime = ts[0]; 3383 vattr->va_mtime = ts[1]; 3384 if (nullflag) 3385 vattr->va_vaflags |= VA_UTIMES_NULL; 3386 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3387 3388 return error; 3389 } 3390 3391 int 3392 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3393 { 3394 struct timespec ts[2]; 3395 struct vnode *vp; 3396 struct vattr vattr; 3397 int error; 3398 3399 if ((error = getutimes(tptr, ts)) != 0) 3400 return (error); 3401 3402 /* 3403 * NOTE: utimes() succeeds for the owner even if the file 3404 * is not user-writable. 3405 */ 3406 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3407 3408 if ((error = nlookup(nd)) != 0) 3409 return (error); 3410 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3411 return (error); 3412 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3413 return (error); 3414 3415 /* 3416 * note: vget is required for any operation that might mod the vnode 3417 * so VINACTIVE is properly cleared. 3418 */ 3419 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3420 error = vget(vp, LK_EXCLUSIVE); 3421 if (error == 0) { 3422 error = setutimes(vp, &vattr, ts, (tptr == NULL)); 3423 vput(vp); 3424 } 3425 } 3426 vrele(vp); 3427 return (error); 3428 } 3429 3430 /* 3431 * utimes_args(char *path, struct timeval *tptr) 3432 * 3433 * Set the access and modification times of a file. 3434 * 3435 * MPALMOSTSAFE 3436 */ 3437 int 3438 sys_utimes(struct utimes_args *uap) 3439 { 3440 struct timeval tv[2]; 3441 struct nlookupdata nd; 3442 int error; 3443 3444 if (uap->tptr) { 3445 error = copyin(uap->tptr, tv, sizeof(tv)); 3446 if (error) 3447 return (error); 3448 } 3449 get_mplock(); 3450 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3451 if (error == 0) 3452 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3453 nlookup_done(&nd); 3454 rel_mplock(); 3455 return (error); 3456 } 3457 3458 /* 3459 * lutimes_args(char *path, struct timeval *tptr) 3460 * 3461 * Set the access and modification times of a file. 3462 * 3463 * MPALMOSTSAFE 3464 */ 3465 int 3466 sys_lutimes(struct lutimes_args *uap) 3467 { 3468 struct timeval tv[2]; 3469 struct nlookupdata nd; 3470 int error; 3471 3472 if (uap->tptr) { 3473 error = copyin(uap->tptr, tv, sizeof(tv)); 3474 if (error) 3475 return (error); 3476 } 3477 get_mplock(); 3478 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3479 if (error == 0) 3480 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3481 nlookup_done(&nd); 3482 rel_mplock(); 3483 return (error); 3484 } 3485 3486 /* 3487 * Set utimes on a file descriptor. The creds used to open the 3488 * file are used to determine whether the operation is allowed 3489 * or not. 3490 */ 3491 int 3492 kern_futimes(int fd, struct timeval *tptr) 3493 { 3494 struct thread *td = curthread; 3495 struct proc *p = td->td_proc; 3496 struct timespec ts[2]; 3497 struct file *fp; 3498 struct vnode *vp; 3499 struct vattr vattr; 3500 int error; 3501 3502 error = getutimes(tptr, ts); 3503 if (error) 3504 return (error); 3505 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3506 return (error); 3507 if (fp->f_nchandle.ncp) 3508 error = ncp_writechk(&fp->f_nchandle); 3509 if (error == 0) { 3510 vp = fp->f_data; 3511 error = vget(vp, LK_EXCLUSIVE); 3512 if (error == 0) { 3513 error = VOP_GETATTR(vp, &vattr); 3514 if (error == 0) { 3515 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3516 fp->f_cred); 3517 } 3518 if (error == 0) { 3519 error = setutimes(vp, &vattr, ts, 3520 (tptr == NULL)); 3521 } 3522 vput(vp); 3523 } 3524 } 3525 fdrop(fp); 3526 return (error); 3527 } 3528 3529 /* 3530 * futimes_args(int fd, struct timeval *tptr) 3531 * 3532 * Set the access and modification times of a file. 3533 * 3534 * MPALMOSTSAFE 3535 */ 3536 int 3537 sys_futimes(struct futimes_args *uap) 3538 { 3539 struct timeval tv[2]; 3540 int error; 3541 3542 if (uap->tptr) { 3543 error = copyin(uap->tptr, tv, sizeof(tv)); 3544 if (error) 3545 return (error); 3546 } 3547 get_mplock(); 3548 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3549 rel_mplock(); 3550 3551 return (error); 3552 } 3553 3554 int 3555 kern_truncate(struct nlookupdata *nd, off_t length) 3556 { 3557 struct vnode *vp; 3558 struct vattr vattr; 3559 int error; 3560 3561 if (length < 0) 3562 return(EINVAL); 3563 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3564 if ((error = nlookup(nd)) != 0) 3565 return (error); 3566 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3567 return (error); 3568 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3569 return (error); 3570 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3571 vrele(vp); 3572 return (error); 3573 } 3574 if (vp->v_type == VDIR) { 3575 error = EISDIR; 3576 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3577 VATTR_NULL(&vattr); 3578 vattr.va_size = length; 3579 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3580 } 3581 vput(vp); 3582 return (error); 3583 } 3584 3585 /* 3586 * truncate(char *path, int pad, off_t length) 3587 * 3588 * Truncate a file given its path name. 3589 * 3590 * MPALMOSTSAFE 3591 */ 3592 int 3593 sys_truncate(struct truncate_args *uap) 3594 { 3595 struct nlookupdata nd; 3596 int error; 3597 3598 get_mplock(); 3599 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3600 if (error == 0) 3601 error = kern_truncate(&nd, uap->length); 3602 nlookup_done(&nd); 3603 rel_mplock(); 3604 return error; 3605 } 3606 3607 int 3608 kern_ftruncate(int fd, off_t length) 3609 { 3610 struct thread *td = curthread; 3611 struct proc *p = td->td_proc; 3612 struct vattr vattr; 3613 struct vnode *vp; 3614 struct file *fp; 3615 int error; 3616 3617 if (length < 0) 3618 return(EINVAL); 3619 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3620 return (error); 3621 if (fp->f_nchandle.ncp) { 3622 error = ncp_writechk(&fp->f_nchandle); 3623 if (error) 3624 goto done; 3625 } 3626 if ((fp->f_flag & FWRITE) == 0) { 3627 error = EINVAL; 3628 goto done; 3629 } 3630 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3631 error = EINVAL; 3632 goto done; 3633 } 3634 vp = (struct vnode *)fp->f_data; 3635 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3636 if (vp->v_type == VDIR) { 3637 error = EISDIR; 3638 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3639 VATTR_NULL(&vattr); 3640 vattr.va_size = length; 3641 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3642 } 3643 vn_unlock(vp); 3644 done: 3645 fdrop(fp); 3646 return (error); 3647 } 3648 3649 /* 3650 * ftruncate_args(int fd, int pad, off_t length) 3651 * 3652 * Truncate a file given a file descriptor. 3653 * 3654 * MPALMOSTSAFE 3655 */ 3656 int 3657 sys_ftruncate(struct ftruncate_args *uap) 3658 { 3659 int error; 3660 3661 get_mplock(); 3662 error = kern_ftruncate(uap->fd, uap->length); 3663 rel_mplock(); 3664 3665 return (error); 3666 } 3667 3668 /* 3669 * fsync(int fd) 3670 * 3671 * Sync an open file. 3672 * 3673 * MPALMOSTSAFE 3674 */ 3675 int 3676 sys_fsync(struct fsync_args *uap) 3677 { 3678 struct thread *td = curthread; 3679 struct proc *p = td->td_proc; 3680 struct vnode *vp; 3681 struct file *fp; 3682 vm_object_t obj; 3683 int error; 3684 3685 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3686 return (error); 3687 get_mplock(); 3688 vp = (struct vnode *)fp->f_data; 3689 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3690 if ((obj = vp->v_object) != NULL) 3691 vm_object_page_clean(obj, 0, 0, 0); 3692 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3693 if (error == 0 && vp->v_mount) 3694 error = buf_fsync(vp); 3695 vn_unlock(vp); 3696 rel_mplock(); 3697 fdrop(fp); 3698 3699 return (error); 3700 } 3701 3702 int 3703 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3704 { 3705 struct nchandle fnchd; 3706 struct nchandle tnchd; 3707 struct namecache *ncp; 3708 struct vnode *fdvp; 3709 struct vnode *tdvp; 3710 struct mount *mp; 3711 int error; 3712 3713 bwillinode(1); 3714 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3715 if ((error = nlookup(fromnd)) != 0) 3716 return (error); 3717 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3718 return (ENOENT); 3719 fnchd.mount = fromnd->nl_nch.mount; 3720 cache_hold(&fnchd); 3721 3722 /* 3723 * unlock the source nch so we can lookup the target nch without 3724 * deadlocking. The target may or may not exist so we do not check 3725 * for a target vp like kern_mkdir() and other creation functions do. 3726 * 3727 * The source and target directories are ref'd and rechecked after 3728 * everything is relocked to determine if the source or target file 3729 * has been renamed. 3730 */ 3731 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3732 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3733 cache_unlock(&fromnd->nl_nch); 3734 3735 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 3736 if ((error = nlookup(tond)) != 0) { 3737 cache_drop(&fnchd); 3738 return (error); 3739 } 3740 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3741 cache_drop(&fnchd); 3742 return (ENOENT); 3743 } 3744 tnchd.mount = tond->nl_nch.mount; 3745 cache_hold(&tnchd); 3746 3747 /* 3748 * If the source and target are the same there is nothing to do 3749 */ 3750 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3751 cache_drop(&fnchd); 3752 cache_drop(&tnchd); 3753 return (0); 3754 } 3755 3756 /* 3757 * Mount points cannot be renamed or overwritten 3758 */ 3759 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3760 NCF_ISMOUNTPT 3761 ) { 3762 cache_drop(&fnchd); 3763 cache_drop(&tnchd); 3764 return (EINVAL); 3765 } 3766 3767 /* 3768 * Relock the source ncp. cache_relock() will deal with any 3769 * deadlocks against the already-locked tond and will also 3770 * make sure both are resolved. 3771 * 3772 * NOTE AFTER RELOCKING: The source or target ncp may have become 3773 * invalid while they were unlocked, nc_vp and nc_mount could 3774 * be NULL. 3775 */ 3776 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 3777 &tond->nl_nch, tond->nl_cred); 3778 fromnd->nl_flags |= NLC_NCPISLOCKED; 3779 3780 /* 3781 * make sure the parent directories linkages are the same 3782 */ 3783 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3784 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3785 cache_drop(&fnchd); 3786 cache_drop(&tnchd); 3787 return (ENOENT); 3788 } 3789 3790 /* 3791 * Both the source and target must be within the same filesystem and 3792 * in the same filesystem as their parent directories within the 3793 * namecache topology. 3794 * 3795 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3796 */ 3797 mp = fnchd.mount; 3798 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3799 mp != tond->nl_nch.mount) { 3800 cache_drop(&fnchd); 3801 cache_drop(&tnchd); 3802 return (EXDEV); 3803 } 3804 3805 /* 3806 * Make sure the mount point is writable 3807 */ 3808 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3809 cache_drop(&fnchd); 3810 cache_drop(&tnchd); 3811 return (error); 3812 } 3813 3814 /* 3815 * If the target exists and either the source or target is a directory, 3816 * then both must be directories. 3817 * 3818 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3819 * have become NULL. 3820 */ 3821 if (tond->nl_nch.ncp->nc_vp) { 3822 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3823 error = ENOENT; 3824 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3825 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3826 error = ENOTDIR; 3827 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3828 error = EISDIR; 3829 } 3830 } 3831 3832 /* 3833 * You cannot rename a source into itself or a subdirectory of itself. 3834 * We check this by travsersing the target directory upwards looking 3835 * for a match against the source. 3836 * 3837 * XXX MPSAFE 3838 */ 3839 if (error == 0) { 3840 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3841 if (fromnd->nl_nch.ncp == ncp) { 3842 error = EINVAL; 3843 break; 3844 } 3845 } 3846 } 3847 3848 cache_drop(&fnchd); 3849 cache_drop(&tnchd); 3850 3851 /* 3852 * Even though the namespaces are different, they may still represent 3853 * hardlinks to the same file. The filesystem might have a hard time 3854 * with this so we issue a NREMOVE of the source instead of a NRENAME 3855 * when we detect the situation. 3856 */ 3857 if (error == 0) { 3858 fdvp = fromnd->nl_dvp; 3859 tdvp = tond->nl_dvp; 3860 if (fdvp == NULL || tdvp == NULL) { 3861 error = EPERM; 3862 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3863 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3864 fromnd->nl_cred); 3865 } else { 3866 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3867 fdvp, tdvp, tond->nl_cred); 3868 } 3869 } 3870 return (error); 3871 } 3872 3873 /* 3874 * rename_args(char *from, char *to) 3875 * 3876 * Rename files. Source and destination must either both be directories, 3877 * or both not be directories. If target is a directory, it must be empty. 3878 * 3879 * MPALMOSTSAFE 3880 */ 3881 int 3882 sys_rename(struct rename_args *uap) 3883 { 3884 struct nlookupdata fromnd, tond; 3885 int error; 3886 3887 get_mplock(); 3888 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3889 if (error == 0) { 3890 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3891 if (error == 0) 3892 error = kern_rename(&fromnd, &tond); 3893 nlookup_done(&tond); 3894 } 3895 nlookup_done(&fromnd); 3896 rel_mplock(); 3897 return (error); 3898 } 3899 3900 /* 3901 * renameat_args(int oldfd, char *old, int newfd, char *new) 3902 * 3903 * Rename files using paths relative to the directories associated with 3904 * oldfd and newfd. Source and destination must either both be directories, 3905 * or both not be directories. If target is a directory, it must be empty. 3906 * 3907 * MPALMOSTSAFE 3908 */ 3909 int 3910 sys_renameat(struct renameat_args *uap) 3911 { 3912 struct nlookupdata oldnd, newnd; 3913 struct file *oldfp, *newfp; 3914 int error; 3915 3916 get_mplock(); 3917 error = nlookup_init_at(&oldnd, &oldfp, uap->oldfd, uap->old, 3918 UIO_USERSPACE, 0); 3919 if (error == 0) { 3920 error = nlookup_init_at(&newnd, &newfp, uap->newfd, uap->new, 3921 UIO_USERSPACE, 0); 3922 if (error == 0) 3923 error = kern_rename(&oldnd, &newnd); 3924 nlookup_done_at(&newnd, newfp); 3925 } 3926 nlookup_done_at(&oldnd, oldfp); 3927 rel_mplock(); 3928 return (error); 3929 } 3930 3931 int 3932 kern_mkdir(struct nlookupdata *nd, int mode) 3933 { 3934 struct thread *td = curthread; 3935 struct proc *p = td->td_proc; 3936 struct vnode *vp; 3937 struct vattr vattr; 3938 int error; 3939 3940 bwillinode(1); 3941 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3942 if ((error = nlookup(nd)) != 0) 3943 return (error); 3944 3945 if (nd->nl_nch.ncp->nc_vp) 3946 return (EEXIST); 3947 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3948 return (error); 3949 VATTR_NULL(&vattr); 3950 vattr.va_type = VDIR; 3951 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3952 3953 vp = NULL; 3954 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 3955 if (error == 0) 3956 vput(vp); 3957 return (error); 3958 } 3959 3960 /* 3961 * mkdir_args(char *path, int mode) 3962 * 3963 * Make a directory file. 3964 * 3965 * MPALMOSTSAFE 3966 */ 3967 int 3968 sys_mkdir(struct mkdir_args *uap) 3969 { 3970 struct nlookupdata nd; 3971 int error; 3972 3973 get_mplock(); 3974 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3975 if (error == 0) 3976 error = kern_mkdir(&nd, uap->mode); 3977 nlookup_done(&nd); 3978 rel_mplock(); 3979 return (error); 3980 } 3981 3982 /* 3983 * mkdirat_args(int fd, char *path, mode_t mode) 3984 * 3985 * Make a directory file. The path is relative to the directory associated 3986 * with fd. 3987 * 3988 * MPALMOSTSAFE 3989 */ 3990 int 3991 sys_mkdirat(struct mkdirat_args *uap) 3992 { 3993 struct nlookupdata nd; 3994 struct file *fp; 3995 int error; 3996 3997 get_mplock(); 3998 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3999 if (error == 0) 4000 error = kern_mkdir(&nd, uap->mode); 4001 nlookup_done_at(&nd, fp); 4002 rel_mplock(); 4003 return (error); 4004 } 4005 4006 int 4007 kern_rmdir(struct nlookupdata *nd) 4008 { 4009 int error; 4010 4011 bwillinode(1); 4012 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4013 if ((error = nlookup(nd)) != 0) 4014 return (error); 4015 4016 /* 4017 * Do not allow directories representing mount points to be 4018 * deleted, even if empty. Check write perms on mount point 4019 * in case the vnode is aliased (aka nullfs). 4020 */ 4021 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4022 return (EINVAL); 4023 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4024 return (error); 4025 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4026 return (error); 4027 } 4028 4029 /* 4030 * rmdir_args(char *path) 4031 * 4032 * Remove a directory file. 4033 * 4034 * MPALMOSTSAFE 4035 */ 4036 int 4037 sys_rmdir(struct rmdir_args *uap) 4038 { 4039 struct nlookupdata nd; 4040 int error; 4041 4042 get_mplock(); 4043 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4044 if (error == 0) 4045 error = kern_rmdir(&nd); 4046 nlookup_done(&nd); 4047 rel_mplock(); 4048 return (error); 4049 } 4050 4051 int 4052 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4053 enum uio_seg direction) 4054 { 4055 struct thread *td = curthread; 4056 struct proc *p = td->td_proc; 4057 struct vnode *vp; 4058 struct file *fp; 4059 struct uio auio; 4060 struct iovec aiov; 4061 off_t loff; 4062 int error, eofflag; 4063 4064 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 4065 return (error); 4066 if ((fp->f_flag & FREAD) == 0) { 4067 error = EBADF; 4068 goto done; 4069 } 4070 vp = (struct vnode *)fp->f_data; 4071 unionread: 4072 if (vp->v_type != VDIR) { 4073 error = EINVAL; 4074 goto done; 4075 } 4076 aiov.iov_base = buf; 4077 aiov.iov_len = count; 4078 auio.uio_iov = &aiov; 4079 auio.uio_iovcnt = 1; 4080 auio.uio_rw = UIO_READ; 4081 auio.uio_segflg = direction; 4082 auio.uio_td = td; 4083 auio.uio_resid = count; 4084 loff = auio.uio_offset = fp->f_offset; 4085 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4086 fp->f_offset = auio.uio_offset; 4087 if (error) 4088 goto done; 4089 if (count == auio.uio_resid) { 4090 if (union_dircheckp) { 4091 error = union_dircheckp(td, &vp, fp); 4092 if (error == -1) 4093 goto unionread; 4094 if (error) 4095 goto done; 4096 } 4097 #if 0 4098 if ((vp->v_flag & VROOT) && 4099 (vp->v_mount->mnt_flag & MNT_UNION)) { 4100 struct vnode *tvp = vp; 4101 vp = vp->v_mount->mnt_vnodecovered; 4102 vref(vp); 4103 fp->f_data = vp; 4104 fp->f_offset = 0; 4105 vrele(tvp); 4106 goto unionread; 4107 } 4108 #endif 4109 } 4110 4111 /* 4112 * WARNING! *basep may not be wide enough to accomodate the 4113 * seek offset. XXX should we hack this to return the upper 32 bits 4114 * for offsets greater then 4G? 4115 */ 4116 if (basep) { 4117 *basep = (long)loff; 4118 } 4119 *res = count - auio.uio_resid; 4120 done: 4121 fdrop(fp); 4122 return (error); 4123 } 4124 4125 /* 4126 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4127 * 4128 * Read a block of directory entries in a file system independent format. 4129 * 4130 * MPALMOSTSAFE 4131 */ 4132 int 4133 sys_getdirentries(struct getdirentries_args *uap) 4134 { 4135 long base; 4136 int error; 4137 4138 get_mplock(); 4139 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4140 &uap->sysmsg_result, UIO_USERSPACE); 4141 rel_mplock(); 4142 4143 if (error == 0 && uap->basep) 4144 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4145 return (error); 4146 } 4147 4148 /* 4149 * getdents_args(int fd, char *buf, size_t count) 4150 * 4151 * MPALMOSTSAFE 4152 */ 4153 int 4154 sys_getdents(struct getdents_args *uap) 4155 { 4156 int error; 4157 4158 get_mplock(); 4159 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4160 &uap->sysmsg_result, UIO_USERSPACE); 4161 rel_mplock(); 4162 4163 return (error); 4164 } 4165 4166 /* 4167 * Set the mode mask for creation of filesystem nodes. 4168 * 4169 * umask(int newmask) 4170 * 4171 * MPSAFE 4172 */ 4173 int 4174 sys_umask(struct umask_args *uap) 4175 { 4176 struct thread *td = curthread; 4177 struct proc *p = td->td_proc; 4178 struct filedesc *fdp; 4179 4180 fdp = p->p_fd; 4181 uap->sysmsg_result = fdp->fd_cmask; 4182 fdp->fd_cmask = uap->newmask & ALLPERMS; 4183 return (0); 4184 } 4185 4186 /* 4187 * revoke(char *path) 4188 * 4189 * Void all references to file by ripping underlying filesystem 4190 * away from vnode. 4191 * 4192 * MPALMOSTSAFE 4193 */ 4194 int 4195 sys_revoke(struct revoke_args *uap) 4196 { 4197 struct nlookupdata nd; 4198 struct vattr vattr; 4199 struct vnode *vp; 4200 struct ucred *cred; 4201 int error; 4202 4203 vp = NULL; 4204 get_mplock(); 4205 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4206 if (error == 0) 4207 error = nlookup(&nd); 4208 if (error == 0) 4209 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4210 cred = crhold(nd.nl_cred); 4211 nlookup_done(&nd); 4212 if (error == 0) { 4213 if (error == 0) 4214 error = VOP_GETATTR(vp, &vattr); 4215 if (error == 0 && cred->cr_uid != vattr.va_uid) 4216 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4217 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4218 if (vcount(vp) > 0) 4219 error = vrevoke(vp, cred); 4220 } else if (error == 0) { 4221 error = vrevoke(vp, cred); 4222 } 4223 vrele(vp); 4224 } 4225 if (cred) 4226 crfree(cred); 4227 rel_mplock(); 4228 return (error); 4229 } 4230 4231 /* 4232 * getfh_args(char *fname, fhandle_t *fhp) 4233 * 4234 * Get (NFS) file handle 4235 * 4236 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4237 * mount. This allows nullfs mounts to be explicitly exported. 4238 * 4239 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4240 * 4241 * nullfs mounts of subdirectories are not safe. That is, it will 4242 * work, but you do not really have protection against access to 4243 * the related parent directories. 4244 * 4245 * MPALMOSTSAFE 4246 */ 4247 int 4248 sys_getfh(struct getfh_args *uap) 4249 { 4250 struct thread *td = curthread; 4251 struct nlookupdata nd; 4252 fhandle_t fh; 4253 struct vnode *vp; 4254 struct mount *mp; 4255 int error; 4256 4257 /* 4258 * Must be super user 4259 */ 4260 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4261 return (error); 4262 4263 vp = NULL; 4264 get_mplock(); 4265 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4266 if (error == 0) 4267 error = nlookup(&nd); 4268 if (error == 0) 4269 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4270 mp = nd.nl_nch.mount; 4271 nlookup_done(&nd); 4272 if (error == 0) { 4273 bzero(&fh, sizeof(fh)); 4274 fh.fh_fsid = mp->mnt_stat.f_fsid; 4275 error = VFS_VPTOFH(vp, &fh.fh_fid); 4276 vput(vp); 4277 if (error == 0) 4278 error = copyout(&fh, uap->fhp, sizeof(fh)); 4279 } 4280 rel_mplock(); 4281 return (error); 4282 } 4283 4284 /* 4285 * fhopen_args(const struct fhandle *u_fhp, int flags) 4286 * 4287 * syscall for the rpc.lockd to use to translate a NFS file handle into 4288 * an open descriptor. 4289 * 4290 * warning: do not remove the priv_check() call or this becomes one giant 4291 * security hole. 4292 * 4293 * MPALMOSTSAFE 4294 */ 4295 int 4296 sys_fhopen(struct fhopen_args *uap) 4297 { 4298 struct thread *td = curthread; 4299 struct filedesc *fdp = td->td_proc->p_fd; 4300 struct mount *mp; 4301 struct vnode *vp; 4302 struct fhandle fhp; 4303 struct vattr vat; 4304 struct vattr *vap = &vat; 4305 struct flock lf; 4306 int fmode, mode, error, type; 4307 struct file *nfp; 4308 struct file *fp; 4309 int indx; 4310 4311 /* 4312 * Must be super user 4313 */ 4314 error = priv_check(td, PRIV_ROOT); 4315 if (error) 4316 return (error); 4317 4318 fmode = FFLAGS(uap->flags); 4319 4320 /* 4321 * Why not allow a non-read/write open for our lockd? 4322 */ 4323 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4324 return (EINVAL); 4325 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4326 if (error) 4327 return(error); 4328 4329 /* 4330 * Find the mount point 4331 */ 4332 get_mplock(); 4333 mp = vfs_getvfs(&fhp.fh_fsid); 4334 if (mp == NULL) { 4335 error = ESTALE; 4336 goto done; 4337 } 4338 /* now give me my vnode, it gets returned to me locked */ 4339 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4340 if (error) 4341 goto done; 4342 /* 4343 * from now on we have to make sure not 4344 * to forget about the vnode 4345 * any error that causes an abort must vput(vp) 4346 * just set error = err and 'goto bad;'. 4347 */ 4348 4349 /* 4350 * from vn_open 4351 */ 4352 if (vp->v_type == VLNK) { 4353 error = EMLINK; 4354 goto bad; 4355 } 4356 if (vp->v_type == VSOCK) { 4357 error = EOPNOTSUPP; 4358 goto bad; 4359 } 4360 mode = 0; 4361 if (fmode & (FWRITE | O_TRUNC)) { 4362 if (vp->v_type == VDIR) { 4363 error = EISDIR; 4364 goto bad; 4365 } 4366 error = vn_writechk(vp, NULL); 4367 if (error) 4368 goto bad; 4369 mode |= VWRITE; 4370 } 4371 if (fmode & FREAD) 4372 mode |= VREAD; 4373 if (mode) { 4374 error = VOP_ACCESS(vp, mode, td->td_ucred); 4375 if (error) 4376 goto bad; 4377 } 4378 if (fmode & O_TRUNC) { 4379 vn_unlock(vp); /* XXX */ 4380 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4381 VATTR_NULL(vap); 4382 vap->va_size = 0; 4383 error = VOP_SETATTR(vp, vap, td->td_ucred); 4384 if (error) 4385 goto bad; 4386 } 4387 4388 /* 4389 * VOP_OPEN needs the file pointer so it can potentially override 4390 * it. 4391 * 4392 * WARNING! no f_nchandle will be associated when fhopen()ing a 4393 * directory. XXX 4394 */ 4395 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4396 goto bad; 4397 fp = nfp; 4398 4399 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4400 if (error) { 4401 /* 4402 * setting f_ops this way prevents VOP_CLOSE from being 4403 * called or fdrop() releasing the vp from v_data. Since 4404 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4405 */ 4406 fp->f_ops = &badfileops; 4407 fp->f_data = NULL; 4408 goto bad_drop; 4409 } 4410 4411 /* 4412 * The fp is given its own reference, we still have our ref and lock. 4413 * 4414 * Assert that all regular files must be created with a VM object. 4415 */ 4416 if (vp->v_type == VREG && vp->v_object == NULL) { 4417 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 4418 goto bad_drop; 4419 } 4420 4421 /* 4422 * The open was successful. Handle any locking requirements. 4423 */ 4424 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4425 lf.l_whence = SEEK_SET; 4426 lf.l_start = 0; 4427 lf.l_len = 0; 4428 if (fmode & O_EXLOCK) 4429 lf.l_type = F_WRLCK; 4430 else 4431 lf.l_type = F_RDLCK; 4432 if (fmode & FNONBLOCK) 4433 type = 0; 4434 else 4435 type = F_WAIT; 4436 vn_unlock(vp); 4437 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 4438 /* 4439 * release our private reference. 4440 */ 4441 fsetfd(fdp, NULL, indx); 4442 fdrop(fp); 4443 vrele(vp); 4444 goto done; 4445 } 4446 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4447 fp->f_flag |= FHASLOCK; 4448 } 4449 4450 /* 4451 * Clean up. Associate the file pointer with the previously 4452 * reserved descriptor and return it. 4453 */ 4454 vput(vp); 4455 rel_mplock(); 4456 fsetfd(fdp, fp, indx); 4457 fdrop(fp); 4458 uap->sysmsg_result = indx; 4459 return (0); 4460 4461 bad_drop: 4462 fsetfd(fdp, NULL, indx); 4463 fdrop(fp); 4464 bad: 4465 vput(vp); 4466 done: 4467 rel_mplock(); 4468 return (error); 4469 } 4470 4471 /* 4472 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4473 * 4474 * MPALMOSTSAFE 4475 */ 4476 int 4477 sys_fhstat(struct fhstat_args *uap) 4478 { 4479 struct thread *td = curthread; 4480 struct stat sb; 4481 fhandle_t fh; 4482 struct mount *mp; 4483 struct vnode *vp; 4484 int error; 4485 4486 /* 4487 * Must be super user 4488 */ 4489 error = priv_check(td, PRIV_ROOT); 4490 if (error) 4491 return (error); 4492 4493 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4494 if (error) 4495 return (error); 4496 4497 get_mplock(); 4498 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4499 error = ESTALE; 4500 if (error == 0) { 4501 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4502 error = vn_stat(vp, &sb, td->td_ucred); 4503 vput(vp); 4504 } 4505 } 4506 rel_mplock(); 4507 if (error == 0) 4508 error = copyout(&sb, uap->sb, sizeof(sb)); 4509 return (error); 4510 } 4511 4512 /* 4513 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4514 * 4515 * MPALMOSTSAFE 4516 */ 4517 int 4518 sys_fhstatfs(struct fhstatfs_args *uap) 4519 { 4520 struct thread *td = curthread; 4521 struct proc *p = td->td_proc; 4522 struct statfs *sp; 4523 struct mount *mp; 4524 struct vnode *vp; 4525 struct statfs sb; 4526 char *fullpath, *freepath; 4527 fhandle_t fh; 4528 int error; 4529 4530 /* 4531 * Must be super user 4532 */ 4533 if ((error = priv_check(td, PRIV_ROOT))) 4534 return (error); 4535 4536 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4537 return (error); 4538 4539 get_mplock(); 4540 4541 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4542 error = ESTALE; 4543 goto done; 4544 } 4545 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4546 error = ESTALE; 4547 goto done; 4548 } 4549 4550 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4551 goto done; 4552 mp = vp->v_mount; 4553 sp = &mp->mnt_stat; 4554 vput(vp); 4555 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4556 goto done; 4557 4558 error = mount_path(p, mp, &fullpath, &freepath); 4559 if (error) 4560 goto done; 4561 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4562 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4563 kfree(freepath, M_TEMP); 4564 4565 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4566 if (priv_check(td, PRIV_ROOT)) { 4567 bcopy(sp, &sb, sizeof(sb)); 4568 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4569 sp = &sb; 4570 } 4571 error = copyout(sp, uap->buf, sizeof(*sp)); 4572 done: 4573 rel_mplock(); 4574 return (error); 4575 } 4576 4577 /* 4578 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4579 * 4580 * MPALMOSTSAFE 4581 */ 4582 int 4583 sys_fhstatvfs(struct fhstatvfs_args *uap) 4584 { 4585 struct thread *td = curthread; 4586 struct proc *p = td->td_proc; 4587 struct statvfs *sp; 4588 struct mount *mp; 4589 struct vnode *vp; 4590 fhandle_t fh; 4591 int error; 4592 4593 /* 4594 * Must be super user 4595 */ 4596 if ((error = priv_check(td, PRIV_ROOT))) 4597 return (error); 4598 4599 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4600 return (error); 4601 4602 get_mplock(); 4603 4604 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4605 error = ESTALE; 4606 goto done; 4607 } 4608 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4609 error = ESTALE; 4610 goto done; 4611 } 4612 4613 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4614 goto done; 4615 mp = vp->v_mount; 4616 sp = &mp->mnt_vstat; 4617 vput(vp); 4618 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4619 goto done; 4620 4621 sp->f_flag = 0; 4622 if (mp->mnt_flag & MNT_RDONLY) 4623 sp->f_flag |= ST_RDONLY; 4624 if (mp->mnt_flag & MNT_NOSUID) 4625 sp->f_flag |= ST_NOSUID; 4626 error = copyout(sp, uap->buf, sizeof(*sp)); 4627 done: 4628 rel_mplock(); 4629 return (error); 4630 } 4631 4632 4633 /* 4634 * Syscall to push extended attribute configuration information into the 4635 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4636 * a command (int cmd), and attribute name and misc data. For now, the 4637 * attribute name is left in userspace for consumption by the VFS_op. 4638 * It will probably be changed to be copied into sysspace by the 4639 * syscall in the future, once issues with various consumers of the 4640 * attribute code have raised their hands. 4641 * 4642 * Currently this is used only by UFS Extended Attributes. 4643 * 4644 * MPALMOSTSAFE 4645 */ 4646 int 4647 sys_extattrctl(struct extattrctl_args *uap) 4648 { 4649 struct nlookupdata nd; 4650 struct vnode *vp; 4651 char attrname[EXTATTR_MAXNAMELEN]; 4652 int error; 4653 size_t size; 4654 4655 get_mplock(); 4656 4657 attrname[0] = 0; 4658 vp = NULL; 4659 error = 0; 4660 4661 if (error == 0 && uap->filename) { 4662 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4663 NLC_FOLLOW); 4664 if (error == 0) 4665 error = nlookup(&nd); 4666 if (error == 0) 4667 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4668 nlookup_done(&nd); 4669 } 4670 4671 if (error == 0 && uap->attrname) { 4672 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4673 &size); 4674 } 4675 4676 if (error == 0) { 4677 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4678 if (error == 0) 4679 error = nlookup(&nd); 4680 if (error == 0) 4681 error = ncp_writechk(&nd.nl_nch); 4682 if (error == 0) { 4683 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4684 uap->attrnamespace, 4685 uap->attrname, nd.nl_cred); 4686 } 4687 nlookup_done(&nd); 4688 } 4689 4690 rel_mplock(); 4691 4692 return (error); 4693 } 4694 4695 /* 4696 * Syscall to get a named extended attribute on a file or directory. 4697 * 4698 * MPALMOSTSAFE 4699 */ 4700 int 4701 sys_extattr_set_file(struct extattr_set_file_args *uap) 4702 { 4703 char attrname[EXTATTR_MAXNAMELEN]; 4704 struct nlookupdata nd; 4705 struct vnode *vp; 4706 struct uio auio; 4707 struct iovec aiov; 4708 int error; 4709 4710 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4711 if (error) 4712 return (error); 4713 4714 vp = NULL; 4715 get_mplock(); 4716 4717 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4718 if (error == 0) 4719 error = nlookup(&nd); 4720 if (error == 0) 4721 error = ncp_writechk(&nd.nl_nch); 4722 if (error == 0) 4723 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4724 if (error) { 4725 nlookup_done(&nd); 4726 rel_mplock(); 4727 return (error); 4728 } 4729 4730 bzero(&auio, sizeof(auio)); 4731 aiov.iov_base = uap->data; 4732 aiov.iov_len = uap->nbytes; 4733 auio.uio_iov = &aiov; 4734 auio.uio_iovcnt = 1; 4735 auio.uio_offset = 0; 4736 auio.uio_resid = uap->nbytes; 4737 auio.uio_rw = UIO_WRITE; 4738 auio.uio_td = curthread; 4739 4740 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4741 &auio, nd.nl_cred); 4742 4743 vput(vp); 4744 nlookup_done(&nd); 4745 rel_mplock(); 4746 return (error); 4747 } 4748 4749 /* 4750 * Syscall to get a named extended attribute on a file or directory. 4751 * 4752 * MPALMOSTSAFE 4753 */ 4754 int 4755 sys_extattr_get_file(struct extattr_get_file_args *uap) 4756 { 4757 char attrname[EXTATTR_MAXNAMELEN]; 4758 struct nlookupdata nd; 4759 struct uio auio; 4760 struct iovec aiov; 4761 struct vnode *vp; 4762 int error; 4763 4764 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4765 if (error) 4766 return (error); 4767 4768 vp = NULL; 4769 get_mplock(); 4770 4771 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4772 if (error == 0) 4773 error = nlookup(&nd); 4774 if (error == 0) 4775 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4776 if (error) { 4777 nlookup_done(&nd); 4778 rel_mplock(); 4779 return (error); 4780 } 4781 4782 bzero(&auio, sizeof(auio)); 4783 aiov.iov_base = uap->data; 4784 aiov.iov_len = uap->nbytes; 4785 auio.uio_iov = &aiov; 4786 auio.uio_iovcnt = 1; 4787 auio.uio_offset = 0; 4788 auio.uio_resid = uap->nbytes; 4789 auio.uio_rw = UIO_READ; 4790 auio.uio_td = curthread; 4791 4792 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 4793 &auio, nd.nl_cred); 4794 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 4795 4796 vput(vp); 4797 nlookup_done(&nd); 4798 rel_mplock(); 4799 return(error); 4800 } 4801 4802 /* 4803 * Syscall to delete a named extended attribute from a file or directory. 4804 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4805 * 4806 * MPALMOSTSAFE 4807 */ 4808 int 4809 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4810 { 4811 char attrname[EXTATTR_MAXNAMELEN]; 4812 struct nlookupdata nd; 4813 struct vnode *vp; 4814 int error; 4815 4816 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4817 if (error) 4818 return(error); 4819 4820 get_mplock(); 4821 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4822 if (error == 0) 4823 error = nlookup(&nd); 4824 if (error == 0) 4825 error = ncp_writechk(&nd.nl_nch); 4826 if (error == 0) { 4827 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4828 if (error == 0) { 4829 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 4830 attrname, NULL, nd.nl_cred); 4831 vput(vp); 4832 } 4833 } 4834 nlookup_done(&nd); 4835 rel_mplock(); 4836 return(error); 4837 } 4838 4839 /* 4840 * Determine if the mount is visible to the process. 4841 */ 4842 static int 4843 chroot_visible_mnt(struct mount *mp, struct proc *p) 4844 { 4845 struct nchandle nch; 4846 4847 /* 4848 * Traverse from the mount point upwards. If we hit the process 4849 * root then the mount point is visible to the process. 4850 */ 4851 nch = mp->mnt_ncmountpt; 4852 while (nch.ncp) { 4853 if (nch.mount == p->p_fd->fd_nrdir.mount && 4854 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4855 return(1); 4856 } 4857 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4858 nch = nch.mount->mnt_ncmounton; 4859 } else { 4860 nch.ncp = nch.ncp->nc_parent; 4861 } 4862 } 4863 4864 /* 4865 * If the mount point is not visible to the process, but the 4866 * process root is in a subdirectory of the mount, return 4867 * TRUE anyway. 4868 */ 4869 if (p->p_fd->fd_nrdir.mount == mp) 4870 return(1); 4871 4872 return(0); 4873 } 4874 4875