1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.135 2008/11/11 00:55:49 pavalos Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/priv.h> 62 #include <sys/jail.h> 63 #include <sys/namei.h> 64 #include <sys/nlookup.h> 65 #include <sys/dirent.h> 66 #include <sys/extattr.h> 67 #include <sys/spinlock.h> 68 #include <sys/kern_syscall.h> 69 #include <sys/objcache.h> 70 #include <sys/sysctl.h> 71 72 #include <sys/buf2.h> 73 #include <sys/file2.h> 74 #include <sys/spinlock2.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 80 #include <machine/limits.h> 81 #include <machine/stdarg.h> 82 83 #include <vfs/union/union.h> 84 85 static void mount_warning(struct mount *mp, const char *ctl, ...); 86 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 87 static int checkvp_chdir (struct vnode *vn, struct thread *td); 88 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 89 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 90 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 91 static int getutimes (const struct timeval *, struct timespec *); 92 static int setfown (struct vnode *, uid_t, gid_t); 93 static int setfmode (struct vnode *, int); 94 static int setfflags (struct vnode *, int); 95 static int setutimes (struct vnode *, const struct timespec *, int); 96 static int usermount = 0; /* if 1, non-root can mount fs. */ 97 98 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 99 100 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 101 102 /* 103 * Virtual File System System Calls 104 */ 105 106 /* 107 * Mount a file system. 108 */ 109 /* 110 * mount_args(char *type, char *path, int flags, caddr_t data) 111 */ 112 /* ARGSUSED */ 113 int 114 sys_mount(struct mount_args *uap) 115 { 116 struct thread *td = curthread; 117 struct proc *p = td->td_proc; 118 struct vnode *vp; 119 struct nchandle nch; 120 struct mount *mp; 121 struct vfsconf *vfsp; 122 int error, flag = 0, flag2 = 0; 123 int hasmount; 124 struct vattr va; 125 struct nlookupdata nd; 126 char fstypename[MFSNAMELEN]; 127 struct ucred *cred = p->p_ucred; 128 129 KKASSERT(p); 130 if (jailed(cred)) 131 return (EPERM); 132 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 133 return (error); 134 /* 135 * Do not allow NFS export by non-root users. 136 */ 137 if (uap->flags & MNT_EXPORTED) { 138 error = priv_check(td, PRIV_ROOT); 139 if (error) 140 return (error); 141 } 142 /* 143 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 144 */ 145 if (priv_check(td, PRIV_ROOT)) 146 uap->flags |= MNT_NOSUID | MNT_NODEV; 147 148 /* 149 * Lookup the requested path and extract the nch and vnode. 150 */ 151 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 152 if (error == 0) { 153 if ((error = nlookup(&nd)) == 0) { 154 if (nd.nl_nch.ncp->nc_vp == NULL) 155 error = ENOENT; 156 } 157 } 158 if (error) { 159 nlookup_done(&nd); 160 return (error); 161 } 162 163 /* 164 * Extract the locked+refd ncp and cleanup the nd structure 165 */ 166 nch = nd.nl_nch; 167 cache_zero(&nd.nl_nch); 168 nlookup_done(&nd); 169 170 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 171 hasmount = 1; 172 else 173 hasmount = 0; 174 175 176 /* 177 * now we have the locked ref'd nch and unreferenced vnode. 178 */ 179 vp = nch.ncp->nc_vp; 180 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 181 cache_put(&nch); 182 return (error); 183 } 184 cache_unlock(&nch); 185 186 /* 187 * Now we have an unlocked ref'd nch and a locked ref'd vp 188 */ 189 if (uap->flags & MNT_UPDATE) { 190 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 191 cache_drop(&nch); 192 vput(vp); 193 return (EINVAL); 194 } 195 mp = vp->v_mount; 196 flag = mp->mnt_flag; 197 flag2 = mp->mnt_kern_flag; 198 /* 199 * We only allow the filesystem to be reloaded if it 200 * is currently mounted read-only. 201 */ 202 if ((uap->flags & MNT_RELOAD) && 203 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 204 cache_drop(&nch); 205 vput(vp); 206 return (EOPNOTSUPP); /* Needs translation */ 207 } 208 /* 209 * Only root, or the user that did the original mount is 210 * permitted to update it. 211 */ 212 if (mp->mnt_stat.f_owner != cred->cr_uid && 213 (error = priv_check(td, PRIV_ROOT))) { 214 cache_drop(&nch); 215 vput(vp); 216 return (error); 217 } 218 if (vfs_busy(mp, LK_NOWAIT)) { 219 cache_drop(&nch); 220 vput(vp); 221 return (EBUSY); 222 } 223 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 224 cache_drop(&nch); 225 vfs_unbusy(mp); 226 vput(vp); 227 return (EBUSY); 228 } 229 vp->v_flag |= VMOUNT; 230 mp->mnt_flag |= 231 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 232 vn_unlock(vp); 233 goto update; 234 } 235 /* 236 * If the user is not root, ensure that they own the directory 237 * onto which we are attempting to mount. 238 */ 239 if ((error = VOP_GETATTR(vp, &va)) || 240 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 241 cache_drop(&nch); 242 vput(vp); 243 return (error); 244 } 245 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 246 cache_drop(&nch); 247 vput(vp); 248 return (error); 249 } 250 if (vp->v_type != VDIR) { 251 cache_drop(&nch); 252 vput(vp); 253 return (ENOTDIR); 254 } 255 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 256 cache_drop(&nch); 257 vput(vp); 258 return (EPERM); 259 } 260 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 261 cache_drop(&nch); 262 vput(vp); 263 return (error); 264 } 265 vfsp = vfsconf_find_by_name(fstypename); 266 if (vfsp == NULL) { 267 linker_file_t lf; 268 269 /* Only load modules for root (very important!) */ 270 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 271 cache_drop(&nch); 272 vput(vp); 273 return error; 274 } 275 error = linker_load_file(fstypename, &lf); 276 if (error || lf == NULL) { 277 cache_drop(&nch); 278 vput(vp); 279 if (lf == NULL) 280 error = ENODEV; 281 return error; 282 } 283 lf->userrefs++; 284 /* lookup again, see if the VFS was loaded */ 285 vfsp = vfsconf_find_by_name(fstypename); 286 if (vfsp == NULL) { 287 lf->userrefs--; 288 linker_file_unload(lf); 289 cache_drop(&nch); 290 vput(vp); 291 return (ENODEV); 292 } 293 } 294 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 295 cache_drop(&nch); 296 vput(vp); 297 return (EBUSY); 298 } 299 vp->v_flag |= VMOUNT; 300 301 /* 302 * Allocate and initialize the filesystem. 303 */ 304 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 305 TAILQ_INIT(&mp->mnt_nvnodelist); 306 TAILQ_INIT(&mp->mnt_reservedvnlist); 307 TAILQ_INIT(&mp->mnt_jlist); 308 mp->mnt_nvnodelistsize = 0; 309 lockinit(&mp->mnt_lock, "vfslock", 0, 0); 310 vfs_busy(mp, LK_NOWAIT); 311 mp->mnt_op = vfsp->vfc_vfsops; 312 mp->mnt_vfc = vfsp; 313 vfsp->vfc_refcount++; 314 mp->mnt_stat.f_type = vfsp->vfc_typenum; 315 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 316 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 317 mp->mnt_stat.f_owner = cred->cr_uid; 318 mp->mnt_iosize_max = DFLTPHYS; 319 vn_unlock(vp); 320 update: 321 /* 322 * Set the mount level flags. 323 */ 324 if (uap->flags & MNT_RDONLY) 325 mp->mnt_flag |= MNT_RDONLY; 326 else if (mp->mnt_flag & MNT_RDONLY) 327 mp->mnt_kern_flag |= MNTK_WANTRDWR; 328 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 329 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 330 MNT_NOSYMFOLLOW | MNT_IGNORE | 331 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 332 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 333 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 334 MNT_NOSYMFOLLOW | MNT_IGNORE | 335 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 336 /* 337 * Mount the filesystem. 338 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 339 * get. 340 */ 341 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 342 if (mp->mnt_flag & MNT_UPDATE) { 343 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 344 mp->mnt_flag &= ~MNT_RDONLY; 345 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 346 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 347 if (error) { 348 mp->mnt_flag = flag; 349 mp->mnt_kern_flag = flag2; 350 } 351 vfs_unbusy(mp); 352 vp->v_flag &= ~VMOUNT; 353 vrele(vp); 354 cache_drop(&nch); 355 return (error); 356 } 357 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 358 /* 359 * Put the new filesystem on the mount list after root. The mount 360 * point gets its own mnt_ncmountpt (unless the VFS already set one 361 * up) which represents the root of the mount. The lookup code 362 * detects the mount point going forward and checks the root of 363 * the mount going backwards. 364 * 365 * It is not necessary to invalidate or purge the vnode underneath 366 * because elements under the mount will be given their own glue 367 * namecache record. 368 */ 369 if (!error) { 370 if (mp->mnt_ncmountpt.ncp == NULL) { 371 /* 372 * allocate, then unlock, but leave the ref intact 373 */ 374 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 375 cache_unlock(&mp->mnt_ncmountpt); 376 } 377 mp->mnt_ncmounton = nch; /* inherits ref */ 378 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 379 380 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 381 vp->v_flag &= ~VMOUNT; 382 mountlist_insert(mp, MNTINS_LAST); 383 vn_unlock(vp); 384 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 385 error = vfs_allocate_syncvnode(mp); 386 vfs_unbusy(mp); 387 error = VFS_START(mp, 0); 388 vrele(vp); 389 } else { 390 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 391 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 392 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 393 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 394 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 395 vp->v_flag &= ~VMOUNT; 396 mp->mnt_vfc->vfc_refcount--; 397 vfs_unbusy(mp); 398 kfree(mp, M_MOUNT); 399 cache_drop(&nch); 400 vput(vp); 401 } 402 return (error); 403 } 404 405 /* 406 * Scan all active processes to see if any of them have a current 407 * or root directory onto which the new filesystem has just been 408 * mounted. If so, replace them with the new mount point. 409 * 410 * The passed ncp is ref'd and locked (from the mount code) and 411 * must be associated with the vnode representing the root of the 412 * mount point. 413 */ 414 struct checkdirs_info { 415 struct nchandle old_nch; 416 struct nchandle new_nch; 417 struct vnode *old_vp; 418 struct vnode *new_vp; 419 }; 420 421 static int checkdirs_callback(struct proc *p, void *data); 422 423 static void 424 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 425 { 426 struct checkdirs_info info; 427 struct vnode *olddp; 428 struct vnode *newdp; 429 struct mount *mp; 430 431 /* 432 * If the old mount point's vnode has a usecount of 1, it is not 433 * being held as a descriptor anywhere. 434 */ 435 olddp = old_nch->ncp->nc_vp; 436 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 437 return; 438 439 /* 440 * Force the root vnode of the new mount point to be resolved 441 * so we can update any matching processes. 442 */ 443 mp = new_nch->mount; 444 if (VFS_ROOT(mp, &newdp)) 445 panic("mount: lost mount"); 446 cache_setunresolved(new_nch); 447 cache_setvp(new_nch, newdp); 448 449 /* 450 * Special handling of the root node 451 */ 452 if (rootvnode == olddp) { 453 vref(newdp); 454 vfs_cache_setroot(newdp, cache_hold(new_nch)); 455 } 456 457 /* 458 * Pass newdp separately so the callback does not have to access 459 * it via new_nch->ncp->nc_vp. 460 */ 461 info.old_nch = *old_nch; 462 info.new_nch = *new_nch; 463 info.new_vp = newdp; 464 allproc_scan(checkdirs_callback, &info); 465 vput(newdp); 466 } 467 468 /* 469 * NOTE: callback is not MP safe because the scanned process's filedesc 470 * structure can be ripped out from under us, amoung other things. 471 */ 472 static int 473 checkdirs_callback(struct proc *p, void *data) 474 { 475 struct checkdirs_info *info = data; 476 struct filedesc *fdp; 477 struct nchandle ncdrop1; 478 struct nchandle ncdrop2; 479 struct vnode *vprele1; 480 struct vnode *vprele2; 481 482 if ((fdp = p->p_fd) != NULL) { 483 cache_zero(&ncdrop1); 484 cache_zero(&ncdrop2); 485 vprele1 = NULL; 486 vprele2 = NULL; 487 488 /* 489 * MPUNSAFE - XXX fdp can be pulled out from under a 490 * foreign process. 491 * 492 * A shared filedesc is ok, we don't have to copy it 493 * because we are making this change globally. 494 */ 495 spin_lock_wr(&fdp->fd_spin); 496 if (fdp->fd_ncdir.mount == info->old_nch.mount && 497 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 498 vprele1 = fdp->fd_cdir; 499 vref(info->new_vp); 500 fdp->fd_cdir = info->new_vp; 501 ncdrop1 = fdp->fd_ncdir; 502 cache_copy(&info->new_nch, &fdp->fd_ncdir); 503 } 504 if (fdp->fd_nrdir.mount == info->old_nch.mount && 505 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 506 vprele2 = fdp->fd_rdir; 507 vref(info->new_vp); 508 fdp->fd_rdir = info->new_vp; 509 ncdrop2 = fdp->fd_nrdir; 510 cache_copy(&info->new_nch, &fdp->fd_nrdir); 511 } 512 spin_unlock_wr(&fdp->fd_spin); 513 if (ncdrop1.ncp) 514 cache_drop(&ncdrop1); 515 if (ncdrop2.ncp) 516 cache_drop(&ncdrop2); 517 if (vprele1) 518 vrele(vprele1); 519 if (vprele2) 520 vrele(vprele2); 521 } 522 return(0); 523 } 524 525 /* 526 * Unmount a file system. 527 * 528 * Note: unmount takes a path to the vnode mounted on as argument, 529 * not special file (as before). 530 */ 531 /* 532 * umount_args(char *path, int flags) 533 */ 534 /* ARGSUSED */ 535 int 536 sys_unmount(struct unmount_args *uap) 537 { 538 struct thread *td = curthread; 539 struct proc *p = td->td_proc; 540 struct mount *mp = NULL; 541 int error; 542 struct nlookupdata nd; 543 544 KKASSERT(p); 545 if (p->p_ucred->cr_prison != NULL) 546 return (EPERM); 547 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 548 return (error); 549 550 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 551 if (error == 0) 552 error = nlookup(&nd); 553 if (error) 554 goto out; 555 556 mp = nd.nl_nch.mount; 557 558 /* 559 * Only root, or the user that did the original mount is 560 * permitted to unmount this filesystem. 561 */ 562 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 563 (error = priv_check(td, PRIV_ROOT))) 564 goto out; 565 566 /* 567 * Don't allow unmounting the root file system. 568 */ 569 if (mp->mnt_flag & MNT_ROOTFS) { 570 error = EINVAL; 571 goto out; 572 } 573 574 /* 575 * Must be the root of the filesystem 576 */ 577 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 578 error = EINVAL; 579 goto out; 580 } 581 582 out: 583 nlookup_done(&nd); 584 if (error) 585 return (error); 586 return (dounmount(mp, uap->flags)); 587 } 588 589 /* 590 * Do the actual file system unmount. 591 */ 592 static int 593 dounmount_interlock(struct mount *mp) 594 { 595 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 596 return (EBUSY); 597 mp->mnt_kern_flag |= MNTK_UNMOUNT; 598 return(0); 599 } 600 601 int 602 dounmount(struct mount *mp, int flags) 603 { 604 struct namecache *ncp; 605 struct nchandle nch; 606 struct vnode *vp; 607 int error; 608 int async_flag; 609 int lflags; 610 int freeok = 1; 611 612 /* 613 * Exclusive access for unmounting purposes 614 */ 615 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 616 return (error); 617 618 /* 619 * Allow filesystems to detect that a forced unmount is in progress. 620 */ 621 if (flags & MNT_FORCE) 622 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 623 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 624 error = lockmgr(&mp->mnt_lock, lflags); 625 if (error) { 626 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 627 if (mp->mnt_kern_flag & MNTK_MWAIT) 628 wakeup(mp); 629 return (error); 630 } 631 632 if (mp->mnt_flag & MNT_EXPUBLIC) 633 vfs_setpublicfs(NULL, NULL, NULL); 634 635 vfs_msync(mp, MNT_WAIT); 636 async_flag = mp->mnt_flag & MNT_ASYNC; 637 mp->mnt_flag &=~ MNT_ASYNC; 638 639 /* 640 * If this filesystem isn't aliasing other filesystems, 641 * try to invalidate any remaining namecache entries and 642 * check the count afterwords. 643 */ 644 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 645 cache_lock(&mp->mnt_ncmountpt); 646 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 647 cache_unlock(&mp->mnt_ncmountpt); 648 649 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 650 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 651 652 if ((flags & MNT_FORCE) == 0) { 653 error = EBUSY; 654 mount_warning(mp, "Cannot unmount: " 655 "%d namecache " 656 "references still " 657 "present", 658 ncp->nc_refs - 1); 659 } else { 660 mount_warning(mp, "Forced unmount: " 661 "%d namecache " 662 "references still " 663 "present", 664 ncp->nc_refs - 1); 665 freeok = 0; 666 } 667 } 668 } 669 670 /* 671 * nchandle records ref the mount structure. Expect a count of 1 672 * (our mount->mnt_ncmountpt). 673 */ 674 if (mp->mnt_refs != 1) { 675 if ((flags & MNT_FORCE) == 0) { 676 mount_warning(mp, "Cannot unmount: " 677 "%d process references still " 678 "present", mp->mnt_refs); 679 error = EBUSY; 680 } else { 681 mount_warning(mp, "Forced unmount: " 682 "%d process references still " 683 "present", mp->mnt_refs); 684 freeok = 0; 685 } 686 } 687 688 /* 689 * Decomission our special mnt_syncer vnode. This also stops 690 * the vnlru code. If we are unable to unmount we recommission 691 * the vnode. 692 */ 693 if (error == 0) { 694 if ((vp = mp->mnt_syncer) != NULL) { 695 mp->mnt_syncer = NULL; 696 vrele(vp); 697 } 698 if (((mp->mnt_flag & MNT_RDONLY) || 699 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 700 (flags & MNT_FORCE)) { 701 error = VFS_UNMOUNT(mp, flags); 702 } 703 } 704 if (error) { 705 if (mp->mnt_syncer == NULL) 706 vfs_allocate_syncvnode(mp); 707 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 708 mp->mnt_flag |= async_flag; 709 lockmgr(&mp->mnt_lock, LK_RELEASE); 710 if (mp->mnt_kern_flag & MNTK_MWAIT) 711 wakeup(mp); 712 return (error); 713 } 714 /* 715 * Clean up any journals still associated with the mount after 716 * filesystem activity has ceased. 717 */ 718 journal_remove_all_journals(mp, 719 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 720 721 mountlist_remove(mp); 722 723 /* 724 * Remove any installed vnode ops here so the individual VFSs don't 725 * have to. 726 */ 727 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 728 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 729 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 730 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 731 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 732 733 if (mp->mnt_ncmountpt.ncp != NULL) { 734 nch = mp->mnt_ncmountpt; 735 cache_zero(&mp->mnt_ncmountpt); 736 cache_clrmountpt(&nch); 737 cache_drop(&nch); 738 } 739 if (mp->mnt_ncmounton.ncp != NULL) { 740 nch = mp->mnt_ncmounton; 741 cache_zero(&mp->mnt_ncmounton); 742 cache_clrmountpt(&nch); 743 cache_drop(&nch); 744 } 745 746 mp->mnt_vfc->vfc_refcount--; 747 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 748 panic("unmount: dangling vnode"); 749 lockmgr(&mp->mnt_lock, LK_RELEASE); 750 if (mp->mnt_kern_flag & MNTK_MWAIT) 751 wakeup(mp); 752 if (freeok) 753 kfree(mp, M_MOUNT); 754 return (0); 755 } 756 757 static 758 void 759 mount_warning(struct mount *mp, const char *ctl, ...) 760 { 761 char *ptr; 762 char *buf; 763 __va_list va; 764 765 __va_start(va, ctl); 766 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) { 767 kprintf("unmount(%s): ", ptr); 768 kvprintf(ctl, va); 769 kprintf("\n"); 770 kfree(buf, M_TEMP); 771 } else { 772 kprintf("unmount(%p", mp); 773 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 774 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 775 kprintf("): "); 776 kvprintf(ctl, va); 777 kprintf("\n"); 778 } 779 __va_end(va); 780 } 781 782 /* 783 * Shim cache_fullpath() to handle the case where a process is chrooted into 784 * a subdirectory of a mount. In this case if the root mount matches the 785 * process root directory's mount we have to specify the process's root 786 * directory instead of the mount point, because the mount point might 787 * be above the root directory. 788 */ 789 static 790 int 791 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 792 { 793 struct nchandle *nch; 794 795 if (p && p->p_fd->fd_nrdir.mount == mp) 796 nch = &p->p_fd->fd_nrdir; 797 else 798 nch = &mp->mnt_ncmountpt; 799 return(cache_fullpath(p, nch, rb, fb)); 800 } 801 802 /* 803 * Sync each mounted filesystem. 804 */ 805 806 #ifdef DEBUG 807 static int syncprt = 0; 808 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 809 #endif /* DEBUG */ 810 811 static int sync_callback(struct mount *mp, void *data); 812 813 /* ARGSUSED */ 814 int 815 sys_sync(struct sync_args *uap) 816 { 817 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 818 #ifdef DEBUG 819 /* 820 * print out buffer pool stat information on each sync() call. 821 */ 822 if (syncprt) 823 vfs_bufstats(); 824 #endif /* DEBUG */ 825 return (0); 826 } 827 828 static 829 int 830 sync_callback(struct mount *mp, void *data __unused) 831 { 832 int asyncflag; 833 834 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 835 asyncflag = mp->mnt_flag & MNT_ASYNC; 836 mp->mnt_flag &= ~MNT_ASYNC; 837 vfs_msync(mp, MNT_NOWAIT); 838 VFS_SYNC(mp, MNT_NOWAIT); 839 mp->mnt_flag |= asyncflag; 840 } 841 return(0); 842 } 843 844 /* XXX PRISON: could be per prison flag */ 845 static int prison_quotas; 846 #if 0 847 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 848 #endif 849 850 /* 851 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 852 * 853 * Change filesystem quotas. 854 */ 855 /* ARGSUSED */ 856 int 857 sys_quotactl(struct quotactl_args *uap) 858 { 859 struct nlookupdata nd; 860 struct thread *td; 861 struct proc *p; 862 struct mount *mp; 863 int error; 864 865 td = curthread; 866 p = td->td_proc; 867 if (p->p_ucred->cr_prison && !prison_quotas) 868 return (EPERM); 869 870 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 871 if (error == 0) 872 error = nlookup(&nd); 873 if (error == 0) { 874 mp = nd.nl_nch.mount; 875 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 876 uap->arg, nd.nl_cred); 877 } 878 nlookup_done(&nd); 879 return (error); 880 } 881 882 /* 883 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 884 * void *buf, int buflen) 885 * 886 * This function operates on a mount point and executes the specified 887 * operation using the specified control data, and possibly returns data. 888 * 889 * The actual number of bytes stored in the result buffer is returned, 0 890 * if none, otherwise an error is returned. 891 */ 892 /* ARGSUSED */ 893 int 894 sys_mountctl(struct mountctl_args *uap) 895 { 896 struct thread *td = curthread; 897 struct proc *p = td->td_proc; 898 struct file *fp; 899 void *ctl = NULL; 900 void *buf = NULL; 901 char *path = NULL; 902 int error; 903 904 /* 905 * Sanity and permissions checks. We must be root. 906 */ 907 KKASSERT(p); 908 if (p->p_ucred->cr_prison != NULL) 909 return (EPERM); 910 if ((error = priv_check(td, PRIV_ROOT)) != 0) 911 return (error); 912 913 /* 914 * Argument length checks 915 */ 916 if (uap->ctllen < 0 || uap->ctllen > 1024) 917 return (EINVAL); 918 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 919 return (EINVAL); 920 if (uap->path == NULL) 921 return (EINVAL); 922 923 /* 924 * Allocate the necessary buffers and copyin data 925 */ 926 path = objcache_get(namei_oc, M_WAITOK); 927 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 928 if (error) 929 goto done; 930 931 if (uap->ctllen) { 932 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 933 error = copyin(uap->ctl, ctl, uap->ctllen); 934 if (error) 935 goto done; 936 } 937 if (uap->buflen) 938 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 939 940 /* 941 * Validate the descriptor 942 */ 943 if (uap->fd >= 0) { 944 fp = holdfp(p->p_fd, uap->fd, -1); 945 if (fp == NULL) { 946 error = EBADF; 947 goto done; 948 } 949 } else { 950 fp = NULL; 951 } 952 953 /* 954 * Execute the internal kernel function and clean up. 955 */ 956 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 957 if (fp) 958 fdrop(fp); 959 if (error == 0 && uap->sysmsg_result > 0) 960 error = copyout(buf, uap->buf, uap->sysmsg_result); 961 done: 962 if (path) 963 objcache_put(namei_oc, path); 964 if (ctl) 965 kfree(ctl, M_TEMP); 966 if (buf) 967 kfree(buf, M_TEMP); 968 return (error); 969 } 970 971 /* 972 * Execute a mount control operation by resolving the path to a mount point 973 * and calling vop_mountctl(). 974 * 975 * Use the mount point from the nch instead of the vnode so nullfs mounts 976 * can properly spike the VOP. 977 */ 978 int 979 kern_mountctl(const char *path, int op, struct file *fp, 980 const void *ctl, int ctllen, 981 void *buf, int buflen, int *res) 982 { 983 struct vnode *vp; 984 struct mount *mp; 985 struct nlookupdata nd; 986 int error; 987 988 *res = 0; 989 vp = NULL; 990 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 991 if (error == 0) 992 error = nlookup(&nd); 993 if (error == 0) 994 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 995 mp = nd.nl_nch.mount; 996 nlookup_done(&nd); 997 if (error) 998 return (error); 999 1000 /* 1001 * Must be the root of the filesystem 1002 */ 1003 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1004 vput(vp); 1005 return (EINVAL); 1006 } 1007 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 1008 buf, buflen, res); 1009 vput(vp); 1010 return (error); 1011 } 1012 1013 int 1014 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1015 { 1016 struct thread *td = curthread; 1017 struct proc *p = td->td_proc; 1018 struct mount *mp; 1019 struct statfs *sp; 1020 char *fullpath, *freepath; 1021 int error; 1022 1023 if ((error = nlookup(nd)) != 0) 1024 return (error); 1025 mp = nd->nl_nch.mount; 1026 sp = &mp->mnt_stat; 1027 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1028 return (error); 1029 1030 error = mount_path(p, mp, &fullpath, &freepath); 1031 if (error) 1032 return(error); 1033 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1034 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1035 kfree(freepath, M_TEMP); 1036 1037 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1038 bcopy(sp, buf, sizeof(*buf)); 1039 /* Only root should have access to the fsid's. */ 1040 if (priv_check(td, PRIV_ROOT)) 1041 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1042 return (0); 1043 } 1044 1045 /* 1046 * statfs_args(char *path, struct statfs *buf) 1047 * 1048 * Get filesystem statistics. 1049 */ 1050 int 1051 sys_statfs(struct statfs_args *uap) 1052 { 1053 struct nlookupdata nd; 1054 struct statfs buf; 1055 int error; 1056 1057 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1058 if (error == 0) 1059 error = kern_statfs(&nd, &buf); 1060 nlookup_done(&nd); 1061 if (error == 0) 1062 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1063 return (error); 1064 } 1065 1066 int 1067 kern_fstatfs(int fd, struct statfs *buf) 1068 { 1069 struct thread *td = curthread; 1070 struct proc *p = td->td_proc; 1071 struct file *fp; 1072 struct mount *mp; 1073 struct statfs *sp; 1074 char *fullpath, *freepath; 1075 int error; 1076 1077 KKASSERT(p); 1078 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1079 return (error); 1080 mp = ((struct vnode *)fp->f_data)->v_mount; 1081 if (mp == NULL) { 1082 error = EBADF; 1083 goto done; 1084 } 1085 if (fp->f_cred == NULL) { 1086 error = EINVAL; 1087 goto done; 1088 } 1089 sp = &mp->mnt_stat; 1090 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1091 goto done; 1092 1093 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1094 goto done; 1095 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1096 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1097 kfree(freepath, M_TEMP); 1098 1099 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1100 bcopy(sp, buf, sizeof(*buf)); 1101 1102 /* Only root should have access to the fsid's. */ 1103 if (priv_check(td, PRIV_ROOT)) 1104 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1105 error = 0; 1106 done: 1107 fdrop(fp); 1108 return (error); 1109 } 1110 1111 /* 1112 * fstatfs_args(int fd, struct statfs *buf) 1113 * 1114 * Get filesystem statistics. 1115 */ 1116 int 1117 sys_fstatfs(struct fstatfs_args *uap) 1118 { 1119 struct statfs buf; 1120 int error; 1121 1122 error = kern_fstatfs(uap->fd, &buf); 1123 1124 if (error == 0) 1125 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1126 return (error); 1127 } 1128 1129 int 1130 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1131 { 1132 struct mount *mp; 1133 struct statvfs *sp; 1134 int error; 1135 1136 if ((error = nlookup(nd)) != 0) 1137 return (error); 1138 mp = nd->nl_nch.mount; 1139 sp = &mp->mnt_vstat; 1140 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1141 return (error); 1142 1143 sp->f_flag = 0; 1144 if (mp->mnt_flag & MNT_RDONLY) 1145 sp->f_flag |= ST_RDONLY; 1146 if (mp->mnt_flag & MNT_NOSUID) 1147 sp->f_flag |= ST_NOSUID; 1148 bcopy(sp, buf, sizeof(*buf)); 1149 return (0); 1150 } 1151 1152 /* 1153 * statfs_args(char *path, struct statfs *buf) 1154 * 1155 * Get filesystem statistics. 1156 */ 1157 int 1158 sys_statvfs(struct statvfs_args *uap) 1159 { 1160 struct nlookupdata nd; 1161 struct statvfs buf; 1162 int error; 1163 1164 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1165 if (error == 0) 1166 error = kern_statvfs(&nd, &buf); 1167 nlookup_done(&nd); 1168 if (error == 0) 1169 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1170 return (error); 1171 } 1172 1173 int 1174 kern_fstatvfs(int fd, struct statvfs *buf) 1175 { 1176 struct thread *td = curthread; 1177 struct proc *p = td->td_proc; 1178 struct file *fp; 1179 struct mount *mp; 1180 struct statvfs *sp; 1181 int error; 1182 1183 KKASSERT(p); 1184 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1185 return (error); 1186 mp = ((struct vnode *)fp->f_data)->v_mount; 1187 if (mp == NULL) { 1188 error = EBADF; 1189 goto done; 1190 } 1191 if (fp->f_cred == NULL) { 1192 error = EINVAL; 1193 goto done; 1194 } 1195 sp = &mp->mnt_vstat; 1196 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1197 goto done; 1198 1199 sp->f_flag = 0; 1200 if (mp->mnt_flag & MNT_RDONLY) 1201 sp->f_flag |= ST_RDONLY; 1202 if (mp->mnt_flag & MNT_NOSUID) 1203 sp->f_flag |= ST_NOSUID; 1204 1205 bcopy(sp, buf, sizeof(*buf)); 1206 error = 0; 1207 done: 1208 fdrop(fp); 1209 return (error); 1210 } 1211 1212 /* 1213 * fstatfs_args(int fd, struct statfs *buf) 1214 * 1215 * Get filesystem statistics. 1216 */ 1217 int 1218 sys_fstatvfs(struct fstatvfs_args *uap) 1219 { 1220 struct statvfs buf; 1221 int error; 1222 1223 error = kern_fstatvfs(uap->fd, &buf); 1224 1225 if (error == 0) 1226 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1227 return (error); 1228 } 1229 1230 /* 1231 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1232 * 1233 * Get statistics on all filesystems. 1234 */ 1235 1236 struct getfsstat_info { 1237 struct statfs *sfsp; 1238 long count; 1239 long maxcount; 1240 int error; 1241 int flags; 1242 struct proc *p; 1243 }; 1244 1245 static int getfsstat_callback(struct mount *, void *); 1246 1247 /* ARGSUSED */ 1248 int 1249 sys_getfsstat(struct getfsstat_args *uap) 1250 { 1251 struct thread *td = curthread; 1252 struct proc *p = td->td_proc; 1253 struct getfsstat_info info; 1254 1255 bzero(&info, sizeof(info)); 1256 1257 info.maxcount = uap->bufsize / sizeof(struct statfs); 1258 info.sfsp = uap->buf; 1259 info.count = 0; 1260 info.flags = uap->flags; 1261 info.p = p; 1262 1263 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1264 if (info.sfsp && info.count > info.maxcount) 1265 uap->sysmsg_result = info.maxcount; 1266 else 1267 uap->sysmsg_result = info.count; 1268 return (info.error); 1269 } 1270 1271 static int 1272 getfsstat_callback(struct mount *mp, void *data) 1273 { 1274 struct getfsstat_info *info = data; 1275 struct statfs *sp; 1276 char *freepath; 1277 char *fullpath; 1278 int error; 1279 1280 if (info->sfsp && info->count < info->maxcount) { 1281 if (info->p && !chroot_visible_mnt(mp, info->p)) 1282 return(0); 1283 sp = &mp->mnt_stat; 1284 1285 /* 1286 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1287 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1288 * overrides MNT_WAIT. 1289 */ 1290 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1291 (info->flags & MNT_WAIT)) && 1292 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1293 return(0); 1294 } 1295 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1296 1297 error = mount_path(info->p, mp, &fullpath, &freepath); 1298 if (error) { 1299 info->error = error; 1300 return(-1); 1301 } 1302 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1303 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1304 kfree(freepath, M_TEMP); 1305 1306 error = copyout(sp, info->sfsp, sizeof(*sp)); 1307 if (error) { 1308 info->error = error; 1309 return (-1); 1310 } 1311 ++info->sfsp; 1312 } 1313 info->count++; 1314 return(0); 1315 } 1316 1317 /* 1318 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1319 long bufsize, int flags) 1320 * 1321 * Get statistics on all filesystems. 1322 */ 1323 1324 struct getvfsstat_info { 1325 struct statfs *sfsp; 1326 struct statvfs *vsfsp; 1327 long count; 1328 long maxcount; 1329 int error; 1330 int flags; 1331 struct proc *p; 1332 }; 1333 1334 static int getvfsstat_callback(struct mount *, void *); 1335 1336 /* ARGSUSED */ 1337 int 1338 sys_getvfsstat(struct getvfsstat_args *uap) 1339 { 1340 struct thread *td = curthread; 1341 struct proc *p = td->td_proc; 1342 struct getvfsstat_info info; 1343 1344 bzero(&info, sizeof(info)); 1345 1346 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1347 info.sfsp = uap->buf; 1348 info.vsfsp = uap->vbuf; 1349 info.count = 0; 1350 info.flags = uap->flags; 1351 info.p = p; 1352 1353 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1354 if (info.vsfsp && info.count > info.maxcount) 1355 uap->sysmsg_result = info.maxcount; 1356 else 1357 uap->sysmsg_result = info.count; 1358 return (info.error); 1359 } 1360 1361 static int 1362 getvfsstat_callback(struct mount *mp, void *data) 1363 { 1364 struct getvfsstat_info *info = data; 1365 struct statfs *sp; 1366 struct statvfs *vsp; 1367 char *freepath; 1368 char *fullpath; 1369 int error; 1370 1371 if (info->vsfsp && info->count < info->maxcount) { 1372 if (info->p && !chroot_visible_mnt(mp, info->p)) 1373 return(0); 1374 sp = &mp->mnt_stat; 1375 vsp = &mp->mnt_vstat; 1376 1377 /* 1378 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1379 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1380 * overrides MNT_WAIT. 1381 */ 1382 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1383 (info->flags & MNT_WAIT)) && 1384 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1385 return(0); 1386 } 1387 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1388 1389 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1390 (info->flags & MNT_WAIT)) && 1391 (error = VFS_STATVFS(mp, vsp, info->p->p_ucred))) { 1392 return(0); 1393 } 1394 vsp->f_flag = 0; 1395 if (mp->mnt_flag & MNT_RDONLY) 1396 vsp->f_flag |= ST_RDONLY; 1397 if (mp->mnt_flag & MNT_NOSUID) 1398 vsp->f_flag |= ST_NOSUID; 1399 1400 error = mount_path(info->p, mp, &fullpath, &freepath); 1401 if (error) { 1402 info->error = error; 1403 return(-1); 1404 } 1405 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1406 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1407 kfree(freepath, M_TEMP); 1408 1409 error = copyout(sp, info->sfsp, sizeof(*sp)); 1410 if (error == 0) 1411 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1412 if (error) { 1413 info->error = error; 1414 return (-1); 1415 } 1416 ++info->sfsp; 1417 ++info->vsfsp; 1418 } 1419 info->count++; 1420 return(0); 1421 } 1422 1423 1424 /* 1425 * fchdir_args(int fd) 1426 * 1427 * Change current working directory to a given file descriptor. 1428 */ 1429 /* ARGSUSED */ 1430 int 1431 sys_fchdir(struct fchdir_args *uap) 1432 { 1433 struct thread *td = curthread; 1434 struct proc *p = td->td_proc; 1435 struct filedesc *fdp = p->p_fd; 1436 struct vnode *vp, *ovp; 1437 struct mount *mp; 1438 struct file *fp; 1439 struct nchandle nch, onch, tnch; 1440 int error; 1441 1442 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1443 return (error); 1444 vp = (struct vnode *)fp->f_data; 1445 vref(vp); 1446 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1447 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) 1448 error = ENOTDIR; 1449 else 1450 error = VOP_ACCESS(vp, VEXEC, p->p_ucred); 1451 if (error) { 1452 vput(vp); 1453 fdrop(fp); 1454 return (error); 1455 } 1456 cache_copy(&fp->f_nchandle, &nch); 1457 1458 /* 1459 * If the ncp has become a mount point, traverse through 1460 * the mount point. 1461 */ 1462 1463 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1464 (mp = cache_findmount(&nch)) != NULL 1465 ) { 1466 error = nlookup_mp(mp, &tnch); 1467 if (error == 0) { 1468 cache_unlock(&tnch); /* leave ref intact */ 1469 vput(vp); 1470 vp = tnch.ncp->nc_vp; 1471 error = vget(vp, LK_SHARED); 1472 KKASSERT(error == 0); 1473 cache_drop(&nch); 1474 nch = tnch; 1475 } 1476 } 1477 if (error == 0) { 1478 ovp = fdp->fd_cdir; 1479 onch = fdp->fd_ncdir; 1480 vn_unlock(vp); /* leave ref intact */ 1481 fdp->fd_cdir = vp; 1482 fdp->fd_ncdir = nch; 1483 cache_drop(&onch); 1484 vrele(ovp); 1485 } else { 1486 cache_drop(&nch); 1487 vput(vp); 1488 } 1489 fdrop(fp); 1490 return (error); 1491 } 1492 1493 int 1494 kern_chdir(struct nlookupdata *nd) 1495 { 1496 struct thread *td = curthread; 1497 struct proc *p = td->td_proc; 1498 struct filedesc *fdp = p->p_fd; 1499 struct vnode *vp, *ovp; 1500 struct nchandle onch; 1501 int error; 1502 1503 if ((error = nlookup(nd)) != 0) 1504 return (error); 1505 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1506 return (ENOENT); 1507 if ((error = vget(vp, LK_SHARED)) != 0) 1508 return (error); 1509 1510 error = checkvp_chdir(vp, td); 1511 vn_unlock(vp); 1512 if (error == 0) { 1513 ovp = fdp->fd_cdir; 1514 onch = fdp->fd_ncdir; 1515 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1516 fdp->fd_ncdir = nd->nl_nch; 1517 fdp->fd_cdir = vp; 1518 cache_drop(&onch); 1519 vrele(ovp); 1520 cache_zero(&nd->nl_nch); 1521 } else { 1522 vrele(vp); 1523 } 1524 return (error); 1525 } 1526 1527 /* 1528 * chdir_args(char *path) 1529 * 1530 * Change current working directory (``.''). 1531 */ 1532 int 1533 sys_chdir(struct chdir_args *uap) 1534 { 1535 struct nlookupdata nd; 1536 int error; 1537 1538 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1539 if (error == 0) 1540 error = kern_chdir(&nd); 1541 nlookup_done(&nd); 1542 return (error); 1543 } 1544 1545 /* 1546 * Helper function for raised chroot(2) security function: Refuse if 1547 * any filedescriptors are open directories. 1548 */ 1549 static int 1550 chroot_refuse_vdir_fds(struct filedesc *fdp) 1551 { 1552 struct vnode *vp; 1553 struct file *fp; 1554 int error; 1555 int fd; 1556 1557 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1558 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1559 continue; 1560 vp = (struct vnode *)fp->f_data; 1561 if (vp->v_type != VDIR) { 1562 fdrop(fp); 1563 continue; 1564 } 1565 fdrop(fp); 1566 return(EPERM); 1567 } 1568 return (0); 1569 } 1570 1571 /* 1572 * This sysctl determines if we will allow a process to chroot(2) if it 1573 * has a directory open: 1574 * 0: disallowed for all processes. 1575 * 1: allowed for processes that were not already chroot(2)'ed. 1576 * 2: allowed for all processes. 1577 */ 1578 1579 static int chroot_allow_open_directories = 1; 1580 1581 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1582 &chroot_allow_open_directories, 0, ""); 1583 1584 /* 1585 * chroot to the specified namecache entry. We obtain the vp from the 1586 * namecache data. The passed ncp must be locked and referenced and will 1587 * remain locked and referenced on return. 1588 */ 1589 int 1590 kern_chroot(struct nchandle *nch) 1591 { 1592 struct thread *td = curthread; 1593 struct proc *p = td->td_proc; 1594 struct filedesc *fdp = p->p_fd; 1595 struct vnode *vp; 1596 int error; 1597 1598 /* 1599 * Only root can chroot 1600 */ 1601 if ((error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT)) != 0) 1602 return (error); 1603 1604 /* 1605 * Disallow open directory descriptors (fchdir() breakouts). 1606 */ 1607 if (chroot_allow_open_directories == 0 || 1608 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1609 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1610 return (error); 1611 } 1612 if ((vp = nch->ncp->nc_vp) == NULL) 1613 return (ENOENT); 1614 1615 if ((error = vget(vp, LK_SHARED)) != 0) 1616 return (error); 1617 1618 /* 1619 * Check the validity of vp as a directory to change to and 1620 * associate it with rdir/jdir. 1621 */ 1622 error = checkvp_chdir(vp, td); 1623 vn_unlock(vp); /* leave reference intact */ 1624 if (error == 0) { 1625 vrele(fdp->fd_rdir); 1626 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1627 cache_drop(&fdp->fd_nrdir); 1628 cache_copy(nch, &fdp->fd_nrdir); 1629 if (fdp->fd_jdir == NULL) { 1630 fdp->fd_jdir = vp; 1631 vref(fdp->fd_jdir); 1632 cache_copy(nch, &fdp->fd_njdir); 1633 } 1634 } else { 1635 vrele(vp); 1636 } 1637 return (error); 1638 } 1639 1640 /* 1641 * chroot_args(char *path) 1642 * 1643 * Change notion of root (``/'') directory. 1644 */ 1645 /* ARGSUSED */ 1646 int 1647 sys_chroot(struct chroot_args *uap) 1648 { 1649 struct thread *td = curthread; 1650 struct nlookupdata nd; 1651 int error; 1652 1653 KKASSERT(td->td_proc); 1654 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1655 if (error) { 1656 nlookup_done(&nd); 1657 return(error); 1658 } 1659 error = nlookup(&nd); 1660 if (error == 0) 1661 error = kern_chroot(&nd.nl_nch); 1662 nlookup_done(&nd); 1663 return(error); 1664 } 1665 1666 /* 1667 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1668 * determine whether it is legal to chdir to the vnode. The vnode's state 1669 * is not changed by this call. 1670 */ 1671 int 1672 checkvp_chdir(struct vnode *vp, struct thread *td) 1673 { 1674 int error; 1675 1676 if (vp->v_type != VDIR) 1677 error = ENOTDIR; 1678 else 1679 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred); 1680 return (error); 1681 } 1682 1683 int 1684 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1685 { 1686 struct thread *td = curthread; 1687 struct proc *p = td->td_proc; 1688 struct lwp *lp = td->td_lwp; 1689 struct filedesc *fdp = p->p_fd; 1690 int cmode, flags; 1691 struct file *nfp; 1692 struct file *fp; 1693 struct vnode *vp; 1694 int type, indx, error; 1695 struct flock lf; 1696 1697 if ((oflags & O_ACCMODE) == O_ACCMODE) 1698 return (EINVAL); 1699 flags = FFLAGS(oflags); 1700 error = falloc(p, &nfp, NULL); 1701 if (error) 1702 return (error); 1703 fp = nfp; 1704 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1705 1706 /* 1707 * XXX p_dupfd is a real mess. It allows a device to return a 1708 * file descriptor to be duplicated rather then doing the open 1709 * itself. 1710 */ 1711 lp->lwp_dupfd = -1; 1712 1713 /* 1714 * Call vn_open() to do the lookup and assign the vnode to the 1715 * file pointer. vn_open() does not change the ref count on fp 1716 * and the vnode, on success, will be inherited by the file pointer 1717 * and unlocked. 1718 */ 1719 nd->nl_flags |= NLC_LOCKVP; 1720 error = vn_open(nd, fp, flags, cmode); 1721 nlookup_done(nd); 1722 if (error) { 1723 /* 1724 * handle special fdopen() case. bleh. dupfdopen() is 1725 * responsible for dropping the old contents of ofiles[indx] 1726 * if it succeeds. 1727 * 1728 * Note that fsetfd() will add a ref to fp which represents 1729 * the fd_files[] assignment. We must still drop our 1730 * reference. 1731 */ 1732 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1733 if (fdalloc(p, 0, &indx) == 0) { 1734 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error); 1735 if (error == 0) { 1736 *res = indx; 1737 fdrop(fp); /* our ref */ 1738 return (0); 1739 } 1740 fsetfd(p, NULL, indx); 1741 } 1742 } 1743 fdrop(fp); /* our ref */ 1744 if (error == ERESTART) 1745 error = EINTR; 1746 return (error); 1747 } 1748 1749 /* 1750 * ref the vnode for ourselves so it can't be ripped out from under 1751 * is. XXX need an ND flag to request that the vnode be returned 1752 * anyway. 1753 * 1754 * Reserve a file descriptor but do not assign it until the open 1755 * succeeds. 1756 */ 1757 vp = (struct vnode *)fp->f_data; 1758 vref(vp); 1759 if ((error = fdalloc(p, 0, &indx)) != 0) { 1760 fdrop(fp); 1761 vrele(vp); 1762 return (error); 1763 } 1764 1765 /* 1766 * If no error occurs the vp will have been assigned to the file 1767 * pointer. 1768 */ 1769 lp->lwp_dupfd = 0; 1770 1771 if (flags & (O_EXLOCK | O_SHLOCK)) { 1772 lf.l_whence = SEEK_SET; 1773 lf.l_start = 0; 1774 lf.l_len = 0; 1775 if (flags & O_EXLOCK) 1776 lf.l_type = F_WRLCK; 1777 else 1778 lf.l_type = F_RDLCK; 1779 if (flags & FNONBLOCK) 1780 type = 0; 1781 else 1782 type = F_WAIT; 1783 1784 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1785 /* 1786 * lock request failed. Clean up the reserved 1787 * descriptor. 1788 */ 1789 vrele(vp); 1790 fsetfd(p, NULL, indx); 1791 fdrop(fp); 1792 return (error); 1793 } 1794 fp->f_flag |= FHASLOCK; 1795 } 1796 #if 0 1797 /* 1798 * Assert that all regular file vnodes were created with a object. 1799 */ 1800 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1801 ("open: regular file has no backing object after vn_open")); 1802 #endif 1803 1804 vrele(vp); 1805 1806 /* 1807 * release our private reference, leaving the one associated with the 1808 * descriptor table intact. 1809 */ 1810 fsetfd(p, fp, indx); 1811 fdrop(fp); 1812 *res = indx; 1813 return (0); 1814 } 1815 1816 /* 1817 * open_args(char *path, int flags, int mode) 1818 * 1819 * Check permissions, allocate an open file structure, 1820 * and call the device open routine if any. 1821 */ 1822 int 1823 sys_open(struct open_args *uap) 1824 { 1825 struct nlookupdata nd; 1826 int error; 1827 1828 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1829 if (error == 0) { 1830 error = kern_open(&nd, uap->flags, 1831 uap->mode, &uap->sysmsg_result); 1832 } 1833 nlookup_done(&nd); 1834 return (error); 1835 } 1836 1837 int 1838 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1839 { 1840 struct thread *td = curthread; 1841 struct proc *p = td->td_proc; 1842 struct vnode *vp; 1843 struct vattr vattr; 1844 int error; 1845 int whiteout = 0; 1846 1847 KKASSERT(p); 1848 1849 switch (mode & S_IFMT) { 1850 case S_IFCHR: 1851 case S_IFBLK: 1852 error = priv_check(td, PRIV_ROOT); 1853 break; 1854 default: 1855 error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT); 1856 break; 1857 } 1858 if (error) 1859 return (error); 1860 1861 bwillinode(1); 1862 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1863 if ((error = nlookup(nd)) != 0) 1864 return (error); 1865 if (nd->nl_nch.ncp->nc_vp) 1866 return (EEXIST); 1867 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1868 return (error); 1869 1870 VATTR_NULL(&vattr); 1871 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1872 vattr.va_rmajor = rmajor; 1873 vattr.va_rminor = rminor; 1874 whiteout = 0; 1875 1876 switch (mode & S_IFMT) { 1877 case S_IFMT: /* used by badsect to flag bad sectors */ 1878 vattr.va_type = VBAD; 1879 break; 1880 case S_IFCHR: 1881 vattr.va_type = VCHR; 1882 break; 1883 case S_IFBLK: 1884 vattr.va_type = VBLK; 1885 break; 1886 case S_IFWHT: 1887 whiteout = 1; 1888 break; 1889 case S_IFDIR: 1890 /* special directories support for HAMMER */ 1891 vattr.va_type = VDIR; 1892 break; 1893 default: 1894 error = EINVAL; 1895 break; 1896 } 1897 if (error == 0) { 1898 if (whiteout) { 1899 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 1900 nd->nl_cred, NAMEI_CREATE); 1901 } else { 1902 vp = NULL; 1903 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 1904 &vp, nd->nl_cred, &vattr); 1905 if (error == 0) 1906 vput(vp); 1907 } 1908 } 1909 return (error); 1910 } 1911 1912 /* 1913 * mknod_args(char *path, int mode, int dev) 1914 * 1915 * Create a special file. 1916 */ 1917 int 1918 sys_mknod(struct mknod_args *uap) 1919 { 1920 struct nlookupdata nd; 1921 int error; 1922 1923 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1924 if (error == 0) { 1925 error = kern_mknod(&nd, uap->mode, 1926 umajor(uap->dev), uminor(uap->dev)); 1927 } 1928 nlookup_done(&nd); 1929 return (error); 1930 } 1931 1932 int 1933 kern_mkfifo(struct nlookupdata *nd, int mode) 1934 { 1935 struct thread *td = curthread; 1936 struct proc *p = td->td_proc; 1937 struct vattr vattr; 1938 struct vnode *vp; 1939 int error; 1940 1941 bwillinode(1); 1942 1943 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1944 if ((error = nlookup(nd)) != 0) 1945 return (error); 1946 if (nd->nl_nch.ncp->nc_vp) 1947 return (EEXIST); 1948 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1949 return (error); 1950 1951 VATTR_NULL(&vattr); 1952 vattr.va_type = VFIFO; 1953 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1954 vp = NULL; 1955 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 1956 if (error == 0) 1957 vput(vp); 1958 return (error); 1959 } 1960 1961 /* 1962 * mkfifo_args(char *path, int mode) 1963 * 1964 * Create a named pipe. 1965 */ 1966 int 1967 sys_mkfifo(struct mkfifo_args *uap) 1968 { 1969 struct nlookupdata nd; 1970 int error; 1971 1972 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1973 if (error == 0) 1974 error = kern_mkfifo(&nd, uap->mode); 1975 nlookup_done(&nd); 1976 return (error); 1977 } 1978 1979 static int hardlink_check_uid = 0; 1980 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1981 &hardlink_check_uid, 0, 1982 "Unprivileged processes cannot create hard links to files owned by other " 1983 "users"); 1984 static int hardlink_check_gid = 0; 1985 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1986 &hardlink_check_gid, 0, 1987 "Unprivileged processes cannot create hard links to files owned by other " 1988 "groups"); 1989 1990 static int 1991 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 1992 { 1993 struct vattr va; 1994 int error; 1995 1996 /* 1997 * Shortcut if disabled 1998 */ 1999 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2000 return (0); 2001 2002 /* 2003 * root cred can always hardlink 2004 */ 2005 if (priv_check_cred(cred, PRIV_ROOT, PRISON_ROOT) == 0) 2006 return (0); 2007 2008 /* 2009 * Otherwise only if the originating file is owned by the 2010 * same user or group. Note that any group is allowed if 2011 * the file is owned by the caller. 2012 */ 2013 error = VOP_GETATTR(vp, &va); 2014 if (error != 0) 2015 return (error); 2016 2017 if (hardlink_check_uid) { 2018 if (cred->cr_uid != va.va_uid) 2019 return (EPERM); 2020 } 2021 2022 if (hardlink_check_gid) { 2023 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2024 return (EPERM); 2025 } 2026 2027 return (0); 2028 } 2029 2030 int 2031 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2032 { 2033 struct thread *td = curthread; 2034 struct vnode *vp; 2035 int error; 2036 2037 /* 2038 * Lookup the source and obtained a locked vnode. 2039 * 2040 * XXX relookup on vget failure / race ? 2041 */ 2042 bwillinode(1); 2043 if ((error = nlookup(nd)) != 0) 2044 return (error); 2045 vp = nd->nl_nch.ncp->nc_vp; 2046 KKASSERT(vp != NULL); 2047 if (vp->v_type == VDIR) 2048 return (EPERM); /* POSIX */ 2049 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2050 return (error); 2051 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2052 return (error); 2053 2054 /* 2055 * Unlock the source so we can lookup the target without deadlocking 2056 * (XXX vp is locked already, possible other deadlock?). The target 2057 * must not exist. 2058 */ 2059 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2060 nd->nl_flags &= ~NLC_NCPISLOCKED; 2061 cache_unlock(&nd->nl_nch); 2062 2063 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2064 if ((error = nlookup(linknd)) != 0) { 2065 vput(vp); 2066 return (error); 2067 } 2068 if (linknd->nl_nch.ncp->nc_vp) { 2069 vput(vp); 2070 return (EEXIST); 2071 } 2072 2073 /* 2074 * Finally run the new API VOP. 2075 */ 2076 error = can_hardlink(vp, td, td->td_proc->p_ucred); 2077 if (error == 0) { 2078 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2079 vp, linknd->nl_cred); 2080 } 2081 vput(vp); 2082 return (error); 2083 } 2084 2085 /* 2086 * link_args(char *path, char *link) 2087 * 2088 * Make a hard file link. 2089 */ 2090 int 2091 sys_link(struct link_args *uap) 2092 { 2093 struct nlookupdata nd, linknd; 2094 int error; 2095 2096 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2097 if (error == 0) { 2098 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2099 if (error == 0) 2100 error = kern_link(&nd, &linknd); 2101 nlookup_done(&linknd); 2102 } 2103 nlookup_done(&nd); 2104 return (error); 2105 } 2106 2107 int 2108 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2109 { 2110 struct vattr vattr; 2111 struct vnode *vp; 2112 struct vnode *dvp; 2113 int error; 2114 2115 bwillinode(1); 2116 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2117 if ((error = nlookup(nd)) != 0) 2118 return (error); 2119 if (nd->nl_nch.ncp->nc_vp) 2120 return (EEXIST); 2121 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2122 return (error); 2123 dvp = nd->nl_dvp; 2124 VATTR_NULL(&vattr); 2125 vattr.va_mode = mode; 2126 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2127 if (error == 0) 2128 vput(vp); 2129 return (error); 2130 } 2131 2132 /* 2133 * symlink(char *path, char *link) 2134 * 2135 * Make a symbolic link. 2136 */ 2137 int 2138 sys_symlink(struct symlink_args *uap) 2139 { 2140 struct thread *td = curthread; 2141 struct nlookupdata nd; 2142 char *path; 2143 int error; 2144 int mode; 2145 2146 path = objcache_get(namei_oc, M_WAITOK); 2147 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2148 if (error == 0) { 2149 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2150 if (error == 0) { 2151 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2152 error = kern_symlink(&nd, path, mode); 2153 } 2154 nlookup_done(&nd); 2155 } 2156 objcache_put(namei_oc, path); 2157 return (error); 2158 } 2159 2160 /* 2161 * undelete_args(char *path) 2162 * 2163 * Delete a whiteout from the filesystem. 2164 */ 2165 /* ARGSUSED */ 2166 int 2167 sys_undelete(struct undelete_args *uap) 2168 { 2169 struct nlookupdata nd; 2170 int error; 2171 2172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2173 bwillinode(1); 2174 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2175 if (error == 0) 2176 error = nlookup(&nd); 2177 if (error == 0) 2178 error = ncp_writechk(&nd.nl_nch); 2179 if (error == 0) { 2180 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2181 NAMEI_DELETE); 2182 } 2183 nlookup_done(&nd); 2184 return (error); 2185 } 2186 2187 int 2188 kern_unlink(struct nlookupdata *nd) 2189 { 2190 int error; 2191 2192 bwillinode(1); 2193 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2194 if ((error = nlookup(nd)) != 0) 2195 return (error); 2196 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2197 return (error); 2198 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2199 return (error); 2200 } 2201 2202 /* 2203 * unlink_args(char *path) 2204 * 2205 * Delete a name from the filesystem. 2206 */ 2207 int 2208 sys_unlink(struct unlink_args *uap) 2209 { 2210 struct nlookupdata nd; 2211 int error; 2212 2213 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2214 if (error == 0) 2215 error = kern_unlink(&nd); 2216 nlookup_done(&nd); 2217 return (error); 2218 } 2219 2220 int 2221 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2222 { 2223 struct thread *td = curthread; 2224 struct proc *p = td->td_proc; 2225 struct file *fp; 2226 struct vnode *vp; 2227 struct vattr vattr; 2228 off_t new_offset; 2229 int error; 2230 2231 fp = holdfp(p->p_fd, fd, -1); 2232 if (fp == NULL) 2233 return (EBADF); 2234 if (fp->f_type != DTYPE_VNODE) { 2235 error = ESPIPE; 2236 goto done; 2237 } 2238 vp = (struct vnode *)fp->f_data; 2239 2240 switch (whence) { 2241 case L_INCR: 2242 new_offset = fp->f_offset + offset; 2243 error = 0; 2244 break; 2245 case L_XTND: 2246 error = VOP_GETATTR(vp, &vattr); 2247 new_offset = offset + vattr.va_size; 2248 break; 2249 case L_SET: 2250 new_offset = offset; 2251 error = 0; 2252 break; 2253 default: 2254 new_offset = 0; 2255 error = EINVAL; 2256 break; 2257 } 2258 2259 /* 2260 * Validate the seek position. Negative offsets are not allowed 2261 * for regular files, block specials, or directories. 2262 */ 2263 if (error == 0) { 2264 if (new_offset < 0 && 2265 (vp->v_type == VREG || vp->v_type == VDIR || 2266 vp->v_type == VCHR || vp->v_type == VBLK)) { 2267 error = EINVAL; 2268 } else { 2269 fp->f_offset = new_offset; 2270 } 2271 } 2272 *res = fp->f_offset; 2273 done: 2274 fdrop(fp); 2275 return (error); 2276 } 2277 2278 /* 2279 * lseek_args(int fd, int pad, off_t offset, int whence) 2280 * 2281 * Reposition read/write file offset. 2282 */ 2283 int 2284 sys_lseek(struct lseek_args *uap) 2285 { 2286 int error; 2287 2288 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2289 &uap->sysmsg_offset); 2290 2291 return (error); 2292 } 2293 2294 int 2295 kern_access(struct nlookupdata *nd, int aflags) 2296 { 2297 struct vnode *vp; 2298 int error, flags; 2299 2300 if ((error = nlookup(nd)) != 0) 2301 return (error); 2302 retry: 2303 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2304 if (error) 2305 return (error); 2306 2307 /* Flags == 0 means only check for existence. */ 2308 if (aflags) { 2309 flags = 0; 2310 if (aflags & R_OK) 2311 flags |= VREAD; 2312 if (aflags & W_OK) 2313 flags |= VWRITE; 2314 if (aflags & X_OK) 2315 flags |= VEXEC; 2316 if ((flags & VWRITE) == 0 || 2317 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2318 error = VOP_ACCESS(vp, flags, nd->nl_cred); 2319 2320 /* 2321 * If the file handle is stale we have to re-resolve the 2322 * entry. This is a hack at the moment. 2323 */ 2324 if (error == ESTALE) { 2325 vput(vp); 2326 cache_setunresolved(&nd->nl_nch); 2327 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2328 if (error == 0) { 2329 vp = NULL; 2330 goto retry; 2331 } 2332 return(error); 2333 } 2334 } 2335 vput(vp); 2336 return (error); 2337 } 2338 2339 /* 2340 * access_args(char *path, int flags) 2341 * 2342 * Check access permissions. 2343 */ 2344 int 2345 sys_access(struct access_args *uap) 2346 { 2347 struct nlookupdata nd; 2348 int error; 2349 2350 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2351 if (error == 0) 2352 error = kern_access(&nd, uap->flags); 2353 nlookup_done(&nd); 2354 return (error); 2355 } 2356 2357 int 2358 kern_stat(struct nlookupdata *nd, struct stat *st) 2359 { 2360 int error; 2361 struct vnode *vp; 2362 thread_t td; 2363 2364 if ((error = nlookup(nd)) != 0) 2365 return (error); 2366 again: 2367 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2368 return (ENOENT); 2369 2370 td = curthread; 2371 if ((error = vget(vp, LK_SHARED)) != 0) 2372 return (error); 2373 error = vn_stat(vp, st, nd->nl_cred); 2374 2375 /* 2376 * If the file handle is stale we have to re-resolve the entry. This 2377 * is a hack at the moment. 2378 */ 2379 if (error == ESTALE) { 2380 vput(vp); 2381 cache_setunresolved(&nd->nl_nch); 2382 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2383 if (error == 0) 2384 goto again; 2385 } else { 2386 vput(vp); 2387 } 2388 return (error); 2389 } 2390 2391 /* 2392 * stat_args(char *path, struct stat *ub) 2393 * 2394 * Get file status; this version follows links. 2395 */ 2396 int 2397 sys_stat(struct stat_args *uap) 2398 { 2399 struct nlookupdata nd; 2400 struct stat st; 2401 int error; 2402 2403 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2404 if (error == 0) { 2405 error = kern_stat(&nd, &st); 2406 if (error == 0) 2407 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2408 } 2409 nlookup_done(&nd); 2410 return (error); 2411 } 2412 2413 /* 2414 * lstat_args(char *path, struct stat *ub) 2415 * 2416 * Get file status; this version does not follow links. 2417 */ 2418 int 2419 sys_lstat(struct lstat_args *uap) 2420 { 2421 struct nlookupdata nd; 2422 struct stat st; 2423 int error; 2424 2425 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2426 if (error == 0) { 2427 error = kern_stat(&nd, &st); 2428 if (error == 0) 2429 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2430 } 2431 nlookup_done(&nd); 2432 return (error); 2433 } 2434 2435 /* 2436 * pathconf_Args(char *path, int name) 2437 * 2438 * Get configurable pathname variables. 2439 */ 2440 /* ARGSUSED */ 2441 int 2442 sys_pathconf(struct pathconf_args *uap) 2443 { 2444 struct nlookupdata nd; 2445 struct vnode *vp; 2446 int error; 2447 2448 vp = NULL; 2449 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2450 if (error == 0) 2451 error = nlookup(&nd); 2452 if (error == 0) 2453 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2454 nlookup_done(&nd); 2455 if (error == 0) { 2456 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 2457 vput(vp); 2458 } 2459 return (error); 2460 } 2461 2462 /* 2463 * XXX: daver 2464 * kern_readlink isn't properly split yet. There is a copyin burried 2465 * in VOP_READLINK(). 2466 */ 2467 int 2468 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2469 { 2470 struct thread *td = curthread; 2471 struct proc *p = td->td_proc; 2472 struct vnode *vp; 2473 struct iovec aiov; 2474 struct uio auio; 2475 int error; 2476 2477 if ((error = nlookup(nd)) != 0) 2478 return (error); 2479 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2480 if (error) 2481 return (error); 2482 if (vp->v_type != VLNK) { 2483 error = EINVAL; 2484 } else { 2485 aiov.iov_base = buf; 2486 aiov.iov_len = count; 2487 auio.uio_iov = &aiov; 2488 auio.uio_iovcnt = 1; 2489 auio.uio_offset = 0; 2490 auio.uio_rw = UIO_READ; 2491 auio.uio_segflg = UIO_USERSPACE; 2492 auio.uio_td = td; 2493 auio.uio_resid = count; 2494 error = VOP_READLINK(vp, &auio, p->p_ucred); 2495 } 2496 vput(vp); 2497 *res = count - auio.uio_resid; 2498 return (error); 2499 } 2500 2501 /* 2502 * readlink_args(char *path, char *buf, int count) 2503 * 2504 * Return target name of a symbolic link. 2505 */ 2506 int 2507 sys_readlink(struct readlink_args *uap) 2508 { 2509 struct nlookupdata nd; 2510 int error; 2511 2512 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2513 if (error == 0) { 2514 error = kern_readlink(&nd, uap->buf, uap->count, 2515 &uap->sysmsg_result); 2516 } 2517 nlookup_done(&nd); 2518 return (error); 2519 } 2520 2521 static int 2522 setfflags(struct vnode *vp, int flags) 2523 { 2524 struct thread *td = curthread; 2525 struct proc *p = td->td_proc; 2526 int error; 2527 struct vattr vattr; 2528 2529 /* 2530 * Prevent non-root users from setting flags on devices. When 2531 * a device is reused, users can retain ownership of the device 2532 * if they are allowed to set flags and programs assume that 2533 * chown can't fail when done as root. 2534 */ 2535 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2536 ((error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT)) != 0)) 2537 return (error); 2538 2539 /* 2540 * note: vget is required for any operation that might mod the vnode 2541 * so VINACTIVE is properly cleared. 2542 */ 2543 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2544 VATTR_NULL(&vattr); 2545 vattr.va_flags = flags; 2546 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2547 vput(vp); 2548 } 2549 return (error); 2550 } 2551 2552 /* 2553 * chflags(char *path, int flags) 2554 * 2555 * Change flags of a file given a path name. 2556 */ 2557 /* ARGSUSED */ 2558 int 2559 sys_chflags(struct chflags_args *uap) 2560 { 2561 struct nlookupdata nd; 2562 struct vnode *vp; 2563 int error; 2564 2565 vp = NULL; 2566 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2567 /* XXX Add NLC flag indicating modifying operation? */ 2568 if (error == 0) 2569 error = nlookup(&nd); 2570 if (error == 0) 2571 error = ncp_writechk(&nd.nl_nch); 2572 if (error == 0) 2573 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2574 nlookup_done(&nd); 2575 if (error == 0) { 2576 error = setfflags(vp, uap->flags); 2577 vrele(vp); 2578 } 2579 return (error); 2580 } 2581 2582 /* 2583 * lchflags(char *path, int flags) 2584 * 2585 * Change flags of a file given a path name, but don't follow symlinks. 2586 */ 2587 /* ARGSUSED */ 2588 int 2589 sys_lchflags(struct lchflags_args *uap) 2590 { 2591 struct nlookupdata nd; 2592 struct vnode *vp; 2593 int error; 2594 2595 vp = NULL; 2596 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2597 /* XXX Add NLC flag indicating modifying operation? */ 2598 if (error == 0) 2599 error = nlookup(&nd); 2600 if (error == 0) 2601 error = ncp_writechk(&nd.nl_nch); 2602 if (error == 0) 2603 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2604 nlookup_done(&nd); 2605 if (error == 0) { 2606 error = setfflags(vp, uap->flags); 2607 vrele(vp); 2608 } 2609 return (error); 2610 } 2611 2612 /* 2613 * fchflags_args(int fd, int flags) 2614 * 2615 * Change flags of a file given a file descriptor. 2616 */ 2617 /* ARGSUSED */ 2618 int 2619 sys_fchflags(struct fchflags_args *uap) 2620 { 2621 struct thread *td = curthread; 2622 struct proc *p = td->td_proc; 2623 struct file *fp; 2624 int error; 2625 2626 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2627 return (error); 2628 if (fp->f_nchandle.ncp) 2629 error = ncp_writechk(&fp->f_nchandle); 2630 if (error == 0) 2631 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2632 fdrop(fp); 2633 return (error); 2634 } 2635 2636 static int 2637 setfmode(struct vnode *vp, int mode) 2638 { 2639 struct thread *td = curthread; 2640 struct proc *p = td->td_proc; 2641 int error; 2642 struct vattr vattr; 2643 2644 /* 2645 * note: vget is required for any operation that might mod the vnode 2646 * so VINACTIVE is properly cleared. 2647 */ 2648 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2649 VATTR_NULL(&vattr); 2650 vattr.va_mode = mode & ALLPERMS; 2651 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2652 vput(vp); 2653 } 2654 return error; 2655 } 2656 2657 int 2658 kern_chmod(struct nlookupdata *nd, int mode) 2659 { 2660 struct vnode *vp; 2661 int error; 2662 2663 /* XXX Add NLC flag indicating modifying operation? */ 2664 if ((error = nlookup(nd)) != 0) 2665 return (error); 2666 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2667 return (error); 2668 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2669 error = setfmode(vp, mode); 2670 vrele(vp); 2671 return (error); 2672 } 2673 2674 /* 2675 * chmod_args(char *path, int mode) 2676 * 2677 * Change mode of a file given path name. 2678 */ 2679 /* ARGSUSED */ 2680 int 2681 sys_chmod(struct chmod_args *uap) 2682 { 2683 struct nlookupdata nd; 2684 int error; 2685 2686 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2687 if (error == 0) 2688 error = kern_chmod(&nd, uap->mode); 2689 nlookup_done(&nd); 2690 return (error); 2691 } 2692 2693 /* 2694 * lchmod_args(char *path, int mode) 2695 * 2696 * Change mode of a file given path name (don't follow links.) 2697 */ 2698 /* ARGSUSED */ 2699 int 2700 sys_lchmod(struct lchmod_args *uap) 2701 { 2702 struct nlookupdata nd; 2703 int error; 2704 2705 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2706 if (error == 0) 2707 error = kern_chmod(&nd, uap->mode); 2708 nlookup_done(&nd); 2709 return (error); 2710 } 2711 2712 /* 2713 * fchmod_args(int fd, int mode) 2714 * 2715 * Change mode of a file given a file descriptor. 2716 */ 2717 /* ARGSUSED */ 2718 int 2719 sys_fchmod(struct fchmod_args *uap) 2720 { 2721 struct thread *td = curthread; 2722 struct proc *p = td->td_proc; 2723 struct file *fp; 2724 int error; 2725 2726 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2727 return (error); 2728 if (fp->f_nchandle.ncp) 2729 error = ncp_writechk(&fp->f_nchandle); 2730 if (error == 0) 2731 error = setfmode((struct vnode *)fp->f_data, uap->mode); 2732 fdrop(fp); 2733 return (error); 2734 } 2735 2736 static int 2737 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2738 { 2739 struct thread *td = curthread; 2740 struct proc *p = td->td_proc; 2741 int error; 2742 struct vattr vattr; 2743 2744 /* 2745 * note: vget is required for any operation that might mod the vnode 2746 * so VINACTIVE is properly cleared. 2747 */ 2748 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2749 VATTR_NULL(&vattr); 2750 vattr.va_uid = uid; 2751 vattr.va_gid = gid; 2752 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2753 vput(vp); 2754 } 2755 return error; 2756 } 2757 2758 int 2759 kern_chown(struct nlookupdata *nd, int uid, int gid) 2760 { 2761 struct vnode *vp; 2762 int error; 2763 2764 /* XXX Add NLC flag indicating modifying operation? */ 2765 if ((error = nlookup(nd)) != 0) 2766 return (error); 2767 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2768 return (error); 2769 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2770 error = setfown(vp, uid, gid); 2771 vrele(vp); 2772 return (error); 2773 } 2774 2775 /* 2776 * chown(char *path, int uid, int gid) 2777 * 2778 * Set ownership given a path name. 2779 */ 2780 int 2781 sys_chown(struct chown_args *uap) 2782 { 2783 struct nlookupdata nd; 2784 int error; 2785 2786 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2787 if (error == 0) 2788 error = kern_chown(&nd, uap->uid, uap->gid); 2789 nlookup_done(&nd); 2790 return (error); 2791 } 2792 2793 /* 2794 * lchown_args(char *path, int uid, int gid) 2795 * 2796 * Set ownership given a path name, do not cross symlinks. 2797 */ 2798 int 2799 sys_lchown(struct lchown_args *uap) 2800 { 2801 struct nlookupdata nd; 2802 int error; 2803 2804 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2805 if (error == 0) 2806 error = kern_chown(&nd, uap->uid, uap->gid); 2807 nlookup_done(&nd); 2808 return (error); 2809 } 2810 2811 /* 2812 * fchown_args(int fd, int uid, int gid) 2813 * 2814 * Set ownership given a file descriptor. 2815 */ 2816 /* ARGSUSED */ 2817 int 2818 sys_fchown(struct fchown_args *uap) 2819 { 2820 struct thread *td = curthread; 2821 struct proc *p = td->td_proc; 2822 struct file *fp; 2823 int error; 2824 2825 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2826 return (error); 2827 if (fp->f_nchandle.ncp) 2828 error = ncp_writechk(&fp->f_nchandle); 2829 if (error == 0) 2830 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 2831 fdrop(fp); 2832 return (error); 2833 } 2834 2835 static int 2836 getutimes(const struct timeval *tvp, struct timespec *tsp) 2837 { 2838 struct timeval tv[2]; 2839 2840 if (tvp == NULL) { 2841 microtime(&tv[0]); 2842 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2843 tsp[1] = tsp[0]; 2844 } else { 2845 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2846 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2847 } 2848 return 0; 2849 } 2850 2851 static int 2852 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2853 { 2854 struct thread *td = curthread; 2855 struct proc *p = td->td_proc; 2856 int error; 2857 struct vattr vattr; 2858 2859 /* 2860 * note: vget is required for any operation that might mod the vnode 2861 * so VINACTIVE is properly cleared. 2862 */ 2863 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2864 VATTR_NULL(&vattr); 2865 vattr.va_atime = ts[0]; 2866 vattr.va_mtime = ts[1]; 2867 if (nullflag) 2868 vattr.va_vaflags |= VA_UTIMES_NULL; 2869 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2870 vput(vp); 2871 } 2872 return error; 2873 } 2874 2875 int 2876 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2877 { 2878 struct timespec ts[2]; 2879 struct vnode *vp; 2880 int error; 2881 2882 if ((error = getutimes(tptr, ts)) != 0) 2883 return (error); 2884 /* XXX Add NLC flag indicating modifying operation? */ 2885 if ((error = nlookup(nd)) != 0) 2886 return (error); 2887 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2888 return (error); 2889 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2890 return (error); 2891 error = setutimes(vp, ts, tptr == NULL); 2892 vrele(vp); 2893 return (error); 2894 } 2895 2896 /* 2897 * utimes_args(char *path, struct timeval *tptr) 2898 * 2899 * Set the access and modification times of a file. 2900 */ 2901 int 2902 sys_utimes(struct utimes_args *uap) 2903 { 2904 struct timeval tv[2]; 2905 struct nlookupdata nd; 2906 int error; 2907 2908 if (uap->tptr) { 2909 error = copyin(uap->tptr, tv, sizeof(tv)); 2910 if (error) 2911 return (error); 2912 } 2913 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2914 if (error == 0) 2915 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2916 nlookup_done(&nd); 2917 return (error); 2918 } 2919 2920 /* 2921 * lutimes_args(char *path, struct timeval *tptr) 2922 * 2923 * Set the access and modification times of a file. 2924 */ 2925 int 2926 sys_lutimes(struct lutimes_args *uap) 2927 { 2928 struct timeval tv[2]; 2929 struct nlookupdata nd; 2930 int error; 2931 2932 if (uap->tptr) { 2933 error = copyin(uap->tptr, tv, sizeof(tv)); 2934 if (error) 2935 return (error); 2936 } 2937 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2938 if (error == 0) 2939 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2940 nlookup_done(&nd); 2941 return (error); 2942 } 2943 2944 int 2945 kern_futimes(int fd, struct timeval *tptr) 2946 { 2947 struct thread *td = curthread; 2948 struct proc *p = td->td_proc; 2949 struct timespec ts[2]; 2950 struct file *fp; 2951 int error; 2952 2953 error = getutimes(tptr, ts); 2954 if (error) 2955 return (error); 2956 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2957 return (error); 2958 if (fp->f_nchandle.ncp) 2959 error = ncp_writechk(&fp->f_nchandle); 2960 if (error == 0) 2961 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2962 fdrop(fp); 2963 return (error); 2964 } 2965 2966 /* 2967 * futimes_args(int fd, struct timeval *tptr) 2968 * 2969 * Set the access and modification times of a file. 2970 */ 2971 int 2972 sys_futimes(struct futimes_args *uap) 2973 { 2974 struct timeval tv[2]; 2975 int error; 2976 2977 if (uap->tptr) { 2978 error = copyin(uap->tptr, tv, sizeof(tv)); 2979 if (error) 2980 return (error); 2981 } 2982 2983 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2984 2985 return (error); 2986 } 2987 2988 int 2989 kern_truncate(struct nlookupdata *nd, off_t length) 2990 { 2991 struct vnode *vp; 2992 struct vattr vattr; 2993 int error; 2994 2995 if (length < 0) 2996 return(EINVAL); 2997 /* XXX Add NLC flag indicating modifying operation? */ 2998 if ((error = nlookup(nd)) != 0) 2999 return (error); 3000 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3001 return (error); 3002 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3003 return (error); 3004 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3005 vrele(vp); 3006 return (error); 3007 } 3008 if (vp->v_type == VDIR) { 3009 error = EISDIR; 3010 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 && 3011 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) { 3012 VATTR_NULL(&vattr); 3013 vattr.va_size = length; 3014 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3015 } 3016 vput(vp); 3017 return (error); 3018 } 3019 3020 /* 3021 * truncate(char *path, int pad, off_t length) 3022 * 3023 * Truncate a file given its path name. 3024 */ 3025 int 3026 sys_truncate(struct truncate_args *uap) 3027 { 3028 struct nlookupdata nd; 3029 int error; 3030 3031 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3032 if (error == 0) 3033 error = kern_truncate(&nd, uap->length); 3034 nlookup_done(&nd); 3035 return error; 3036 } 3037 3038 int 3039 kern_ftruncate(int fd, off_t length) 3040 { 3041 struct thread *td = curthread; 3042 struct proc *p = td->td_proc; 3043 struct vattr vattr; 3044 struct vnode *vp; 3045 struct file *fp; 3046 int error; 3047 3048 if (length < 0) 3049 return(EINVAL); 3050 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3051 return (error); 3052 if (fp->f_nchandle.ncp) { 3053 error = ncp_writechk(&fp->f_nchandle); 3054 if (error) 3055 goto done; 3056 } 3057 if ((fp->f_flag & FWRITE) == 0) { 3058 error = EINVAL; 3059 goto done; 3060 } 3061 vp = (struct vnode *)fp->f_data; 3062 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3063 if (vp->v_type == VDIR) { 3064 error = EISDIR; 3065 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3066 VATTR_NULL(&vattr); 3067 vattr.va_size = length; 3068 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3069 } 3070 vn_unlock(vp); 3071 done: 3072 fdrop(fp); 3073 return (error); 3074 } 3075 3076 /* 3077 * ftruncate_args(int fd, int pad, off_t length) 3078 * 3079 * Truncate a file given a file descriptor. 3080 */ 3081 int 3082 sys_ftruncate(struct ftruncate_args *uap) 3083 { 3084 int error; 3085 3086 error = kern_ftruncate(uap->fd, uap->length); 3087 3088 return (error); 3089 } 3090 3091 /* 3092 * fsync(int fd) 3093 * 3094 * Sync an open file. 3095 */ 3096 /* ARGSUSED */ 3097 int 3098 sys_fsync(struct fsync_args *uap) 3099 { 3100 struct thread *td = curthread; 3101 struct proc *p = td->td_proc; 3102 struct vnode *vp; 3103 struct file *fp; 3104 vm_object_t obj; 3105 int error; 3106 3107 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3108 return (error); 3109 vp = (struct vnode *)fp->f_data; 3110 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3111 if ((obj = vp->v_object) != NULL) 3112 vm_object_page_clean(obj, 0, 0, 0); 3113 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount) 3114 error = buf_fsync(vp); 3115 vn_unlock(vp); 3116 fdrop(fp); 3117 return (error); 3118 } 3119 3120 int 3121 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3122 { 3123 struct nchandle fnchd; 3124 struct nchandle tnchd; 3125 struct namecache *ncp; 3126 struct vnode *fdvp; 3127 struct vnode *tdvp; 3128 struct mount *mp; 3129 int error; 3130 3131 bwillinode(1); 3132 fromnd->nl_flags |= NLC_REFDVP; 3133 if ((error = nlookup(fromnd)) != 0) 3134 return (error); 3135 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3136 return (ENOENT); 3137 fnchd.mount = fromnd->nl_nch.mount; 3138 cache_hold(&fnchd); 3139 3140 /* 3141 * unlock the source nch so we can lookup the target nch without 3142 * deadlocking. The target may or may not exist so we do not check 3143 * for a target vp like kern_mkdir() and other creation functions do. 3144 * 3145 * The source and target directories are ref'd and rechecked after 3146 * everything is relocked to determine if the source or target file 3147 * has been renamed. 3148 */ 3149 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3150 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3151 cache_unlock(&fromnd->nl_nch); 3152 3153 tond->nl_flags |= NLC_CREATE | NLC_REFDVP; 3154 if ((error = nlookup(tond)) != 0) { 3155 cache_drop(&fnchd); 3156 return (error); 3157 } 3158 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3159 cache_drop(&fnchd); 3160 return (ENOENT); 3161 } 3162 tnchd.mount = tond->nl_nch.mount; 3163 cache_hold(&tnchd); 3164 3165 /* 3166 * If the source and target are the same there is nothing to do 3167 */ 3168 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3169 cache_drop(&fnchd); 3170 cache_drop(&tnchd); 3171 return (0); 3172 } 3173 3174 /* 3175 * Mount points cannot be renamed or overwritten 3176 */ 3177 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3178 NCF_ISMOUNTPT 3179 ) { 3180 cache_drop(&fnchd); 3181 cache_drop(&tnchd); 3182 return (EINVAL); 3183 } 3184 3185 /* 3186 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 3187 * may have become invalid while it was unlocked, nc_vp and nc_mount 3188 * could be NULL. 3189 */ 3190 if (cache_lock_nonblock(&fromnd->nl_nch) == 0) { 3191 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3192 } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) { 3193 cache_lock(&fromnd->nl_nch); 3194 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3195 } else { 3196 cache_unlock(&tond->nl_nch); 3197 cache_lock(&fromnd->nl_nch); 3198 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3199 cache_lock(&tond->nl_nch); 3200 cache_resolve(&tond->nl_nch, tond->nl_cred); 3201 } 3202 fromnd->nl_flags |= NLC_NCPISLOCKED; 3203 3204 /* 3205 * make sure the parent directories linkages are the same 3206 */ 3207 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3208 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3209 cache_drop(&fnchd); 3210 cache_drop(&tnchd); 3211 return (ENOENT); 3212 } 3213 3214 /* 3215 * Both the source and target must be within the same filesystem and 3216 * in the same filesystem as their parent directories within the 3217 * namecache topology. 3218 * 3219 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3220 */ 3221 mp = fnchd.mount; 3222 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3223 mp != tond->nl_nch.mount) { 3224 cache_drop(&fnchd); 3225 cache_drop(&tnchd); 3226 return (EXDEV); 3227 } 3228 3229 /* 3230 * Make sure the mount point is writable 3231 */ 3232 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3233 cache_drop(&fnchd); 3234 cache_drop(&tnchd); 3235 return (error); 3236 } 3237 3238 /* 3239 * If the target exists and either the source or target is a directory, 3240 * then both must be directories. 3241 * 3242 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3243 * have become NULL. 3244 */ 3245 if (tond->nl_nch.ncp->nc_vp) { 3246 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3247 error = ENOENT; 3248 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3249 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3250 error = ENOTDIR; 3251 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3252 error = EISDIR; 3253 } 3254 } 3255 3256 /* 3257 * You cannot rename a source into itself or a subdirectory of itself. 3258 * We check this by travsersing the target directory upwards looking 3259 * for a match against the source. 3260 */ 3261 if (error == 0) { 3262 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3263 if (fromnd->nl_nch.ncp == ncp) { 3264 error = EINVAL; 3265 break; 3266 } 3267 } 3268 } 3269 3270 cache_drop(&fnchd); 3271 cache_drop(&tnchd); 3272 3273 /* 3274 * Even though the namespaces are different, they may still represent 3275 * hardlinks to the same file. The filesystem might have a hard time 3276 * with this so we issue a NREMOVE of the source instead of a NRENAME 3277 * when we detect the situation. 3278 */ 3279 if (error == 0) { 3280 fdvp = fromnd->nl_dvp; 3281 tdvp = tond->nl_dvp; 3282 if (fdvp == NULL || tdvp == NULL) { 3283 error = EPERM; 3284 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3285 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3286 fromnd->nl_cred); 3287 } else { 3288 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3289 fdvp, tdvp, tond->nl_cred); 3290 } 3291 } 3292 return (error); 3293 } 3294 3295 /* 3296 * rename_args(char *from, char *to) 3297 * 3298 * Rename files. Source and destination must either both be directories, 3299 * or both not be directories. If target is a directory, it must be empty. 3300 */ 3301 int 3302 sys_rename(struct rename_args *uap) 3303 { 3304 struct nlookupdata fromnd, tond; 3305 int error; 3306 3307 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3308 if (error == 0) { 3309 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3310 if (error == 0) 3311 error = kern_rename(&fromnd, &tond); 3312 nlookup_done(&tond); 3313 } 3314 nlookup_done(&fromnd); 3315 return (error); 3316 } 3317 3318 int 3319 kern_mkdir(struct nlookupdata *nd, int mode) 3320 { 3321 struct thread *td = curthread; 3322 struct proc *p = td->td_proc; 3323 struct vnode *vp; 3324 struct vattr vattr; 3325 int error; 3326 3327 bwillinode(1); 3328 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3329 if ((error = nlookup(nd)) != 0) 3330 return (error); 3331 3332 if (nd->nl_nch.ncp->nc_vp) 3333 return (EEXIST); 3334 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3335 return (error); 3336 VATTR_NULL(&vattr); 3337 vattr.va_type = VDIR; 3338 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3339 3340 vp = NULL; 3341 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr); 3342 if (error == 0) 3343 vput(vp); 3344 return (error); 3345 } 3346 3347 /* 3348 * mkdir_args(char *path, int mode) 3349 * 3350 * Make a directory file. 3351 */ 3352 /* ARGSUSED */ 3353 int 3354 sys_mkdir(struct mkdir_args *uap) 3355 { 3356 struct nlookupdata nd; 3357 int error; 3358 3359 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3360 if (error == 0) 3361 error = kern_mkdir(&nd, uap->mode); 3362 nlookup_done(&nd); 3363 return (error); 3364 } 3365 3366 int 3367 kern_rmdir(struct nlookupdata *nd) 3368 { 3369 int error; 3370 3371 bwillinode(1); 3372 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3373 if ((error = nlookup(nd)) != 0) 3374 return (error); 3375 3376 /* 3377 * Do not allow directories representing mount points to be 3378 * deleted, even if empty. Check write perms on mount point 3379 * in case the vnode is aliased (aka nullfs). 3380 */ 3381 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3382 return (EINVAL); 3383 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3384 return (error); 3385 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3386 return (error); 3387 } 3388 3389 /* 3390 * rmdir_args(char *path) 3391 * 3392 * Remove a directory file. 3393 */ 3394 /* ARGSUSED */ 3395 int 3396 sys_rmdir(struct rmdir_args *uap) 3397 { 3398 struct nlookupdata nd; 3399 int error; 3400 3401 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3402 if (error == 0) 3403 error = kern_rmdir(&nd); 3404 nlookup_done(&nd); 3405 return (error); 3406 } 3407 3408 int 3409 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3410 enum uio_seg direction) 3411 { 3412 struct thread *td = curthread; 3413 struct proc *p = td->td_proc; 3414 struct vnode *vp; 3415 struct file *fp; 3416 struct uio auio; 3417 struct iovec aiov; 3418 off_t loff; 3419 int error, eofflag; 3420 3421 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3422 return (error); 3423 if ((fp->f_flag & FREAD) == 0) { 3424 error = EBADF; 3425 goto done; 3426 } 3427 vp = (struct vnode *)fp->f_data; 3428 unionread: 3429 if (vp->v_type != VDIR) { 3430 error = EINVAL; 3431 goto done; 3432 } 3433 aiov.iov_base = buf; 3434 aiov.iov_len = count; 3435 auio.uio_iov = &aiov; 3436 auio.uio_iovcnt = 1; 3437 auio.uio_rw = UIO_READ; 3438 auio.uio_segflg = direction; 3439 auio.uio_td = td; 3440 auio.uio_resid = count; 3441 loff = auio.uio_offset = fp->f_offset; 3442 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3443 fp->f_offset = auio.uio_offset; 3444 if (error) 3445 goto done; 3446 if (count == auio.uio_resid) { 3447 if (union_dircheckp) { 3448 error = union_dircheckp(td, &vp, fp); 3449 if (error == -1) 3450 goto unionread; 3451 if (error) 3452 goto done; 3453 } 3454 #if 0 3455 if ((vp->v_flag & VROOT) && 3456 (vp->v_mount->mnt_flag & MNT_UNION)) { 3457 struct vnode *tvp = vp; 3458 vp = vp->v_mount->mnt_vnodecovered; 3459 vref(vp); 3460 fp->f_data = vp; 3461 fp->f_offset = 0; 3462 vrele(tvp); 3463 goto unionread; 3464 } 3465 #endif 3466 } 3467 3468 /* 3469 * WARNING! *basep may not be wide enough to accomodate the 3470 * seek offset. XXX should we hack this to return the upper 32 bits 3471 * for offsets greater then 4G? 3472 */ 3473 if (basep) { 3474 *basep = (long)loff; 3475 } 3476 *res = count - auio.uio_resid; 3477 done: 3478 fdrop(fp); 3479 return (error); 3480 } 3481 3482 /* 3483 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3484 * 3485 * Read a block of directory entries in a file system independent format. 3486 */ 3487 int 3488 sys_getdirentries(struct getdirentries_args *uap) 3489 { 3490 long base; 3491 int error; 3492 3493 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3494 &uap->sysmsg_result, UIO_USERSPACE); 3495 3496 if (error == 0 && uap->basep) 3497 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3498 return (error); 3499 } 3500 3501 /* 3502 * getdents_args(int fd, char *buf, size_t count) 3503 */ 3504 int 3505 sys_getdents(struct getdents_args *uap) 3506 { 3507 int error; 3508 3509 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3510 &uap->sysmsg_result, UIO_USERSPACE); 3511 3512 return (error); 3513 } 3514 3515 /* 3516 * umask(int newmask) 3517 * 3518 * Set the mode mask for creation of filesystem nodes. 3519 * 3520 * MP SAFE 3521 */ 3522 int 3523 sys_umask(struct umask_args *uap) 3524 { 3525 struct thread *td = curthread; 3526 struct proc *p = td->td_proc; 3527 struct filedesc *fdp; 3528 3529 fdp = p->p_fd; 3530 uap->sysmsg_result = fdp->fd_cmask; 3531 fdp->fd_cmask = uap->newmask & ALLPERMS; 3532 return (0); 3533 } 3534 3535 /* 3536 * revoke(char *path) 3537 * 3538 * Void all references to file by ripping underlying filesystem 3539 * away from vnode. 3540 */ 3541 /* ARGSUSED */ 3542 int 3543 sys_revoke(struct revoke_args *uap) 3544 { 3545 struct nlookupdata nd; 3546 struct vattr vattr; 3547 struct vnode *vp; 3548 struct ucred *cred; 3549 int error; 3550 3551 vp = NULL; 3552 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3553 if (error == 0) 3554 error = nlookup(&nd); 3555 if (error == 0) 3556 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3557 cred = crhold(nd.nl_cred); 3558 nlookup_done(&nd); 3559 if (error == 0) { 3560 if (vp->v_type != VCHR && vp->v_type != VBLK) 3561 error = EINVAL; 3562 if (error == 0) 3563 error = VOP_GETATTR(vp, &vattr); 3564 if (error == 0 && cred->cr_uid != vattr.va_uid) 3565 error = priv_check_cred(cred, PRIV_ROOT, PRISON_ROOT); 3566 if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) { 3567 error = 0; 3568 vx_lock(vp); 3569 VOP_REVOKE(vp, REVOKEALL); 3570 vx_unlock(vp); 3571 } 3572 vrele(vp); 3573 } 3574 if (cred) 3575 crfree(cred); 3576 return (error); 3577 } 3578 3579 /* 3580 * getfh_args(char *fname, fhandle_t *fhp) 3581 * 3582 * Get (NFS) file handle 3583 * 3584 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 3585 * mount. This allows nullfs mounts to be explicitly exported. 3586 * 3587 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 3588 * 3589 * nullfs mounts of subdirectories are not safe. That is, it will 3590 * work, but you do not really have protection against access to 3591 * the related parent directories. 3592 */ 3593 int 3594 sys_getfh(struct getfh_args *uap) 3595 { 3596 struct thread *td = curthread; 3597 struct nlookupdata nd; 3598 fhandle_t fh; 3599 struct vnode *vp; 3600 struct mount *mp; 3601 int error; 3602 3603 /* 3604 * Must be super user 3605 */ 3606 if ((error = priv_check(td, PRIV_ROOT)) != 0) 3607 return (error); 3608 3609 vp = NULL; 3610 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3611 if (error == 0) 3612 error = nlookup(&nd); 3613 if (error == 0) 3614 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3615 mp = nd.nl_nch.mount; 3616 nlookup_done(&nd); 3617 if (error == 0) { 3618 bzero(&fh, sizeof(fh)); 3619 fh.fh_fsid = mp->mnt_stat.f_fsid; 3620 error = VFS_VPTOFH(vp, &fh.fh_fid); 3621 vput(vp); 3622 if (error == 0) 3623 error = copyout(&fh, uap->fhp, sizeof(fh)); 3624 } 3625 return (error); 3626 } 3627 3628 /* 3629 * fhopen_args(const struct fhandle *u_fhp, int flags) 3630 * 3631 * syscall for the rpc.lockd to use to translate a NFS file handle into 3632 * an open descriptor. 3633 * 3634 * warning: do not remove the priv_check() call or this becomes one giant 3635 * security hole. 3636 */ 3637 int 3638 sys_fhopen(struct fhopen_args *uap) 3639 { 3640 struct thread *td = curthread; 3641 struct proc *p = td->td_proc; 3642 struct mount *mp; 3643 struct vnode *vp; 3644 struct fhandle fhp; 3645 struct vattr vat; 3646 struct vattr *vap = &vat; 3647 struct flock lf; 3648 int fmode, mode, error, type; 3649 struct file *nfp; 3650 struct file *fp; 3651 int indx; 3652 3653 /* 3654 * Must be super user 3655 */ 3656 error = priv_check(td, PRIV_ROOT); 3657 if (error) 3658 return (error); 3659 3660 fmode = FFLAGS(uap->flags); 3661 /* why not allow a non-read/write open for our lockd? */ 3662 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3663 return (EINVAL); 3664 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3665 if (error) 3666 return(error); 3667 /* find the mount point */ 3668 mp = vfs_getvfs(&fhp.fh_fsid); 3669 if (mp == NULL) 3670 return (ESTALE); 3671 /* now give me my vnode, it gets returned to me locked */ 3672 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 3673 if (error) 3674 return (error); 3675 /* 3676 * from now on we have to make sure not 3677 * to forget about the vnode 3678 * any error that causes an abort must vput(vp) 3679 * just set error = err and 'goto bad;'. 3680 */ 3681 3682 /* 3683 * from vn_open 3684 */ 3685 if (vp->v_type == VLNK) { 3686 error = EMLINK; 3687 goto bad; 3688 } 3689 if (vp->v_type == VSOCK) { 3690 error = EOPNOTSUPP; 3691 goto bad; 3692 } 3693 mode = 0; 3694 if (fmode & (FWRITE | O_TRUNC)) { 3695 if (vp->v_type == VDIR) { 3696 error = EISDIR; 3697 goto bad; 3698 } 3699 error = vn_writechk(vp, NULL); 3700 if (error) 3701 goto bad; 3702 mode |= VWRITE; 3703 } 3704 if (fmode & FREAD) 3705 mode |= VREAD; 3706 if (mode) { 3707 error = VOP_ACCESS(vp, mode, p->p_ucred); 3708 if (error) 3709 goto bad; 3710 } 3711 if (fmode & O_TRUNC) { 3712 vn_unlock(vp); /* XXX */ 3713 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 3714 VATTR_NULL(vap); 3715 vap->va_size = 0; 3716 error = VOP_SETATTR(vp, vap, p->p_ucred); 3717 if (error) 3718 goto bad; 3719 } 3720 3721 /* 3722 * VOP_OPEN needs the file pointer so it can potentially override 3723 * it. 3724 * 3725 * WARNING! no f_nchandle will be associated when fhopen()ing a 3726 * directory. XXX 3727 */ 3728 if ((error = falloc(p, &nfp, &indx)) != 0) 3729 goto bad; 3730 fp = nfp; 3731 3732 error = VOP_OPEN(vp, fmode, p->p_ucred, fp); 3733 if (error) { 3734 /* 3735 * setting f_ops this way prevents VOP_CLOSE from being 3736 * called or fdrop() releasing the vp from v_data. Since 3737 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3738 */ 3739 fp->f_ops = &badfileops; 3740 fp->f_data = NULL; 3741 goto bad_drop; 3742 } 3743 3744 /* 3745 * The fp is given its own reference, we still have our ref and lock. 3746 * 3747 * Assert that all regular files must be created with a VM object. 3748 */ 3749 if (vp->v_type == VREG && vp->v_object == NULL) { 3750 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 3751 goto bad_drop; 3752 } 3753 3754 /* 3755 * The open was successful. Handle any locking requirements. 3756 */ 3757 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3758 lf.l_whence = SEEK_SET; 3759 lf.l_start = 0; 3760 lf.l_len = 0; 3761 if (fmode & O_EXLOCK) 3762 lf.l_type = F_WRLCK; 3763 else 3764 lf.l_type = F_RDLCK; 3765 if (fmode & FNONBLOCK) 3766 type = 0; 3767 else 3768 type = F_WAIT; 3769 vn_unlock(vp); 3770 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3771 /* 3772 * release our private reference. 3773 */ 3774 fsetfd(p, NULL, indx); 3775 fdrop(fp); 3776 vrele(vp); 3777 return (error); 3778 } 3779 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3780 fp->f_flag |= FHASLOCK; 3781 } 3782 3783 /* 3784 * Clean up. Associate the file pointer with the previously 3785 * reserved descriptor and return it. 3786 */ 3787 vput(vp); 3788 fsetfd(p, fp, indx); 3789 fdrop(fp); 3790 uap->sysmsg_result = indx; 3791 return (0); 3792 3793 bad_drop: 3794 fsetfd(p, NULL, indx); 3795 fdrop(fp); 3796 bad: 3797 vput(vp); 3798 return (error); 3799 } 3800 3801 /* 3802 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3803 */ 3804 int 3805 sys_fhstat(struct fhstat_args *uap) 3806 { 3807 struct thread *td = curthread; 3808 struct stat sb; 3809 fhandle_t fh; 3810 struct mount *mp; 3811 struct vnode *vp; 3812 int error; 3813 3814 /* 3815 * Must be super user 3816 */ 3817 error = priv_check(td, PRIV_ROOT); 3818 if (error) 3819 return (error); 3820 3821 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3822 if (error) 3823 return (error); 3824 3825 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3826 return (ESTALE); 3827 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3828 return (error); 3829 error = vn_stat(vp, &sb, td->td_proc->p_ucred); 3830 vput(vp); 3831 if (error) 3832 return (error); 3833 error = copyout(&sb, uap->sb, sizeof(sb)); 3834 return (error); 3835 } 3836 3837 /* 3838 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3839 */ 3840 int 3841 sys_fhstatfs(struct fhstatfs_args *uap) 3842 { 3843 struct thread *td = curthread; 3844 struct proc *p = td->td_proc; 3845 struct statfs *sp; 3846 struct mount *mp; 3847 struct vnode *vp; 3848 struct statfs sb; 3849 char *fullpath, *freepath; 3850 fhandle_t fh; 3851 int error; 3852 3853 /* 3854 * Must be super user 3855 */ 3856 if ((error = priv_check(td, PRIV_ROOT))) 3857 return (error); 3858 3859 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3860 return (error); 3861 3862 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3863 return (ESTALE); 3864 3865 if (p != NULL && !chroot_visible_mnt(mp, p)) 3866 return (ESTALE); 3867 3868 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3869 return (error); 3870 mp = vp->v_mount; 3871 sp = &mp->mnt_stat; 3872 vput(vp); 3873 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0) 3874 return (error); 3875 3876 error = mount_path(p, mp, &fullpath, &freepath); 3877 if (error) 3878 return(error); 3879 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3880 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3881 kfree(freepath, M_TEMP); 3882 3883 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3884 if (priv_check(td, PRIV_ROOT)) { 3885 bcopy(sp, &sb, sizeof(sb)); 3886 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3887 sp = &sb; 3888 } 3889 return (copyout(sp, uap->buf, sizeof(*sp))); 3890 } 3891 3892 /* 3893 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 3894 */ 3895 int 3896 sys_fhstatvfs(struct fhstatvfs_args *uap) 3897 { 3898 struct thread *td = curthread; 3899 struct proc *p = td->td_proc; 3900 struct statvfs *sp; 3901 struct mount *mp; 3902 struct vnode *vp; 3903 fhandle_t fh; 3904 int error; 3905 3906 /* 3907 * Must be super user 3908 */ 3909 if ((error = priv_check(td, PRIV_ROOT))) 3910 return (error); 3911 3912 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3913 return (error); 3914 3915 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3916 return (ESTALE); 3917 3918 if (p != NULL && !chroot_visible_mnt(mp, p)) 3919 return (ESTALE); 3920 3921 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3922 return (error); 3923 mp = vp->v_mount; 3924 sp = &mp->mnt_vstat; 3925 vput(vp); 3926 if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0) 3927 return (error); 3928 3929 sp->f_flag = 0; 3930 if (mp->mnt_flag & MNT_RDONLY) 3931 sp->f_flag |= ST_RDONLY; 3932 if (mp->mnt_flag & MNT_NOSUID) 3933 sp->f_flag |= ST_NOSUID; 3934 3935 return (copyout(sp, uap->buf, sizeof(*sp))); 3936 } 3937 3938 3939 /* 3940 * Syscall to push extended attribute configuration information into the 3941 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3942 * a command (int cmd), and attribute name and misc data. For now, the 3943 * attribute name is left in userspace for consumption by the VFS_op. 3944 * It will probably be changed to be copied into sysspace by the 3945 * syscall in the future, once issues with various consumers of the 3946 * attribute code have raised their hands. 3947 * 3948 * Currently this is used only by UFS Extended Attributes. 3949 */ 3950 int 3951 sys_extattrctl(struct extattrctl_args *uap) 3952 { 3953 struct nlookupdata nd; 3954 struct mount *mp; 3955 struct vnode *vp; 3956 int error; 3957 3958 vp = NULL; 3959 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3960 if (error == 0) 3961 error = nlookup(&nd); 3962 if (error == 0) { 3963 mp = nd.nl_nch.mount; 3964 error = VFS_EXTATTRCTL(mp, uap->cmd, 3965 uap->attrname, uap->arg, 3966 nd.nl_cred); 3967 } 3968 nlookup_done(&nd); 3969 return (error); 3970 } 3971 3972 /* 3973 * Syscall to set a named extended attribute on a file or directory. 3974 * Accepts attribute name, and a uio structure pointing to the data to set. 3975 * The uio is consumed in the style of writev(). The real work happens 3976 * in VOP_SETEXTATTR(). 3977 */ 3978 int 3979 sys_extattr_set_file(struct extattr_set_file_args *uap) 3980 { 3981 char attrname[EXTATTR_MAXNAMELEN]; 3982 struct iovec aiov[UIO_SMALLIOV]; 3983 struct iovec *needfree; 3984 struct nlookupdata nd; 3985 struct iovec *iov; 3986 struct vnode *vp; 3987 struct uio auio; 3988 u_int iovlen; 3989 u_int cnt; 3990 int error; 3991 int i; 3992 3993 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3994 if (error) 3995 return (error); 3996 3997 vp = NULL; 3998 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3999 if (error == 0) 4000 error = nlookup(&nd); 4001 if (error == 0) 4002 error = ncp_writechk(&nd.nl_nch); 4003 if (error == 0) 4004 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4005 if (error) { 4006 nlookup_done(&nd); 4007 return (error); 4008 } 4009 4010 needfree = NULL; 4011 iovlen = uap->iovcnt * sizeof(struct iovec); 4012 if (uap->iovcnt > UIO_SMALLIOV) { 4013 if (uap->iovcnt > UIO_MAXIOV) { 4014 error = EINVAL; 4015 goto done; 4016 } 4017 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4018 needfree = iov; 4019 } else { 4020 iov = aiov; 4021 } 4022 auio.uio_iov = iov; 4023 auio.uio_iovcnt = uap->iovcnt; 4024 auio.uio_rw = UIO_WRITE; 4025 auio.uio_segflg = UIO_USERSPACE; 4026 auio.uio_td = nd.nl_td; 4027 auio.uio_offset = 0; 4028 if ((error = copyin(uap->iovp, iov, iovlen))) 4029 goto done; 4030 auio.uio_resid = 0; 4031 for (i = 0; i < uap->iovcnt; i++) { 4032 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4033 error = EINVAL; 4034 goto done; 4035 } 4036 auio.uio_resid += iov->iov_len; 4037 iov++; 4038 } 4039 cnt = auio.uio_resid; 4040 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4041 cnt -= auio.uio_resid; 4042 uap->sysmsg_result = cnt; 4043 done: 4044 vput(vp); 4045 nlookup_done(&nd); 4046 if (needfree) 4047 FREE(needfree, M_IOV); 4048 return (error); 4049 } 4050 4051 /* 4052 * Syscall to get a named extended attribute on a file or directory. 4053 * Accepts attribute name, and a uio structure pointing to a buffer for the 4054 * data. The uio is consumed in the style of readv(). The real work 4055 * happens in VOP_GETEXTATTR(); 4056 */ 4057 int 4058 sys_extattr_get_file(struct extattr_get_file_args *uap) 4059 { 4060 char attrname[EXTATTR_MAXNAMELEN]; 4061 struct iovec aiov[UIO_SMALLIOV]; 4062 struct iovec *needfree; 4063 struct nlookupdata nd; 4064 struct iovec *iov; 4065 struct vnode *vp; 4066 struct uio auio; 4067 u_int iovlen; 4068 u_int cnt; 4069 int error; 4070 int i; 4071 4072 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4073 if (error) 4074 return (error); 4075 4076 vp = NULL; 4077 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4078 if (error == 0) 4079 error = nlookup(&nd); 4080 if (error == 0) 4081 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4082 if (error) { 4083 nlookup_done(&nd); 4084 return (error); 4085 } 4086 4087 iovlen = uap->iovcnt * sizeof (struct iovec); 4088 needfree = NULL; 4089 if (uap->iovcnt > UIO_SMALLIOV) { 4090 if (uap->iovcnt > UIO_MAXIOV) { 4091 error = EINVAL; 4092 goto done; 4093 } 4094 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4095 needfree = iov; 4096 } else { 4097 iov = aiov; 4098 } 4099 auio.uio_iov = iov; 4100 auio.uio_iovcnt = uap->iovcnt; 4101 auio.uio_rw = UIO_READ; 4102 auio.uio_segflg = UIO_USERSPACE; 4103 auio.uio_td = nd.nl_td; 4104 auio.uio_offset = 0; 4105 if ((error = copyin(uap->iovp, iov, iovlen))) 4106 goto done; 4107 auio.uio_resid = 0; 4108 for (i = 0; i < uap->iovcnt; i++) { 4109 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4110 error = EINVAL; 4111 goto done; 4112 } 4113 auio.uio_resid += iov->iov_len; 4114 iov++; 4115 } 4116 cnt = auio.uio_resid; 4117 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4118 cnt -= auio.uio_resid; 4119 uap->sysmsg_result = cnt; 4120 done: 4121 vput(vp); 4122 nlookup_done(&nd); 4123 if (needfree) 4124 FREE(needfree, M_IOV); 4125 return(error); 4126 } 4127 4128 /* 4129 * Syscall to delete a named extended attribute from a file or directory. 4130 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4131 */ 4132 int 4133 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4134 { 4135 char attrname[EXTATTR_MAXNAMELEN]; 4136 struct nlookupdata nd; 4137 struct vnode *vp; 4138 int error; 4139 4140 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4141 if (error) 4142 return(error); 4143 4144 vp = NULL; 4145 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4146 if (error == 0) 4147 error = nlookup(&nd); 4148 if (error == 0) 4149 error = ncp_writechk(&nd.nl_nch); 4150 if (error == 0) 4151 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4152 if (error) { 4153 nlookup_done(&nd); 4154 return (error); 4155 } 4156 4157 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred); 4158 vput(vp); 4159 nlookup_done(&nd); 4160 return(error); 4161 } 4162 4163 /* 4164 * Determine if the mount is visible to the process. 4165 */ 4166 static int 4167 chroot_visible_mnt(struct mount *mp, struct proc *p) 4168 { 4169 struct nchandle nch; 4170 4171 /* 4172 * Traverse from the mount point upwards. If we hit the process 4173 * root then the mount point is visible to the process. 4174 */ 4175 nch = mp->mnt_ncmountpt; 4176 while (nch.ncp) { 4177 if (nch.mount == p->p_fd->fd_nrdir.mount && 4178 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4179 return(1); 4180 } 4181 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4182 nch = nch.mount->mnt_ncmounton; 4183 } else { 4184 nch.ncp = nch.ncp->nc_parent; 4185 } 4186 } 4187 4188 /* 4189 * If the mount point is not visible to the process, but the 4190 * process root is in a subdirectory of the mount, return 4191 * TRUE anyway. 4192 */ 4193 if (p->p_fd->fd_nrdir.mount == mp) 4194 return(1); 4195 4196 return(0); 4197 } 4198 4199