1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.135 2008/11/11 00:55:49 pavalos Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/priv.h> 62 #include <sys/jail.h> 63 #include <sys/namei.h> 64 #include <sys/nlookup.h> 65 #include <sys/dirent.h> 66 #include <sys/extattr.h> 67 #include <sys/spinlock.h> 68 #include <sys/kern_syscall.h> 69 #include <sys/objcache.h> 70 #include <sys/sysctl.h> 71 72 #include <sys/buf2.h> 73 #include <sys/file2.h> 74 #include <sys/spinlock2.h> 75 76 #include <vm/vm.h> 77 #include <vm/vm_object.h> 78 #include <vm/vm_page.h> 79 80 #include <machine/limits.h> 81 #include <machine/stdarg.h> 82 83 #include <vfs/union/union.h> 84 85 static void mount_warning(struct mount *mp, const char *ctl, ...); 86 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 87 static int checkvp_chdir (struct vnode *vn, struct thread *td); 88 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 89 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 90 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 91 static int getutimes (const struct timeval *, struct timespec *); 92 static int setfown (struct vnode *, uid_t, gid_t); 93 static int setfmode (struct vnode *, int); 94 static int setfflags (struct vnode *, int); 95 static int setutimes (struct vnode *, const struct timespec *, int); 96 static int usermount = 0; /* if 1, non-root can mount fs. */ 97 98 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 99 100 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 101 102 /* 103 * Virtual File System System Calls 104 */ 105 106 /* 107 * Mount a file system. 108 */ 109 /* 110 * mount_args(char *type, char *path, int flags, caddr_t data) 111 */ 112 /* ARGSUSED */ 113 int 114 sys_mount(struct mount_args *uap) 115 { 116 struct thread *td = curthread; 117 struct proc *p = td->td_proc; 118 struct vnode *vp; 119 struct nchandle nch; 120 struct mount *mp; 121 struct vfsconf *vfsp; 122 int error, flag = 0, flag2 = 0; 123 int hasmount; 124 struct vattr va; 125 struct nlookupdata nd; 126 char fstypename[MFSNAMELEN]; 127 struct ucred *cred = p->p_ucred; 128 129 KKASSERT(p); 130 if (jailed(cred)) 131 return (EPERM); 132 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 133 return (error); 134 /* 135 * Do not allow NFS export by non-root users. 136 */ 137 if (uap->flags & MNT_EXPORTED) { 138 error = priv_check(td, PRIV_ROOT); 139 if (error) 140 return (error); 141 } 142 /* 143 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 144 */ 145 if (priv_check(td, PRIV_ROOT)) 146 uap->flags |= MNT_NOSUID | MNT_NODEV; 147 148 /* 149 * Lookup the requested path and extract the nch and vnode. 150 */ 151 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 152 if (error == 0) { 153 if ((error = nlookup(&nd)) == 0) { 154 if (nd.nl_nch.ncp->nc_vp == NULL) 155 error = ENOENT; 156 } 157 } 158 if (error) { 159 nlookup_done(&nd); 160 return (error); 161 } 162 163 /* 164 * Extract the locked+refd ncp and cleanup the nd structure 165 */ 166 nch = nd.nl_nch; 167 cache_zero(&nd.nl_nch); 168 nlookup_done(&nd); 169 170 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 171 hasmount = 1; 172 else 173 hasmount = 0; 174 175 176 /* 177 * now we have the locked ref'd nch and unreferenced vnode. 178 */ 179 vp = nch.ncp->nc_vp; 180 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 181 cache_put(&nch); 182 return (error); 183 } 184 cache_unlock(&nch); 185 186 /* 187 * Now we have an unlocked ref'd nch and a locked ref'd vp 188 */ 189 if (uap->flags & MNT_UPDATE) { 190 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 191 cache_drop(&nch); 192 vput(vp); 193 return (EINVAL); 194 } 195 mp = vp->v_mount; 196 flag = mp->mnt_flag; 197 flag2 = mp->mnt_kern_flag; 198 /* 199 * We only allow the filesystem to be reloaded if it 200 * is currently mounted read-only. 201 */ 202 if ((uap->flags & MNT_RELOAD) && 203 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 204 cache_drop(&nch); 205 vput(vp); 206 return (EOPNOTSUPP); /* Needs translation */ 207 } 208 /* 209 * Only root, or the user that did the original mount is 210 * permitted to update it. 211 */ 212 if (mp->mnt_stat.f_owner != cred->cr_uid && 213 (error = priv_check(td, PRIV_ROOT))) { 214 cache_drop(&nch); 215 vput(vp); 216 return (error); 217 } 218 if (vfs_busy(mp, LK_NOWAIT)) { 219 cache_drop(&nch); 220 vput(vp); 221 return (EBUSY); 222 } 223 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 224 cache_drop(&nch); 225 vfs_unbusy(mp); 226 vput(vp); 227 return (EBUSY); 228 } 229 vp->v_flag |= VMOUNT; 230 mp->mnt_flag |= 231 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 232 vn_unlock(vp); 233 goto update; 234 } 235 /* 236 * If the user is not root, ensure that they own the directory 237 * onto which we are attempting to mount. 238 */ 239 if ((error = VOP_GETATTR(vp, &va)) || 240 (va.va_uid != cred->cr_uid && (error = priv_check(td, PRIV_ROOT)))) { 241 cache_drop(&nch); 242 vput(vp); 243 return (error); 244 } 245 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 246 cache_drop(&nch); 247 vput(vp); 248 return (error); 249 } 250 if (vp->v_type != VDIR) { 251 cache_drop(&nch); 252 vput(vp); 253 return (ENOTDIR); 254 } 255 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 256 cache_drop(&nch); 257 vput(vp); 258 return (EPERM); 259 } 260 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 261 cache_drop(&nch); 262 vput(vp); 263 return (error); 264 } 265 vfsp = vfsconf_find_by_name(fstypename); 266 if (vfsp == NULL) { 267 linker_file_t lf; 268 269 /* Only load modules for root (very important!) */ 270 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 271 cache_drop(&nch); 272 vput(vp); 273 return error; 274 } 275 error = linker_load_file(fstypename, &lf); 276 if (error || lf == NULL) { 277 cache_drop(&nch); 278 vput(vp); 279 if (lf == NULL) 280 error = ENODEV; 281 return error; 282 } 283 lf->userrefs++; 284 /* lookup again, see if the VFS was loaded */ 285 vfsp = vfsconf_find_by_name(fstypename); 286 if (vfsp == NULL) { 287 lf->userrefs--; 288 linker_file_unload(lf); 289 cache_drop(&nch); 290 vput(vp); 291 return (ENODEV); 292 } 293 } 294 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 295 cache_drop(&nch); 296 vput(vp); 297 return (EBUSY); 298 } 299 vp->v_flag |= VMOUNT; 300 301 /* 302 * Allocate and initialize the filesystem. 303 */ 304 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 305 TAILQ_INIT(&mp->mnt_nvnodelist); 306 TAILQ_INIT(&mp->mnt_reservedvnlist); 307 TAILQ_INIT(&mp->mnt_jlist); 308 mp->mnt_nvnodelistsize = 0; 309 lockinit(&mp->mnt_lock, "vfslock", 0, 0); 310 vfs_busy(mp, LK_NOWAIT); 311 mp->mnt_op = vfsp->vfc_vfsops; 312 mp->mnt_vfc = vfsp; 313 vfsp->vfc_refcount++; 314 mp->mnt_stat.f_type = vfsp->vfc_typenum; 315 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 316 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 317 mp->mnt_stat.f_owner = cred->cr_uid; 318 mp->mnt_iosize_max = DFLTPHYS; 319 vn_unlock(vp); 320 update: 321 /* 322 * Set the mount level flags. 323 */ 324 if (uap->flags & MNT_RDONLY) 325 mp->mnt_flag |= MNT_RDONLY; 326 else if (mp->mnt_flag & MNT_RDONLY) 327 mp->mnt_kern_flag |= MNTK_WANTRDWR; 328 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 329 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 330 MNT_NOSYMFOLLOW | MNT_IGNORE | 331 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 332 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 333 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 334 MNT_NOSYMFOLLOW | MNT_IGNORE | 335 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 336 /* 337 * Mount the filesystem. 338 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 339 * get. 340 */ 341 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 342 if (mp->mnt_flag & MNT_UPDATE) { 343 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 344 mp->mnt_flag &= ~MNT_RDONLY; 345 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 346 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 347 if (error) { 348 mp->mnt_flag = flag; 349 mp->mnt_kern_flag = flag2; 350 } 351 vfs_unbusy(mp); 352 vp->v_flag &= ~VMOUNT; 353 vrele(vp); 354 cache_drop(&nch); 355 return (error); 356 } 357 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 358 /* 359 * Put the new filesystem on the mount list after root. The mount 360 * point gets its own mnt_ncmountpt (unless the VFS already set one 361 * up) which represents the root of the mount. The lookup code 362 * detects the mount point going forward and checks the root of 363 * the mount going backwards. 364 * 365 * It is not necessary to invalidate or purge the vnode underneath 366 * because elements under the mount will be given their own glue 367 * namecache record. 368 */ 369 if (!error) { 370 if (mp->mnt_ncmountpt.ncp == NULL) { 371 /* 372 * allocate, then unlock, but leave the ref intact 373 */ 374 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 375 cache_unlock(&mp->mnt_ncmountpt); 376 } 377 mp->mnt_ncmounton = nch; /* inherits ref */ 378 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 379 380 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 381 vp->v_flag &= ~VMOUNT; 382 mountlist_insert(mp, MNTINS_LAST); 383 vn_unlock(vp); 384 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 385 error = vfs_allocate_syncvnode(mp); 386 vfs_unbusy(mp); 387 error = VFS_START(mp, 0); 388 vrele(vp); 389 } else { 390 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 391 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 392 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 393 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 394 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 395 vp->v_flag &= ~VMOUNT; 396 mp->mnt_vfc->vfc_refcount--; 397 vfs_unbusy(mp); 398 kfree(mp, M_MOUNT); 399 cache_drop(&nch); 400 vput(vp); 401 } 402 return (error); 403 } 404 405 /* 406 * Scan all active processes to see if any of them have a current 407 * or root directory onto which the new filesystem has just been 408 * mounted. If so, replace them with the new mount point. 409 * 410 * The passed ncp is ref'd and locked (from the mount code) and 411 * must be associated with the vnode representing the root of the 412 * mount point. 413 */ 414 struct checkdirs_info { 415 struct nchandle old_nch; 416 struct nchandle new_nch; 417 struct vnode *old_vp; 418 struct vnode *new_vp; 419 }; 420 421 static int checkdirs_callback(struct proc *p, void *data); 422 423 static void 424 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 425 { 426 struct checkdirs_info info; 427 struct vnode *olddp; 428 struct vnode *newdp; 429 struct mount *mp; 430 431 /* 432 * If the old mount point's vnode has a usecount of 1, it is not 433 * being held as a descriptor anywhere. 434 */ 435 olddp = old_nch->ncp->nc_vp; 436 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 437 return; 438 439 /* 440 * Force the root vnode of the new mount point to be resolved 441 * so we can update any matching processes. 442 */ 443 mp = new_nch->mount; 444 if (VFS_ROOT(mp, &newdp)) 445 panic("mount: lost mount"); 446 cache_setunresolved(new_nch); 447 cache_setvp(new_nch, newdp); 448 449 /* 450 * Special handling of the root node 451 */ 452 if (rootvnode == olddp) { 453 vref(newdp); 454 vfs_cache_setroot(newdp, cache_hold(new_nch)); 455 } 456 457 /* 458 * Pass newdp separately so the callback does not have to access 459 * it via new_nch->ncp->nc_vp. 460 */ 461 info.old_nch = *old_nch; 462 info.new_nch = *new_nch; 463 info.new_vp = newdp; 464 allproc_scan(checkdirs_callback, &info); 465 vput(newdp); 466 } 467 468 /* 469 * NOTE: callback is not MP safe because the scanned process's filedesc 470 * structure can be ripped out from under us, amoung other things. 471 */ 472 static int 473 checkdirs_callback(struct proc *p, void *data) 474 { 475 struct checkdirs_info *info = data; 476 struct filedesc *fdp; 477 struct nchandle ncdrop1; 478 struct nchandle ncdrop2; 479 struct vnode *vprele1; 480 struct vnode *vprele2; 481 482 if ((fdp = p->p_fd) != NULL) { 483 cache_zero(&ncdrop1); 484 cache_zero(&ncdrop2); 485 vprele1 = NULL; 486 vprele2 = NULL; 487 488 /* 489 * MPUNSAFE - XXX fdp can be pulled out from under a 490 * foreign process. 491 * 492 * A shared filedesc is ok, we don't have to copy it 493 * because we are making this change globally. 494 */ 495 spin_lock_wr(&fdp->fd_spin); 496 if (fdp->fd_ncdir.mount == info->old_nch.mount && 497 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 498 vprele1 = fdp->fd_cdir; 499 vref(info->new_vp); 500 fdp->fd_cdir = info->new_vp; 501 ncdrop1 = fdp->fd_ncdir; 502 cache_copy(&info->new_nch, &fdp->fd_ncdir); 503 } 504 if (fdp->fd_nrdir.mount == info->old_nch.mount && 505 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 506 vprele2 = fdp->fd_rdir; 507 vref(info->new_vp); 508 fdp->fd_rdir = info->new_vp; 509 ncdrop2 = fdp->fd_nrdir; 510 cache_copy(&info->new_nch, &fdp->fd_nrdir); 511 } 512 spin_unlock_wr(&fdp->fd_spin); 513 if (ncdrop1.ncp) 514 cache_drop(&ncdrop1); 515 if (ncdrop2.ncp) 516 cache_drop(&ncdrop2); 517 if (vprele1) 518 vrele(vprele1); 519 if (vprele2) 520 vrele(vprele2); 521 } 522 return(0); 523 } 524 525 /* 526 * Unmount a file system. 527 * 528 * Note: unmount takes a path to the vnode mounted on as argument, 529 * not special file (as before). 530 */ 531 /* 532 * umount_args(char *path, int flags) 533 */ 534 /* ARGSUSED */ 535 int 536 sys_unmount(struct unmount_args *uap) 537 { 538 struct thread *td = curthread; 539 struct proc *p = td->td_proc; 540 struct mount *mp = NULL; 541 int error; 542 struct nlookupdata nd; 543 544 KKASSERT(p); 545 if (p->p_ucred->cr_prison != NULL) 546 return (EPERM); 547 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 548 return (error); 549 550 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 551 if (error == 0) 552 error = nlookup(&nd); 553 if (error) 554 goto out; 555 556 mp = nd.nl_nch.mount; 557 558 /* 559 * Only root, or the user that did the original mount is 560 * permitted to unmount this filesystem. 561 */ 562 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 563 (error = priv_check(td, PRIV_ROOT))) 564 goto out; 565 566 /* 567 * Don't allow unmounting the root file system. 568 */ 569 if (mp->mnt_flag & MNT_ROOTFS) { 570 error = EINVAL; 571 goto out; 572 } 573 574 /* 575 * Must be the root of the filesystem 576 */ 577 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 578 error = EINVAL; 579 goto out; 580 } 581 582 out: 583 nlookup_done(&nd); 584 if (error) 585 return (error); 586 return (dounmount(mp, uap->flags)); 587 } 588 589 /* 590 * Do the actual file system unmount. 591 */ 592 static int 593 dounmount_interlock(struct mount *mp) 594 { 595 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 596 return (EBUSY); 597 mp->mnt_kern_flag |= MNTK_UNMOUNT; 598 return(0); 599 } 600 601 int 602 dounmount(struct mount *mp, int flags) 603 { 604 struct namecache *ncp; 605 struct nchandle nch; 606 struct vnode *vp; 607 int error; 608 int async_flag; 609 int lflags; 610 int freeok = 1; 611 612 /* 613 * Exclusive access for unmounting purposes 614 */ 615 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 616 return (error); 617 618 /* 619 * Allow filesystems to detect that a forced unmount is in progress. 620 */ 621 if (flags & MNT_FORCE) 622 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 623 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 624 error = lockmgr(&mp->mnt_lock, lflags); 625 if (error) { 626 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 627 if (mp->mnt_kern_flag & MNTK_MWAIT) 628 wakeup(mp); 629 return (error); 630 } 631 632 if (mp->mnt_flag & MNT_EXPUBLIC) 633 vfs_setpublicfs(NULL, NULL, NULL); 634 635 vfs_msync(mp, MNT_WAIT); 636 async_flag = mp->mnt_flag & MNT_ASYNC; 637 mp->mnt_flag &=~ MNT_ASYNC; 638 639 /* 640 * If this filesystem isn't aliasing other filesystems, 641 * try to invalidate any remaining namecache entries and 642 * check the count afterwords. 643 */ 644 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 645 cache_lock(&mp->mnt_ncmountpt); 646 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 647 cache_unlock(&mp->mnt_ncmountpt); 648 649 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 650 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 651 652 if ((flags & MNT_FORCE) == 0) { 653 error = EBUSY; 654 mount_warning(mp, "Cannot unmount: " 655 "%d namecache " 656 "references still " 657 "present", 658 ncp->nc_refs - 1); 659 } else { 660 mount_warning(mp, "Forced unmount: " 661 "%d namecache " 662 "references still " 663 "present", 664 ncp->nc_refs - 1); 665 freeok = 0; 666 } 667 } 668 } 669 670 /* 671 * nchandle records ref the mount structure. Expect a count of 1 672 * (our mount->mnt_ncmountpt). 673 */ 674 if (mp->mnt_refs != 1) { 675 if ((flags & MNT_FORCE) == 0) { 676 mount_warning(mp, "Cannot unmount: " 677 "%d process references still " 678 "present", mp->mnt_refs); 679 error = EBUSY; 680 } else { 681 mount_warning(mp, "Forced unmount: " 682 "%d process references still " 683 "present", mp->mnt_refs); 684 freeok = 0; 685 } 686 } 687 688 /* 689 * Decomission our special mnt_syncer vnode. This also stops 690 * the vnlru code. If we are unable to unmount we recommission 691 * the vnode. 692 */ 693 if (error == 0) { 694 if ((vp = mp->mnt_syncer) != NULL) { 695 mp->mnt_syncer = NULL; 696 vrele(vp); 697 } 698 if (((mp->mnt_flag & MNT_RDONLY) || 699 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 700 (flags & MNT_FORCE)) { 701 error = VFS_UNMOUNT(mp, flags); 702 } 703 } 704 if (error) { 705 if (mp->mnt_syncer == NULL) 706 vfs_allocate_syncvnode(mp); 707 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 708 mp->mnt_flag |= async_flag; 709 lockmgr(&mp->mnt_lock, LK_RELEASE); 710 if (mp->mnt_kern_flag & MNTK_MWAIT) 711 wakeup(mp); 712 return (error); 713 } 714 /* 715 * Clean up any journals still associated with the mount after 716 * filesystem activity has ceased. 717 */ 718 journal_remove_all_journals(mp, 719 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 720 721 mountlist_remove(mp); 722 723 /* 724 * Remove any installed vnode ops here so the individual VFSs don't 725 * have to. 726 */ 727 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 728 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 729 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 730 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 731 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 732 733 if (mp->mnt_ncmountpt.ncp != NULL) { 734 nch = mp->mnt_ncmountpt; 735 cache_zero(&mp->mnt_ncmountpt); 736 cache_clrmountpt(&nch); 737 cache_drop(&nch); 738 } 739 if (mp->mnt_ncmounton.ncp != NULL) { 740 nch = mp->mnt_ncmounton; 741 cache_zero(&mp->mnt_ncmounton); 742 cache_clrmountpt(&nch); 743 cache_drop(&nch); 744 } 745 746 mp->mnt_vfc->vfc_refcount--; 747 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 748 panic("unmount: dangling vnode"); 749 lockmgr(&mp->mnt_lock, LK_RELEASE); 750 if (mp->mnt_kern_flag & MNTK_MWAIT) 751 wakeup(mp); 752 if (freeok) 753 kfree(mp, M_MOUNT); 754 return (0); 755 } 756 757 static 758 void 759 mount_warning(struct mount *mp, const char *ctl, ...) 760 { 761 char *ptr; 762 char *buf; 763 __va_list va; 764 765 __va_start(va, ctl); 766 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) { 767 kprintf("unmount(%s): ", ptr); 768 kvprintf(ctl, va); 769 kprintf("\n"); 770 kfree(buf, M_TEMP); 771 } else { 772 kprintf("unmount(%p", mp); 773 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 774 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 775 kprintf("): "); 776 kvprintf(ctl, va); 777 kprintf("\n"); 778 } 779 __va_end(va); 780 } 781 782 /* 783 * Shim cache_fullpath() to handle the case where a process is chrooted into 784 * a subdirectory of a mount. In this case if the root mount matches the 785 * process root directory's mount we have to specify the process's root 786 * directory instead of the mount point, because the mount point might 787 * be above the root directory. 788 */ 789 static 790 int 791 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 792 { 793 struct nchandle *nch; 794 795 if (p && p->p_fd->fd_nrdir.mount == mp) 796 nch = &p->p_fd->fd_nrdir; 797 else 798 nch = &mp->mnt_ncmountpt; 799 return(cache_fullpath(p, nch, rb, fb)); 800 } 801 802 /* 803 * Sync each mounted filesystem. 804 */ 805 806 #ifdef DEBUG 807 static int syncprt = 0; 808 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 809 #endif /* DEBUG */ 810 811 static int sync_callback(struct mount *mp, void *data); 812 813 /* ARGSUSED */ 814 int 815 sys_sync(struct sync_args *uap) 816 { 817 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 818 #ifdef DEBUG 819 /* 820 * print out buffer pool stat information on each sync() call. 821 */ 822 if (syncprt) 823 vfs_bufstats(); 824 #endif /* DEBUG */ 825 return (0); 826 } 827 828 static 829 int 830 sync_callback(struct mount *mp, void *data __unused) 831 { 832 int asyncflag; 833 834 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 835 asyncflag = mp->mnt_flag & MNT_ASYNC; 836 mp->mnt_flag &= ~MNT_ASYNC; 837 vfs_msync(mp, MNT_NOWAIT); 838 VFS_SYNC(mp, MNT_NOWAIT); 839 mp->mnt_flag |= asyncflag; 840 } 841 return(0); 842 } 843 844 /* XXX PRISON: could be per prison flag */ 845 static int prison_quotas; 846 #if 0 847 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 848 #endif 849 850 /* 851 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 852 * 853 * Change filesystem quotas. 854 */ 855 /* ARGSUSED */ 856 int 857 sys_quotactl(struct quotactl_args *uap) 858 { 859 struct nlookupdata nd; 860 struct thread *td; 861 struct proc *p; 862 struct mount *mp; 863 int error; 864 865 td = curthread; 866 p = td->td_proc; 867 if (p->p_ucred->cr_prison && !prison_quotas) 868 return (EPERM); 869 870 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 871 if (error == 0) 872 error = nlookup(&nd); 873 if (error == 0) { 874 mp = nd.nl_nch.mount; 875 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 876 uap->arg, nd.nl_cred); 877 } 878 nlookup_done(&nd); 879 return (error); 880 } 881 882 /* 883 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 884 * void *buf, int buflen) 885 * 886 * This function operates on a mount point and executes the specified 887 * operation using the specified control data, and possibly returns data. 888 * 889 * The actual number of bytes stored in the result buffer is returned, 0 890 * if none, otherwise an error is returned. 891 */ 892 /* ARGSUSED */ 893 int 894 sys_mountctl(struct mountctl_args *uap) 895 { 896 struct thread *td = curthread; 897 struct proc *p = td->td_proc; 898 struct file *fp; 899 void *ctl = NULL; 900 void *buf = NULL; 901 char *path = NULL; 902 int error; 903 904 /* 905 * Sanity and permissions checks. We must be root. 906 */ 907 KKASSERT(p); 908 if (p->p_ucred->cr_prison != NULL) 909 return (EPERM); 910 if ((error = priv_check(td, PRIV_ROOT)) != 0) 911 return (error); 912 913 /* 914 * Argument length checks 915 */ 916 if (uap->ctllen < 0 || uap->ctllen > 1024) 917 return (EINVAL); 918 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 919 return (EINVAL); 920 if (uap->path == NULL) 921 return (EINVAL); 922 923 /* 924 * Allocate the necessary buffers and copyin data 925 */ 926 path = objcache_get(namei_oc, M_WAITOK); 927 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 928 if (error) 929 goto done; 930 931 if (uap->ctllen) { 932 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 933 error = copyin(uap->ctl, ctl, uap->ctllen); 934 if (error) 935 goto done; 936 } 937 if (uap->buflen) 938 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 939 940 /* 941 * Validate the descriptor 942 */ 943 if (uap->fd >= 0) { 944 fp = holdfp(p->p_fd, uap->fd, -1); 945 if (fp == NULL) { 946 error = EBADF; 947 goto done; 948 } 949 } else { 950 fp = NULL; 951 } 952 953 /* 954 * Execute the internal kernel function and clean up. 955 */ 956 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 957 if (fp) 958 fdrop(fp); 959 if (error == 0 && uap->sysmsg_result > 0) 960 error = copyout(buf, uap->buf, uap->sysmsg_result); 961 done: 962 if (path) 963 objcache_put(namei_oc, path); 964 if (ctl) 965 kfree(ctl, M_TEMP); 966 if (buf) 967 kfree(buf, M_TEMP); 968 return (error); 969 } 970 971 /* 972 * Execute a mount control operation by resolving the path to a mount point 973 * and calling vop_mountctl(). 974 * 975 * Use the mount point from the nch instead of the vnode so nullfs mounts 976 * can properly spike the VOP. 977 */ 978 int 979 kern_mountctl(const char *path, int op, struct file *fp, 980 const void *ctl, int ctllen, 981 void *buf, int buflen, int *res) 982 { 983 struct vnode *vp; 984 struct mount *mp; 985 struct nlookupdata nd; 986 int error; 987 988 *res = 0; 989 vp = NULL; 990 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 991 if (error == 0) 992 error = nlookup(&nd); 993 if (error == 0) 994 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 995 mp = nd.nl_nch.mount; 996 nlookup_done(&nd); 997 if (error) 998 return (error); 999 1000 /* 1001 * Must be the root of the filesystem 1002 */ 1003 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1004 vput(vp); 1005 return (EINVAL); 1006 } 1007 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 1008 buf, buflen, res); 1009 vput(vp); 1010 return (error); 1011 } 1012 1013 int 1014 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1015 { 1016 struct thread *td = curthread; 1017 struct proc *p = td->td_proc; 1018 struct mount *mp; 1019 struct statfs *sp; 1020 char *fullpath, *freepath; 1021 int error; 1022 1023 if ((error = nlookup(nd)) != 0) 1024 return (error); 1025 mp = nd->nl_nch.mount; 1026 sp = &mp->mnt_stat; 1027 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1028 return (error); 1029 1030 error = mount_path(p, mp, &fullpath, &freepath); 1031 if (error) 1032 return(error); 1033 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1034 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1035 kfree(freepath, M_TEMP); 1036 1037 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1038 bcopy(sp, buf, sizeof(*buf)); 1039 /* Only root should have access to the fsid's. */ 1040 if (priv_check(td, PRIV_ROOT)) 1041 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1042 return (0); 1043 } 1044 1045 /* 1046 * statfs_args(char *path, struct statfs *buf) 1047 * 1048 * Get filesystem statistics. 1049 */ 1050 int 1051 sys_statfs(struct statfs_args *uap) 1052 { 1053 struct nlookupdata nd; 1054 struct statfs buf; 1055 int error; 1056 1057 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1058 if (error == 0) 1059 error = kern_statfs(&nd, &buf); 1060 nlookup_done(&nd); 1061 if (error == 0) 1062 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1063 return (error); 1064 } 1065 1066 int 1067 kern_fstatfs(int fd, struct statfs *buf) 1068 { 1069 struct thread *td = curthread; 1070 struct proc *p = td->td_proc; 1071 struct file *fp; 1072 struct mount *mp; 1073 struct statfs *sp; 1074 char *fullpath, *freepath; 1075 int error; 1076 1077 KKASSERT(p); 1078 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1079 return (error); 1080 mp = ((struct vnode *)fp->f_data)->v_mount; 1081 if (mp == NULL) { 1082 error = EBADF; 1083 goto done; 1084 } 1085 if (fp->f_cred == NULL) { 1086 error = EINVAL; 1087 goto done; 1088 } 1089 sp = &mp->mnt_stat; 1090 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1091 goto done; 1092 1093 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1094 goto done; 1095 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1096 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1097 kfree(freepath, M_TEMP); 1098 1099 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1100 bcopy(sp, buf, sizeof(*buf)); 1101 1102 /* Only root should have access to the fsid's. */ 1103 if (priv_check(td, PRIV_ROOT)) 1104 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1105 error = 0; 1106 done: 1107 fdrop(fp); 1108 return (error); 1109 } 1110 1111 /* 1112 * fstatfs_args(int fd, struct statfs *buf) 1113 * 1114 * Get filesystem statistics. 1115 */ 1116 int 1117 sys_fstatfs(struct fstatfs_args *uap) 1118 { 1119 struct statfs buf; 1120 int error; 1121 1122 error = kern_fstatfs(uap->fd, &buf); 1123 1124 if (error == 0) 1125 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1126 return (error); 1127 } 1128 1129 int 1130 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1131 { 1132 struct mount *mp; 1133 struct statvfs *sp; 1134 int error; 1135 1136 if ((error = nlookup(nd)) != 0) 1137 return (error); 1138 mp = nd->nl_nch.mount; 1139 sp = &mp->mnt_vstat; 1140 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1141 return (error); 1142 1143 sp->f_flag = 0; 1144 if (mp->mnt_flag & MNT_RDONLY) 1145 sp->f_flag |= ST_RDONLY; 1146 if (mp->mnt_flag & MNT_NOSUID) 1147 sp->f_flag |= ST_NOSUID; 1148 bcopy(sp, buf, sizeof(*buf)); 1149 return (0); 1150 } 1151 1152 /* 1153 * statfs_args(char *path, struct statfs *buf) 1154 * 1155 * Get filesystem statistics. 1156 */ 1157 int 1158 sys_statvfs(struct statvfs_args *uap) 1159 { 1160 struct nlookupdata nd; 1161 struct statvfs buf; 1162 int error; 1163 1164 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1165 if (error == 0) 1166 error = kern_statvfs(&nd, &buf); 1167 nlookup_done(&nd); 1168 if (error == 0) 1169 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1170 return (error); 1171 } 1172 1173 int 1174 kern_fstatvfs(int fd, struct statvfs *buf) 1175 { 1176 struct thread *td = curthread; 1177 struct proc *p = td->td_proc; 1178 struct file *fp; 1179 struct mount *mp; 1180 struct statvfs *sp; 1181 int error; 1182 1183 KKASSERT(p); 1184 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1185 return (error); 1186 mp = ((struct vnode *)fp->f_data)->v_mount; 1187 if (mp == NULL) { 1188 error = EBADF; 1189 goto done; 1190 } 1191 if (fp->f_cred == NULL) { 1192 error = EINVAL; 1193 goto done; 1194 } 1195 sp = &mp->mnt_vstat; 1196 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1197 goto done; 1198 1199 sp->f_flag = 0; 1200 if (mp->mnt_flag & MNT_RDONLY) 1201 sp->f_flag |= ST_RDONLY; 1202 if (mp->mnt_flag & MNT_NOSUID) 1203 sp->f_flag |= ST_NOSUID; 1204 1205 bcopy(sp, buf, sizeof(*buf)); 1206 error = 0; 1207 done: 1208 fdrop(fp); 1209 return (error); 1210 } 1211 1212 /* 1213 * fstatfs_args(int fd, struct statfs *buf) 1214 * 1215 * Get filesystem statistics. 1216 */ 1217 int 1218 sys_fstatvfs(struct fstatvfs_args *uap) 1219 { 1220 struct statvfs buf; 1221 int error; 1222 1223 error = kern_fstatvfs(uap->fd, &buf); 1224 1225 if (error == 0) 1226 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1227 return (error); 1228 } 1229 1230 /* 1231 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1232 * 1233 * Get statistics on all filesystems. 1234 */ 1235 1236 struct getfsstat_info { 1237 struct statfs *sfsp; 1238 long count; 1239 long maxcount; 1240 int error; 1241 int flags; 1242 struct proc *p; 1243 }; 1244 1245 static int getfsstat_callback(struct mount *, void *); 1246 1247 /* ARGSUSED */ 1248 int 1249 sys_getfsstat(struct getfsstat_args *uap) 1250 { 1251 struct thread *td = curthread; 1252 struct proc *p = td->td_proc; 1253 struct getfsstat_info info; 1254 1255 bzero(&info, sizeof(info)); 1256 1257 info.maxcount = uap->bufsize / sizeof(struct statfs); 1258 info.sfsp = uap->buf; 1259 info.count = 0; 1260 info.flags = uap->flags; 1261 info.p = p; 1262 1263 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1264 if (info.sfsp && info.count > info.maxcount) 1265 uap->sysmsg_result = info.maxcount; 1266 else 1267 uap->sysmsg_result = info.count; 1268 return (info.error); 1269 } 1270 1271 static int 1272 getfsstat_callback(struct mount *mp, void *data) 1273 { 1274 struct getfsstat_info *info = data; 1275 struct statfs *sp; 1276 char *freepath; 1277 char *fullpath; 1278 int error; 1279 1280 if (info->sfsp && info->count < info->maxcount) { 1281 if (info->p && !chroot_visible_mnt(mp, info->p)) 1282 return(0); 1283 sp = &mp->mnt_stat; 1284 1285 /* 1286 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1287 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1288 * overrides MNT_WAIT. 1289 */ 1290 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1291 (info->flags & MNT_WAIT)) && 1292 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1293 return(0); 1294 } 1295 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1296 1297 error = mount_path(info->p, mp, &fullpath, &freepath); 1298 if (error) { 1299 info->error = error; 1300 return(-1); 1301 } 1302 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1303 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1304 kfree(freepath, M_TEMP); 1305 1306 error = copyout(sp, info->sfsp, sizeof(*sp)); 1307 if (error) { 1308 info->error = error; 1309 return (-1); 1310 } 1311 ++info->sfsp; 1312 } 1313 info->count++; 1314 return(0); 1315 } 1316 1317 /* 1318 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1319 long bufsize, int flags) 1320 * 1321 * Get statistics on all filesystems. 1322 */ 1323 1324 struct getvfsstat_info { 1325 struct statfs *sfsp; 1326 struct statvfs *vsfsp; 1327 long count; 1328 long maxcount; 1329 int error; 1330 int flags; 1331 struct proc *p; 1332 }; 1333 1334 static int getvfsstat_callback(struct mount *, void *); 1335 1336 /* ARGSUSED */ 1337 int 1338 sys_getvfsstat(struct getvfsstat_args *uap) 1339 { 1340 struct thread *td = curthread; 1341 struct proc *p = td->td_proc; 1342 struct getvfsstat_info info; 1343 1344 bzero(&info, sizeof(info)); 1345 1346 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1347 info.sfsp = uap->buf; 1348 info.vsfsp = uap->vbuf; 1349 info.count = 0; 1350 info.flags = uap->flags; 1351 info.p = p; 1352 1353 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1354 if (info.vsfsp && info.count > info.maxcount) 1355 uap->sysmsg_result = info.maxcount; 1356 else 1357 uap->sysmsg_result = info.count; 1358 return (info.error); 1359 } 1360 1361 static int 1362 getvfsstat_callback(struct mount *mp, void *data) 1363 { 1364 struct getvfsstat_info *info = data; 1365 struct statfs *sp; 1366 struct statvfs *vsp; 1367 char *freepath; 1368 char *fullpath; 1369 int error; 1370 1371 if (info->vsfsp && info->count < info->maxcount) { 1372 if (info->p && !chroot_visible_mnt(mp, info->p)) 1373 return(0); 1374 sp = &mp->mnt_stat; 1375 vsp = &mp->mnt_vstat; 1376 1377 /* 1378 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1379 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1380 * overrides MNT_WAIT. 1381 */ 1382 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1383 (info->flags & MNT_WAIT)) && 1384 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1385 return(0); 1386 } 1387 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1388 1389 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1390 (info->flags & MNT_WAIT)) && 1391 (error = VFS_STATVFS(mp, vsp, info->p->p_ucred))) { 1392 return(0); 1393 } 1394 vsp->f_flag = 0; 1395 if (mp->mnt_flag & MNT_RDONLY) 1396 vsp->f_flag |= ST_RDONLY; 1397 if (mp->mnt_flag & MNT_NOSUID) 1398 vsp->f_flag |= ST_NOSUID; 1399 1400 error = mount_path(info->p, mp, &fullpath, &freepath); 1401 if (error) { 1402 info->error = error; 1403 return(-1); 1404 } 1405 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1406 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1407 kfree(freepath, M_TEMP); 1408 1409 error = copyout(sp, info->sfsp, sizeof(*sp)); 1410 if (error == 0) 1411 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1412 if (error) { 1413 info->error = error; 1414 return (-1); 1415 } 1416 ++info->sfsp; 1417 ++info->vsfsp; 1418 } 1419 info->count++; 1420 return(0); 1421 } 1422 1423 1424 /* 1425 * fchdir_args(int fd) 1426 * 1427 * Change current working directory to a given file descriptor. 1428 */ 1429 /* ARGSUSED */ 1430 int 1431 sys_fchdir(struct fchdir_args *uap) 1432 { 1433 struct thread *td = curthread; 1434 struct proc *p = td->td_proc; 1435 struct filedesc *fdp = p->p_fd; 1436 struct vnode *vp, *ovp; 1437 struct mount *mp; 1438 struct file *fp; 1439 struct nchandle nch, onch, tnch; 1440 int error; 1441 1442 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1443 return (error); 1444 vp = (struct vnode *)fp->f_data; 1445 vref(vp); 1446 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1447 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) 1448 error = ENOTDIR; 1449 else 1450 error = VOP_ACCESS(vp, VEXEC, p->p_ucred); 1451 if (error) { 1452 vput(vp); 1453 fdrop(fp); 1454 return (error); 1455 } 1456 cache_copy(&fp->f_nchandle, &nch); 1457 1458 /* 1459 * If the ncp has become a mount point, traverse through 1460 * the mount point. 1461 */ 1462 1463 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1464 (mp = cache_findmount(&nch)) != NULL 1465 ) { 1466 error = nlookup_mp(mp, &tnch); 1467 if (error == 0) { 1468 cache_unlock(&tnch); /* leave ref intact */ 1469 vput(vp); 1470 vp = tnch.ncp->nc_vp; 1471 error = vget(vp, LK_SHARED); 1472 KKASSERT(error == 0); 1473 cache_drop(&nch); 1474 nch = tnch; 1475 } 1476 } 1477 if (error == 0) { 1478 ovp = fdp->fd_cdir; 1479 onch = fdp->fd_ncdir; 1480 vn_unlock(vp); /* leave ref intact */ 1481 fdp->fd_cdir = vp; 1482 fdp->fd_ncdir = nch; 1483 cache_drop(&onch); 1484 vrele(ovp); 1485 } else { 1486 cache_drop(&nch); 1487 vput(vp); 1488 } 1489 fdrop(fp); 1490 return (error); 1491 } 1492 1493 int 1494 kern_chdir(struct nlookupdata *nd) 1495 { 1496 struct thread *td = curthread; 1497 struct proc *p = td->td_proc; 1498 struct filedesc *fdp = p->p_fd; 1499 struct vnode *vp, *ovp; 1500 struct nchandle onch; 1501 int error; 1502 1503 if ((error = nlookup(nd)) != 0) 1504 return (error); 1505 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1506 return (ENOENT); 1507 if ((error = vget(vp, LK_SHARED)) != 0) 1508 return (error); 1509 1510 error = checkvp_chdir(vp, td); 1511 vn_unlock(vp); 1512 if (error == 0) { 1513 ovp = fdp->fd_cdir; 1514 onch = fdp->fd_ncdir; 1515 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1516 fdp->fd_ncdir = nd->nl_nch; 1517 fdp->fd_cdir = vp; 1518 cache_drop(&onch); 1519 vrele(ovp); 1520 cache_zero(&nd->nl_nch); 1521 } else { 1522 vrele(vp); 1523 } 1524 return (error); 1525 } 1526 1527 /* 1528 * chdir_args(char *path) 1529 * 1530 * Change current working directory (``.''). 1531 */ 1532 int 1533 sys_chdir(struct chdir_args *uap) 1534 { 1535 struct nlookupdata nd; 1536 int error; 1537 1538 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1539 if (error == 0) 1540 error = kern_chdir(&nd); 1541 nlookup_done(&nd); 1542 return (error); 1543 } 1544 1545 /* 1546 * Helper function for raised chroot(2) security function: Refuse if 1547 * any filedescriptors are open directories. 1548 */ 1549 static int 1550 chroot_refuse_vdir_fds(struct filedesc *fdp) 1551 { 1552 struct vnode *vp; 1553 struct file *fp; 1554 int error; 1555 int fd; 1556 1557 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1558 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1559 continue; 1560 vp = (struct vnode *)fp->f_data; 1561 if (vp->v_type != VDIR) { 1562 fdrop(fp); 1563 continue; 1564 } 1565 fdrop(fp); 1566 return(EPERM); 1567 } 1568 return (0); 1569 } 1570 1571 /* 1572 * This sysctl determines if we will allow a process to chroot(2) if it 1573 * has a directory open: 1574 * 0: disallowed for all processes. 1575 * 1: allowed for processes that were not already chroot(2)'ed. 1576 * 2: allowed for all processes. 1577 */ 1578 1579 static int chroot_allow_open_directories = 1; 1580 1581 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1582 &chroot_allow_open_directories, 0, ""); 1583 1584 /* 1585 * chroot to the specified namecache entry. We obtain the vp from the 1586 * namecache data. The passed ncp must be locked and referenced and will 1587 * remain locked and referenced on return. 1588 */ 1589 int 1590 kern_chroot(struct nchandle *nch) 1591 { 1592 struct thread *td = curthread; 1593 struct proc *p = td->td_proc; 1594 struct filedesc *fdp = p->p_fd; 1595 struct vnode *vp; 1596 int error; 1597 1598 /* 1599 * Only root can chroot 1600 */ 1601 if ((error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT)) != 0) 1602 return (error); 1603 1604 /* 1605 * Disallow open directory descriptors (fchdir() breakouts). 1606 */ 1607 if (chroot_allow_open_directories == 0 || 1608 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1609 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1610 return (error); 1611 } 1612 if ((vp = nch->ncp->nc_vp) == NULL) 1613 return (ENOENT); 1614 1615 if ((error = vget(vp, LK_SHARED)) != 0) 1616 return (error); 1617 1618 /* 1619 * Check the validity of vp as a directory to change to and 1620 * associate it with rdir/jdir. 1621 */ 1622 error = checkvp_chdir(vp, td); 1623 vn_unlock(vp); /* leave reference intact */ 1624 if (error == 0) { 1625 vrele(fdp->fd_rdir); 1626 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1627 cache_drop(&fdp->fd_nrdir); 1628 cache_copy(nch, &fdp->fd_nrdir); 1629 if (fdp->fd_jdir == NULL) { 1630 fdp->fd_jdir = vp; 1631 vref(fdp->fd_jdir); 1632 cache_copy(nch, &fdp->fd_njdir); 1633 } 1634 } else { 1635 vrele(vp); 1636 } 1637 return (error); 1638 } 1639 1640 /* 1641 * chroot_args(char *path) 1642 * 1643 * Change notion of root (``/'') directory. 1644 */ 1645 /* ARGSUSED */ 1646 int 1647 sys_chroot(struct chroot_args *uap) 1648 { 1649 struct thread *td = curthread; 1650 struct nlookupdata nd; 1651 int error; 1652 1653 KKASSERT(td->td_proc); 1654 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1655 if (error) { 1656 nlookup_done(&nd); 1657 return(error); 1658 } 1659 error = nlookup(&nd); 1660 if (error == 0) 1661 error = kern_chroot(&nd.nl_nch); 1662 nlookup_done(&nd); 1663 return(error); 1664 } 1665 1666 /* 1667 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1668 * determine whether it is legal to chdir to the vnode. The vnode's state 1669 * is not changed by this call. 1670 */ 1671 int 1672 checkvp_chdir(struct vnode *vp, struct thread *td) 1673 { 1674 int error; 1675 1676 if (vp->v_type != VDIR) 1677 error = ENOTDIR; 1678 else 1679 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred); 1680 return (error); 1681 } 1682 1683 int 1684 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1685 { 1686 struct thread *td = curthread; 1687 struct proc *p = td->td_proc; 1688 struct lwp *lp = td->td_lwp; 1689 struct filedesc *fdp = p->p_fd; 1690 int cmode, flags; 1691 struct file *nfp; 1692 struct file *fp; 1693 struct vnode *vp; 1694 int type, indx, error; 1695 struct flock lf; 1696 1697 if ((oflags & O_ACCMODE) == O_ACCMODE) 1698 return (EINVAL); 1699 flags = FFLAGS(oflags); 1700 error = falloc(p, &nfp, NULL); 1701 if (error) 1702 return (error); 1703 fp = nfp; 1704 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1705 1706 /* 1707 * XXX p_dupfd is a real mess. It allows a device to return a 1708 * file descriptor to be duplicated rather then doing the open 1709 * itself. 1710 */ 1711 lp->lwp_dupfd = -1; 1712 1713 /* 1714 * Call vn_open() to do the lookup and assign the vnode to the 1715 * file pointer. vn_open() does not change the ref count on fp 1716 * and the vnode, on success, will be inherited by the file pointer 1717 * and unlocked. 1718 */ 1719 nd->nl_flags |= NLC_LOCKVP; 1720 error = vn_open(nd, fp, flags, cmode); 1721 nlookup_done(nd); 1722 if (error) { 1723 /* 1724 * handle special fdopen() case. bleh. dupfdopen() is 1725 * responsible for dropping the old contents of ofiles[indx] 1726 * if it succeeds. 1727 * 1728 * Note that fsetfd() will add a ref to fp which represents 1729 * the fd_files[] assignment. We must still drop our 1730 * reference. 1731 */ 1732 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1733 if (fdalloc(p, 0, &indx) == 0) { 1734 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error); 1735 if (error == 0) { 1736 *res = indx; 1737 fdrop(fp); /* our ref */ 1738 return (0); 1739 } 1740 fsetfd(p, NULL, indx); 1741 } 1742 } 1743 fdrop(fp); /* our ref */ 1744 if (error == ERESTART) 1745 error = EINTR; 1746 return (error); 1747 } 1748 1749 /* 1750 * ref the vnode for ourselves so it can't be ripped out from under 1751 * is. XXX need an ND flag to request that the vnode be returned 1752 * anyway. 1753 * 1754 * Reserve a file descriptor but do not assign it until the open 1755 * succeeds. 1756 */ 1757 vp = (struct vnode *)fp->f_data; 1758 vref(vp); 1759 if ((error = fdalloc(p, 0, &indx)) != 0) { 1760 fdrop(fp); 1761 vrele(vp); 1762 return (error); 1763 } 1764 1765 /* 1766 * If no error occurs the vp will have been assigned to the file 1767 * pointer. 1768 */ 1769 lp->lwp_dupfd = 0; 1770 1771 if (flags & (O_EXLOCK | O_SHLOCK)) { 1772 lf.l_whence = SEEK_SET; 1773 lf.l_start = 0; 1774 lf.l_len = 0; 1775 if (flags & O_EXLOCK) 1776 lf.l_type = F_WRLCK; 1777 else 1778 lf.l_type = F_RDLCK; 1779 if (flags & FNONBLOCK) 1780 type = 0; 1781 else 1782 type = F_WAIT; 1783 1784 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1785 /* 1786 * lock request failed. Clean up the reserved 1787 * descriptor. 1788 */ 1789 vrele(vp); 1790 fsetfd(p, NULL, indx); 1791 fdrop(fp); 1792 return (error); 1793 } 1794 fp->f_flag |= FHASLOCK; 1795 } 1796 #if 0 1797 /* 1798 * Assert that all regular file vnodes were created with a object. 1799 */ 1800 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1801 ("open: regular file has no backing object after vn_open")); 1802 #endif 1803 1804 vrele(vp); 1805 1806 /* 1807 * release our private reference, leaving the one associated with the 1808 * descriptor table intact. 1809 */ 1810 fsetfd(p, fp, indx); 1811 fdrop(fp); 1812 *res = indx; 1813 return (0); 1814 } 1815 1816 /* 1817 * open_args(char *path, int flags, int mode) 1818 * 1819 * Check permissions, allocate an open file structure, 1820 * and call the device open routine if any. 1821 */ 1822 int 1823 sys_open(struct open_args *uap) 1824 { 1825 struct nlookupdata nd; 1826 int error; 1827 1828 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1829 if (error == 0) { 1830 error = kern_open(&nd, uap->flags, 1831 uap->mode, &uap->sysmsg_result); 1832 } 1833 nlookup_done(&nd); 1834 return (error); 1835 } 1836 1837 int 1838 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1839 { 1840 struct thread *td = curthread; 1841 struct proc *p = td->td_proc; 1842 struct vnode *vp; 1843 struct vattr vattr; 1844 int error; 1845 int whiteout = 0; 1846 1847 KKASSERT(p); 1848 1849 switch (mode & S_IFMT) { 1850 case S_IFCHR: 1851 case S_IFBLK: 1852 error = priv_check(td, PRIV_ROOT); 1853 break; 1854 default: 1855 error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT); 1856 break; 1857 } 1858 if (error) 1859 return (error); 1860 1861 bwillinode(1); 1862 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1863 if ((error = nlookup(nd)) != 0) 1864 return (error); 1865 if (nd->nl_nch.ncp->nc_vp) 1866 return (EEXIST); 1867 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1868 return (error); 1869 1870 VATTR_NULL(&vattr); 1871 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1872 vattr.va_rmajor = rmajor; 1873 vattr.va_rminor = rminor; 1874 whiteout = 0; 1875 1876 switch (mode & S_IFMT) { 1877 case S_IFMT: /* used by badsect to flag bad sectors */ 1878 vattr.va_type = VBAD; 1879 break; 1880 case S_IFCHR: 1881 vattr.va_type = VCHR; 1882 break; 1883 case S_IFBLK: 1884 vattr.va_type = VBLK; 1885 break; 1886 case S_IFWHT: 1887 whiteout = 1; 1888 break; 1889 case S_IFDIR: 1890 /* special directories support for HAMMER */ 1891 vattr.va_type = VDIR; 1892 break; 1893 default: 1894 error = EINVAL; 1895 break; 1896 } 1897 if (error == 0) { 1898 if (whiteout) { 1899 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 1900 nd->nl_cred, NAMEI_CREATE); 1901 } else { 1902 vp = NULL; 1903 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 1904 &vp, nd->nl_cred, &vattr); 1905 if (error == 0) 1906 vput(vp); 1907 } 1908 } 1909 return (error); 1910 } 1911 1912 /* 1913 * mknod_args(char *path, int mode, int dev) 1914 * 1915 * Create a special file. 1916 */ 1917 int 1918 sys_mknod(struct mknod_args *uap) 1919 { 1920 struct nlookupdata nd; 1921 int error; 1922 1923 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1924 if (error == 0) { 1925 error = kern_mknod(&nd, uap->mode, 1926 umajor(uap->dev), uminor(uap->dev)); 1927 } 1928 nlookup_done(&nd); 1929 return (error); 1930 } 1931 1932 int 1933 kern_mkfifo(struct nlookupdata *nd, int mode) 1934 { 1935 struct thread *td = curthread; 1936 struct proc *p = td->td_proc; 1937 struct vattr vattr; 1938 struct vnode *vp; 1939 int error; 1940 1941 bwillinode(1); 1942 1943 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1944 if ((error = nlookup(nd)) != 0) 1945 return (error); 1946 if (nd->nl_nch.ncp->nc_vp) 1947 return (EEXIST); 1948 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1949 return (error); 1950 1951 VATTR_NULL(&vattr); 1952 vattr.va_type = VFIFO; 1953 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1954 vp = NULL; 1955 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 1956 if (error == 0) 1957 vput(vp); 1958 return (error); 1959 } 1960 1961 /* 1962 * mkfifo_args(char *path, int mode) 1963 * 1964 * Create a named pipe. 1965 */ 1966 int 1967 sys_mkfifo(struct mkfifo_args *uap) 1968 { 1969 struct nlookupdata nd; 1970 int error; 1971 1972 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1973 if (error == 0) 1974 error = kern_mkfifo(&nd, uap->mode); 1975 nlookup_done(&nd); 1976 return (error); 1977 } 1978 1979 static int hardlink_check_uid = 0; 1980 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1981 &hardlink_check_uid, 0, 1982 "Unprivileged processes cannot create hard links to files owned by other " 1983 "users"); 1984 static int hardlink_check_gid = 0; 1985 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1986 &hardlink_check_gid, 0, 1987 "Unprivileged processes cannot create hard links to files owned by other " 1988 "groups"); 1989 1990 static int 1991 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 1992 { 1993 struct vattr va; 1994 int error; 1995 1996 /* 1997 * Shortcut if disabled 1998 */ 1999 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2000 return (0); 2001 2002 /* 2003 * root cred can always hardlink 2004 */ 2005 if (priv_check_cred(cred, PRIV_ROOT, PRISON_ROOT) == 0) 2006 return (0); 2007 2008 /* 2009 * Otherwise only if the originating file is owned by the 2010 * same user or group. Note that any group is allowed if 2011 * the file is owned by the caller. 2012 */ 2013 error = VOP_GETATTR(vp, &va); 2014 if (error != 0) 2015 return (error); 2016 2017 if (hardlink_check_uid) { 2018 if (cred->cr_uid != va.va_uid) 2019 return (EPERM); 2020 } 2021 2022 if (hardlink_check_gid) { 2023 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2024 return (EPERM); 2025 } 2026 2027 return (0); 2028 } 2029 2030 int 2031 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2032 { 2033 struct thread *td = curthread; 2034 struct vnode *vp; 2035 int error; 2036 2037 /* 2038 * Lookup the source and obtained a locked vnode. 2039 * 2040 * XXX relookup on vget failure / race ? 2041 */ 2042 bwillinode(1); 2043 if ((error = nlookup(nd)) != 0) 2044 return (error); 2045 vp = nd->nl_nch.ncp->nc_vp; 2046 KKASSERT(vp != NULL); 2047 if (vp->v_type == VDIR) 2048 return (EPERM); /* POSIX */ 2049 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2050 return (error); 2051 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2052 return (error); 2053 2054 /* 2055 * Unlock the source so we can lookup the target without deadlocking 2056 * (XXX vp is locked already, possible other deadlock?). The target 2057 * must not exist. 2058 */ 2059 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2060 nd->nl_flags &= ~NLC_NCPISLOCKED; 2061 cache_unlock(&nd->nl_nch); 2062 2063 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2064 if ((error = nlookup(linknd)) != 0) { 2065 vput(vp); 2066 return (error); 2067 } 2068 if (linknd->nl_nch.ncp->nc_vp) { 2069 vput(vp); 2070 return (EEXIST); 2071 } 2072 2073 /* 2074 * Finally run the new API VOP. 2075 */ 2076 error = can_hardlink(vp, td, td->td_proc->p_ucred); 2077 if (error == 0) { 2078 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2079 vp, linknd->nl_cred); 2080 } 2081 vput(vp); 2082 return (error); 2083 } 2084 2085 /* 2086 * link_args(char *path, char *link) 2087 * 2088 * Make a hard file link. 2089 */ 2090 int 2091 sys_link(struct link_args *uap) 2092 { 2093 struct nlookupdata nd, linknd; 2094 int error; 2095 2096 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2097 if (error == 0) { 2098 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2099 if (error == 0) 2100 error = kern_link(&nd, &linknd); 2101 nlookup_done(&linknd); 2102 } 2103 nlookup_done(&nd); 2104 return (error); 2105 } 2106 2107 int 2108 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2109 { 2110 struct vattr vattr; 2111 struct vnode *vp; 2112 struct vnode *dvp; 2113 int error; 2114 2115 bwillinode(1); 2116 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2117 if ((error = nlookup(nd)) != 0) 2118 return (error); 2119 if (nd->nl_nch.ncp->nc_vp) 2120 return (EEXIST); 2121 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2122 return (error); 2123 dvp = nd->nl_dvp; 2124 VATTR_NULL(&vattr); 2125 vattr.va_mode = mode; 2126 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2127 if (error == 0) 2128 vput(vp); 2129 return (error); 2130 } 2131 2132 /* 2133 * symlink(char *path, char *link) 2134 * 2135 * Make a symbolic link. 2136 */ 2137 int 2138 sys_symlink(struct symlink_args *uap) 2139 { 2140 struct thread *td = curthread; 2141 struct nlookupdata nd; 2142 char *path; 2143 int error; 2144 int mode; 2145 2146 path = objcache_get(namei_oc, M_WAITOK); 2147 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2148 if (error == 0) { 2149 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2150 if (error == 0) { 2151 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2152 error = kern_symlink(&nd, path, mode); 2153 } 2154 nlookup_done(&nd); 2155 } 2156 objcache_put(namei_oc, path); 2157 return (error); 2158 } 2159 2160 /* 2161 * undelete_args(char *path) 2162 * 2163 * Delete a whiteout from the filesystem. 2164 */ 2165 /* ARGSUSED */ 2166 int 2167 sys_undelete(struct undelete_args *uap) 2168 { 2169 struct nlookupdata nd; 2170 int error; 2171 2172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2173 bwillinode(1); 2174 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2175 if (error == 0) 2176 error = nlookup(&nd); 2177 if (error == 0) 2178 error = ncp_writechk(&nd.nl_nch); 2179 if (error == 0) { 2180 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2181 NAMEI_DELETE); 2182 } 2183 nlookup_done(&nd); 2184 return (error); 2185 } 2186 2187 int 2188 kern_unlink(struct nlookupdata *nd) 2189 { 2190 int error; 2191 2192 bwillinode(1); 2193 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2194 if ((error = nlookup(nd)) != 0) 2195 return (error); 2196 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2197 return (error); 2198 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2199 return (error); 2200 } 2201 2202 /* 2203 * unlink_args(char *path) 2204 * 2205 * Delete a name from the filesystem. 2206 */ 2207 int 2208 sys_unlink(struct unlink_args *uap) 2209 { 2210 struct nlookupdata nd; 2211 int error; 2212 2213 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2214 if (error == 0) 2215 error = kern_unlink(&nd); 2216 nlookup_done(&nd); 2217 return (error); 2218 } 2219 2220 int 2221 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2222 { 2223 struct thread *td = curthread; 2224 struct proc *p = td->td_proc; 2225 struct file *fp; 2226 struct vnode *vp; 2227 struct vattr vattr; 2228 off_t new_offset; 2229 int error; 2230 2231 fp = holdfp(p->p_fd, fd, -1); 2232 if (fp == NULL) 2233 return (EBADF); 2234 if (fp->f_type != DTYPE_VNODE) { 2235 error = ESPIPE; 2236 goto done; 2237 } 2238 vp = (struct vnode *)fp->f_data; 2239 2240 switch (whence) { 2241 case L_INCR: 2242 new_offset = fp->f_offset + offset; 2243 error = 0; 2244 break; 2245 case L_XTND: 2246 error = VOP_GETATTR(vp, &vattr); 2247 new_offset = offset + vattr.va_size; 2248 break; 2249 case L_SET: 2250 new_offset = offset; 2251 error = 0; 2252 break; 2253 default: 2254 new_offset = 0; 2255 error = EINVAL; 2256 break; 2257 } 2258 2259 /* 2260 * Validate the seek position. Negative offsets are not allowed 2261 * for regular files, block specials, or directories. 2262 */ 2263 if (error == 0) { 2264 if (new_offset < 0 && 2265 (vp->v_type == VREG || vp->v_type == VDIR || 2266 vp->v_type == VCHR || vp->v_type == VBLK)) { 2267 error = EINVAL; 2268 } else { 2269 fp->f_offset = new_offset; 2270 } 2271 } 2272 *res = fp->f_offset; 2273 done: 2274 fdrop(fp); 2275 return (error); 2276 } 2277 2278 /* 2279 * lseek_args(int fd, int pad, off_t offset, int whence) 2280 * 2281 * Reposition read/write file offset. 2282 */ 2283 int 2284 sys_lseek(struct lseek_args *uap) 2285 { 2286 int error; 2287 2288 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2289 &uap->sysmsg_offset); 2290 2291 return (error); 2292 } 2293 2294 int 2295 kern_access(struct nlookupdata *nd, int aflags) 2296 { 2297 struct vnode *vp; 2298 int error, flags; 2299 2300 if ((error = nlookup(nd)) != 0) 2301 return (error); 2302 retry: 2303 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2304 if (error) 2305 return (error); 2306 2307 /* Flags == 0 means only check for existence. */ 2308 if (aflags) { 2309 flags = 0; 2310 if (aflags & R_OK) 2311 flags |= VREAD; 2312 if (aflags & W_OK) 2313 flags |= VWRITE; 2314 if (aflags & X_OK) 2315 flags |= VEXEC; 2316 if ((flags & VWRITE) == 0 || 2317 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2318 error = VOP_ACCESS(vp, flags, nd->nl_cred); 2319 2320 /* 2321 * If the file handle is stale we have to re-resolve the 2322 * entry. This is a hack at the moment. 2323 */ 2324 if (error == ESTALE) { 2325 vput(vp); 2326 cache_setunresolved(&nd->nl_nch); 2327 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2328 if (error == 0) { 2329 vp = NULL; 2330 goto retry; 2331 } 2332 return(error); 2333 } 2334 } 2335 vput(vp); 2336 return (error); 2337 } 2338 2339 /* 2340 * access_args(char *path, int flags) 2341 * 2342 * Check access permissions. 2343 */ 2344 int 2345 sys_access(struct access_args *uap) 2346 { 2347 struct nlookupdata nd; 2348 int error; 2349 2350 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2351 if (error == 0) 2352 error = kern_access(&nd, uap->flags); 2353 nlookup_done(&nd); 2354 return (error); 2355 } 2356 2357 int 2358 kern_stat(struct nlookupdata *nd, struct stat *st) 2359 { 2360 int error; 2361 struct vnode *vp; 2362 thread_t td; 2363 2364 if ((error = nlookup(nd)) != 0) 2365 return (error); 2366 again: 2367 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2368 return (ENOENT); 2369 2370 td = curthread; 2371 if ((error = vget(vp, LK_SHARED)) != 0) 2372 return (error); 2373 error = vn_stat(vp, st, nd->nl_cred); 2374 2375 /* 2376 * If the file handle is stale we have to re-resolve the entry. This 2377 * is a hack at the moment. 2378 */ 2379 if (error == ESTALE) { 2380 vput(vp); 2381 cache_setunresolved(&nd->nl_nch); 2382 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2383 if (error == 0) 2384 goto again; 2385 } else { 2386 vput(vp); 2387 } 2388 return (error); 2389 } 2390 2391 /* 2392 * stat_args(char *path, struct stat *ub) 2393 * 2394 * Get file status; this version follows links. 2395 */ 2396 int 2397 sys_stat(struct stat_args *uap) 2398 { 2399 struct nlookupdata nd; 2400 struct stat st; 2401 int error; 2402 2403 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2404 if (error == 0) { 2405 error = kern_stat(&nd, &st); 2406 if (error == 0) 2407 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2408 } 2409 nlookup_done(&nd); 2410 return (error); 2411 } 2412 2413 /* 2414 * lstat_args(char *path, struct stat *ub) 2415 * 2416 * Get file status; this version does not follow links. 2417 */ 2418 int 2419 sys_lstat(struct lstat_args *uap) 2420 { 2421 struct nlookupdata nd; 2422 struct stat st; 2423 int error; 2424 2425 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2426 if (error == 0) { 2427 error = kern_stat(&nd, &st); 2428 if (error == 0) 2429 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2430 } 2431 nlookup_done(&nd); 2432 return (error); 2433 } 2434 2435 /* 2436 * pathconf_Args(char *path, int name) 2437 * 2438 * Get configurable pathname variables. 2439 */ 2440 /* ARGSUSED */ 2441 int 2442 sys_pathconf(struct pathconf_args *uap) 2443 { 2444 struct nlookupdata nd; 2445 struct vnode *vp; 2446 int error; 2447 2448 vp = NULL; 2449 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2450 if (error == 0) 2451 error = nlookup(&nd); 2452 if (error == 0) 2453 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2454 nlookup_done(&nd); 2455 if (error == 0) { 2456 error = VOP_PATHCONF(vp, uap->name, &uap->sysmsg_reg); 2457 vput(vp); 2458 } 2459 return (error); 2460 } 2461 2462 /* 2463 * XXX: daver 2464 * kern_readlink isn't properly split yet. There is a copyin burried 2465 * in VOP_READLINK(). 2466 */ 2467 int 2468 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2469 { 2470 struct thread *td = curthread; 2471 struct proc *p = td->td_proc; 2472 struct vnode *vp; 2473 struct iovec aiov; 2474 struct uio auio; 2475 int error; 2476 2477 if ((error = nlookup(nd)) != 0) 2478 return (error); 2479 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2480 if (error) 2481 return (error); 2482 if (vp->v_type != VLNK) { 2483 error = EINVAL; 2484 } else { 2485 aiov.iov_base = buf; 2486 aiov.iov_len = count; 2487 auio.uio_iov = &aiov; 2488 auio.uio_iovcnt = 1; 2489 auio.uio_offset = 0; 2490 auio.uio_rw = UIO_READ; 2491 auio.uio_segflg = UIO_USERSPACE; 2492 auio.uio_td = td; 2493 auio.uio_resid = count; 2494 error = VOP_READLINK(vp, &auio, p->p_ucred); 2495 } 2496 vput(vp); 2497 *res = count - auio.uio_resid; 2498 return (error); 2499 } 2500 2501 /* 2502 * readlink_args(char *path, char *buf, int count) 2503 * 2504 * Return target name of a symbolic link. 2505 */ 2506 int 2507 sys_readlink(struct readlink_args *uap) 2508 { 2509 struct nlookupdata nd; 2510 int error; 2511 2512 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2513 if (error == 0) { 2514 error = kern_readlink(&nd, uap->buf, uap->count, 2515 &uap->sysmsg_result); 2516 } 2517 nlookup_done(&nd); 2518 return (error); 2519 } 2520 2521 static int 2522 setfflags(struct vnode *vp, int flags) 2523 { 2524 struct thread *td = curthread; 2525 struct proc *p = td->td_proc; 2526 int error; 2527 struct vattr vattr; 2528 2529 /* 2530 * Prevent non-root users from setting flags on devices. When 2531 * a device is reused, users can retain ownership of the device 2532 * if they are allowed to set flags and programs assume that 2533 * chown can't fail when done as root. 2534 */ 2535 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2536 ((error = priv_check_cred(p->p_ucred, PRIV_ROOT, PRISON_ROOT)) != 0)) 2537 return (error); 2538 2539 /* 2540 * note: vget is required for any operation that might mod the vnode 2541 * so VINACTIVE is properly cleared. 2542 */ 2543 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2544 VATTR_NULL(&vattr); 2545 vattr.va_flags = flags; 2546 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2547 vput(vp); 2548 } 2549 return (error); 2550 } 2551 2552 /* 2553 * chflags(char *path, int flags) 2554 * 2555 * Change flags of a file given a path name. 2556 */ 2557 /* ARGSUSED */ 2558 int 2559 sys_chflags(struct chflags_args *uap) 2560 { 2561 struct nlookupdata nd; 2562 struct vnode *vp; 2563 int error; 2564 2565 vp = NULL; 2566 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2567 /* XXX Add NLC flag indicating modifying operation? */ 2568 if (error == 0) 2569 error = nlookup(&nd); 2570 if (error == 0) 2571 error = ncp_writechk(&nd.nl_nch); 2572 if (error == 0) 2573 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2574 nlookup_done(&nd); 2575 if (error == 0) { 2576 error = setfflags(vp, uap->flags); 2577 vrele(vp); 2578 } 2579 return (error); 2580 } 2581 2582 /* 2583 * lchflags(char *path, int flags) 2584 * 2585 * Change flags of a file given a path name, but don't follow symlinks. 2586 */ 2587 /* ARGSUSED */ 2588 int 2589 sys_lchflags(struct lchflags_args *uap) 2590 { 2591 struct nlookupdata nd; 2592 struct vnode *vp; 2593 int error; 2594 2595 vp = NULL; 2596 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2597 /* XXX Add NLC flag indicating modifying operation? */ 2598 if (error == 0) 2599 error = nlookup(&nd); 2600 if (error == 0) 2601 error = ncp_writechk(&nd.nl_nch); 2602 if (error == 0) 2603 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2604 nlookup_done(&nd); 2605 if (error == 0) { 2606 error = setfflags(vp, uap->flags); 2607 vrele(vp); 2608 } 2609 return (error); 2610 } 2611 2612 /* 2613 * fchflags_args(int fd, int flags) 2614 * 2615 * Change flags of a file given a file descriptor. 2616 */ 2617 /* ARGSUSED */ 2618 int 2619 sys_fchflags(struct fchflags_args *uap) 2620 { 2621 struct thread *td = curthread; 2622 struct proc *p = td->td_proc; 2623 struct file *fp; 2624 int error; 2625 2626 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2627 return (error); 2628 if (fp->f_nchandle.ncp) 2629 error = ncp_writechk(&fp->f_nchandle); 2630 if (error == 0) 2631 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2632 fdrop(fp); 2633 return (error); 2634 } 2635 2636 static int 2637 setfmode(struct vnode *vp, int mode) 2638 { 2639 struct thread *td = curthread; 2640 struct proc *p = td->td_proc; 2641 int error; 2642 struct vattr vattr; 2643 2644 /* 2645 * note: vget is required for any operation that might mod the vnode 2646 * so VINACTIVE is properly cleared. 2647 */ 2648 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2649 VATTR_NULL(&vattr); 2650 vattr.va_mode = mode & ALLPERMS; 2651 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2652 vput(vp); 2653 } 2654 return error; 2655 } 2656 2657 int 2658 kern_chmod(struct nlookupdata *nd, int mode) 2659 { 2660 struct vnode *vp; 2661 int error; 2662 2663 /* XXX Add NLC flag indicating modifying operation? */ 2664 if ((error = nlookup(nd)) != 0) 2665 return (error); 2666 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2667 return (error); 2668 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2669 error = setfmode(vp, mode); 2670 vrele(vp); 2671 return (error); 2672 } 2673 2674 /* 2675 * chmod_args(char *path, int mode) 2676 * 2677 * Change mode of a file given path name. 2678 */ 2679 /* ARGSUSED */ 2680 int 2681 sys_chmod(struct chmod_args *uap) 2682 { 2683 struct nlookupdata nd; 2684 int error; 2685 2686 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2687 if (error == 0) 2688 error = kern_chmod(&nd, uap->mode); 2689 nlookup_done(&nd); 2690 return (error); 2691 } 2692 2693 /* 2694 * lchmod_args(char *path, int mode) 2695 * 2696 * Change mode of a file given path name (don't follow links.) 2697 */ 2698 /* ARGSUSED */ 2699 int 2700 sys_lchmod(struct lchmod_args *uap) 2701 { 2702 struct nlookupdata nd; 2703 int error; 2704 2705 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2706 if (error == 0) 2707 error = kern_chmod(&nd, uap->mode); 2708 nlookup_done(&nd); 2709 return (error); 2710 } 2711 2712 /* 2713 * fchmod_args(int fd, int mode) 2714 * 2715 * Change mode of a file given a file descriptor. 2716 */ 2717 /* ARGSUSED */ 2718 int 2719 sys_fchmod(struct fchmod_args *uap) 2720 { 2721 struct thread *td = curthread; 2722 struct proc *p = td->td_proc; 2723 struct file *fp; 2724 int error; 2725 2726 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2727 return (error); 2728 if (fp->f_nchandle.ncp) 2729 error = ncp_writechk(&fp->f_nchandle); 2730 if (error == 0) 2731 error = setfmode((struct vnode *)fp->f_data, uap->mode); 2732 fdrop(fp); 2733 return (error); 2734 } 2735 2736 static int 2737 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2738 { 2739 struct thread *td = curthread; 2740 struct proc *p = td->td_proc; 2741 int error; 2742 struct vattr vattr; 2743 2744 /* 2745 * note: vget is required for any operation that might mod the vnode 2746 * so VINACTIVE is properly cleared. 2747 */ 2748 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2749 VATTR_NULL(&vattr); 2750 vattr.va_uid = uid; 2751 vattr.va_gid = gid; 2752 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2753 vput(vp); 2754 } 2755 return error; 2756 } 2757 2758 int 2759 kern_chown(struct nlookupdata *nd, int uid, int gid) 2760 { 2761 struct vnode *vp; 2762 int error; 2763 2764 /* XXX Add NLC flag indicating modifying operation? */ 2765 if ((error = nlookup(nd)) != 0) 2766 return (error); 2767 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2768 return (error); 2769 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2770 error = setfown(vp, uid, gid); 2771 vrele(vp); 2772 return (error); 2773 } 2774 2775 /* 2776 * chown(char *path, int uid, int gid) 2777 * 2778 * Set ownership given a path name. 2779 */ 2780 int 2781 sys_chown(struct chown_args *uap) 2782 { 2783 struct nlookupdata nd; 2784 int error; 2785 2786 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2787 if (error == 0) 2788 error = kern_chown(&nd, uap->uid, uap->gid); 2789 nlookup_done(&nd); 2790 return (error); 2791 } 2792 2793 /* 2794 * lchown_args(char *path, int uid, int gid) 2795 * 2796 * Set ownership given a path name, do not cross symlinks. 2797 */ 2798 int 2799 sys_lchown(struct lchown_args *uap) 2800 { 2801 struct nlookupdata nd; 2802 int error; 2803 2804 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2805 if (error == 0) 2806 error = kern_chown(&nd, uap->uid, uap->gid); 2807 nlookup_done(&nd); 2808 return (error); 2809 } 2810 2811 /* 2812 * fchown_args(int fd, int uid, int gid) 2813 * 2814 * Set ownership given a file descriptor. 2815 */ 2816 /* ARGSUSED */ 2817 int 2818 sys_fchown(struct fchown_args *uap) 2819 { 2820 struct thread *td = curthread; 2821 struct proc *p = td->td_proc; 2822 struct file *fp; 2823 int error; 2824 2825 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2826 return (error); 2827 if (fp->f_nchandle.ncp) 2828 error = ncp_writechk(&fp->f_nchandle); 2829 if (error == 0) 2830 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 2831 fdrop(fp); 2832 return (error); 2833 } 2834 2835 static int 2836 getutimes(const struct timeval *tvp, struct timespec *tsp) 2837 { 2838 struct timeval tv[2]; 2839 2840 if (tvp == NULL) { 2841 microtime(&tv[0]); 2842 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2843 tsp[1] = tsp[0]; 2844 } else { 2845 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2846 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2847 } 2848 return 0; 2849 } 2850 2851 static int 2852 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2853 { 2854 struct thread *td = curthread; 2855 struct proc *p = td->td_proc; 2856 int error; 2857 struct vattr vattr; 2858 2859 /* 2860 * note: vget is required for any operation that might mod the vnode 2861 * so VINACTIVE is properly cleared. 2862 */ 2863 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2864 VATTR_NULL(&vattr); 2865 vattr.va_atime = ts[0]; 2866 vattr.va_mtime = ts[1]; 2867 if (nullflag) 2868 vattr.va_vaflags |= VA_UTIMES_NULL; 2869 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2870 vput(vp); 2871 } 2872 return error; 2873 } 2874 2875 int 2876 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2877 { 2878 struct timespec ts[2]; 2879 struct vnode *vp; 2880 int error; 2881 2882 if ((error = getutimes(tptr, ts)) != 0) 2883 return (error); 2884 /* XXX Add NLC flag indicating modifying operation? */ 2885 if ((error = nlookup(nd)) != 0) 2886 return (error); 2887 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2888 return (error); 2889 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2890 return (error); 2891 2892 /* 2893 * NOTE: utimes() succeeds for the owner even if the file 2894 * is not user-writable. 2895 */ 2896 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 && 2897 (error = VOP_ACCESS(vp, VWRITE | VOWN, nd->nl_cred)) == 0) { 2898 error = setutimes(vp, ts, tptr == NULL); 2899 } 2900 vrele(vp); 2901 return (error); 2902 } 2903 2904 /* 2905 * utimes_args(char *path, struct timeval *tptr) 2906 * 2907 * Set the access and modification times of a file. 2908 */ 2909 int 2910 sys_utimes(struct utimes_args *uap) 2911 { 2912 struct timeval tv[2]; 2913 struct nlookupdata nd; 2914 int error; 2915 2916 if (uap->tptr) { 2917 error = copyin(uap->tptr, tv, sizeof(tv)); 2918 if (error) 2919 return (error); 2920 } 2921 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2922 if (error == 0) 2923 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2924 nlookup_done(&nd); 2925 return (error); 2926 } 2927 2928 /* 2929 * lutimes_args(char *path, struct timeval *tptr) 2930 * 2931 * Set the access and modification times of a file. 2932 */ 2933 int 2934 sys_lutimes(struct lutimes_args *uap) 2935 { 2936 struct timeval tv[2]; 2937 struct nlookupdata nd; 2938 int error; 2939 2940 if (uap->tptr) { 2941 error = copyin(uap->tptr, tv, sizeof(tv)); 2942 if (error) 2943 return (error); 2944 } 2945 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2946 if (error == 0) 2947 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2948 nlookup_done(&nd); 2949 return (error); 2950 } 2951 2952 int 2953 kern_futimes(int fd, struct timeval *tptr) 2954 { 2955 struct thread *td = curthread; 2956 struct proc *p = td->td_proc; 2957 struct timespec ts[2]; 2958 struct file *fp; 2959 int error; 2960 2961 error = getutimes(tptr, ts); 2962 if (error) 2963 return (error); 2964 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2965 return (error); 2966 if (fp->f_nchandle.ncp) 2967 error = ncp_writechk(&fp->f_nchandle); 2968 if (error == 0) 2969 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2970 fdrop(fp); 2971 return (error); 2972 } 2973 2974 /* 2975 * futimes_args(int fd, struct timeval *tptr) 2976 * 2977 * Set the access and modification times of a file. 2978 */ 2979 int 2980 sys_futimes(struct futimes_args *uap) 2981 { 2982 struct timeval tv[2]; 2983 int error; 2984 2985 if (uap->tptr) { 2986 error = copyin(uap->tptr, tv, sizeof(tv)); 2987 if (error) 2988 return (error); 2989 } 2990 2991 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2992 2993 return (error); 2994 } 2995 2996 int 2997 kern_truncate(struct nlookupdata *nd, off_t length) 2998 { 2999 struct vnode *vp; 3000 struct vattr vattr; 3001 int error; 3002 3003 if (length < 0) 3004 return(EINVAL); 3005 /* XXX Add NLC flag indicating modifying operation? */ 3006 if ((error = nlookup(nd)) != 0) 3007 return (error); 3008 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3009 return (error); 3010 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3011 return (error); 3012 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 3013 vrele(vp); 3014 return (error); 3015 } 3016 if (vp->v_type == VDIR) { 3017 error = EISDIR; 3018 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 && 3019 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) { 3020 VATTR_NULL(&vattr); 3021 vattr.va_size = length; 3022 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3023 } 3024 vput(vp); 3025 return (error); 3026 } 3027 3028 /* 3029 * truncate(char *path, int pad, off_t length) 3030 * 3031 * Truncate a file given its path name. 3032 */ 3033 int 3034 sys_truncate(struct truncate_args *uap) 3035 { 3036 struct nlookupdata nd; 3037 int error; 3038 3039 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3040 if (error == 0) 3041 error = kern_truncate(&nd, uap->length); 3042 nlookup_done(&nd); 3043 return error; 3044 } 3045 3046 int 3047 kern_ftruncate(int fd, off_t length) 3048 { 3049 struct thread *td = curthread; 3050 struct proc *p = td->td_proc; 3051 struct vattr vattr; 3052 struct vnode *vp; 3053 struct file *fp; 3054 int error; 3055 3056 if (length < 0) 3057 return(EINVAL); 3058 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3059 return (error); 3060 if (fp->f_nchandle.ncp) { 3061 error = ncp_writechk(&fp->f_nchandle); 3062 if (error) 3063 goto done; 3064 } 3065 if ((fp->f_flag & FWRITE) == 0) { 3066 error = EINVAL; 3067 goto done; 3068 } 3069 vp = (struct vnode *)fp->f_data; 3070 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3071 if (vp->v_type == VDIR) { 3072 error = EISDIR; 3073 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3074 VATTR_NULL(&vattr); 3075 vattr.va_size = length; 3076 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3077 } 3078 vn_unlock(vp); 3079 done: 3080 fdrop(fp); 3081 return (error); 3082 } 3083 3084 /* 3085 * ftruncate_args(int fd, int pad, off_t length) 3086 * 3087 * Truncate a file given a file descriptor. 3088 */ 3089 int 3090 sys_ftruncate(struct ftruncate_args *uap) 3091 { 3092 int error; 3093 3094 error = kern_ftruncate(uap->fd, uap->length); 3095 3096 return (error); 3097 } 3098 3099 /* 3100 * fsync(int fd) 3101 * 3102 * Sync an open file. 3103 */ 3104 /* ARGSUSED */ 3105 int 3106 sys_fsync(struct fsync_args *uap) 3107 { 3108 struct thread *td = curthread; 3109 struct proc *p = td->td_proc; 3110 struct vnode *vp; 3111 struct file *fp; 3112 vm_object_t obj; 3113 int error; 3114 3115 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3116 return (error); 3117 vp = (struct vnode *)fp->f_data; 3118 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3119 if ((obj = vp->v_object) != NULL) 3120 vm_object_page_clean(obj, 0, 0, 0); 3121 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount) 3122 error = buf_fsync(vp); 3123 vn_unlock(vp); 3124 fdrop(fp); 3125 return (error); 3126 } 3127 3128 int 3129 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3130 { 3131 struct nchandle fnchd; 3132 struct nchandle tnchd; 3133 struct namecache *ncp; 3134 struct vnode *fdvp; 3135 struct vnode *tdvp; 3136 struct mount *mp; 3137 int error; 3138 3139 bwillinode(1); 3140 fromnd->nl_flags |= NLC_REFDVP; 3141 if ((error = nlookup(fromnd)) != 0) 3142 return (error); 3143 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3144 return (ENOENT); 3145 fnchd.mount = fromnd->nl_nch.mount; 3146 cache_hold(&fnchd); 3147 3148 /* 3149 * unlock the source nch so we can lookup the target nch without 3150 * deadlocking. The target may or may not exist so we do not check 3151 * for a target vp like kern_mkdir() and other creation functions do. 3152 * 3153 * The source and target directories are ref'd and rechecked after 3154 * everything is relocked to determine if the source or target file 3155 * has been renamed. 3156 */ 3157 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3158 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3159 cache_unlock(&fromnd->nl_nch); 3160 3161 tond->nl_flags |= NLC_CREATE | NLC_REFDVP; 3162 if ((error = nlookup(tond)) != 0) { 3163 cache_drop(&fnchd); 3164 return (error); 3165 } 3166 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3167 cache_drop(&fnchd); 3168 return (ENOENT); 3169 } 3170 tnchd.mount = tond->nl_nch.mount; 3171 cache_hold(&tnchd); 3172 3173 /* 3174 * If the source and target are the same there is nothing to do 3175 */ 3176 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3177 cache_drop(&fnchd); 3178 cache_drop(&tnchd); 3179 return (0); 3180 } 3181 3182 /* 3183 * Mount points cannot be renamed or overwritten 3184 */ 3185 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3186 NCF_ISMOUNTPT 3187 ) { 3188 cache_drop(&fnchd); 3189 cache_drop(&tnchd); 3190 return (EINVAL); 3191 } 3192 3193 /* 3194 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 3195 * may have become invalid while it was unlocked, nc_vp and nc_mount 3196 * could be NULL. 3197 */ 3198 if (cache_lock_nonblock(&fromnd->nl_nch) == 0) { 3199 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3200 } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) { 3201 cache_lock(&fromnd->nl_nch); 3202 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3203 } else { 3204 cache_unlock(&tond->nl_nch); 3205 cache_lock(&fromnd->nl_nch); 3206 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3207 cache_lock(&tond->nl_nch); 3208 cache_resolve(&tond->nl_nch, tond->nl_cred); 3209 } 3210 fromnd->nl_flags |= NLC_NCPISLOCKED; 3211 3212 /* 3213 * make sure the parent directories linkages are the same 3214 */ 3215 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3216 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3217 cache_drop(&fnchd); 3218 cache_drop(&tnchd); 3219 return (ENOENT); 3220 } 3221 3222 /* 3223 * Both the source and target must be within the same filesystem and 3224 * in the same filesystem as their parent directories within the 3225 * namecache topology. 3226 * 3227 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3228 */ 3229 mp = fnchd.mount; 3230 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3231 mp != tond->nl_nch.mount) { 3232 cache_drop(&fnchd); 3233 cache_drop(&tnchd); 3234 return (EXDEV); 3235 } 3236 3237 /* 3238 * Make sure the mount point is writable 3239 */ 3240 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3241 cache_drop(&fnchd); 3242 cache_drop(&tnchd); 3243 return (error); 3244 } 3245 3246 /* 3247 * If the target exists and either the source or target is a directory, 3248 * then both must be directories. 3249 * 3250 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3251 * have become NULL. 3252 */ 3253 if (tond->nl_nch.ncp->nc_vp) { 3254 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3255 error = ENOENT; 3256 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3257 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3258 error = ENOTDIR; 3259 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3260 error = EISDIR; 3261 } 3262 } 3263 3264 /* 3265 * You cannot rename a source into itself or a subdirectory of itself. 3266 * We check this by travsersing the target directory upwards looking 3267 * for a match against the source. 3268 */ 3269 if (error == 0) { 3270 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3271 if (fromnd->nl_nch.ncp == ncp) { 3272 error = EINVAL; 3273 break; 3274 } 3275 } 3276 } 3277 3278 cache_drop(&fnchd); 3279 cache_drop(&tnchd); 3280 3281 /* 3282 * Even though the namespaces are different, they may still represent 3283 * hardlinks to the same file. The filesystem might have a hard time 3284 * with this so we issue a NREMOVE of the source instead of a NRENAME 3285 * when we detect the situation. 3286 */ 3287 if (error == 0) { 3288 fdvp = fromnd->nl_dvp; 3289 tdvp = tond->nl_dvp; 3290 if (fdvp == NULL || tdvp == NULL) { 3291 error = EPERM; 3292 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3293 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3294 fromnd->nl_cred); 3295 } else { 3296 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3297 fdvp, tdvp, tond->nl_cred); 3298 } 3299 } 3300 return (error); 3301 } 3302 3303 /* 3304 * rename_args(char *from, char *to) 3305 * 3306 * Rename files. Source and destination must either both be directories, 3307 * or both not be directories. If target is a directory, it must be empty. 3308 */ 3309 int 3310 sys_rename(struct rename_args *uap) 3311 { 3312 struct nlookupdata fromnd, tond; 3313 int error; 3314 3315 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3316 if (error == 0) { 3317 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3318 if (error == 0) 3319 error = kern_rename(&fromnd, &tond); 3320 nlookup_done(&tond); 3321 } 3322 nlookup_done(&fromnd); 3323 return (error); 3324 } 3325 3326 int 3327 kern_mkdir(struct nlookupdata *nd, int mode) 3328 { 3329 struct thread *td = curthread; 3330 struct proc *p = td->td_proc; 3331 struct vnode *vp; 3332 struct vattr vattr; 3333 int error; 3334 3335 bwillinode(1); 3336 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3337 if ((error = nlookup(nd)) != 0) 3338 return (error); 3339 3340 if (nd->nl_nch.ncp->nc_vp) 3341 return (EEXIST); 3342 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3343 return (error); 3344 VATTR_NULL(&vattr); 3345 vattr.va_type = VDIR; 3346 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3347 3348 vp = NULL; 3349 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr); 3350 if (error == 0) 3351 vput(vp); 3352 return (error); 3353 } 3354 3355 /* 3356 * mkdir_args(char *path, int mode) 3357 * 3358 * Make a directory file. 3359 */ 3360 /* ARGSUSED */ 3361 int 3362 sys_mkdir(struct mkdir_args *uap) 3363 { 3364 struct nlookupdata nd; 3365 int error; 3366 3367 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3368 if (error == 0) 3369 error = kern_mkdir(&nd, uap->mode); 3370 nlookup_done(&nd); 3371 return (error); 3372 } 3373 3374 int 3375 kern_rmdir(struct nlookupdata *nd) 3376 { 3377 int error; 3378 3379 bwillinode(1); 3380 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3381 if ((error = nlookup(nd)) != 0) 3382 return (error); 3383 3384 /* 3385 * Do not allow directories representing mount points to be 3386 * deleted, even if empty. Check write perms on mount point 3387 * in case the vnode is aliased (aka nullfs). 3388 */ 3389 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3390 return (EINVAL); 3391 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3392 return (error); 3393 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3394 return (error); 3395 } 3396 3397 /* 3398 * rmdir_args(char *path) 3399 * 3400 * Remove a directory file. 3401 */ 3402 /* ARGSUSED */ 3403 int 3404 sys_rmdir(struct rmdir_args *uap) 3405 { 3406 struct nlookupdata nd; 3407 int error; 3408 3409 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3410 if (error == 0) 3411 error = kern_rmdir(&nd); 3412 nlookup_done(&nd); 3413 return (error); 3414 } 3415 3416 int 3417 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3418 enum uio_seg direction) 3419 { 3420 struct thread *td = curthread; 3421 struct proc *p = td->td_proc; 3422 struct vnode *vp; 3423 struct file *fp; 3424 struct uio auio; 3425 struct iovec aiov; 3426 off_t loff; 3427 int error, eofflag; 3428 3429 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3430 return (error); 3431 if ((fp->f_flag & FREAD) == 0) { 3432 error = EBADF; 3433 goto done; 3434 } 3435 vp = (struct vnode *)fp->f_data; 3436 unionread: 3437 if (vp->v_type != VDIR) { 3438 error = EINVAL; 3439 goto done; 3440 } 3441 aiov.iov_base = buf; 3442 aiov.iov_len = count; 3443 auio.uio_iov = &aiov; 3444 auio.uio_iovcnt = 1; 3445 auio.uio_rw = UIO_READ; 3446 auio.uio_segflg = direction; 3447 auio.uio_td = td; 3448 auio.uio_resid = count; 3449 loff = auio.uio_offset = fp->f_offset; 3450 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3451 fp->f_offset = auio.uio_offset; 3452 if (error) 3453 goto done; 3454 if (count == auio.uio_resid) { 3455 if (union_dircheckp) { 3456 error = union_dircheckp(td, &vp, fp); 3457 if (error == -1) 3458 goto unionread; 3459 if (error) 3460 goto done; 3461 } 3462 #if 0 3463 if ((vp->v_flag & VROOT) && 3464 (vp->v_mount->mnt_flag & MNT_UNION)) { 3465 struct vnode *tvp = vp; 3466 vp = vp->v_mount->mnt_vnodecovered; 3467 vref(vp); 3468 fp->f_data = vp; 3469 fp->f_offset = 0; 3470 vrele(tvp); 3471 goto unionread; 3472 } 3473 #endif 3474 } 3475 3476 /* 3477 * WARNING! *basep may not be wide enough to accomodate the 3478 * seek offset. XXX should we hack this to return the upper 32 bits 3479 * for offsets greater then 4G? 3480 */ 3481 if (basep) { 3482 *basep = (long)loff; 3483 } 3484 *res = count - auio.uio_resid; 3485 done: 3486 fdrop(fp); 3487 return (error); 3488 } 3489 3490 /* 3491 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3492 * 3493 * Read a block of directory entries in a file system independent format. 3494 */ 3495 int 3496 sys_getdirentries(struct getdirentries_args *uap) 3497 { 3498 long base; 3499 int error; 3500 3501 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3502 &uap->sysmsg_result, UIO_USERSPACE); 3503 3504 if (error == 0 && uap->basep) 3505 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3506 return (error); 3507 } 3508 3509 /* 3510 * getdents_args(int fd, char *buf, size_t count) 3511 */ 3512 int 3513 sys_getdents(struct getdents_args *uap) 3514 { 3515 int error; 3516 3517 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3518 &uap->sysmsg_result, UIO_USERSPACE); 3519 3520 return (error); 3521 } 3522 3523 /* 3524 * umask(int newmask) 3525 * 3526 * Set the mode mask for creation of filesystem nodes. 3527 * 3528 * MP SAFE 3529 */ 3530 int 3531 sys_umask(struct umask_args *uap) 3532 { 3533 struct thread *td = curthread; 3534 struct proc *p = td->td_proc; 3535 struct filedesc *fdp; 3536 3537 fdp = p->p_fd; 3538 uap->sysmsg_result = fdp->fd_cmask; 3539 fdp->fd_cmask = uap->newmask & ALLPERMS; 3540 return (0); 3541 } 3542 3543 /* 3544 * revoke(char *path) 3545 * 3546 * Void all references to file by ripping underlying filesystem 3547 * away from vnode. 3548 */ 3549 /* ARGSUSED */ 3550 int 3551 sys_revoke(struct revoke_args *uap) 3552 { 3553 struct nlookupdata nd; 3554 struct vattr vattr; 3555 struct vnode *vp; 3556 struct ucred *cred; 3557 int error; 3558 3559 vp = NULL; 3560 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3561 if (error == 0) 3562 error = nlookup(&nd); 3563 if (error == 0) 3564 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3565 cred = crhold(nd.nl_cred); 3566 nlookup_done(&nd); 3567 if (error == 0) { 3568 if (error == 0) 3569 error = VOP_GETATTR(vp, &vattr); 3570 if (error == 0 && cred->cr_uid != vattr.va_uid) 3571 error = priv_check_cred(cred, PRIV_ROOT, PRISON_ROOT); 3572 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 3573 if (count_udev(vp->v_umajor, vp->v_uminor) > 0) 3574 error = vrevoke(vp, cred); 3575 } else if (error == 0) { 3576 error = vrevoke(vp, cred); 3577 } 3578 vrele(vp); 3579 } 3580 if (cred) 3581 crfree(cred); 3582 return (error); 3583 } 3584 3585 /* 3586 * getfh_args(char *fname, fhandle_t *fhp) 3587 * 3588 * Get (NFS) file handle 3589 * 3590 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 3591 * mount. This allows nullfs mounts to be explicitly exported. 3592 * 3593 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 3594 * 3595 * nullfs mounts of subdirectories are not safe. That is, it will 3596 * work, but you do not really have protection against access to 3597 * the related parent directories. 3598 */ 3599 int 3600 sys_getfh(struct getfh_args *uap) 3601 { 3602 struct thread *td = curthread; 3603 struct nlookupdata nd; 3604 fhandle_t fh; 3605 struct vnode *vp; 3606 struct mount *mp; 3607 int error; 3608 3609 /* 3610 * Must be super user 3611 */ 3612 if ((error = priv_check(td, PRIV_ROOT)) != 0) 3613 return (error); 3614 3615 vp = NULL; 3616 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3617 if (error == 0) 3618 error = nlookup(&nd); 3619 if (error == 0) 3620 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3621 mp = nd.nl_nch.mount; 3622 nlookup_done(&nd); 3623 if (error == 0) { 3624 bzero(&fh, sizeof(fh)); 3625 fh.fh_fsid = mp->mnt_stat.f_fsid; 3626 error = VFS_VPTOFH(vp, &fh.fh_fid); 3627 vput(vp); 3628 if (error == 0) 3629 error = copyout(&fh, uap->fhp, sizeof(fh)); 3630 } 3631 return (error); 3632 } 3633 3634 /* 3635 * fhopen_args(const struct fhandle *u_fhp, int flags) 3636 * 3637 * syscall for the rpc.lockd to use to translate a NFS file handle into 3638 * an open descriptor. 3639 * 3640 * warning: do not remove the priv_check() call or this becomes one giant 3641 * security hole. 3642 */ 3643 int 3644 sys_fhopen(struct fhopen_args *uap) 3645 { 3646 struct thread *td = curthread; 3647 struct proc *p = td->td_proc; 3648 struct mount *mp; 3649 struct vnode *vp; 3650 struct fhandle fhp; 3651 struct vattr vat; 3652 struct vattr *vap = &vat; 3653 struct flock lf; 3654 int fmode, mode, error, type; 3655 struct file *nfp; 3656 struct file *fp; 3657 int indx; 3658 3659 /* 3660 * Must be super user 3661 */ 3662 error = priv_check(td, PRIV_ROOT); 3663 if (error) 3664 return (error); 3665 3666 fmode = FFLAGS(uap->flags); 3667 /* why not allow a non-read/write open for our lockd? */ 3668 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3669 return (EINVAL); 3670 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3671 if (error) 3672 return(error); 3673 /* find the mount point */ 3674 mp = vfs_getvfs(&fhp.fh_fsid); 3675 if (mp == NULL) 3676 return (ESTALE); 3677 /* now give me my vnode, it gets returned to me locked */ 3678 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 3679 if (error) 3680 return (error); 3681 /* 3682 * from now on we have to make sure not 3683 * to forget about the vnode 3684 * any error that causes an abort must vput(vp) 3685 * just set error = err and 'goto bad;'. 3686 */ 3687 3688 /* 3689 * from vn_open 3690 */ 3691 if (vp->v_type == VLNK) { 3692 error = EMLINK; 3693 goto bad; 3694 } 3695 if (vp->v_type == VSOCK) { 3696 error = EOPNOTSUPP; 3697 goto bad; 3698 } 3699 mode = 0; 3700 if (fmode & (FWRITE | O_TRUNC)) { 3701 if (vp->v_type == VDIR) { 3702 error = EISDIR; 3703 goto bad; 3704 } 3705 error = vn_writechk(vp, NULL); 3706 if (error) 3707 goto bad; 3708 mode |= VWRITE; 3709 } 3710 if (fmode & FREAD) 3711 mode |= VREAD; 3712 if (mode) { 3713 error = VOP_ACCESS(vp, mode, p->p_ucred); 3714 if (error) 3715 goto bad; 3716 } 3717 if (fmode & O_TRUNC) { 3718 vn_unlock(vp); /* XXX */ 3719 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 3720 VATTR_NULL(vap); 3721 vap->va_size = 0; 3722 error = VOP_SETATTR(vp, vap, p->p_ucred); 3723 if (error) 3724 goto bad; 3725 } 3726 3727 /* 3728 * VOP_OPEN needs the file pointer so it can potentially override 3729 * it. 3730 * 3731 * WARNING! no f_nchandle will be associated when fhopen()ing a 3732 * directory. XXX 3733 */ 3734 if ((error = falloc(p, &nfp, &indx)) != 0) 3735 goto bad; 3736 fp = nfp; 3737 3738 error = VOP_OPEN(vp, fmode, p->p_ucred, fp); 3739 if (error) { 3740 /* 3741 * setting f_ops this way prevents VOP_CLOSE from being 3742 * called or fdrop() releasing the vp from v_data. Since 3743 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3744 */ 3745 fp->f_ops = &badfileops; 3746 fp->f_data = NULL; 3747 goto bad_drop; 3748 } 3749 3750 /* 3751 * The fp is given its own reference, we still have our ref and lock. 3752 * 3753 * Assert that all regular files must be created with a VM object. 3754 */ 3755 if (vp->v_type == VREG && vp->v_object == NULL) { 3756 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 3757 goto bad_drop; 3758 } 3759 3760 /* 3761 * The open was successful. Handle any locking requirements. 3762 */ 3763 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3764 lf.l_whence = SEEK_SET; 3765 lf.l_start = 0; 3766 lf.l_len = 0; 3767 if (fmode & O_EXLOCK) 3768 lf.l_type = F_WRLCK; 3769 else 3770 lf.l_type = F_RDLCK; 3771 if (fmode & FNONBLOCK) 3772 type = 0; 3773 else 3774 type = F_WAIT; 3775 vn_unlock(vp); 3776 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3777 /* 3778 * release our private reference. 3779 */ 3780 fsetfd(p, NULL, indx); 3781 fdrop(fp); 3782 vrele(vp); 3783 return (error); 3784 } 3785 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3786 fp->f_flag |= FHASLOCK; 3787 } 3788 3789 /* 3790 * Clean up. Associate the file pointer with the previously 3791 * reserved descriptor and return it. 3792 */ 3793 vput(vp); 3794 fsetfd(p, fp, indx); 3795 fdrop(fp); 3796 uap->sysmsg_result = indx; 3797 return (0); 3798 3799 bad_drop: 3800 fsetfd(p, NULL, indx); 3801 fdrop(fp); 3802 bad: 3803 vput(vp); 3804 return (error); 3805 } 3806 3807 /* 3808 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3809 */ 3810 int 3811 sys_fhstat(struct fhstat_args *uap) 3812 { 3813 struct thread *td = curthread; 3814 struct stat sb; 3815 fhandle_t fh; 3816 struct mount *mp; 3817 struct vnode *vp; 3818 int error; 3819 3820 /* 3821 * Must be super user 3822 */ 3823 error = priv_check(td, PRIV_ROOT); 3824 if (error) 3825 return (error); 3826 3827 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3828 if (error) 3829 return (error); 3830 3831 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3832 return (ESTALE); 3833 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3834 return (error); 3835 error = vn_stat(vp, &sb, td->td_proc->p_ucred); 3836 vput(vp); 3837 if (error) 3838 return (error); 3839 error = copyout(&sb, uap->sb, sizeof(sb)); 3840 return (error); 3841 } 3842 3843 /* 3844 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3845 */ 3846 int 3847 sys_fhstatfs(struct fhstatfs_args *uap) 3848 { 3849 struct thread *td = curthread; 3850 struct proc *p = td->td_proc; 3851 struct statfs *sp; 3852 struct mount *mp; 3853 struct vnode *vp; 3854 struct statfs sb; 3855 char *fullpath, *freepath; 3856 fhandle_t fh; 3857 int error; 3858 3859 /* 3860 * Must be super user 3861 */ 3862 if ((error = priv_check(td, PRIV_ROOT))) 3863 return (error); 3864 3865 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3866 return (error); 3867 3868 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3869 return (ESTALE); 3870 3871 if (p != NULL && !chroot_visible_mnt(mp, p)) 3872 return (ESTALE); 3873 3874 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3875 return (error); 3876 mp = vp->v_mount; 3877 sp = &mp->mnt_stat; 3878 vput(vp); 3879 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0) 3880 return (error); 3881 3882 error = mount_path(p, mp, &fullpath, &freepath); 3883 if (error) 3884 return(error); 3885 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3886 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3887 kfree(freepath, M_TEMP); 3888 3889 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3890 if (priv_check(td, PRIV_ROOT)) { 3891 bcopy(sp, &sb, sizeof(sb)); 3892 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3893 sp = &sb; 3894 } 3895 return (copyout(sp, uap->buf, sizeof(*sp))); 3896 } 3897 3898 /* 3899 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 3900 */ 3901 int 3902 sys_fhstatvfs(struct fhstatvfs_args *uap) 3903 { 3904 struct thread *td = curthread; 3905 struct proc *p = td->td_proc; 3906 struct statvfs *sp; 3907 struct mount *mp; 3908 struct vnode *vp; 3909 fhandle_t fh; 3910 int error; 3911 3912 /* 3913 * Must be super user 3914 */ 3915 if ((error = priv_check(td, PRIV_ROOT))) 3916 return (error); 3917 3918 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3919 return (error); 3920 3921 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3922 return (ESTALE); 3923 3924 if (p != NULL && !chroot_visible_mnt(mp, p)) 3925 return (ESTALE); 3926 3927 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 3928 return (error); 3929 mp = vp->v_mount; 3930 sp = &mp->mnt_vstat; 3931 vput(vp); 3932 if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0) 3933 return (error); 3934 3935 sp->f_flag = 0; 3936 if (mp->mnt_flag & MNT_RDONLY) 3937 sp->f_flag |= ST_RDONLY; 3938 if (mp->mnt_flag & MNT_NOSUID) 3939 sp->f_flag |= ST_NOSUID; 3940 3941 return (copyout(sp, uap->buf, sizeof(*sp))); 3942 } 3943 3944 3945 /* 3946 * Syscall to push extended attribute configuration information into the 3947 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3948 * a command (int cmd), and attribute name and misc data. For now, the 3949 * attribute name is left in userspace for consumption by the VFS_op. 3950 * It will probably be changed to be copied into sysspace by the 3951 * syscall in the future, once issues with various consumers of the 3952 * attribute code have raised their hands. 3953 * 3954 * Currently this is used only by UFS Extended Attributes. 3955 */ 3956 int 3957 sys_extattrctl(struct extattrctl_args *uap) 3958 { 3959 struct nlookupdata nd; 3960 struct mount *mp; 3961 struct vnode *vp; 3962 int error; 3963 3964 vp = NULL; 3965 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3966 if (error == 0) 3967 error = nlookup(&nd); 3968 if (error == 0) { 3969 mp = nd.nl_nch.mount; 3970 error = VFS_EXTATTRCTL(mp, uap->cmd, 3971 uap->attrname, uap->arg, 3972 nd.nl_cred); 3973 } 3974 nlookup_done(&nd); 3975 return (error); 3976 } 3977 3978 /* 3979 * Syscall to set a named extended attribute on a file or directory. 3980 * Accepts attribute name, and a uio structure pointing to the data to set. 3981 * The uio is consumed in the style of writev(). The real work happens 3982 * in VOP_SETEXTATTR(). 3983 */ 3984 int 3985 sys_extattr_set_file(struct extattr_set_file_args *uap) 3986 { 3987 char attrname[EXTATTR_MAXNAMELEN]; 3988 struct iovec aiov[UIO_SMALLIOV]; 3989 struct iovec *needfree; 3990 struct nlookupdata nd; 3991 struct iovec *iov; 3992 struct vnode *vp; 3993 struct uio auio; 3994 u_int iovlen; 3995 u_int cnt; 3996 int error; 3997 int i; 3998 3999 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4000 if (error) 4001 return (error); 4002 4003 vp = NULL; 4004 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4005 if (error == 0) 4006 error = nlookup(&nd); 4007 if (error == 0) 4008 error = ncp_writechk(&nd.nl_nch); 4009 if (error == 0) 4010 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4011 if (error) { 4012 nlookup_done(&nd); 4013 return (error); 4014 } 4015 4016 needfree = NULL; 4017 iovlen = uap->iovcnt * sizeof(struct iovec); 4018 if (uap->iovcnt > UIO_SMALLIOV) { 4019 if (uap->iovcnt > UIO_MAXIOV) { 4020 error = EINVAL; 4021 goto done; 4022 } 4023 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4024 needfree = iov; 4025 } else { 4026 iov = aiov; 4027 } 4028 auio.uio_iov = iov; 4029 auio.uio_iovcnt = uap->iovcnt; 4030 auio.uio_rw = UIO_WRITE; 4031 auio.uio_segflg = UIO_USERSPACE; 4032 auio.uio_td = nd.nl_td; 4033 auio.uio_offset = 0; 4034 if ((error = copyin(uap->iovp, iov, iovlen))) 4035 goto done; 4036 auio.uio_resid = 0; 4037 for (i = 0; i < uap->iovcnt; i++) { 4038 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4039 error = EINVAL; 4040 goto done; 4041 } 4042 auio.uio_resid += iov->iov_len; 4043 iov++; 4044 } 4045 cnt = auio.uio_resid; 4046 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4047 cnt -= auio.uio_resid; 4048 uap->sysmsg_result = cnt; 4049 done: 4050 vput(vp); 4051 nlookup_done(&nd); 4052 if (needfree) 4053 FREE(needfree, M_IOV); 4054 return (error); 4055 } 4056 4057 /* 4058 * Syscall to get a named extended attribute on a file or directory. 4059 * Accepts attribute name, and a uio structure pointing to a buffer for the 4060 * data. The uio is consumed in the style of readv(). The real work 4061 * happens in VOP_GETEXTATTR(); 4062 */ 4063 int 4064 sys_extattr_get_file(struct extattr_get_file_args *uap) 4065 { 4066 char attrname[EXTATTR_MAXNAMELEN]; 4067 struct iovec aiov[UIO_SMALLIOV]; 4068 struct iovec *needfree; 4069 struct nlookupdata nd; 4070 struct iovec *iov; 4071 struct vnode *vp; 4072 struct uio auio; 4073 u_int iovlen; 4074 u_int cnt; 4075 int error; 4076 int i; 4077 4078 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4079 if (error) 4080 return (error); 4081 4082 vp = NULL; 4083 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4084 if (error == 0) 4085 error = nlookup(&nd); 4086 if (error == 0) 4087 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4088 if (error) { 4089 nlookup_done(&nd); 4090 return (error); 4091 } 4092 4093 iovlen = uap->iovcnt * sizeof (struct iovec); 4094 needfree = NULL; 4095 if (uap->iovcnt > UIO_SMALLIOV) { 4096 if (uap->iovcnt > UIO_MAXIOV) { 4097 error = EINVAL; 4098 goto done; 4099 } 4100 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4101 needfree = iov; 4102 } else { 4103 iov = aiov; 4104 } 4105 auio.uio_iov = iov; 4106 auio.uio_iovcnt = uap->iovcnt; 4107 auio.uio_rw = UIO_READ; 4108 auio.uio_segflg = UIO_USERSPACE; 4109 auio.uio_td = nd.nl_td; 4110 auio.uio_offset = 0; 4111 if ((error = copyin(uap->iovp, iov, iovlen))) 4112 goto done; 4113 auio.uio_resid = 0; 4114 for (i = 0; i < uap->iovcnt; i++) { 4115 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4116 error = EINVAL; 4117 goto done; 4118 } 4119 auio.uio_resid += iov->iov_len; 4120 iov++; 4121 } 4122 cnt = auio.uio_resid; 4123 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4124 cnt -= auio.uio_resid; 4125 uap->sysmsg_result = cnt; 4126 done: 4127 vput(vp); 4128 nlookup_done(&nd); 4129 if (needfree) 4130 FREE(needfree, M_IOV); 4131 return(error); 4132 } 4133 4134 /* 4135 * Syscall to delete a named extended attribute from a file or directory. 4136 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4137 */ 4138 int 4139 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4140 { 4141 char attrname[EXTATTR_MAXNAMELEN]; 4142 struct nlookupdata nd; 4143 struct vnode *vp; 4144 int error; 4145 4146 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4147 if (error) 4148 return(error); 4149 4150 vp = NULL; 4151 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4152 if (error == 0) 4153 error = nlookup(&nd); 4154 if (error == 0) 4155 error = ncp_writechk(&nd.nl_nch); 4156 if (error == 0) 4157 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4158 if (error) { 4159 nlookup_done(&nd); 4160 return (error); 4161 } 4162 4163 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred); 4164 vput(vp); 4165 nlookup_done(&nd); 4166 return(error); 4167 } 4168 4169 /* 4170 * Determine if the mount is visible to the process. 4171 */ 4172 static int 4173 chroot_visible_mnt(struct mount *mp, struct proc *p) 4174 { 4175 struct nchandle nch; 4176 4177 /* 4178 * Traverse from the mount point upwards. If we hit the process 4179 * root then the mount point is visible to the process. 4180 */ 4181 nch = mp->mnt_ncmountpt; 4182 while (nch.ncp) { 4183 if (nch.mount == p->p_fd->fd_nrdir.mount && 4184 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4185 return(1); 4186 } 4187 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4188 nch = nch.mount->mnt_ncmounton; 4189 } else { 4190 nch.ncp = nch.ncp->nc_parent; 4191 } 4192 } 4193 4194 /* 4195 * If the mount point is not visible to the process, but the 4196 * process root is in a subdirectory of the mount, return 4197 * TRUE anyway. 4198 */ 4199 if (p->p_fd->fd_nrdir.mount == mp) 4200 return(1); 4201 4202 return(0); 4203 } 4204 4205