1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.133 2008/06/28 17:59:49 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/spinlock.h> 66 #include <sys/kern_syscall.h> 67 #include <sys/objcache.h> 68 #include <sys/sysctl.h> 69 70 #include <sys/buf2.h> 71 #include <sys/file2.h> 72 #include <sys/spinlock2.h> 73 74 #include <vm/vm.h> 75 #include <vm/vm_object.h> 76 #include <vm/vm_page.h> 77 78 #include <machine/limits.h> 79 #include <machine/stdarg.h> 80 81 #include <vfs/union/union.h> 82 83 static void mount_warning(struct mount *mp, const char *ctl, ...); 84 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 85 static int checkvp_chdir (struct vnode *vn, struct thread *td); 86 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 87 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 88 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 89 static int getutimes (const struct timeval *, struct timespec *); 90 static int setfown (struct vnode *, uid_t, gid_t); 91 static int setfmode (struct vnode *, int); 92 static int setfflags (struct vnode *, int); 93 static int setutimes (struct vnode *, const struct timespec *, int); 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 96 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 97 98 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 99 100 /* 101 * Virtual File System System Calls 102 */ 103 104 /* 105 * Mount a file system. 106 */ 107 /* 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 */ 110 /* ARGSUSED */ 111 int 112 sys_mount(struct mount_args *uap) 113 { 114 struct thread *td = curthread; 115 struct proc *p = td->td_proc; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 struct vattr va; 123 struct nlookupdata nd; 124 char fstypename[MFSNAMELEN]; 125 struct ucred *cred = p->p_ucred; 126 127 KKASSERT(p); 128 if (cred->cr_prison != NULL) 129 return (EPERM); 130 if (usermount == 0 && (error = suser(td))) 131 return (error); 132 /* 133 * Do not allow NFS export by non-root users. 134 */ 135 if (uap->flags & MNT_EXPORTED) { 136 error = suser(td); 137 if (error) 138 return (error); 139 } 140 /* 141 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 142 */ 143 if (suser(td)) 144 uap->flags |= MNT_NOSUID | MNT_NODEV; 145 146 /* 147 * Lookup the requested path and extract the nch and vnode. 148 */ 149 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 150 if (error == 0) { 151 if ((error = nlookup(&nd)) == 0) { 152 if (nd.nl_nch.ncp->nc_vp == NULL) 153 error = ENOENT; 154 } 155 } 156 if (error) { 157 nlookup_done(&nd); 158 return (error); 159 } 160 161 /* 162 * Extract the locked+refd ncp and cleanup the nd structure 163 */ 164 nch = nd.nl_nch; 165 cache_zero(&nd.nl_nch); 166 nlookup_done(&nd); 167 168 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && cache_findmount(&nch)) 169 hasmount = 1; 170 else 171 hasmount = 0; 172 173 174 /* 175 * now we have the locked ref'd nch and unreferenced vnode. 176 */ 177 vp = nch.ncp->nc_vp; 178 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 179 cache_put(&nch); 180 return (error); 181 } 182 cache_unlock(&nch); 183 184 /* 185 * Now we have an unlocked ref'd nch and a locked ref'd vp 186 */ 187 if (uap->flags & MNT_UPDATE) { 188 if ((vp->v_flag & VROOT) == 0) { 189 cache_drop(&nch); 190 vput(vp); 191 return (EINVAL); 192 } 193 mp = vp->v_mount; 194 flag = mp->mnt_flag; 195 flag2 = mp->mnt_kern_flag; 196 /* 197 * We only allow the filesystem to be reloaded if it 198 * is currently mounted read-only. 199 */ 200 if ((uap->flags & MNT_RELOAD) && 201 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 202 cache_drop(&nch); 203 vput(vp); 204 return (EOPNOTSUPP); /* Needs translation */ 205 } 206 /* 207 * Only root, or the user that did the original mount is 208 * permitted to update it. 209 */ 210 if (mp->mnt_stat.f_owner != cred->cr_uid && 211 (error = suser(td))) { 212 cache_drop(&nch); 213 vput(vp); 214 return (error); 215 } 216 if (vfs_busy(mp, LK_NOWAIT)) { 217 cache_drop(&nch); 218 vput(vp); 219 return (EBUSY); 220 } 221 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 222 cache_drop(&nch); 223 vfs_unbusy(mp); 224 vput(vp); 225 return (EBUSY); 226 } 227 vp->v_flag |= VMOUNT; 228 mp->mnt_flag |= 229 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 230 vn_unlock(vp); 231 goto update; 232 } 233 /* 234 * If the user is not root, ensure that they own the directory 235 * onto which we are attempting to mount. 236 */ 237 if ((error = VOP_GETATTR(vp, &va)) || 238 (va.va_uid != cred->cr_uid && (error = suser(td)))) { 239 cache_drop(&nch); 240 vput(vp); 241 return (error); 242 } 243 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 244 cache_drop(&nch); 245 vput(vp); 246 return (error); 247 } 248 if (vp->v_type != VDIR) { 249 cache_drop(&nch); 250 vput(vp); 251 return (ENOTDIR); 252 } 253 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 254 cache_drop(&nch); 255 vput(vp); 256 return (EPERM); 257 } 258 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 259 cache_drop(&nch); 260 vput(vp); 261 return (error); 262 } 263 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 264 if (!strcmp(vfsp->vfc_name, fstypename)) 265 break; 266 } 267 if (vfsp == NULL) { 268 linker_file_t lf; 269 270 /* Only load modules for root (very important!) */ 271 if ((error = suser(td)) != 0) { 272 cache_drop(&nch); 273 vput(vp); 274 return error; 275 } 276 error = linker_load_file(fstypename, &lf); 277 if (error || lf == NULL) { 278 cache_drop(&nch); 279 vput(vp); 280 if (lf == NULL) 281 error = ENODEV; 282 return error; 283 } 284 lf->userrefs++; 285 /* lookup again, see if the VFS was loaded */ 286 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 287 if (!strcmp(vfsp->vfc_name, fstypename)) 288 break; 289 } 290 if (vfsp == NULL) { 291 lf->userrefs--; 292 linker_file_unload(lf); 293 cache_drop(&nch); 294 vput(vp); 295 return (ENODEV); 296 } 297 } 298 if ((vp->v_flag & VMOUNT) != 0 || hasmount) { 299 cache_drop(&nch); 300 vput(vp); 301 return (EBUSY); 302 } 303 vp->v_flag |= VMOUNT; 304 305 /* 306 * Allocate and initialize the filesystem. 307 */ 308 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 309 TAILQ_INIT(&mp->mnt_nvnodelist); 310 TAILQ_INIT(&mp->mnt_reservedvnlist); 311 TAILQ_INIT(&mp->mnt_jlist); 312 mp->mnt_nvnodelistsize = 0; 313 lockinit(&mp->mnt_lock, "vfslock", 0, 0); 314 vfs_busy(mp, LK_NOWAIT); 315 mp->mnt_op = vfsp->vfc_vfsops; 316 mp->mnt_vfc = vfsp; 317 vfsp->vfc_refcount++; 318 mp->mnt_stat.f_type = vfsp->vfc_typenum; 319 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 320 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 321 mp->mnt_stat.f_owner = cred->cr_uid; 322 mp->mnt_iosize_max = DFLTPHYS; 323 vn_unlock(vp); 324 update: 325 /* 326 * Set the mount level flags. 327 */ 328 if (uap->flags & MNT_RDONLY) 329 mp->mnt_flag |= MNT_RDONLY; 330 else if (mp->mnt_flag & MNT_RDONLY) 331 mp->mnt_kern_flag |= MNTK_WANTRDWR; 332 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 333 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 334 MNT_NOSYMFOLLOW | MNT_IGNORE | 335 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 336 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 337 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 338 MNT_NOSYMFOLLOW | MNT_IGNORE | 339 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 340 /* 341 * Mount the filesystem. 342 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 343 * get. 344 */ 345 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 346 if (mp->mnt_flag & MNT_UPDATE) { 347 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 348 mp->mnt_flag &= ~MNT_RDONLY; 349 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 350 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 351 if (error) { 352 mp->mnt_flag = flag; 353 mp->mnt_kern_flag = flag2; 354 } 355 vfs_unbusy(mp); 356 vp->v_flag &= ~VMOUNT; 357 vrele(vp); 358 cache_drop(&nch); 359 return (error); 360 } 361 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 362 /* 363 * Put the new filesystem on the mount list after root. The mount 364 * point gets its own mnt_ncmountpt (unless the VFS already set one 365 * up) which represents the root of the mount. The lookup code 366 * detects the mount point going forward and checks the root of 367 * the mount going backwards. 368 * 369 * It is not necessary to invalidate or purge the vnode underneath 370 * because elements under the mount will be given their own glue 371 * namecache record. 372 */ 373 if (!error) { 374 if (mp->mnt_ncmountpt.ncp == NULL) { 375 /* 376 * allocate, then unlock, but leave the ref intact 377 */ 378 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 379 cache_unlock(&mp->mnt_ncmountpt); 380 } 381 mp->mnt_ncmounton = nch; /* inherits ref */ 382 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 383 384 /* XXX get the root of the fs and cache_setvp(mnt_ncmountpt...) */ 385 vp->v_flag &= ~VMOUNT; 386 mountlist_insert(mp, MNTINS_LAST); 387 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 388 vn_unlock(vp); 389 error = vfs_allocate_syncvnode(mp); 390 vfs_unbusy(mp); 391 error = VFS_START(mp, 0); 392 vrele(vp); 393 } else { 394 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 395 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 396 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 397 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 398 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 399 vp->v_flag &= ~VMOUNT; 400 mp->mnt_vfc->vfc_refcount--; 401 vfs_unbusy(mp); 402 kfree(mp, M_MOUNT); 403 cache_drop(&nch); 404 vput(vp); 405 } 406 return (error); 407 } 408 409 /* 410 * Scan all active processes to see if any of them have a current 411 * or root directory onto which the new filesystem has just been 412 * mounted. If so, replace them with the new mount point. 413 * 414 * The passed ncp is ref'd and locked (from the mount code) and 415 * must be associated with the vnode representing the root of the 416 * mount point. 417 */ 418 struct checkdirs_info { 419 struct nchandle old_nch; 420 struct nchandle new_nch; 421 struct vnode *old_vp; 422 struct vnode *new_vp; 423 }; 424 425 static int checkdirs_callback(struct proc *p, void *data); 426 427 static void 428 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 429 { 430 struct checkdirs_info info; 431 struct vnode *olddp; 432 struct vnode *newdp; 433 struct mount *mp; 434 435 /* 436 * If the old mount point's vnode has a usecount of 1, it is not 437 * being held as a descriptor anywhere. 438 */ 439 olddp = old_nch->ncp->nc_vp; 440 if (olddp == NULL || olddp->v_sysref.refcnt == 1) 441 return; 442 443 /* 444 * Force the root vnode of the new mount point to be resolved 445 * so we can update any matching processes. 446 */ 447 mp = new_nch->mount; 448 if (VFS_ROOT(mp, &newdp)) 449 panic("mount: lost mount"); 450 cache_setunresolved(new_nch); 451 cache_setvp(new_nch, newdp); 452 453 /* 454 * Special handling of the root node 455 */ 456 if (rootvnode == olddp) { 457 vref(newdp); 458 vfs_cache_setroot(newdp, cache_hold(new_nch)); 459 } 460 461 /* 462 * Pass newdp separately so the callback does not have to access 463 * it via new_nch->ncp->nc_vp. 464 */ 465 info.old_nch = *old_nch; 466 info.new_nch = *new_nch; 467 info.new_vp = newdp; 468 allproc_scan(checkdirs_callback, &info); 469 vput(newdp); 470 } 471 472 /* 473 * NOTE: callback is not MP safe because the scanned process's filedesc 474 * structure can be ripped out from under us, amoung other things. 475 */ 476 static int 477 checkdirs_callback(struct proc *p, void *data) 478 { 479 struct checkdirs_info *info = data; 480 struct filedesc *fdp; 481 struct nchandle ncdrop1; 482 struct nchandle ncdrop2; 483 struct vnode *vprele1; 484 struct vnode *vprele2; 485 486 if ((fdp = p->p_fd) != NULL) { 487 cache_zero(&ncdrop1); 488 cache_zero(&ncdrop2); 489 vprele1 = NULL; 490 vprele2 = NULL; 491 492 /* 493 * MPUNSAFE - XXX fdp can be pulled out from under a 494 * foreign process. 495 * 496 * A shared filedesc is ok, we don't have to copy it 497 * because we are making this change globally. 498 */ 499 spin_lock_wr(&fdp->fd_spin); 500 if (fdp->fd_ncdir.mount == info->old_nch.mount && 501 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 502 vprele1 = fdp->fd_cdir; 503 vref(info->new_vp); 504 fdp->fd_cdir = info->new_vp; 505 ncdrop1 = fdp->fd_ncdir; 506 cache_copy(&info->new_nch, &fdp->fd_ncdir); 507 } 508 if (fdp->fd_nrdir.mount == info->old_nch.mount && 509 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 510 vprele2 = fdp->fd_rdir; 511 vref(info->new_vp); 512 fdp->fd_rdir = info->new_vp; 513 ncdrop2 = fdp->fd_nrdir; 514 cache_copy(&info->new_nch, &fdp->fd_nrdir); 515 } 516 spin_unlock_wr(&fdp->fd_spin); 517 if (ncdrop1.ncp) 518 cache_drop(&ncdrop1); 519 if (ncdrop2.ncp) 520 cache_drop(&ncdrop2); 521 if (vprele1) 522 vrele(vprele1); 523 if (vprele2) 524 vrele(vprele2); 525 } 526 return(0); 527 } 528 529 /* 530 * Unmount a file system. 531 * 532 * Note: unmount takes a path to the vnode mounted on as argument, 533 * not special file (as before). 534 */ 535 /* 536 * umount_args(char *path, int flags) 537 */ 538 /* ARGSUSED */ 539 int 540 sys_unmount(struct unmount_args *uap) 541 { 542 struct thread *td = curthread; 543 struct proc *p = td->td_proc; 544 struct mount *mp = NULL; 545 int error; 546 struct nlookupdata nd; 547 548 KKASSERT(p); 549 if (p->p_ucred->cr_prison != NULL) 550 return (EPERM); 551 if (usermount == 0 && (error = suser(td))) 552 return (error); 553 554 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 555 if (error == 0) 556 error = nlookup(&nd); 557 if (error) 558 goto out; 559 560 mp = nd.nl_nch.mount; 561 562 /* 563 * Only root, or the user that did the original mount is 564 * permitted to unmount this filesystem. 565 */ 566 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 567 (error = suser(td))) 568 goto out; 569 570 /* 571 * Don't allow unmounting the root file system. 572 */ 573 if (mp->mnt_flag & MNT_ROOTFS) { 574 error = EINVAL; 575 goto out; 576 } 577 578 /* 579 * Must be the root of the filesystem 580 */ 581 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 582 error = EINVAL; 583 goto out; 584 } 585 586 out: 587 nlookup_done(&nd); 588 if (error) 589 return (error); 590 return (dounmount(mp, uap->flags)); 591 } 592 593 /* 594 * Do the actual file system unmount. 595 */ 596 static int 597 dounmount_interlock(struct mount *mp) 598 { 599 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 600 return (EBUSY); 601 mp->mnt_kern_flag |= MNTK_UNMOUNT; 602 return(0); 603 } 604 605 int 606 dounmount(struct mount *mp, int flags) 607 { 608 struct namecache *ncp; 609 struct nchandle nch; 610 struct vnode *vp; 611 int error; 612 int async_flag; 613 int lflags; 614 int freeok = 1; 615 616 /* 617 * Exclusive access for unmounting purposes 618 */ 619 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 620 return (error); 621 622 /* 623 * Allow filesystems to detect that a forced unmount is in progress. 624 */ 625 if (flags & MNT_FORCE) 626 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 627 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_NOWAIT); 628 error = lockmgr(&mp->mnt_lock, lflags); 629 if (error) { 630 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 631 if (mp->mnt_kern_flag & MNTK_MWAIT) 632 wakeup(mp); 633 return (error); 634 } 635 636 if (mp->mnt_flag & MNT_EXPUBLIC) 637 vfs_setpublicfs(NULL, NULL, NULL); 638 639 vfs_msync(mp, MNT_WAIT); 640 async_flag = mp->mnt_flag & MNT_ASYNC; 641 mp->mnt_flag &=~ MNT_ASYNC; 642 643 /* 644 * If this filesystem isn't aliasing other filesystems, 645 * try to invalidate any remaining namecache entries and 646 * check the count afterwords. 647 */ 648 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 649 cache_lock(&mp->mnt_ncmountpt); 650 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 651 cache_unlock(&mp->mnt_ncmountpt); 652 653 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 654 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 655 656 if ((flags & MNT_FORCE) == 0) { 657 error = EBUSY; 658 mount_warning(mp, "Cannot unmount: " 659 "%d namecache " 660 "references still " 661 "present", 662 ncp->nc_refs - 1); 663 } else { 664 mount_warning(mp, "Forced unmount: " 665 "%d namecache " 666 "references still " 667 "present", 668 ncp->nc_refs - 1); 669 freeok = 0; 670 } 671 } 672 } 673 674 /* 675 * nchandle records ref the mount structure. Expect a count of 1 676 * (our mount->mnt_ncmountpt). 677 */ 678 if (mp->mnt_refs != 1) { 679 if ((flags & MNT_FORCE) == 0) { 680 mount_warning(mp, "Cannot unmount: " 681 "%d process references still " 682 "present", mp->mnt_refs); 683 error = EBUSY; 684 } else { 685 mount_warning(mp, "Forced unmount: " 686 "%d process references still " 687 "present", mp->mnt_refs); 688 freeok = 0; 689 } 690 } 691 692 /* 693 * Decomission our special mnt_syncer vnode. This also stops 694 * the vnlru code. If we are unable to unmount we recommission 695 * the vnode. 696 */ 697 if (error == 0) { 698 if ((vp = mp->mnt_syncer) != NULL) { 699 mp->mnt_syncer = NULL; 700 vrele(vp); 701 } 702 if (((mp->mnt_flag & MNT_RDONLY) || 703 (error = VFS_SYNC(mp, MNT_WAIT)) == 0) || 704 (flags & MNT_FORCE)) { 705 error = VFS_UNMOUNT(mp, flags); 706 } 707 } 708 if (error) { 709 if (mp->mnt_syncer == NULL) 710 vfs_allocate_syncvnode(mp); 711 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 712 mp->mnt_flag |= async_flag; 713 lockmgr(&mp->mnt_lock, LK_RELEASE); 714 if (mp->mnt_kern_flag & MNTK_MWAIT) 715 wakeup(mp); 716 return (error); 717 } 718 /* 719 * Clean up any journals still associated with the mount after 720 * filesystem activity has ceased. 721 */ 722 journal_remove_all_journals(mp, 723 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 724 725 mountlist_remove(mp); 726 727 /* 728 * Remove any installed vnode ops here so the individual VFSs don't 729 * have to. 730 */ 731 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 732 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 733 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 734 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 735 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 736 737 if (mp->mnt_ncmountpt.ncp != NULL) { 738 nch = mp->mnt_ncmountpt; 739 cache_zero(&mp->mnt_ncmountpt); 740 cache_clrmountpt(&nch); 741 cache_drop(&nch); 742 } 743 if (mp->mnt_ncmounton.ncp != NULL) { 744 nch = mp->mnt_ncmounton; 745 cache_zero(&mp->mnt_ncmounton); 746 cache_clrmountpt(&nch); 747 cache_drop(&nch); 748 } 749 750 mp->mnt_vfc->vfc_refcount--; 751 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 752 panic("unmount: dangling vnode"); 753 lockmgr(&mp->mnt_lock, LK_RELEASE); 754 if (mp->mnt_kern_flag & MNTK_MWAIT) 755 wakeup(mp); 756 if (freeok) 757 kfree(mp, M_MOUNT); 758 return (0); 759 } 760 761 static 762 void 763 mount_warning(struct mount *mp, const char *ctl, ...) 764 { 765 char *ptr; 766 char *buf; 767 __va_list va; 768 769 __va_start(va, ctl); 770 if (cache_fullpath(NULL, &mp->mnt_ncmounton, &ptr, &buf) == 0) { 771 kprintf("unmount(%s): ", ptr); 772 kvprintf(ctl, va); 773 kprintf("\n"); 774 kfree(buf, M_TEMP); 775 } else { 776 kprintf("unmount(%p", mp); 777 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 778 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 779 kprintf("): "); 780 kvprintf(ctl, va); 781 kprintf("\n"); 782 } 783 __va_end(va); 784 } 785 786 /* 787 * Shim cache_fullpath() to handle the case where a process is chrooted into 788 * a subdirectory of a mount. In this case if the root mount matches the 789 * process root directory's mount we have to specify the process's root 790 * directory instead of the mount point, because the mount point might 791 * be above the root directory. 792 */ 793 static 794 int 795 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 796 { 797 struct nchandle *nch; 798 799 if (p && p->p_fd->fd_nrdir.mount == mp) 800 nch = &p->p_fd->fd_nrdir; 801 else 802 nch = &mp->mnt_ncmountpt; 803 return(cache_fullpath(p, nch, rb, fb)); 804 } 805 806 /* 807 * Sync each mounted filesystem. 808 */ 809 810 #ifdef DEBUG 811 static int syncprt = 0; 812 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 813 #endif /* DEBUG */ 814 815 static int sync_callback(struct mount *mp, void *data); 816 817 /* ARGSUSED */ 818 int 819 sys_sync(struct sync_args *uap) 820 { 821 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 822 #ifdef DEBUG 823 /* 824 * print out buffer pool stat information on each sync() call. 825 */ 826 if (syncprt) 827 vfs_bufstats(); 828 #endif /* DEBUG */ 829 return (0); 830 } 831 832 static 833 int 834 sync_callback(struct mount *mp, void *data __unused) 835 { 836 int asyncflag; 837 838 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 839 asyncflag = mp->mnt_flag & MNT_ASYNC; 840 mp->mnt_flag &= ~MNT_ASYNC; 841 vfs_msync(mp, MNT_NOWAIT); 842 VFS_SYNC(mp, MNT_NOWAIT); 843 mp->mnt_flag |= asyncflag; 844 } 845 return(0); 846 } 847 848 /* XXX PRISON: could be per prison flag */ 849 static int prison_quotas; 850 #if 0 851 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 852 #endif 853 854 /* 855 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 856 * 857 * Change filesystem quotas. 858 */ 859 /* ARGSUSED */ 860 int 861 sys_quotactl(struct quotactl_args *uap) 862 { 863 struct nlookupdata nd; 864 struct thread *td; 865 struct proc *p; 866 struct mount *mp; 867 int error; 868 869 td = curthread; 870 p = td->td_proc; 871 if (p->p_ucred->cr_prison && !prison_quotas) 872 return (EPERM); 873 874 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 875 if (error == 0) 876 error = nlookup(&nd); 877 if (error == 0) { 878 mp = nd.nl_nch.mount; 879 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 880 uap->arg, nd.nl_cred); 881 } 882 nlookup_done(&nd); 883 return (error); 884 } 885 886 /* 887 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 888 * void *buf, int buflen) 889 * 890 * This function operates on a mount point and executes the specified 891 * operation using the specified control data, and possibly returns data. 892 * 893 * The actual number of bytes stored in the result buffer is returned, 0 894 * if none, otherwise an error is returned. 895 */ 896 /* ARGSUSED */ 897 int 898 sys_mountctl(struct mountctl_args *uap) 899 { 900 struct thread *td = curthread; 901 struct proc *p = td->td_proc; 902 struct file *fp; 903 void *ctl = NULL; 904 void *buf = NULL; 905 char *path = NULL; 906 int error; 907 908 /* 909 * Sanity and permissions checks. We must be root. 910 */ 911 KKASSERT(p); 912 if (p->p_ucred->cr_prison != NULL) 913 return (EPERM); 914 if ((error = suser(td)) != 0) 915 return (error); 916 917 /* 918 * Argument length checks 919 */ 920 if (uap->ctllen < 0 || uap->ctllen > 1024) 921 return (EINVAL); 922 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 923 return (EINVAL); 924 if (uap->path == NULL) 925 return (EINVAL); 926 927 /* 928 * Allocate the necessary buffers and copyin data 929 */ 930 path = objcache_get(namei_oc, M_WAITOK); 931 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 932 if (error) 933 goto done; 934 935 if (uap->ctllen) { 936 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 937 error = copyin(uap->ctl, ctl, uap->ctllen); 938 if (error) 939 goto done; 940 } 941 if (uap->buflen) 942 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 943 944 /* 945 * Validate the descriptor 946 */ 947 if (uap->fd >= 0) { 948 fp = holdfp(p->p_fd, uap->fd, -1); 949 if (fp == NULL) { 950 error = EBADF; 951 goto done; 952 } 953 } else { 954 fp = NULL; 955 } 956 957 /* 958 * Execute the internal kernel function and clean up. 959 */ 960 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 961 if (fp) 962 fdrop(fp); 963 if (error == 0 && uap->sysmsg_result > 0) 964 error = copyout(buf, uap->buf, uap->sysmsg_result); 965 done: 966 if (path) 967 objcache_put(namei_oc, path); 968 if (ctl) 969 kfree(ctl, M_TEMP); 970 if (buf) 971 kfree(buf, M_TEMP); 972 return (error); 973 } 974 975 /* 976 * Execute a mount control operation by resolving the path to a mount point 977 * and calling vop_mountctl(). 978 */ 979 int 980 kern_mountctl(const char *path, int op, struct file *fp, 981 const void *ctl, int ctllen, 982 void *buf, int buflen, int *res) 983 { 984 struct vnode *vp; 985 struct mount *mp; 986 struct nlookupdata nd; 987 int error; 988 989 *res = 0; 990 vp = NULL; 991 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 992 if (error == 0) 993 error = nlookup(&nd); 994 if (error == 0) 995 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 996 nlookup_done(&nd); 997 if (error) 998 return (error); 999 1000 mp = vp->v_mount; 1001 1002 /* 1003 * Must be the root of the filesystem 1004 */ 1005 if ((vp->v_flag & VROOT) == 0) { 1006 vput(vp); 1007 return (EINVAL); 1008 } 1009 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 1010 buf, buflen, res); 1011 vput(vp); 1012 return (error); 1013 } 1014 1015 int 1016 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1017 { 1018 struct thread *td = curthread; 1019 struct proc *p = td->td_proc; 1020 struct mount *mp; 1021 struct statfs *sp; 1022 char *fullpath, *freepath; 1023 int error; 1024 1025 if ((error = nlookup(nd)) != 0) 1026 return (error); 1027 mp = nd->nl_nch.mount; 1028 sp = &mp->mnt_stat; 1029 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1030 return (error); 1031 1032 error = mount_path(p, mp, &fullpath, &freepath); 1033 if (error) 1034 return(error); 1035 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1036 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1037 kfree(freepath, M_TEMP); 1038 1039 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1040 bcopy(sp, buf, sizeof(*buf)); 1041 /* Only root should have access to the fsid's. */ 1042 if (suser(td)) 1043 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1044 return (0); 1045 } 1046 1047 /* 1048 * statfs_args(char *path, struct statfs *buf) 1049 * 1050 * Get filesystem statistics. 1051 */ 1052 int 1053 sys_statfs(struct statfs_args *uap) 1054 { 1055 struct nlookupdata nd; 1056 struct statfs buf; 1057 int error; 1058 1059 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1060 if (error == 0) 1061 error = kern_statfs(&nd, &buf); 1062 nlookup_done(&nd); 1063 if (error == 0) 1064 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1065 return (error); 1066 } 1067 1068 int 1069 kern_fstatfs(int fd, struct statfs *buf) 1070 { 1071 struct thread *td = curthread; 1072 struct proc *p = td->td_proc; 1073 struct file *fp; 1074 struct mount *mp; 1075 struct statfs *sp; 1076 char *fullpath, *freepath; 1077 int error; 1078 1079 KKASSERT(p); 1080 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1081 return (error); 1082 mp = ((struct vnode *)fp->f_data)->v_mount; 1083 if (mp == NULL) { 1084 error = EBADF; 1085 goto done; 1086 } 1087 if (fp->f_cred == NULL) { 1088 error = EINVAL; 1089 goto done; 1090 } 1091 sp = &mp->mnt_stat; 1092 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1093 goto done; 1094 1095 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1096 goto done; 1097 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1098 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1099 kfree(freepath, M_TEMP); 1100 1101 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1102 bcopy(sp, buf, sizeof(*buf)); 1103 1104 /* Only root should have access to the fsid's. */ 1105 if (suser(td)) 1106 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1107 error = 0; 1108 done: 1109 fdrop(fp); 1110 return (error); 1111 } 1112 1113 /* 1114 * fstatfs_args(int fd, struct statfs *buf) 1115 * 1116 * Get filesystem statistics. 1117 */ 1118 int 1119 sys_fstatfs(struct fstatfs_args *uap) 1120 { 1121 struct statfs buf; 1122 int error; 1123 1124 error = kern_fstatfs(uap->fd, &buf); 1125 1126 if (error == 0) 1127 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1128 return (error); 1129 } 1130 1131 int 1132 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1133 { 1134 struct mount *mp; 1135 struct statvfs *sp; 1136 int error; 1137 1138 if ((error = nlookup(nd)) != 0) 1139 return (error); 1140 mp = nd->nl_nch.mount; 1141 sp = &mp->mnt_vstat; 1142 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1143 return (error); 1144 1145 sp->f_flag = 0; 1146 if (mp->mnt_flag & MNT_RDONLY) 1147 sp->f_flag |= ST_RDONLY; 1148 if (mp->mnt_flag & MNT_NOSUID) 1149 sp->f_flag |= ST_NOSUID; 1150 bcopy(sp, buf, sizeof(*buf)); 1151 return (0); 1152 } 1153 1154 /* 1155 * statfs_args(char *path, struct statfs *buf) 1156 * 1157 * Get filesystem statistics. 1158 */ 1159 int 1160 sys_statvfs(struct statvfs_args *uap) 1161 { 1162 struct nlookupdata nd; 1163 struct statvfs buf; 1164 int error; 1165 1166 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1167 if (error == 0) 1168 error = kern_statvfs(&nd, &buf); 1169 nlookup_done(&nd); 1170 if (error == 0) 1171 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1172 return (error); 1173 } 1174 1175 int 1176 kern_fstatvfs(int fd, struct statvfs *buf) 1177 { 1178 struct thread *td = curthread; 1179 struct proc *p = td->td_proc; 1180 struct file *fp; 1181 struct mount *mp; 1182 struct statvfs *sp; 1183 int error; 1184 1185 KKASSERT(p); 1186 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 1187 return (error); 1188 mp = ((struct vnode *)fp->f_data)->v_mount; 1189 if (mp == NULL) { 1190 error = EBADF; 1191 goto done; 1192 } 1193 if (fp->f_cred == NULL) { 1194 error = EINVAL; 1195 goto done; 1196 } 1197 sp = &mp->mnt_vstat; 1198 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1199 goto done; 1200 1201 sp->f_flag = 0; 1202 if (mp->mnt_flag & MNT_RDONLY) 1203 sp->f_flag |= ST_RDONLY; 1204 if (mp->mnt_flag & MNT_NOSUID) 1205 sp->f_flag |= ST_NOSUID; 1206 1207 bcopy(sp, buf, sizeof(*buf)); 1208 error = 0; 1209 done: 1210 fdrop(fp); 1211 return (error); 1212 } 1213 1214 /* 1215 * fstatfs_args(int fd, struct statfs *buf) 1216 * 1217 * Get filesystem statistics. 1218 */ 1219 int 1220 sys_fstatvfs(struct fstatvfs_args *uap) 1221 { 1222 struct statvfs buf; 1223 int error; 1224 1225 error = kern_fstatvfs(uap->fd, &buf); 1226 1227 if (error == 0) 1228 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1229 return (error); 1230 } 1231 1232 /* 1233 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1234 * 1235 * Get statistics on all filesystems. 1236 */ 1237 1238 struct getfsstat_info { 1239 struct statfs *sfsp; 1240 long count; 1241 long maxcount; 1242 int error; 1243 int flags; 1244 struct proc *p; 1245 }; 1246 1247 static int getfsstat_callback(struct mount *, void *); 1248 1249 /* ARGSUSED */ 1250 int 1251 sys_getfsstat(struct getfsstat_args *uap) 1252 { 1253 struct thread *td = curthread; 1254 struct proc *p = td->td_proc; 1255 struct getfsstat_info info; 1256 1257 bzero(&info, sizeof(info)); 1258 1259 info.maxcount = uap->bufsize / sizeof(struct statfs); 1260 info.sfsp = uap->buf; 1261 info.count = 0; 1262 info.flags = uap->flags; 1263 info.p = p; 1264 1265 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1266 if (info.sfsp && info.count > info.maxcount) 1267 uap->sysmsg_result = info.maxcount; 1268 else 1269 uap->sysmsg_result = info.count; 1270 return (info.error); 1271 } 1272 1273 static int 1274 getfsstat_callback(struct mount *mp, void *data) 1275 { 1276 struct getfsstat_info *info = data; 1277 struct statfs *sp; 1278 char *freepath; 1279 char *fullpath; 1280 int error; 1281 1282 if (info->sfsp && info->count < info->maxcount) { 1283 if (info->p && !chroot_visible_mnt(mp, info->p)) 1284 return(0); 1285 sp = &mp->mnt_stat; 1286 1287 /* 1288 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1289 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1290 * overrides MNT_WAIT. 1291 */ 1292 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1293 (info->flags & MNT_WAIT)) && 1294 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1295 return(0); 1296 } 1297 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1298 1299 error = mount_path(info->p, mp, &fullpath, &freepath); 1300 if (error) { 1301 info->error = error; 1302 return(-1); 1303 } 1304 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1305 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1306 kfree(freepath, M_TEMP); 1307 1308 error = copyout(sp, info->sfsp, sizeof(*sp)); 1309 if (error) { 1310 info->error = error; 1311 return (-1); 1312 } 1313 ++info->sfsp; 1314 } 1315 info->count++; 1316 return(0); 1317 } 1318 1319 /* 1320 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1321 long bufsize, int flags) 1322 * 1323 * Get statistics on all filesystems. 1324 */ 1325 1326 struct getvfsstat_info { 1327 struct statfs *sfsp; 1328 struct statvfs *vsfsp; 1329 long count; 1330 long maxcount; 1331 int error; 1332 int flags; 1333 struct proc *p; 1334 }; 1335 1336 static int getvfsstat_callback(struct mount *, void *); 1337 1338 /* ARGSUSED */ 1339 int 1340 sys_getvfsstat(struct getvfsstat_args *uap) 1341 { 1342 struct thread *td = curthread; 1343 struct proc *p = td->td_proc; 1344 struct getvfsstat_info info; 1345 1346 bzero(&info, sizeof(info)); 1347 1348 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1349 info.sfsp = uap->buf; 1350 info.vsfsp = uap->vbuf; 1351 info.count = 0; 1352 info.flags = uap->flags; 1353 info.p = p; 1354 1355 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1356 if (info.vsfsp && info.count > info.maxcount) 1357 uap->sysmsg_result = info.maxcount; 1358 else 1359 uap->sysmsg_result = info.count; 1360 return (info.error); 1361 } 1362 1363 static int 1364 getvfsstat_callback(struct mount *mp, void *data) 1365 { 1366 struct getvfsstat_info *info = data; 1367 struct statfs *sp; 1368 struct statvfs *vsp; 1369 char *freepath; 1370 char *fullpath; 1371 int error; 1372 1373 if (info->vsfsp && info->count < info->maxcount) { 1374 if (info->p && !chroot_visible_mnt(mp, info->p)) 1375 return(0); 1376 sp = &mp->mnt_stat; 1377 vsp = &mp->mnt_vstat; 1378 1379 /* 1380 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1381 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1382 * overrides MNT_WAIT. 1383 */ 1384 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1385 (info->flags & MNT_WAIT)) && 1386 (error = VFS_STATFS(mp, sp, info->p->p_ucred))) { 1387 return(0); 1388 } 1389 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1390 1391 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1392 (info->flags & MNT_WAIT)) && 1393 (error = VFS_STATVFS(mp, vsp, info->p->p_ucred))) { 1394 return(0); 1395 } 1396 vsp->f_flag = 0; 1397 if (mp->mnt_flag & MNT_RDONLY) 1398 vsp->f_flag |= ST_RDONLY; 1399 if (mp->mnt_flag & MNT_NOSUID) 1400 vsp->f_flag |= ST_NOSUID; 1401 1402 error = mount_path(info->p, mp, &fullpath, &freepath); 1403 if (error) { 1404 info->error = error; 1405 return(-1); 1406 } 1407 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1408 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1409 kfree(freepath, M_TEMP); 1410 1411 error = copyout(sp, info->sfsp, sizeof(*sp)); 1412 if (error == 0) 1413 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1414 if (error) { 1415 info->error = error; 1416 return (-1); 1417 } 1418 ++info->sfsp; 1419 ++info->vsfsp; 1420 } 1421 info->count++; 1422 return(0); 1423 } 1424 1425 1426 /* 1427 * fchdir_args(int fd) 1428 * 1429 * Change current working directory to a given file descriptor. 1430 */ 1431 /* ARGSUSED */ 1432 int 1433 sys_fchdir(struct fchdir_args *uap) 1434 { 1435 struct thread *td = curthread; 1436 struct proc *p = td->td_proc; 1437 struct filedesc *fdp = p->p_fd; 1438 struct vnode *vp, *ovp; 1439 struct mount *mp; 1440 struct file *fp; 1441 struct nchandle nch, onch, tnch; 1442 int error; 1443 1444 if ((error = holdvnode(fdp, uap->fd, &fp)) != 0) 1445 return (error); 1446 vp = (struct vnode *)fp->f_data; 1447 vref(vp); 1448 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 1449 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) 1450 error = ENOTDIR; 1451 else 1452 error = VOP_ACCESS(vp, VEXEC, p->p_ucred); 1453 if (error) { 1454 vput(vp); 1455 fdrop(fp); 1456 return (error); 1457 } 1458 cache_copy(&fp->f_nchandle, &nch); 1459 1460 /* 1461 * If the ncp has become a mount point, traverse through 1462 * the mount point. 1463 */ 1464 1465 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1466 (mp = cache_findmount(&nch)) != NULL 1467 ) { 1468 error = nlookup_mp(mp, &tnch); 1469 if (error == 0) { 1470 cache_unlock(&tnch); /* leave ref intact */ 1471 vput(vp); 1472 vp = tnch.ncp->nc_vp; 1473 error = vget(vp, LK_SHARED); 1474 KKASSERT(error == 0); 1475 cache_drop(&nch); 1476 nch = tnch; 1477 } 1478 } 1479 if (error == 0) { 1480 ovp = fdp->fd_cdir; 1481 onch = fdp->fd_ncdir; 1482 vn_unlock(vp); /* leave ref intact */ 1483 fdp->fd_cdir = vp; 1484 fdp->fd_ncdir = nch; 1485 cache_drop(&onch); 1486 vrele(ovp); 1487 } else { 1488 cache_drop(&nch); 1489 vput(vp); 1490 } 1491 fdrop(fp); 1492 return (error); 1493 } 1494 1495 int 1496 kern_chdir(struct nlookupdata *nd) 1497 { 1498 struct thread *td = curthread; 1499 struct proc *p = td->td_proc; 1500 struct filedesc *fdp = p->p_fd; 1501 struct vnode *vp, *ovp; 1502 struct nchandle onch; 1503 int error; 1504 1505 if ((error = nlookup(nd)) != 0) 1506 return (error); 1507 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1508 return (ENOENT); 1509 if ((error = vget(vp, LK_SHARED)) != 0) 1510 return (error); 1511 1512 error = checkvp_chdir(vp, td); 1513 vn_unlock(vp); 1514 if (error == 0) { 1515 ovp = fdp->fd_cdir; 1516 onch = fdp->fd_ncdir; 1517 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1518 fdp->fd_ncdir = nd->nl_nch; 1519 fdp->fd_cdir = vp; 1520 cache_drop(&onch); 1521 vrele(ovp); 1522 cache_zero(&nd->nl_nch); 1523 } else { 1524 vrele(vp); 1525 } 1526 return (error); 1527 } 1528 1529 /* 1530 * chdir_args(char *path) 1531 * 1532 * Change current working directory (``.''). 1533 */ 1534 int 1535 sys_chdir(struct chdir_args *uap) 1536 { 1537 struct nlookupdata nd; 1538 int error; 1539 1540 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1541 if (error == 0) 1542 error = kern_chdir(&nd); 1543 nlookup_done(&nd); 1544 return (error); 1545 } 1546 1547 /* 1548 * Helper function for raised chroot(2) security function: Refuse if 1549 * any filedescriptors are open directories. 1550 */ 1551 static int 1552 chroot_refuse_vdir_fds(struct filedesc *fdp) 1553 { 1554 struct vnode *vp; 1555 struct file *fp; 1556 int error; 1557 int fd; 1558 1559 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1560 if ((error = holdvnode(fdp, fd, &fp)) != 0) 1561 continue; 1562 vp = (struct vnode *)fp->f_data; 1563 if (vp->v_type != VDIR) { 1564 fdrop(fp); 1565 continue; 1566 } 1567 fdrop(fp); 1568 return(EPERM); 1569 } 1570 return (0); 1571 } 1572 1573 /* 1574 * This sysctl determines if we will allow a process to chroot(2) if it 1575 * has a directory open: 1576 * 0: disallowed for all processes. 1577 * 1: allowed for processes that were not already chroot(2)'ed. 1578 * 2: allowed for all processes. 1579 */ 1580 1581 static int chroot_allow_open_directories = 1; 1582 1583 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1584 &chroot_allow_open_directories, 0, ""); 1585 1586 /* 1587 * chroot to the specified namecache entry. We obtain the vp from the 1588 * namecache data. The passed ncp must be locked and referenced and will 1589 * remain locked and referenced on return. 1590 */ 1591 int 1592 kern_chroot(struct nchandle *nch) 1593 { 1594 struct thread *td = curthread; 1595 struct proc *p = td->td_proc; 1596 struct filedesc *fdp = p->p_fd; 1597 struct vnode *vp; 1598 int error; 1599 1600 /* 1601 * Only root can chroot 1602 */ 1603 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1604 return (error); 1605 1606 /* 1607 * Disallow open directory descriptors (fchdir() breakouts). 1608 */ 1609 if (chroot_allow_open_directories == 0 || 1610 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1611 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1612 return (error); 1613 } 1614 if ((vp = nch->ncp->nc_vp) == NULL) 1615 return (ENOENT); 1616 1617 if ((error = vget(vp, LK_SHARED)) != 0) 1618 return (error); 1619 1620 /* 1621 * Check the validity of vp as a directory to change to and 1622 * associate it with rdir/jdir. 1623 */ 1624 error = checkvp_chdir(vp, td); 1625 vn_unlock(vp); /* leave reference intact */ 1626 if (error == 0) { 1627 vrele(fdp->fd_rdir); 1628 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1629 cache_drop(&fdp->fd_nrdir); 1630 cache_copy(nch, &fdp->fd_nrdir); 1631 if (fdp->fd_jdir == NULL) { 1632 fdp->fd_jdir = vp; 1633 vref(fdp->fd_jdir); 1634 cache_copy(nch, &fdp->fd_njdir); 1635 } 1636 } else { 1637 vrele(vp); 1638 } 1639 return (error); 1640 } 1641 1642 /* 1643 * chroot_args(char *path) 1644 * 1645 * Change notion of root (``/'') directory. 1646 */ 1647 /* ARGSUSED */ 1648 int 1649 sys_chroot(struct chroot_args *uap) 1650 { 1651 struct thread *td = curthread; 1652 struct nlookupdata nd; 1653 int error; 1654 1655 KKASSERT(td->td_proc); 1656 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1657 if (error) { 1658 nlookup_done(&nd); 1659 return(error); 1660 } 1661 error = nlookup(&nd); 1662 if (error == 0) 1663 error = kern_chroot(&nd.nl_nch); 1664 nlookup_done(&nd); 1665 return(error); 1666 } 1667 1668 /* 1669 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1670 * determine whether it is legal to chdir to the vnode. The vnode's state 1671 * is not changed by this call. 1672 */ 1673 int 1674 checkvp_chdir(struct vnode *vp, struct thread *td) 1675 { 1676 int error; 1677 1678 if (vp->v_type != VDIR) 1679 error = ENOTDIR; 1680 else 1681 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred); 1682 return (error); 1683 } 1684 1685 int 1686 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1687 { 1688 struct thread *td = curthread; 1689 struct proc *p = td->td_proc; 1690 struct lwp *lp = td->td_lwp; 1691 struct filedesc *fdp = p->p_fd; 1692 int cmode, flags; 1693 struct file *nfp; 1694 struct file *fp; 1695 struct vnode *vp; 1696 int type, indx, error; 1697 struct flock lf; 1698 1699 if ((oflags & O_ACCMODE) == O_ACCMODE) 1700 return (EINVAL); 1701 flags = FFLAGS(oflags); 1702 error = falloc(p, &nfp, NULL); 1703 if (error) 1704 return (error); 1705 fp = nfp; 1706 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1707 1708 /* 1709 * XXX p_dupfd is a real mess. It allows a device to return a 1710 * file descriptor to be duplicated rather then doing the open 1711 * itself. 1712 */ 1713 lp->lwp_dupfd = -1; 1714 1715 /* 1716 * Call vn_open() to do the lookup and assign the vnode to the 1717 * file pointer. vn_open() does not change the ref count on fp 1718 * and the vnode, on success, will be inherited by the file pointer 1719 * and unlocked. 1720 */ 1721 nd->nl_flags |= NLC_LOCKVP; 1722 error = vn_open(nd, fp, flags, cmode); 1723 nlookup_done(nd); 1724 if (error) { 1725 /* 1726 * handle special fdopen() case. bleh. dupfdopen() is 1727 * responsible for dropping the old contents of ofiles[indx] 1728 * if it succeeds. 1729 * 1730 * Note that fsetfd() will add a ref to fp which represents 1731 * the fd_files[] assignment. We must still drop our 1732 * reference. 1733 */ 1734 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 1735 if (fdalloc(p, 0, &indx) == 0) { 1736 error = dupfdopen(p, indx, lp->lwp_dupfd, flags, error); 1737 if (error == 0) { 1738 *res = indx; 1739 fdrop(fp); /* our ref */ 1740 return (0); 1741 } 1742 fsetfd(p, NULL, indx); 1743 } 1744 } 1745 fdrop(fp); /* our ref */ 1746 if (error == ERESTART) 1747 error = EINTR; 1748 return (error); 1749 } 1750 1751 /* 1752 * ref the vnode for ourselves so it can't be ripped out from under 1753 * is. XXX need an ND flag to request that the vnode be returned 1754 * anyway. 1755 * 1756 * Reserve a file descriptor but do not assign it until the open 1757 * succeeds. 1758 */ 1759 vp = (struct vnode *)fp->f_data; 1760 vref(vp); 1761 if ((error = fdalloc(p, 0, &indx)) != 0) { 1762 fdrop(fp); 1763 vrele(vp); 1764 return (error); 1765 } 1766 1767 /* 1768 * If no error occurs the vp will have been assigned to the file 1769 * pointer. 1770 */ 1771 lp->lwp_dupfd = 0; 1772 1773 if (flags & (O_EXLOCK | O_SHLOCK)) { 1774 lf.l_whence = SEEK_SET; 1775 lf.l_start = 0; 1776 lf.l_len = 0; 1777 if (flags & O_EXLOCK) 1778 lf.l_type = F_WRLCK; 1779 else 1780 lf.l_type = F_RDLCK; 1781 if (flags & FNONBLOCK) 1782 type = 0; 1783 else 1784 type = F_WAIT; 1785 1786 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1787 /* 1788 * lock request failed. Clean up the reserved 1789 * descriptor. 1790 */ 1791 vrele(vp); 1792 fsetfd(p, NULL, indx); 1793 fdrop(fp); 1794 return (error); 1795 } 1796 fp->f_flag |= FHASLOCK; 1797 } 1798 #if 0 1799 /* 1800 * Assert that all regular file vnodes were created with a object. 1801 */ 1802 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 1803 ("open: regular file has no backing object after vn_open")); 1804 #endif 1805 1806 vrele(vp); 1807 1808 /* 1809 * release our private reference, leaving the one associated with the 1810 * descriptor table intact. 1811 */ 1812 fsetfd(p, fp, indx); 1813 fdrop(fp); 1814 *res = indx; 1815 return (0); 1816 } 1817 1818 /* 1819 * open_args(char *path, int flags, int mode) 1820 * 1821 * Check permissions, allocate an open file structure, 1822 * and call the device open routine if any. 1823 */ 1824 int 1825 sys_open(struct open_args *uap) 1826 { 1827 struct nlookupdata nd; 1828 int error; 1829 1830 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1831 if (error == 0) { 1832 error = kern_open(&nd, uap->flags, 1833 uap->mode, &uap->sysmsg_result); 1834 } 1835 nlookup_done(&nd); 1836 return (error); 1837 } 1838 1839 int 1840 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 1841 { 1842 struct thread *td = curthread; 1843 struct proc *p = td->td_proc; 1844 struct vnode *vp; 1845 struct vattr vattr; 1846 int error; 1847 int whiteout = 0; 1848 1849 KKASSERT(p); 1850 1851 switch (mode & S_IFMT) { 1852 case S_IFCHR: 1853 case S_IFBLK: 1854 error = suser(td); 1855 break; 1856 default: 1857 error = suser_cred(p->p_ucred, PRISON_ROOT); 1858 break; 1859 } 1860 if (error) 1861 return (error); 1862 1863 bwillinode(1); 1864 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1865 if ((error = nlookup(nd)) != 0) 1866 return (error); 1867 if (nd->nl_nch.ncp->nc_vp) 1868 return (EEXIST); 1869 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1870 return (error); 1871 1872 VATTR_NULL(&vattr); 1873 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1874 vattr.va_rmajor = rmajor; 1875 vattr.va_rminor = rminor; 1876 whiteout = 0; 1877 1878 switch (mode & S_IFMT) { 1879 case S_IFMT: /* used by badsect to flag bad sectors */ 1880 vattr.va_type = VBAD; 1881 break; 1882 case S_IFCHR: 1883 vattr.va_type = VCHR; 1884 break; 1885 case S_IFBLK: 1886 vattr.va_type = VBLK; 1887 break; 1888 case S_IFWHT: 1889 whiteout = 1; 1890 break; 1891 case S_IFDIR: 1892 /* special directories support for HAMMER */ 1893 vattr.va_type = VDIR; 1894 break; 1895 default: 1896 error = EINVAL; 1897 break; 1898 } 1899 if (error == 0) { 1900 if (whiteout) { 1901 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 1902 nd->nl_cred, NAMEI_CREATE); 1903 } else { 1904 vp = NULL; 1905 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 1906 &vp, nd->nl_cred, &vattr); 1907 if (error == 0) 1908 vput(vp); 1909 } 1910 } 1911 return (error); 1912 } 1913 1914 /* 1915 * mknod_args(char *path, int mode, int dev) 1916 * 1917 * Create a special file. 1918 */ 1919 int 1920 sys_mknod(struct mknod_args *uap) 1921 { 1922 struct nlookupdata nd; 1923 int error; 1924 1925 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1926 if (error == 0) { 1927 error = kern_mknod(&nd, uap->mode, 1928 umajor(uap->dev), uminor(uap->dev)); 1929 } 1930 nlookup_done(&nd); 1931 return (error); 1932 } 1933 1934 int 1935 kern_mkfifo(struct nlookupdata *nd, int mode) 1936 { 1937 struct thread *td = curthread; 1938 struct proc *p = td->td_proc; 1939 struct vattr vattr; 1940 struct vnode *vp; 1941 int error; 1942 1943 bwillinode(1); 1944 1945 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 1946 if ((error = nlookup(nd)) != 0) 1947 return (error); 1948 if (nd->nl_nch.ncp->nc_vp) 1949 return (EEXIST); 1950 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 1951 return (error); 1952 1953 VATTR_NULL(&vattr); 1954 vattr.va_type = VFIFO; 1955 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1956 vp = NULL; 1957 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 1958 if (error == 0) 1959 vput(vp); 1960 return (error); 1961 } 1962 1963 /* 1964 * mkfifo_args(char *path, int mode) 1965 * 1966 * Create a named pipe. 1967 */ 1968 int 1969 sys_mkfifo(struct mkfifo_args *uap) 1970 { 1971 struct nlookupdata nd; 1972 int error; 1973 1974 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1975 if (error == 0) 1976 error = kern_mkfifo(&nd, uap->mode); 1977 nlookup_done(&nd); 1978 return (error); 1979 } 1980 1981 static int hardlink_check_uid = 0; 1982 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1983 &hardlink_check_uid, 0, 1984 "Unprivileged processes cannot create hard links to files owned by other " 1985 "users"); 1986 static int hardlink_check_gid = 0; 1987 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1988 &hardlink_check_gid, 0, 1989 "Unprivileged processes cannot create hard links to files owned by other " 1990 "groups"); 1991 1992 static int 1993 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 1994 { 1995 struct vattr va; 1996 int error; 1997 1998 /* 1999 * Shortcut if disabled 2000 */ 2001 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2002 return (0); 2003 2004 /* 2005 * root cred can always hardlink 2006 */ 2007 if (suser_cred(cred, PRISON_ROOT) == 0) 2008 return (0); 2009 2010 /* 2011 * Otherwise only if the originating file is owned by the 2012 * same user or group. Note that any group is allowed if 2013 * the file is owned by the caller. 2014 */ 2015 error = VOP_GETATTR(vp, &va); 2016 if (error != 0) 2017 return (error); 2018 2019 if (hardlink_check_uid) { 2020 if (cred->cr_uid != va.va_uid) 2021 return (EPERM); 2022 } 2023 2024 if (hardlink_check_gid) { 2025 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2026 return (EPERM); 2027 } 2028 2029 return (0); 2030 } 2031 2032 int 2033 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2034 { 2035 struct thread *td = curthread; 2036 struct vnode *vp; 2037 int error; 2038 2039 /* 2040 * Lookup the source and obtained a locked vnode. 2041 * 2042 * XXX relookup on vget failure / race ? 2043 */ 2044 bwillinode(1); 2045 if ((error = nlookup(nd)) != 0) 2046 return (error); 2047 vp = nd->nl_nch.ncp->nc_vp; 2048 KKASSERT(vp != NULL); 2049 if (vp->v_type == VDIR) 2050 return (EPERM); /* POSIX */ 2051 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2052 return (error); 2053 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2054 return (error); 2055 2056 /* 2057 * Unlock the source so we can lookup the target without deadlocking 2058 * (XXX vp is locked already, possible other deadlock?). The target 2059 * must not exist. 2060 */ 2061 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2062 nd->nl_flags &= ~NLC_NCPISLOCKED; 2063 cache_unlock(&nd->nl_nch); 2064 2065 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2066 if ((error = nlookup(linknd)) != 0) { 2067 vput(vp); 2068 return (error); 2069 } 2070 if (linknd->nl_nch.ncp->nc_vp) { 2071 vput(vp); 2072 return (EEXIST); 2073 } 2074 2075 /* 2076 * Finally run the new API VOP. 2077 */ 2078 error = can_hardlink(vp, td, td->td_proc->p_ucred); 2079 if (error == 0) { 2080 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2081 vp, linknd->nl_cred); 2082 } 2083 vput(vp); 2084 return (error); 2085 } 2086 2087 /* 2088 * link_args(char *path, char *link) 2089 * 2090 * Make a hard file link. 2091 */ 2092 int 2093 sys_link(struct link_args *uap) 2094 { 2095 struct nlookupdata nd, linknd; 2096 int error; 2097 2098 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2099 if (error == 0) { 2100 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2101 if (error == 0) 2102 error = kern_link(&nd, &linknd); 2103 nlookup_done(&linknd); 2104 } 2105 nlookup_done(&nd); 2106 return (error); 2107 } 2108 2109 int 2110 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2111 { 2112 struct vattr vattr; 2113 struct vnode *vp; 2114 struct vnode *dvp; 2115 int error; 2116 2117 bwillinode(1); 2118 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2119 if ((error = nlookup(nd)) != 0) 2120 return (error); 2121 if (nd->nl_nch.ncp->nc_vp) 2122 return (EEXIST); 2123 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2124 return (error); 2125 dvp = nd->nl_dvp; 2126 VATTR_NULL(&vattr); 2127 vattr.va_mode = mode; 2128 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2129 if (error == 0) 2130 vput(vp); 2131 return (error); 2132 } 2133 2134 /* 2135 * symlink(char *path, char *link) 2136 * 2137 * Make a symbolic link. 2138 */ 2139 int 2140 sys_symlink(struct symlink_args *uap) 2141 { 2142 struct thread *td = curthread; 2143 struct nlookupdata nd; 2144 char *path; 2145 int error; 2146 int mode; 2147 2148 path = objcache_get(namei_oc, M_WAITOK); 2149 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2150 if (error == 0) { 2151 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2152 if (error == 0) { 2153 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2154 error = kern_symlink(&nd, path, mode); 2155 } 2156 nlookup_done(&nd); 2157 } 2158 objcache_put(namei_oc, path); 2159 return (error); 2160 } 2161 2162 /* 2163 * undelete_args(char *path) 2164 * 2165 * Delete a whiteout from the filesystem. 2166 */ 2167 /* ARGSUSED */ 2168 int 2169 sys_undelete(struct undelete_args *uap) 2170 { 2171 struct nlookupdata nd; 2172 int error; 2173 2174 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2175 bwillinode(1); 2176 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2177 if (error == 0) 2178 error = nlookup(&nd); 2179 if (error == 0) 2180 error = ncp_writechk(&nd.nl_nch); 2181 if (error == 0) { 2182 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2183 NAMEI_DELETE); 2184 } 2185 nlookup_done(&nd); 2186 return (error); 2187 } 2188 2189 int 2190 kern_unlink(struct nlookupdata *nd) 2191 { 2192 int error; 2193 2194 bwillinode(1); 2195 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2196 if ((error = nlookup(nd)) != 0) 2197 return (error); 2198 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2199 return (error); 2200 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2201 return (error); 2202 } 2203 2204 /* 2205 * unlink_args(char *path) 2206 * 2207 * Delete a name from the filesystem. 2208 */ 2209 int 2210 sys_unlink(struct unlink_args *uap) 2211 { 2212 struct nlookupdata nd; 2213 int error; 2214 2215 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2216 if (error == 0) 2217 error = kern_unlink(&nd); 2218 nlookup_done(&nd); 2219 return (error); 2220 } 2221 2222 int 2223 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2224 { 2225 struct thread *td = curthread; 2226 struct proc *p = td->td_proc; 2227 struct file *fp; 2228 struct vnode *vp; 2229 struct vattr vattr; 2230 off_t new_offset; 2231 int error; 2232 2233 fp = holdfp(p->p_fd, fd, -1); 2234 if (fp == NULL) 2235 return (EBADF); 2236 if (fp->f_type != DTYPE_VNODE) { 2237 error = ESPIPE; 2238 goto done; 2239 } 2240 vp = (struct vnode *)fp->f_data; 2241 2242 switch (whence) { 2243 case L_INCR: 2244 new_offset = fp->f_offset + offset; 2245 error = 0; 2246 break; 2247 case L_XTND: 2248 error = VOP_GETATTR(vp, &vattr); 2249 new_offset = offset + vattr.va_size; 2250 break; 2251 case L_SET: 2252 new_offset = offset; 2253 error = 0; 2254 break; 2255 default: 2256 new_offset = 0; 2257 error = EINVAL; 2258 break; 2259 } 2260 2261 /* 2262 * Validate the seek position. Negative offsets are not allowed 2263 * for regular files, block specials, or directories. 2264 */ 2265 if (error == 0) { 2266 if (new_offset < 0 && 2267 (vp->v_type == VREG || vp->v_type == VDIR || 2268 vp->v_type == VCHR || vp->v_type == VBLK)) { 2269 error = EINVAL; 2270 } else { 2271 fp->f_offset = new_offset; 2272 } 2273 } 2274 *res = fp->f_offset; 2275 done: 2276 fdrop(fp); 2277 return (error); 2278 } 2279 2280 /* 2281 * lseek_args(int fd, int pad, off_t offset, int whence) 2282 * 2283 * Reposition read/write file offset. 2284 */ 2285 int 2286 sys_lseek(struct lseek_args *uap) 2287 { 2288 int error; 2289 2290 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2291 &uap->sysmsg_offset); 2292 2293 return (error); 2294 } 2295 2296 int 2297 kern_access(struct nlookupdata *nd, int aflags) 2298 { 2299 struct vnode *vp; 2300 int error, flags; 2301 2302 if ((error = nlookup(nd)) != 0) 2303 return (error); 2304 retry: 2305 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2306 if (error) 2307 return (error); 2308 2309 /* Flags == 0 means only check for existence. */ 2310 if (aflags) { 2311 flags = 0; 2312 if (aflags & R_OK) 2313 flags |= VREAD; 2314 if (aflags & W_OK) 2315 flags |= VWRITE; 2316 if (aflags & X_OK) 2317 flags |= VEXEC; 2318 if ((flags & VWRITE) == 0 || 2319 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2320 error = VOP_ACCESS(vp, flags, nd->nl_cred); 2321 2322 /* 2323 * If the file handle is stale we have to re-resolve the 2324 * entry. This is a hack at the moment. 2325 */ 2326 if (error == ESTALE) { 2327 vput(vp); 2328 cache_setunresolved(&nd->nl_nch); 2329 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2330 if (error == 0) { 2331 vp = NULL; 2332 goto retry; 2333 } 2334 return(error); 2335 } 2336 } 2337 vput(vp); 2338 return (error); 2339 } 2340 2341 /* 2342 * access_args(char *path, int flags) 2343 * 2344 * Check access permissions. 2345 */ 2346 int 2347 sys_access(struct access_args *uap) 2348 { 2349 struct nlookupdata nd; 2350 int error; 2351 2352 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2353 if (error == 0) 2354 error = kern_access(&nd, uap->flags); 2355 nlookup_done(&nd); 2356 return (error); 2357 } 2358 2359 int 2360 kern_stat(struct nlookupdata *nd, struct stat *st) 2361 { 2362 int error; 2363 struct vnode *vp; 2364 thread_t td; 2365 2366 if ((error = nlookup(nd)) != 0) 2367 return (error); 2368 again: 2369 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2370 return (ENOENT); 2371 2372 td = curthread; 2373 if ((error = vget(vp, LK_SHARED)) != 0) 2374 return (error); 2375 error = vn_stat(vp, st, nd->nl_cred); 2376 2377 /* 2378 * If the file handle is stale we have to re-resolve the entry. This 2379 * is a hack at the moment. 2380 */ 2381 if (error == ESTALE) { 2382 vput(vp); 2383 cache_setunresolved(&nd->nl_nch); 2384 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2385 if (error == 0) 2386 goto again; 2387 } else { 2388 vput(vp); 2389 } 2390 return (error); 2391 } 2392 2393 /* 2394 * stat_args(char *path, struct stat *ub) 2395 * 2396 * Get file status; this version follows links. 2397 */ 2398 int 2399 sys_stat(struct stat_args *uap) 2400 { 2401 struct nlookupdata nd; 2402 struct stat st; 2403 int error; 2404 2405 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2406 if (error == 0) { 2407 error = kern_stat(&nd, &st); 2408 if (error == 0) 2409 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2410 } 2411 nlookup_done(&nd); 2412 return (error); 2413 } 2414 2415 /* 2416 * lstat_args(char *path, struct stat *ub) 2417 * 2418 * Get file status; this version does not follow links. 2419 */ 2420 int 2421 sys_lstat(struct lstat_args *uap) 2422 { 2423 struct nlookupdata nd; 2424 struct stat st; 2425 int error; 2426 2427 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2428 if (error == 0) { 2429 error = kern_stat(&nd, &st); 2430 if (error == 0) 2431 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2432 } 2433 nlookup_done(&nd); 2434 return (error); 2435 } 2436 2437 /* 2438 * pathconf_Args(char *path, int name) 2439 * 2440 * Get configurable pathname variables. 2441 */ 2442 /* ARGSUSED */ 2443 int 2444 sys_pathconf(struct pathconf_args *uap) 2445 { 2446 struct nlookupdata nd; 2447 struct vnode *vp; 2448 int error; 2449 2450 vp = NULL; 2451 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2452 if (error == 0) 2453 error = nlookup(&nd); 2454 if (error == 0) 2455 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2456 nlookup_done(&nd); 2457 if (error == 0) { 2458 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 2459 vput(vp); 2460 } 2461 return (error); 2462 } 2463 2464 /* 2465 * XXX: daver 2466 * kern_readlink isn't properly split yet. There is a copyin burried 2467 * in VOP_READLINK(). 2468 */ 2469 int 2470 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 2471 { 2472 struct thread *td = curthread; 2473 struct proc *p = td->td_proc; 2474 struct vnode *vp; 2475 struct iovec aiov; 2476 struct uio auio; 2477 int error; 2478 2479 if ((error = nlookup(nd)) != 0) 2480 return (error); 2481 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_EXCLUSIVE, &vp); 2482 if (error) 2483 return (error); 2484 if (vp->v_type != VLNK) { 2485 error = EINVAL; 2486 } else { 2487 aiov.iov_base = buf; 2488 aiov.iov_len = count; 2489 auio.uio_iov = &aiov; 2490 auio.uio_iovcnt = 1; 2491 auio.uio_offset = 0; 2492 auio.uio_rw = UIO_READ; 2493 auio.uio_segflg = UIO_USERSPACE; 2494 auio.uio_td = td; 2495 auio.uio_resid = count; 2496 error = VOP_READLINK(vp, &auio, p->p_ucred); 2497 } 2498 vput(vp); 2499 *res = count - auio.uio_resid; 2500 return (error); 2501 } 2502 2503 /* 2504 * readlink_args(char *path, char *buf, int count) 2505 * 2506 * Return target name of a symbolic link. 2507 */ 2508 int 2509 sys_readlink(struct readlink_args *uap) 2510 { 2511 struct nlookupdata nd; 2512 int error; 2513 2514 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2515 if (error == 0) { 2516 error = kern_readlink(&nd, uap->buf, uap->count, 2517 &uap->sysmsg_result); 2518 } 2519 nlookup_done(&nd); 2520 return (error); 2521 } 2522 2523 static int 2524 setfflags(struct vnode *vp, int flags) 2525 { 2526 struct thread *td = curthread; 2527 struct proc *p = td->td_proc; 2528 int error; 2529 struct vattr vattr; 2530 2531 /* 2532 * Prevent non-root users from setting flags on devices. When 2533 * a device is reused, users can retain ownership of the device 2534 * if they are allowed to set flags and programs assume that 2535 * chown can't fail when done as root. 2536 */ 2537 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2538 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2539 return (error); 2540 2541 /* 2542 * note: vget is required for any operation that might mod the vnode 2543 * so VINACTIVE is properly cleared. 2544 */ 2545 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2546 VATTR_NULL(&vattr); 2547 vattr.va_flags = flags; 2548 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2549 vput(vp); 2550 } 2551 return (error); 2552 } 2553 2554 /* 2555 * chflags(char *path, int flags) 2556 * 2557 * Change flags of a file given a path name. 2558 */ 2559 /* ARGSUSED */ 2560 int 2561 sys_chflags(struct chflags_args *uap) 2562 { 2563 struct nlookupdata nd; 2564 struct vnode *vp; 2565 int error; 2566 2567 vp = NULL; 2568 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2569 /* XXX Add NLC flag indicating modifying operation? */ 2570 if (error == 0) 2571 error = nlookup(&nd); 2572 if (error == 0) 2573 error = ncp_writechk(&nd.nl_nch); 2574 if (error == 0) 2575 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 2576 nlookup_done(&nd); 2577 if (error == 0) { 2578 error = setfflags(vp, uap->flags); 2579 vrele(vp); 2580 } 2581 return (error); 2582 } 2583 2584 /* 2585 * fchflags_args(int fd, int flags) 2586 * 2587 * Change flags of a file given a file descriptor. 2588 */ 2589 /* ARGSUSED */ 2590 int 2591 sys_fchflags(struct fchflags_args *uap) 2592 { 2593 struct thread *td = curthread; 2594 struct proc *p = td->td_proc; 2595 struct file *fp; 2596 int error; 2597 2598 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2599 return (error); 2600 if (fp->f_nchandle.ncp) 2601 error = ncp_writechk(&fp->f_nchandle); 2602 if (error == 0) 2603 error = setfflags((struct vnode *) fp->f_data, uap->flags); 2604 fdrop(fp); 2605 return (error); 2606 } 2607 2608 static int 2609 setfmode(struct vnode *vp, int mode) 2610 { 2611 struct thread *td = curthread; 2612 struct proc *p = td->td_proc; 2613 int error; 2614 struct vattr vattr; 2615 2616 /* 2617 * note: vget is required for any operation that might mod the vnode 2618 * so VINACTIVE is properly cleared. 2619 */ 2620 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2621 VATTR_NULL(&vattr); 2622 vattr.va_mode = mode & ALLPERMS; 2623 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2624 vput(vp); 2625 } 2626 return error; 2627 } 2628 2629 int 2630 kern_chmod(struct nlookupdata *nd, int mode) 2631 { 2632 struct vnode *vp; 2633 int error; 2634 2635 /* XXX Add NLC flag indicating modifying operation? */ 2636 if ((error = nlookup(nd)) != 0) 2637 return (error); 2638 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2639 return (error); 2640 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2641 error = setfmode(vp, mode); 2642 vrele(vp); 2643 return (error); 2644 } 2645 2646 /* 2647 * chmod_args(char *path, int mode) 2648 * 2649 * Change mode of a file given path name. 2650 */ 2651 /* ARGSUSED */ 2652 int 2653 sys_chmod(struct chmod_args *uap) 2654 { 2655 struct nlookupdata nd; 2656 int error; 2657 2658 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2659 if (error == 0) 2660 error = kern_chmod(&nd, uap->mode); 2661 nlookup_done(&nd); 2662 return (error); 2663 } 2664 2665 /* 2666 * lchmod_args(char *path, int mode) 2667 * 2668 * Change mode of a file given path name (don't follow links.) 2669 */ 2670 /* ARGSUSED */ 2671 int 2672 sys_lchmod(struct lchmod_args *uap) 2673 { 2674 struct nlookupdata nd; 2675 int error; 2676 2677 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2678 if (error == 0) 2679 error = kern_chmod(&nd, uap->mode); 2680 nlookup_done(&nd); 2681 return (error); 2682 } 2683 2684 /* 2685 * fchmod_args(int fd, int mode) 2686 * 2687 * Change mode of a file given a file descriptor. 2688 */ 2689 /* ARGSUSED */ 2690 int 2691 sys_fchmod(struct fchmod_args *uap) 2692 { 2693 struct thread *td = curthread; 2694 struct proc *p = td->td_proc; 2695 struct file *fp; 2696 int error; 2697 2698 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2699 return (error); 2700 if (fp->f_nchandle.ncp) 2701 error = ncp_writechk(&fp->f_nchandle); 2702 if (error == 0) 2703 error = setfmode((struct vnode *)fp->f_data, uap->mode); 2704 fdrop(fp); 2705 return (error); 2706 } 2707 2708 static int 2709 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2710 { 2711 struct thread *td = curthread; 2712 struct proc *p = td->td_proc; 2713 int error; 2714 struct vattr vattr; 2715 2716 /* 2717 * note: vget is required for any operation that might mod the vnode 2718 * so VINACTIVE is properly cleared. 2719 */ 2720 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2721 VATTR_NULL(&vattr); 2722 vattr.va_uid = uid; 2723 vattr.va_gid = gid; 2724 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2725 vput(vp); 2726 } 2727 return error; 2728 } 2729 2730 int 2731 kern_chown(struct nlookupdata *nd, int uid, int gid) 2732 { 2733 struct vnode *vp; 2734 int error; 2735 2736 /* XXX Add NLC flag indicating modifying operation? */ 2737 if ((error = nlookup(nd)) != 0) 2738 return (error); 2739 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2740 return (error); 2741 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 2742 error = setfown(vp, uid, gid); 2743 vrele(vp); 2744 return (error); 2745 } 2746 2747 /* 2748 * chown(char *path, int uid, int gid) 2749 * 2750 * Set ownership given a path name. 2751 */ 2752 int 2753 sys_chown(struct chown_args *uap) 2754 { 2755 struct nlookupdata nd; 2756 int error; 2757 2758 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2759 if (error == 0) 2760 error = kern_chown(&nd, uap->uid, uap->gid); 2761 nlookup_done(&nd); 2762 return (error); 2763 } 2764 2765 /* 2766 * lchown_args(char *path, int uid, int gid) 2767 * 2768 * Set ownership given a path name, do not cross symlinks. 2769 */ 2770 int 2771 sys_lchown(struct lchown_args *uap) 2772 { 2773 struct nlookupdata nd; 2774 int error; 2775 2776 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2777 if (error == 0) 2778 error = kern_chown(&nd, uap->uid, uap->gid); 2779 nlookup_done(&nd); 2780 return (error); 2781 } 2782 2783 /* 2784 * fchown_args(int fd, int uid, int gid) 2785 * 2786 * Set ownership given a file descriptor. 2787 */ 2788 /* ARGSUSED */ 2789 int 2790 sys_fchown(struct fchown_args *uap) 2791 { 2792 struct thread *td = curthread; 2793 struct proc *p = td->td_proc; 2794 struct file *fp; 2795 int error; 2796 2797 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 2798 return (error); 2799 if (fp->f_nchandle.ncp) 2800 error = ncp_writechk(&fp->f_nchandle); 2801 if (error == 0) 2802 error = setfown((struct vnode *)fp->f_data, uap->uid, uap->gid); 2803 fdrop(fp); 2804 return (error); 2805 } 2806 2807 static int 2808 getutimes(const struct timeval *tvp, struct timespec *tsp) 2809 { 2810 struct timeval tv[2]; 2811 2812 if (tvp == NULL) { 2813 microtime(&tv[0]); 2814 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2815 tsp[1] = tsp[0]; 2816 } else { 2817 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2818 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2819 } 2820 return 0; 2821 } 2822 2823 static int 2824 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2825 { 2826 struct thread *td = curthread; 2827 struct proc *p = td->td_proc; 2828 int error; 2829 struct vattr vattr; 2830 2831 /* 2832 * note: vget is required for any operation that might mod the vnode 2833 * so VINACTIVE is properly cleared. 2834 */ 2835 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 2836 VATTR_NULL(&vattr); 2837 vattr.va_atime = ts[0]; 2838 vattr.va_mtime = ts[1]; 2839 if (nullflag) 2840 vattr.va_vaflags |= VA_UTIMES_NULL; 2841 error = VOP_SETATTR(vp, &vattr, p->p_ucred); 2842 vput(vp); 2843 } 2844 return error; 2845 } 2846 2847 int 2848 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2849 { 2850 struct timespec ts[2]; 2851 struct vnode *vp; 2852 int error; 2853 2854 if ((error = getutimes(tptr, ts)) != 0) 2855 return (error); 2856 /* XXX Add NLC flag indicating modifying operation? */ 2857 if ((error = nlookup(nd)) != 0) 2858 return (error); 2859 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2860 return (error); 2861 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2862 return (error); 2863 error = setutimes(vp, ts, tptr == NULL); 2864 vrele(vp); 2865 return (error); 2866 } 2867 2868 /* 2869 * utimes_args(char *path, struct timeval *tptr) 2870 * 2871 * Set the access and modification times of a file. 2872 */ 2873 int 2874 sys_utimes(struct utimes_args *uap) 2875 { 2876 struct timeval tv[2]; 2877 struct nlookupdata nd; 2878 int error; 2879 2880 if (uap->tptr) { 2881 error = copyin(uap->tptr, tv, sizeof(tv)); 2882 if (error) 2883 return (error); 2884 } 2885 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2886 if (error == 0) 2887 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2888 nlookup_done(&nd); 2889 return (error); 2890 } 2891 2892 /* 2893 * lutimes_args(char *path, struct timeval *tptr) 2894 * 2895 * Set the access and modification times of a file. 2896 */ 2897 int 2898 sys_lutimes(struct lutimes_args *uap) 2899 { 2900 struct timeval tv[2]; 2901 struct nlookupdata nd; 2902 int error; 2903 2904 if (uap->tptr) { 2905 error = copyin(uap->tptr, tv, sizeof(tv)); 2906 if (error) 2907 return (error); 2908 } 2909 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2910 if (error == 0) 2911 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2912 nlookup_done(&nd); 2913 return (error); 2914 } 2915 2916 int 2917 kern_futimes(int fd, struct timeval *tptr) 2918 { 2919 struct thread *td = curthread; 2920 struct proc *p = td->td_proc; 2921 struct timespec ts[2]; 2922 struct file *fp; 2923 int error; 2924 2925 error = getutimes(tptr, ts); 2926 if (error) 2927 return (error); 2928 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 2929 return (error); 2930 if (fp->f_nchandle.ncp) 2931 error = ncp_writechk(&fp->f_nchandle); 2932 if (error == 0) 2933 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2934 fdrop(fp); 2935 return (error); 2936 } 2937 2938 /* 2939 * futimes_args(int fd, struct timeval *tptr) 2940 * 2941 * Set the access and modification times of a file. 2942 */ 2943 int 2944 sys_futimes(struct futimes_args *uap) 2945 { 2946 struct timeval tv[2]; 2947 int error; 2948 2949 if (uap->tptr) { 2950 error = copyin(uap->tptr, tv, sizeof(tv)); 2951 if (error) 2952 return (error); 2953 } 2954 2955 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2956 2957 return (error); 2958 } 2959 2960 int 2961 kern_truncate(struct nlookupdata *nd, off_t length) 2962 { 2963 struct vnode *vp; 2964 struct vattr vattr; 2965 int error; 2966 2967 if (length < 0) 2968 return(EINVAL); 2969 /* XXX Add NLC flag indicating modifying operation? */ 2970 if ((error = nlookup(nd)) != 0) 2971 return (error); 2972 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2973 return (error); 2974 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 2975 return (error); 2976 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY)) != 0) { 2977 vrele(vp); 2978 return (error); 2979 } 2980 if (vp->v_type == VDIR) { 2981 error = EISDIR; 2982 } else if ((error = vn_writechk(vp, &nd->nl_nch)) == 0 && 2983 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred)) == 0) { 2984 VATTR_NULL(&vattr); 2985 vattr.va_size = length; 2986 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 2987 } 2988 vput(vp); 2989 return (error); 2990 } 2991 2992 /* 2993 * truncate(char *path, int pad, off_t length) 2994 * 2995 * Truncate a file given its path name. 2996 */ 2997 int 2998 sys_truncate(struct truncate_args *uap) 2999 { 3000 struct nlookupdata nd; 3001 int error; 3002 3003 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3004 if (error == 0) 3005 error = kern_truncate(&nd, uap->length); 3006 nlookup_done(&nd); 3007 return error; 3008 } 3009 3010 int 3011 kern_ftruncate(int fd, off_t length) 3012 { 3013 struct thread *td = curthread; 3014 struct proc *p = td->td_proc; 3015 struct vattr vattr; 3016 struct vnode *vp; 3017 struct file *fp; 3018 int error; 3019 3020 if (length < 0) 3021 return(EINVAL); 3022 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3023 return (error); 3024 if (fp->f_nchandle.ncp) { 3025 error = ncp_writechk(&fp->f_nchandle); 3026 if (error) 3027 goto done; 3028 } 3029 if ((fp->f_flag & FWRITE) == 0) { 3030 error = EINVAL; 3031 goto done; 3032 } 3033 vp = (struct vnode *)fp->f_data; 3034 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3035 if (vp->v_type == VDIR) { 3036 error = EISDIR; 3037 } else if ((error = vn_writechk(vp, NULL)) == 0) { 3038 VATTR_NULL(&vattr); 3039 vattr.va_size = length; 3040 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3041 } 3042 vn_unlock(vp); 3043 done: 3044 fdrop(fp); 3045 return (error); 3046 } 3047 3048 /* 3049 * ftruncate_args(int fd, int pad, off_t length) 3050 * 3051 * Truncate a file given a file descriptor. 3052 */ 3053 int 3054 sys_ftruncate(struct ftruncate_args *uap) 3055 { 3056 int error; 3057 3058 error = kern_ftruncate(uap->fd, uap->length); 3059 3060 return (error); 3061 } 3062 3063 /* 3064 * fsync(int fd) 3065 * 3066 * Sync an open file. 3067 */ 3068 /* ARGSUSED */ 3069 int 3070 sys_fsync(struct fsync_args *uap) 3071 { 3072 struct thread *td = curthread; 3073 struct proc *p = td->td_proc; 3074 struct vnode *vp; 3075 struct file *fp; 3076 vm_object_t obj; 3077 int error; 3078 3079 if ((error = holdvnode(p->p_fd, uap->fd, &fp)) != 0) 3080 return (error); 3081 vp = (struct vnode *)fp->f_data; 3082 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3083 if ((obj = vp->v_object) != NULL) 3084 vm_object_page_clean(obj, 0, 0, 0); 3085 if ((error = VOP_FSYNC(vp, MNT_WAIT)) == 0 && vp->v_mount) 3086 error = buf_fsync(vp); 3087 vn_unlock(vp); 3088 fdrop(fp); 3089 return (error); 3090 } 3091 3092 int 3093 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3094 { 3095 struct nchandle fnchd; 3096 struct nchandle tnchd; 3097 struct namecache *ncp; 3098 struct vnode *fdvp; 3099 struct vnode *tdvp; 3100 struct mount *mp; 3101 int error; 3102 3103 bwillinode(1); 3104 fromnd->nl_flags |= NLC_REFDVP; 3105 if ((error = nlookup(fromnd)) != 0) 3106 return (error); 3107 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3108 return (ENOENT); 3109 fnchd.mount = fromnd->nl_nch.mount; 3110 cache_hold(&fnchd); 3111 3112 /* 3113 * unlock the source nch so we can lookup the target nch without 3114 * deadlocking. The target may or may not exist so we do not check 3115 * for a target vp like kern_mkdir() and other creation functions do. 3116 * 3117 * The source and target directories are ref'd and rechecked after 3118 * everything is relocked to determine if the source or target file 3119 * has been renamed. 3120 */ 3121 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 3122 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 3123 cache_unlock(&fromnd->nl_nch); 3124 3125 tond->nl_flags |= NLC_CREATE | NLC_REFDVP; 3126 if ((error = nlookup(tond)) != 0) { 3127 cache_drop(&fnchd); 3128 return (error); 3129 } 3130 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 3131 cache_drop(&fnchd); 3132 return (ENOENT); 3133 } 3134 tnchd.mount = tond->nl_nch.mount; 3135 cache_hold(&tnchd); 3136 3137 /* 3138 * If the source and target are the same there is nothing to do 3139 */ 3140 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 3141 cache_drop(&fnchd); 3142 cache_drop(&tnchd); 3143 return (0); 3144 } 3145 3146 /* 3147 * Mount points cannot be renamed or overwritten 3148 */ 3149 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 3150 NCF_ISMOUNTPT 3151 ) { 3152 cache_drop(&fnchd); 3153 cache_drop(&tnchd); 3154 return (EINVAL); 3155 } 3156 3157 /* 3158 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 3159 * may have become invalid while it was unlocked, nc_vp and nc_mount 3160 * could be NULL. 3161 */ 3162 if (cache_lock_nonblock(&fromnd->nl_nch) == 0) { 3163 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3164 } else if (fromnd->nl_nch.ncp > tond->nl_nch.ncp) { 3165 cache_lock(&fromnd->nl_nch); 3166 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3167 } else { 3168 cache_unlock(&tond->nl_nch); 3169 cache_lock(&fromnd->nl_nch); 3170 cache_resolve(&fromnd->nl_nch, fromnd->nl_cred); 3171 cache_lock(&tond->nl_nch); 3172 cache_resolve(&tond->nl_nch, tond->nl_cred); 3173 } 3174 fromnd->nl_flags |= NLC_NCPISLOCKED; 3175 3176 /* 3177 * make sure the parent directories linkages are the same 3178 */ 3179 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 3180 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 3181 cache_drop(&fnchd); 3182 cache_drop(&tnchd); 3183 return (ENOENT); 3184 } 3185 3186 /* 3187 * Both the source and target must be within the same filesystem and 3188 * in the same filesystem as their parent directories within the 3189 * namecache topology. 3190 * 3191 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 3192 */ 3193 mp = fnchd.mount; 3194 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 3195 mp != tond->nl_nch.mount) { 3196 cache_drop(&fnchd); 3197 cache_drop(&tnchd); 3198 return (EXDEV); 3199 } 3200 3201 /* 3202 * Make sure the mount point is writable 3203 */ 3204 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 3205 cache_drop(&fnchd); 3206 cache_drop(&tnchd); 3207 return (error); 3208 } 3209 3210 /* 3211 * If the target exists and either the source or target is a directory, 3212 * then both must be directories. 3213 * 3214 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 3215 * have become NULL. 3216 */ 3217 if (tond->nl_nch.ncp->nc_vp) { 3218 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 3219 error = ENOENT; 3220 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 3221 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 3222 error = ENOTDIR; 3223 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 3224 error = EISDIR; 3225 } 3226 } 3227 3228 /* 3229 * You cannot rename a source into itself or a subdirectory of itself. 3230 * We check this by travsersing the target directory upwards looking 3231 * for a match against the source. 3232 */ 3233 if (error == 0) { 3234 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 3235 if (fromnd->nl_nch.ncp == ncp) { 3236 error = EINVAL; 3237 break; 3238 } 3239 } 3240 } 3241 3242 cache_drop(&fnchd); 3243 cache_drop(&tnchd); 3244 3245 /* 3246 * Even though the namespaces are different, they may still represent 3247 * hardlinks to the same file. The filesystem might have a hard time 3248 * with this so we issue a NREMOVE of the source instead of a NRENAME 3249 * when we detect the situation. 3250 */ 3251 if (error == 0) { 3252 fdvp = fromnd->nl_dvp; 3253 tdvp = tond->nl_dvp; 3254 if (fdvp == NULL || tdvp == NULL) { 3255 error = EPERM; 3256 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 3257 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 3258 fromnd->nl_cred); 3259 } else { 3260 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 3261 fdvp, tdvp, tond->nl_cred); 3262 } 3263 } 3264 return (error); 3265 } 3266 3267 /* 3268 * rename_args(char *from, char *to) 3269 * 3270 * Rename files. Source and destination must either both be directories, 3271 * or both not be directories. If target is a directory, it must be empty. 3272 */ 3273 int 3274 sys_rename(struct rename_args *uap) 3275 { 3276 struct nlookupdata fromnd, tond; 3277 int error; 3278 3279 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 3280 if (error == 0) { 3281 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 3282 if (error == 0) 3283 error = kern_rename(&fromnd, &tond); 3284 nlookup_done(&tond); 3285 } 3286 nlookup_done(&fromnd); 3287 return (error); 3288 } 3289 3290 int 3291 kern_mkdir(struct nlookupdata *nd, int mode) 3292 { 3293 struct thread *td = curthread; 3294 struct proc *p = td->td_proc; 3295 struct vnode *vp; 3296 struct vattr vattr; 3297 int error; 3298 3299 bwillinode(1); 3300 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 3301 if ((error = nlookup(nd)) != 0) 3302 return (error); 3303 3304 if (nd->nl_nch.ncp->nc_vp) 3305 return (EEXIST); 3306 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3307 return (error); 3308 VATTR_NULL(&vattr); 3309 vattr.va_type = VDIR; 3310 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 3311 3312 vp = NULL; 3313 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, p->p_ucred, &vattr); 3314 if (error == 0) 3315 vput(vp); 3316 return (error); 3317 } 3318 3319 /* 3320 * mkdir_args(char *path, int mode) 3321 * 3322 * Make a directory file. 3323 */ 3324 /* ARGSUSED */ 3325 int 3326 sys_mkdir(struct mkdir_args *uap) 3327 { 3328 struct nlookupdata nd; 3329 int error; 3330 3331 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3332 if (error == 0) 3333 error = kern_mkdir(&nd, uap->mode); 3334 nlookup_done(&nd); 3335 return (error); 3336 } 3337 3338 int 3339 kern_rmdir(struct nlookupdata *nd) 3340 { 3341 int error; 3342 3343 bwillinode(1); 3344 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 3345 if ((error = nlookup(nd)) != 0) 3346 return (error); 3347 3348 /* 3349 * Do not allow directories representing mount points to be 3350 * deleted, even if empty. Check write perms on mount point 3351 * in case the vnode is aliased (aka nullfs). 3352 */ 3353 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 3354 return (EINVAL); 3355 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3356 return (error); 3357 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 3358 return (error); 3359 } 3360 3361 /* 3362 * rmdir_args(char *path) 3363 * 3364 * Remove a directory file. 3365 */ 3366 /* ARGSUSED */ 3367 int 3368 sys_rmdir(struct rmdir_args *uap) 3369 { 3370 struct nlookupdata nd; 3371 int error; 3372 3373 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3374 if (error == 0) 3375 error = kern_rmdir(&nd); 3376 nlookup_done(&nd); 3377 return (error); 3378 } 3379 3380 int 3381 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 3382 enum uio_seg direction) 3383 { 3384 struct thread *td = curthread; 3385 struct proc *p = td->td_proc; 3386 struct vnode *vp; 3387 struct file *fp; 3388 struct uio auio; 3389 struct iovec aiov; 3390 off_t loff; 3391 int error, eofflag; 3392 3393 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 3394 return (error); 3395 if ((fp->f_flag & FREAD) == 0) { 3396 error = EBADF; 3397 goto done; 3398 } 3399 vp = (struct vnode *)fp->f_data; 3400 unionread: 3401 if (vp->v_type != VDIR) { 3402 error = EINVAL; 3403 goto done; 3404 } 3405 aiov.iov_base = buf; 3406 aiov.iov_len = count; 3407 auio.uio_iov = &aiov; 3408 auio.uio_iovcnt = 1; 3409 auio.uio_rw = UIO_READ; 3410 auio.uio_segflg = direction; 3411 auio.uio_td = td; 3412 auio.uio_resid = count; 3413 loff = auio.uio_offset = fp->f_offset; 3414 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 3415 fp->f_offset = auio.uio_offset; 3416 if (error) 3417 goto done; 3418 if (count == auio.uio_resid) { 3419 if (union_dircheckp) { 3420 error = union_dircheckp(td, &vp, fp); 3421 if (error == -1) 3422 goto unionread; 3423 if (error) 3424 goto done; 3425 } 3426 #if 0 3427 if ((vp->v_flag & VROOT) && 3428 (vp->v_mount->mnt_flag & MNT_UNION)) { 3429 struct vnode *tvp = vp; 3430 vp = vp->v_mount->mnt_vnodecovered; 3431 vref(vp); 3432 fp->f_data = vp; 3433 fp->f_offset = 0; 3434 vrele(tvp); 3435 goto unionread; 3436 } 3437 #endif 3438 } 3439 3440 /* 3441 * WARNING! *basep may not be wide enough to accomodate the 3442 * seek offset. XXX should we hack this to return the upper 32 bits 3443 * for offsets greater then 4G? 3444 */ 3445 if (basep) { 3446 *basep = (long)loff; 3447 } 3448 *res = count - auio.uio_resid; 3449 done: 3450 fdrop(fp); 3451 return (error); 3452 } 3453 3454 /* 3455 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 3456 * 3457 * Read a block of directory entries in a file system independent format. 3458 */ 3459 int 3460 sys_getdirentries(struct getdirentries_args *uap) 3461 { 3462 long base; 3463 int error; 3464 3465 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 3466 &uap->sysmsg_result, UIO_USERSPACE); 3467 3468 if (error == 0 && uap->basep) 3469 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 3470 return (error); 3471 } 3472 3473 /* 3474 * getdents_args(int fd, char *buf, size_t count) 3475 */ 3476 int 3477 sys_getdents(struct getdents_args *uap) 3478 { 3479 int error; 3480 3481 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 3482 &uap->sysmsg_result, UIO_USERSPACE); 3483 3484 return (error); 3485 } 3486 3487 /* 3488 * umask(int newmask) 3489 * 3490 * Set the mode mask for creation of filesystem nodes. 3491 * 3492 * MP SAFE 3493 */ 3494 int 3495 sys_umask(struct umask_args *uap) 3496 { 3497 struct thread *td = curthread; 3498 struct proc *p = td->td_proc; 3499 struct filedesc *fdp; 3500 3501 fdp = p->p_fd; 3502 uap->sysmsg_result = fdp->fd_cmask; 3503 fdp->fd_cmask = uap->newmask & ALLPERMS; 3504 return (0); 3505 } 3506 3507 /* 3508 * revoke(char *path) 3509 * 3510 * Void all references to file by ripping underlying filesystem 3511 * away from vnode. 3512 */ 3513 /* ARGSUSED */ 3514 int 3515 sys_revoke(struct revoke_args *uap) 3516 { 3517 struct nlookupdata nd; 3518 struct vattr vattr; 3519 struct vnode *vp; 3520 struct ucred *cred; 3521 int error; 3522 3523 vp = NULL; 3524 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3525 if (error == 0) 3526 error = nlookup(&nd); 3527 if (error == 0) 3528 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3529 cred = crhold(nd.nl_cred); 3530 nlookup_done(&nd); 3531 if (error == 0) { 3532 if (vp->v_type != VCHR && vp->v_type != VBLK) 3533 error = EINVAL; 3534 if (error == 0) 3535 error = VOP_GETATTR(vp, &vattr); 3536 if (error == 0 && cred->cr_uid != vattr.va_uid) 3537 error = suser_cred(cred, PRISON_ROOT); 3538 if (error == 0 && count_udev(vp->v_umajor, vp->v_uminor) > 0) { 3539 error = 0; 3540 vx_lock(vp); 3541 VOP_REVOKE(vp, REVOKEALL); 3542 vx_unlock(vp); 3543 } 3544 vrele(vp); 3545 } 3546 if (cred) 3547 crfree(cred); 3548 return (error); 3549 } 3550 3551 /* 3552 * getfh_args(char *fname, fhandle_t *fhp) 3553 * 3554 * Get (NFS) file handle 3555 */ 3556 int 3557 sys_getfh(struct getfh_args *uap) 3558 { 3559 struct thread *td = curthread; 3560 struct nlookupdata nd; 3561 fhandle_t fh; 3562 struct vnode *vp; 3563 int error; 3564 3565 /* 3566 * Must be super user 3567 */ 3568 if ((error = suser(td)) != 0) 3569 return (error); 3570 3571 vp = NULL; 3572 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3573 if (error == 0) 3574 error = nlookup(&nd); 3575 if (error == 0) 3576 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3577 nlookup_done(&nd); 3578 if (error == 0) { 3579 bzero(&fh, sizeof(fh)); 3580 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3581 error = VFS_VPTOFH(vp, &fh.fh_fid); 3582 vput(vp); 3583 if (error == 0) 3584 error = copyout(&fh, uap->fhp, sizeof(fh)); 3585 } 3586 return (error); 3587 } 3588 3589 /* 3590 * fhopen_args(const struct fhandle *u_fhp, int flags) 3591 * 3592 * syscall for the rpc.lockd to use to translate a NFS file handle into 3593 * an open descriptor. 3594 * 3595 * warning: do not remove the suser() call or this becomes one giant 3596 * security hole. 3597 */ 3598 int 3599 sys_fhopen(struct fhopen_args *uap) 3600 { 3601 struct thread *td = curthread; 3602 struct proc *p = td->td_proc; 3603 struct mount *mp; 3604 struct vnode *vp; 3605 struct fhandle fhp; 3606 struct vattr vat; 3607 struct vattr *vap = &vat; 3608 struct flock lf; 3609 int fmode, mode, error, type; 3610 struct file *nfp; 3611 struct file *fp; 3612 int indx; 3613 3614 /* 3615 * Must be super user 3616 */ 3617 error = suser(td); 3618 if (error) 3619 return (error); 3620 3621 fmode = FFLAGS(uap->flags); 3622 /* why not allow a non-read/write open for our lockd? */ 3623 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3624 return (EINVAL); 3625 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3626 if (error) 3627 return(error); 3628 /* find the mount point */ 3629 mp = vfs_getvfs(&fhp.fh_fsid); 3630 if (mp == NULL) 3631 return (ESTALE); 3632 /* now give me my vnode, it gets returned to me locked */ 3633 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); 3634 if (error) 3635 return (error); 3636 /* 3637 * from now on we have to make sure not 3638 * to forget about the vnode 3639 * any error that causes an abort must vput(vp) 3640 * just set error = err and 'goto bad;'. 3641 */ 3642 3643 /* 3644 * from vn_open 3645 */ 3646 if (vp->v_type == VLNK) { 3647 error = EMLINK; 3648 goto bad; 3649 } 3650 if (vp->v_type == VSOCK) { 3651 error = EOPNOTSUPP; 3652 goto bad; 3653 } 3654 mode = 0; 3655 if (fmode & (FWRITE | O_TRUNC)) { 3656 if (vp->v_type == VDIR) { 3657 error = EISDIR; 3658 goto bad; 3659 } 3660 error = vn_writechk(vp, NULL); 3661 if (error) 3662 goto bad; 3663 mode |= VWRITE; 3664 } 3665 if (fmode & FREAD) 3666 mode |= VREAD; 3667 if (mode) { 3668 error = VOP_ACCESS(vp, mode, p->p_ucred); 3669 if (error) 3670 goto bad; 3671 } 3672 if (fmode & O_TRUNC) { 3673 vn_unlock(vp); /* XXX */ 3674 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 3675 VATTR_NULL(vap); 3676 vap->va_size = 0; 3677 error = VOP_SETATTR(vp, vap, p->p_ucred); 3678 if (error) 3679 goto bad; 3680 } 3681 3682 /* 3683 * VOP_OPEN needs the file pointer so it can potentially override 3684 * it. 3685 * 3686 * WARNING! no f_nchandle will be associated when fhopen()ing a 3687 * directory. XXX 3688 */ 3689 if ((error = falloc(p, &nfp, &indx)) != 0) 3690 goto bad; 3691 fp = nfp; 3692 3693 error = VOP_OPEN(vp, fmode, p->p_ucred, fp); 3694 if (error) { 3695 /* 3696 * setting f_ops this way prevents VOP_CLOSE from being 3697 * called or fdrop() releasing the vp from v_data. Since 3698 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3699 */ 3700 fp->f_ops = &badfileops; 3701 fp->f_data = NULL; 3702 goto bad_drop; 3703 } 3704 3705 /* 3706 * The fp is given its own reference, we still have our ref and lock. 3707 * 3708 * Assert that all regular files must be created with a VM object. 3709 */ 3710 if (vp->v_type == VREG && vp->v_object == NULL) { 3711 kprintf("fhopen: regular file did not have VM object: %p\n", vp); 3712 goto bad_drop; 3713 } 3714 3715 /* 3716 * The open was successful. Handle any locking requirements. 3717 */ 3718 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3719 lf.l_whence = SEEK_SET; 3720 lf.l_start = 0; 3721 lf.l_len = 0; 3722 if (fmode & O_EXLOCK) 3723 lf.l_type = F_WRLCK; 3724 else 3725 lf.l_type = F_RDLCK; 3726 if (fmode & FNONBLOCK) 3727 type = 0; 3728 else 3729 type = F_WAIT; 3730 vn_unlock(vp); 3731 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3732 /* 3733 * release our private reference. 3734 */ 3735 fsetfd(p, NULL, indx); 3736 fdrop(fp); 3737 vrele(vp); 3738 return (error); 3739 } 3740 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3741 fp->f_flag |= FHASLOCK; 3742 } 3743 3744 /* 3745 * Clean up. Associate the file pointer with the previously 3746 * reserved descriptor and return it. 3747 */ 3748 vput(vp); 3749 fsetfd(p, fp, indx); 3750 fdrop(fp); 3751 uap->sysmsg_result = indx; 3752 return (0); 3753 3754 bad_drop: 3755 fsetfd(p, NULL, indx); 3756 fdrop(fp); 3757 bad: 3758 vput(vp); 3759 return (error); 3760 } 3761 3762 /* 3763 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3764 */ 3765 int 3766 sys_fhstat(struct fhstat_args *uap) 3767 { 3768 struct thread *td = curthread; 3769 struct stat sb; 3770 fhandle_t fh; 3771 struct mount *mp; 3772 struct vnode *vp; 3773 int error; 3774 3775 /* 3776 * Must be super user 3777 */ 3778 error = suser(td); 3779 if (error) 3780 return (error); 3781 3782 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3783 if (error) 3784 return (error); 3785 3786 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3787 return (ESTALE); 3788 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3789 return (error); 3790 error = vn_stat(vp, &sb, td->td_proc->p_ucred); 3791 vput(vp); 3792 if (error) 3793 return (error); 3794 error = copyout(&sb, uap->sb, sizeof(sb)); 3795 return (error); 3796 } 3797 3798 /* 3799 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3800 */ 3801 int 3802 sys_fhstatfs(struct fhstatfs_args *uap) 3803 { 3804 struct thread *td = curthread; 3805 struct proc *p = td->td_proc; 3806 struct statfs *sp; 3807 struct mount *mp; 3808 struct vnode *vp; 3809 struct statfs sb; 3810 char *fullpath, *freepath; 3811 fhandle_t fh; 3812 int error; 3813 3814 /* 3815 * Must be super user 3816 */ 3817 if ((error = suser(td))) 3818 return (error); 3819 3820 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3821 return (error); 3822 3823 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3824 return (ESTALE); 3825 3826 if (p != NULL && !chroot_visible_mnt(mp, p)) 3827 return (ESTALE); 3828 3829 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3830 return (error); 3831 mp = vp->v_mount; 3832 sp = &mp->mnt_stat; 3833 vput(vp); 3834 if ((error = VFS_STATFS(mp, sp, p->p_ucred)) != 0) 3835 return (error); 3836 3837 error = mount_path(p, mp, &fullpath, &freepath); 3838 if (error) 3839 return(error); 3840 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3841 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3842 kfree(freepath, M_TEMP); 3843 3844 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3845 if (suser(td)) { 3846 bcopy(sp, &sb, sizeof(sb)); 3847 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3848 sp = &sb; 3849 } 3850 return (copyout(sp, uap->buf, sizeof(*sp))); 3851 } 3852 3853 /* 3854 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 3855 */ 3856 int 3857 sys_fhstatvfs(struct fhstatvfs_args *uap) 3858 { 3859 struct thread *td = curthread; 3860 struct proc *p = td->td_proc; 3861 struct statvfs *sp; 3862 struct mount *mp; 3863 struct vnode *vp; 3864 fhandle_t fh; 3865 int error; 3866 3867 /* 3868 * Must be super user 3869 */ 3870 if ((error = suser(td))) 3871 return (error); 3872 3873 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3874 return (error); 3875 3876 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3877 return (ESTALE); 3878 3879 if (p != NULL && !chroot_visible_mnt(mp, p)) 3880 return (ESTALE); 3881 3882 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3883 return (error); 3884 mp = vp->v_mount; 3885 sp = &mp->mnt_vstat; 3886 vput(vp); 3887 if ((error = VFS_STATVFS(mp, sp, p->p_ucred)) != 0) 3888 return (error); 3889 3890 sp->f_flag = 0; 3891 if (mp->mnt_flag & MNT_RDONLY) 3892 sp->f_flag |= ST_RDONLY; 3893 if (mp->mnt_flag & MNT_NOSUID) 3894 sp->f_flag |= ST_NOSUID; 3895 3896 return (copyout(sp, uap->buf, sizeof(*sp))); 3897 } 3898 3899 3900 /* 3901 * Syscall to push extended attribute configuration information into the 3902 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3903 * a command (int cmd), and attribute name and misc data. For now, the 3904 * attribute name is left in userspace for consumption by the VFS_op. 3905 * It will probably be changed to be copied into sysspace by the 3906 * syscall in the future, once issues with various consumers of the 3907 * attribute code have raised their hands. 3908 * 3909 * Currently this is used only by UFS Extended Attributes. 3910 */ 3911 int 3912 sys_extattrctl(struct extattrctl_args *uap) 3913 { 3914 struct nlookupdata nd; 3915 struct mount *mp; 3916 struct vnode *vp; 3917 int error; 3918 3919 vp = NULL; 3920 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3921 if (error == 0) 3922 error = nlookup(&nd); 3923 if (error == 0) { 3924 mp = nd.nl_nch.mount; 3925 error = VFS_EXTATTRCTL(mp, uap->cmd, 3926 uap->attrname, uap->arg, 3927 nd.nl_cred); 3928 } 3929 nlookup_done(&nd); 3930 return (error); 3931 } 3932 3933 /* 3934 * Syscall to set a named extended attribute on a file or directory. 3935 * Accepts attribute name, and a uio structure pointing to the data to set. 3936 * The uio is consumed in the style of writev(). The real work happens 3937 * in VOP_SETEXTATTR(). 3938 */ 3939 int 3940 sys_extattr_set_file(struct extattr_set_file_args *uap) 3941 { 3942 char attrname[EXTATTR_MAXNAMELEN]; 3943 struct iovec aiov[UIO_SMALLIOV]; 3944 struct iovec *needfree; 3945 struct nlookupdata nd; 3946 struct iovec *iov; 3947 struct vnode *vp; 3948 struct uio auio; 3949 u_int iovlen; 3950 u_int cnt; 3951 int error; 3952 int i; 3953 3954 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3955 if (error) 3956 return (error); 3957 3958 vp = NULL; 3959 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3960 if (error == 0) 3961 error = nlookup(&nd); 3962 if (error == 0) 3963 error = ncp_writechk(&nd.nl_nch); 3964 if (error == 0) 3965 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3966 if (error) { 3967 nlookup_done(&nd); 3968 return (error); 3969 } 3970 3971 needfree = NULL; 3972 iovlen = uap->iovcnt * sizeof(struct iovec); 3973 if (uap->iovcnt > UIO_SMALLIOV) { 3974 if (uap->iovcnt > UIO_MAXIOV) { 3975 error = EINVAL; 3976 goto done; 3977 } 3978 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3979 needfree = iov; 3980 } else { 3981 iov = aiov; 3982 } 3983 auio.uio_iov = iov; 3984 auio.uio_iovcnt = uap->iovcnt; 3985 auio.uio_rw = UIO_WRITE; 3986 auio.uio_segflg = UIO_USERSPACE; 3987 auio.uio_td = nd.nl_td; 3988 auio.uio_offset = 0; 3989 if ((error = copyin(uap->iovp, iov, iovlen))) 3990 goto done; 3991 auio.uio_resid = 0; 3992 for (i = 0; i < uap->iovcnt; i++) { 3993 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3994 error = EINVAL; 3995 goto done; 3996 } 3997 auio.uio_resid += iov->iov_len; 3998 iov++; 3999 } 4000 cnt = auio.uio_resid; 4001 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4002 cnt -= auio.uio_resid; 4003 uap->sysmsg_result = cnt; 4004 done: 4005 vput(vp); 4006 nlookup_done(&nd); 4007 if (needfree) 4008 FREE(needfree, M_IOV); 4009 return (error); 4010 } 4011 4012 /* 4013 * Syscall to get a named extended attribute on a file or directory. 4014 * Accepts attribute name, and a uio structure pointing to a buffer for the 4015 * data. The uio is consumed in the style of readv(). The real work 4016 * happens in VOP_GETEXTATTR(); 4017 */ 4018 int 4019 sys_extattr_get_file(struct extattr_get_file_args *uap) 4020 { 4021 char attrname[EXTATTR_MAXNAMELEN]; 4022 struct iovec aiov[UIO_SMALLIOV]; 4023 struct iovec *needfree; 4024 struct nlookupdata nd; 4025 struct iovec *iov; 4026 struct vnode *vp; 4027 struct uio auio; 4028 u_int iovlen; 4029 u_int cnt; 4030 int error; 4031 int i; 4032 4033 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4034 if (error) 4035 return (error); 4036 4037 vp = NULL; 4038 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4039 if (error == 0) 4040 error = nlookup(&nd); 4041 if (error == 0) 4042 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4043 if (error) { 4044 nlookup_done(&nd); 4045 return (error); 4046 } 4047 4048 iovlen = uap->iovcnt * sizeof (struct iovec); 4049 needfree = NULL; 4050 if (uap->iovcnt > UIO_SMALLIOV) { 4051 if (uap->iovcnt > UIO_MAXIOV) { 4052 error = EINVAL; 4053 goto done; 4054 } 4055 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 4056 needfree = iov; 4057 } else { 4058 iov = aiov; 4059 } 4060 auio.uio_iov = iov; 4061 auio.uio_iovcnt = uap->iovcnt; 4062 auio.uio_rw = UIO_READ; 4063 auio.uio_segflg = UIO_USERSPACE; 4064 auio.uio_td = nd.nl_td; 4065 auio.uio_offset = 0; 4066 if ((error = copyin(uap->iovp, iov, iovlen))) 4067 goto done; 4068 auio.uio_resid = 0; 4069 for (i = 0; i < uap->iovcnt; i++) { 4070 if (iov->iov_len > INT_MAX - auio.uio_resid) { 4071 error = EINVAL; 4072 goto done; 4073 } 4074 auio.uio_resid += iov->iov_len; 4075 iov++; 4076 } 4077 cnt = auio.uio_resid; 4078 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred); 4079 cnt -= auio.uio_resid; 4080 uap->sysmsg_result = cnt; 4081 done: 4082 vput(vp); 4083 nlookup_done(&nd); 4084 if (needfree) 4085 FREE(needfree, M_IOV); 4086 return(error); 4087 } 4088 4089 /* 4090 * Syscall to delete a named extended attribute from a file or directory. 4091 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 4092 */ 4093 int 4094 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 4095 { 4096 char attrname[EXTATTR_MAXNAMELEN]; 4097 struct nlookupdata nd; 4098 struct vnode *vp; 4099 int error; 4100 4101 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4102 if (error) 4103 return(error); 4104 4105 vp = NULL; 4106 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4107 if (error == 0) 4108 error = nlookup(&nd); 4109 if (error == 0) 4110 error = ncp_writechk(&nd.nl_nch); 4111 if (error == 0) 4112 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4113 if (error) { 4114 nlookup_done(&nd); 4115 return (error); 4116 } 4117 4118 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred); 4119 vput(vp); 4120 nlookup_done(&nd); 4121 return(error); 4122 } 4123 4124 /* 4125 * Determine if the mount is visible to the process. 4126 */ 4127 static int 4128 chroot_visible_mnt(struct mount *mp, struct proc *p) 4129 { 4130 struct nchandle nch; 4131 4132 /* 4133 * Traverse from the mount point upwards. If we hit the process 4134 * root then the mount point is visible to the process. 4135 */ 4136 nch = mp->mnt_ncmountpt; 4137 while (nch.ncp) { 4138 if (nch.mount == p->p_fd->fd_nrdir.mount && 4139 nch.ncp == p->p_fd->fd_nrdir.ncp) { 4140 return(1); 4141 } 4142 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 4143 nch = nch.mount->mnt_ncmounton; 4144 } else { 4145 nch.ncp = nch.ncp->nc_parent; 4146 } 4147 } 4148 4149 /* 4150 * If the mount point is not visible to the process, but the 4151 * process root is in a subdirectory of the mount, return 4152 * TRUE anyway. 4153 */ 4154 if (p->p_fd->fd_nrdir.mount == mp) 4155 return(1); 4156 4157 return(0); 4158 } 4159 4160