1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.70 2005/08/25 18:34:14 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/kern_syscall.h> 66 67 #include <machine/limits.h> 68 #include <vfs/union/union.h> 69 #include <sys/sysctl.h> 70 #include <vm/vm.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_zone.h> 73 #include <vm/vm_page.h> 74 75 #include <sys/file2.h> 76 77 static int checkvp_chdir (struct vnode *vn, struct thread *td); 78 static void checkdirs (struct vnode *olddp, struct namecache *ncp); 79 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 80 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 81 static int getutimes (const struct timeval *, struct timespec *); 82 static int setfown (struct vnode *, uid_t, gid_t); 83 static int setfmode (struct vnode *, int); 84 static int setfflags (struct vnode *, int); 85 static int setutimes (struct vnode *, const struct timespec *, int); 86 static int usermount = 0; /* if 1, non-root can mount fs. */ 87 88 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 89 90 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 91 92 /* 93 * Virtual File System System Calls 94 */ 95 96 /* 97 * Mount a file system. 98 */ 99 /* 100 * mount_args(char *type, char *path, int flags, caddr_t data) 101 */ 102 /* ARGSUSED */ 103 int 104 mount(struct mount_args *uap) 105 { 106 struct thread *td = curthread; 107 struct proc *p = td->td_proc; 108 struct vnode *vp; 109 struct namecache *ncp; 110 struct mount *mp; 111 struct vfsconf *vfsp; 112 int error, flag = 0, flag2 = 0; 113 struct vattr va; 114 struct nlookupdata nd; 115 char fstypename[MFSNAMELEN]; 116 struct nlcomponent nlc; 117 118 KKASSERT(p); 119 if (p->p_ucred->cr_prison != NULL) 120 return (EPERM); 121 if (usermount == 0 && (error = suser(td))) 122 return (error); 123 /* 124 * Do not allow NFS export by non-root users. 125 */ 126 if (uap->flags & MNT_EXPORTED) { 127 error = suser(td); 128 if (error) 129 return (error); 130 } 131 /* 132 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 133 */ 134 if (suser(td)) 135 uap->flags |= MNT_NOSUID | MNT_NODEV; 136 137 /* 138 * Lookup the requested path and extract the ncp and vnode. 139 */ 140 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 141 if (error == 0) { 142 if ((error = nlookup(&nd)) == 0) { 143 if (nd.nl_ncp->nc_vp == NULL) 144 error = ENOENT; 145 } 146 } 147 if (error) { 148 nlookup_done(&nd); 149 return (error); 150 } 151 152 /* 153 * Extract the locked+refd ncp and cleanup the nd structure 154 */ 155 ncp = nd.nl_ncp; 156 nd.nl_ncp = NULL; 157 nlookup_done(&nd); 158 159 /* 160 * now we have the locked ref'd ncp and unreferenced vnode. 161 */ 162 vp = ncp->nc_vp; 163 if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) { 164 cache_put(ncp); 165 return (error); 166 } 167 cache_unlock(ncp); 168 169 /* 170 * Now we have an unlocked ref'd ncp and a locked ref'd vp 171 */ 172 if (uap->flags & MNT_UPDATE) { 173 if ((vp->v_flag & VROOT) == 0) { 174 cache_drop(ncp); 175 vput(vp); 176 return (EINVAL); 177 } 178 mp = vp->v_mount; 179 flag = mp->mnt_flag; 180 flag2 = mp->mnt_kern_flag; 181 /* 182 * We only allow the filesystem to be reloaded if it 183 * is currently mounted read-only. 184 */ 185 if ((uap->flags & MNT_RELOAD) && 186 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 187 cache_drop(ncp); 188 vput(vp); 189 return (EOPNOTSUPP); /* Needs translation */ 190 } 191 /* 192 * Only root, or the user that did the original mount is 193 * permitted to update it. 194 */ 195 if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && 196 (error = suser(td))) { 197 cache_drop(ncp); 198 vput(vp); 199 return (error); 200 } 201 if (vfs_busy(mp, LK_NOWAIT, td)) { 202 cache_drop(ncp); 203 vput(vp); 204 return (EBUSY); 205 } 206 if ((vp->v_flag & VMOUNT) != 0 || 207 vp->v_mountedhere != NULL) { 208 cache_drop(ncp); 209 vfs_unbusy(mp, td); 210 vput(vp); 211 return (EBUSY); 212 } 213 vp->v_flag |= VMOUNT; 214 mp->mnt_flag |= 215 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 216 VOP_UNLOCK(vp, 0, td); 217 goto update; 218 } 219 /* 220 * If the user is not root, ensure that they own the directory 221 * onto which we are attempting to mount. 222 */ 223 if ((error = VOP_GETATTR(vp, &va, td)) || 224 (va.va_uid != p->p_ucred->cr_uid && 225 (error = suser(td)))) { 226 cache_drop(ncp); 227 vput(vp); 228 return (error); 229 } 230 if ((error = vinvalbuf(vp, V_SAVE, td, 0, 0)) != 0) { 231 cache_drop(ncp); 232 vput(vp); 233 return (error); 234 } 235 if (vp->v_type != VDIR) { 236 cache_drop(ncp); 237 vput(vp); 238 return (ENOTDIR); 239 } 240 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 241 cache_drop(ncp); 242 vput(vp); 243 return (error); 244 } 245 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 246 if (!strcmp(vfsp->vfc_name, fstypename)) 247 break; 248 } 249 if (vfsp == NULL) { 250 linker_file_t lf; 251 252 /* Only load modules for root (very important!) */ 253 if ((error = suser(td)) != 0) { 254 cache_drop(ncp); 255 vput(vp); 256 return error; 257 } 258 error = linker_load_file(fstypename, &lf); 259 if (error || lf == NULL) { 260 cache_drop(ncp); 261 vput(vp); 262 if (lf == NULL) 263 error = ENODEV; 264 return error; 265 } 266 lf->userrefs++; 267 /* lookup again, see if the VFS was loaded */ 268 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 269 if (!strcmp(vfsp->vfc_name, fstypename)) 270 break; 271 } 272 if (vfsp == NULL) { 273 lf->userrefs--; 274 linker_file_unload(lf); 275 cache_drop(ncp); 276 vput(vp); 277 return (ENODEV); 278 } 279 } 280 if ((vp->v_flag & VMOUNT) != 0 || 281 vp->v_mountedhere != NULL) { 282 cache_drop(ncp); 283 vput(vp); 284 return (EBUSY); 285 } 286 vp->v_flag |= VMOUNT; 287 288 /* 289 * Allocate and initialize the filesystem. 290 */ 291 mp = malloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 292 TAILQ_INIT(&mp->mnt_nvnodelist); 293 TAILQ_INIT(&mp->mnt_reservedvnlist); 294 TAILQ_INIT(&mp->mnt_jlist); 295 mp->mnt_nvnodelistsize = 0; 296 lockinit(&mp->mnt_lock, 0, "vfslock", 0, LK_NOPAUSE); 297 vfs_busy(mp, LK_NOWAIT, td); 298 mp->mnt_op = vfsp->vfc_vfsops; 299 mp->mnt_vfc = vfsp; 300 vfsp->vfc_refcount++; 301 mp->mnt_stat.f_type = vfsp->vfc_typenum; 302 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 303 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 304 mp->mnt_vnodecovered = vp; 305 mp->mnt_stat.f_owner = p->p_ucred->cr_uid; 306 mp->mnt_iosize_max = DFLTPHYS; 307 VOP_UNLOCK(vp, 0, td); 308 update: 309 /* 310 * Set the mount level flags. 311 */ 312 if (uap->flags & MNT_RDONLY) 313 mp->mnt_flag |= MNT_RDONLY; 314 else if (mp->mnt_flag & MNT_RDONLY) 315 mp->mnt_kern_flag |= MNTK_WANTRDWR; 316 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 317 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 318 MNT_NOSYMFOLLOW | MNT_IGNORE | 319 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 320 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 321 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 322 MNT_NOSYMFOLLOW | MNT_IGNORE | 323 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 324 /* 325 * Mount the filesystem. 326 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 327 * get. 328 */ 329 error = VFS_MOUNT(mp, uap->path, uap->data, td); 330 if (mp->mnt_flag & MNT_UPDATE) { 331 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 332 mp->mnt_flag &= ~MNT_RDONLY; 333 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 334 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 335 if (error) { 336 mp->mnt_flag = flag; 337 mp->mnt_kern_flag = flag2; 338 } 339 vfs_unbusy(mp, td); 340 vp->v_flag &= ~VMOUNT; 341 vrele(vp); 342 cache_drop(ncp); 343 return (error); 344 } 345 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 346 /* 347 * Put the new filesystem on the mount list after root. The mount 348 * point gets its own mnt_ncp which is a special ncp linking the 349 * vnode-under to the root of the new mount. The lookup code 350 * detects the mount point going forward and detects the special 351 * mnt_ncp via NCP_MOUNTPT going backwards. 352 * 353 * It is not necessary to invalidate or purge the vnode underneath 354 * because elements under the mount will be given their own glue 355 * namecache record. 356 */ 357 if (!error) { 358 nlc.nlc_nameptr = ""; 359 nlc.nlc_namelen = 0; 360 mp->mnt_ncp = cache_nlookup(ncp, &nlc); 361 cache_setunresolved(mp->mnt_ncp); 362 mp->mnt_ncp->nc_flag |= NCF_MOUNTPT; 363 mp->mnt_ncp->nc_mount = mp; 364 cache_drop(ncp); 365 /* XXX get the root of the fs and cache_setvp(mnt_ncp...) */ 366 vp->v_flag &= ~VMOUNT; 367 vp->v_mountedhere = mp; 368 mountlist_insert(mp, MNTINS_LAST); 369 checkdirs(vp, mp->mnt_ncp); 370 cache_unlock(mp->mnt_ncp); /* leave ref intact */ 371 VOP_UNLOCK(vp, 0, td); 372 error = vfs_allocate_syncvnode(mp); 373 vfs_unbusy(mp, td); 374 if ((error = VFS_START(mp, 0, td)) != 0) 375 vrele(vp); 376 } else { 377 vfs_rm_vnodeops(&mp->mnt_vn_coherency_ops); 378 vfs_rm_vnodeops(&mp->mnt_vn_journal_ops); 379 vfs_rm_vnodeops(&mp->mnt_vn_norm_ops); 380 vfs_rm_vnodeops(&mp->mnt_vn_spec_ops); 381 vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops); 382 vp->v_flag &= ~VMOUNT; 383 mp->mnt_vfc->vfc_refcount--; 384 vfs_unbusy(mp, td); 385 free(mp, M_MOUNT); 386 cache_drop(ncp); 387 vput(vp); 388 } 389 return (error); 390 } 391 392 /* 393 * Scan all active processes to see if any of them have a current 394 * or root directory onto which the new filesystem has just been 395 * mounted. If so, replace them with the new mount point. 396 * 397 * The passed ncp is ref'd and locked (from the mount code) and 398 * must be associated with the vnode representing the root of the 399 * mount point. 400 */ 401 static void 402 checkdirs(struct vnode *olddp, struct namecache *ncp) 403 { 404 struct filedesc *fdp; 405 struct vnode *newdp; 406 struct mount *mp; 407 struct proc *p; 408 409 if (olddp->v_usecount == 1) 410 return; 411 mp = olddp->v_mountedhere; 412 if (VFS_ROOT(mp, &newdp)) 413 panic("mount: lost mount"); 414 cache_setvp(ncp, newdp); 415 416 if (rootvnode == olddp) { 417 vref(newdp); 418 vfs_cache_setroot(newdp, cache_hold(ncp)); 419 } 420 421 FOREACH_PROC_IN_SYSTEM(p) { 422 fdp = p->p_fd; 423 if (fdp->fd_cdir == olddp) { 424 vrele(fdp->fd_cdir); 425 vref(newdp); 426 fdp->fd_cdir = newdp; 427 cache_drop(fdp->fd_ncdir); 428 fdp->fd_ncdir = cache_hold(ncp); 429 } 430 if (fdp->fd_rdir == olddp) { 431 vrele(fdp->fd_rdir); 432 vref(newdp); 433 fdp->fd_rdir = newdp; 434 cache_drop(fdp->fd_nrdir); 435 fdp->fd_nrdir = cache_hold(ncp); 436 } 437 } 438 vput(newdp); 439 } 440 441 /* 442 * Unmount a file system. 443 * 444 * Note: unmount takes a path to the vnode mounted on as argument, 445 * not special file (as before). 446 */ 447 /* 448 * umount_args(char *path, int flags) 449 */ 450 /* ARGSUSED */ 451 int 452 unmount(struct unmount_args *uap) 453 { 454 struct thread *td = curthread; 455 struct proc *p = td->td_proc; 456 struct vnode *vp; 457 struct mount *mp; 458 int error; 459 struct nlookupdata nd; 460 461 KKASSERT(p); 462 if (p->p_ucred->cr_prison != NULL) 463 return (EPERM); 464 if (usermount == 0 && (error = suser(td))) 465 return (error); 466 467 vp = NULL; 468 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 469 if (error == 0) 470 error = nlookup(&nd); 471 if (error == 0) 472 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 473 nlookup_done(&nd); 474 if (error) 475 return (error); 476 477 mp = vp->v_mount; 478 479 /* 480 * Only root, or the user that did the original mount is 481 * permitted to unmount this filesystem. 482 */ 483 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 484 (error = suser(td))) { 485 vput(vp); 486 return (error); 487 } 488 489 /* 490 * Don't allow unmounting the root file system. 491 */ 492 if (mp->mnt_flag & MNT_ROOTFS) { 493 vput(vp); 494 return (EINVAL); 495 } 496 497 /* 498 * Must be the root of the filesystem 499 */ 500 if ((vp->v_flag & VROOT) == 0) { 501 vput(vp); 502 return (EINVAL); 503 } 504 vput(vp); 505 return (dounmount(mp, uap->flags, td)); 506 } 507 508 /* 509 * Do the actual file system unmount. 510 */ 511 static int 512 dounmount_interlock(struct mount *mp) 513 { 514 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 515 return (EBUSY); 516 mp->mnt_kern_flag |= MNTK_UNMOUNT; 517 return(0); 518 } 519 520 int 521 dounmount(struct mount *mp, int flags, struct thread *td) 522 { 523 struct vnode *coveredvp; 524 int error; 525 int async_flag; 526 527 /* 528 * Exclusive access for unmounting purposes 529 */ 530 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 531 return (error); 532 533 /* 534 * Allow filesystems to detect that a forced unmount is in progress. 535 */ 536 if (flags & MNT_FORCE) 537 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 538 error = lockmgr(&mp->mnt_lock, LK_DRAIN | 539 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), NULL, td); 540 if (error) { 541 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 542 if (mp->mnt_kern_flag & MNTK_MWAIT) 543 wakeup(mp); 544 return (error); 545 } 546 547 if (mp->mnt_flag & MNT_EXPUBLIC) 548 vfs_setpublicfs(NULL, NULL, NULL); 549 550 vfs_msync(mp, MNT_WAIT); 551 async_flag = mp->mnt_flag & MNT_ASYNC; 552 mp->mnt_flag &=~ MNT_ASYNC; 553 cache_purgevfs(mp); /* remove cache entries for this file sys */ 554 if (mp->mnt_syncer != NULL) 555 vrele(mp->mnt_syncer); 556 if (((mp->mnt_flag & MNT_RDONLY) || 557 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 558 (flags & MNT_FORCE)) 559 error = VFS_UNMOUNT(mp, flags, td); 560 if (error) { 561 if (mp->mnt_syncer == NULL) 562 vfs_allocate_syncvnode(mp); 563 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 564 mp->mnt_flag |= async_flag; 565 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_REENABLE, NULL, td); 566 if (mp->mnt_kern_flag & MNTK_MWAIT) 567 wakeup(mp); 568 return (error); 569 } 570 /* 571 * Clean up any journals still associated with the mount after 572 * filesystem activity has ceased. 573 */ 574 journal_remove_all_journals(mp, 575 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 576 577 mountlist_remove(mp); 578 579 /* 580 * Remove any installed vnode ops here so the individual VFSs don't 581 * have to. 582 */ 583 vfs_rm_vnodeops(&mp->mnt_vn_coherency_ops); 584 vfs_rm_vnodeops(&mp->mnt_vn_journal_ops); 585 vfs_rm_vnodeops(&mp->mnt_vn_norm_ops); 586 vfs_rm_vnodeops(&mp->mnt_vn_spec_ops); 587 vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops); 588 589 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 590 coveredvp->v_mountedhere = NULL; 591 vrele(coveredvp); 592 cache_drop(mp->mnt_ncp); 593 mp->mnt_ncp = NULL; 594 } 595 mp->mnt_vfc->vfc_refcount--; 596 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 597 panic("unmount: dangling vnode"); 598 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td); 599 if (mp->mnt_kern_flag & MNTK_MWAIT) 600 wakeup(mp); 601 free(mp, M_MOUNT); 602 return (0); 603 } 604 605 /* 606 * Sync each mounted filesystem. 607 */ 608 609 #ifdef DEBUG 610 static int syncprt = 0; 611 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 612 #endif /* DEBUG */ 613 614 static int sync_callback(struct mount *mp, void *data); 615 616 /* ARGSUSED */ 617 int 618 sync(struct sync_args *uap) 619 { 620 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 621 #ifdef DEBUG 622 /* 623 * print out buffer pool stat information on each sync() call. 624 */ 625 if (syncprt) 626 vfs_bufstats(); 627 #endif /* DEBUG */ 628 return (0); 629 } 630 631 static 632 int 633 sync_callback(struct mount *mp, void *data __unused) 634 { 635 int asyncflag; 636 637 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 638 asyncflag = mp->mnt_flag & MNT_ASYNC; 639 mp->mnt_flag &= ~MNT_ASYNC; 640 vfs_msync(mp, MNT_NOWAIT); 641 VFS_SYNC(mp, MNT_NOWAIT, curthread); 642 mp->mnt_flag |= asyncflag; 643 } 644 return(0); 645 } 646 647 /* XXX PRISON: could be per prison flag */ 648 static int prison_quotas; 649 #if 0 650 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 651 #endif 652 653 /* 654 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 655 * 656 * Change filesystem quotas. 657 */ 658 /* ARGSUSED */ 659 int 660 quotactl(struct quotactl_args *uap) 661 { 662 struct nlookupdata nd; 663 struct thread *td; 664 struct proc *p; 665 struct mount *mp; 666 int error; 667 668 td = curthread; 669 p = td->td_proc; 670 if (p->p_ucred->cr_prison && !prison_quotas) 671 return (EPERM); 672 673 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 674 if (error == 0) 675 error = nlookup(&nd); 676 if (error == 0) { 677 mp = nd.nl_ncp->nc_mount; 678 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 679 uap->arg, nd.nl_td); 680 } 681 nlookup_done(&nd); 682 return (error); 683 } 684 685 /* 686 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 687 * void *buf, int buflen) 688 * 689 * This function operates on a mount point and executes the specified 690 * operation using the specified control data, and possibly returns data. 691 * 692 * The actual number of bytes stored in the result buffer is returned, 0 693 * if none, otherwise an error is returned. 694 */ 695 /* ARGSUSED */ 696 int 697 mountctl(struct mountctl_args *uap) 698 { 699 struct thread *td = curthread; 700 struct proc *p = td->td_proc; 701 struct filedesc *fdp = p->p_fd; 702 struct file *fp; 703 void *ctl = NULL; 704 void *buf = NULL; 705 char *path = NULL; 706 int error; 707 708 /* 709 * Sanity and permissions checks. We must be root. 710 */ 711 KKASSERT(p); 712 if (p->p_ucred->cr_prison != NULL) 713 return (EPERM); 714 if ((error = suser(td)) != 0) 715 return (error); 716 717 /* 718 * Argument length checks 719 */ 720 if (uap->ctllen < 0 || uap->ctllen > 1024) 721 return (EINVAL); 722 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 723 return (EINVAL); 724 if (uap->path == NULL) 725 return (EINVAL); 726 727 /* 728 * Allocate the necessary buffers and copyin data 729 */ 730 path = zalloc(namei_zone); 731 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 732 if (error) 733 goto done; 734 735 if (uap->ctllen) { 736 ctl = malloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 737 error = copyin(uap->ctl, ctl, uap->ctllen); 738 if (error) 739 goto done; 740 } 741 if (uap->buflen) 742 buf = malloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 743 744 /* 745 * Validate the descriptor 746 */ 747 if (uap->fd == -1) { 748 fp = NULL; 749 } else if ((u_int)uap->fd >= fdp->fd_nfiles || 750 (fp = fdp->fd_files[uap->fd].fp) == NULL) { 751 error = EBADF; 752 goto done; 753 } 754 if (fp) 755 fhold(fp); 756 757 /* 758 * Execute the internal kernel function and clean up. 759 */ 760 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 761 if (fp) 762 fdrop(fp, td); 763 if (error == 0 && uap->sysmsg_result > 0) 764 error = copyout(buf, uap->buf, uap->sysmsg_result); 765 done: 766 if (path) 767 zfree(namei_zone, path); 768 if (ctl) 769 free(ctl, M_TEMP); 770 if (buf) 771 free(buf, M_TEMP); 772 return (error); 773 } 774 775 /* 776 * Execute a mount control operation by resolving the path to a mount point 777 * and calling vop_mountctl(). 778 */ 779 int 780 kern_mountctl(const char *path, int op, struct file *fp, 781 const void *ctl, int ctllen, 782 void *buf, int buflen, int *res) 783 { 784 struct vnode *vp; 785 struct mount *mp; 786 struct nlookupdata nd; 787 int error; 788 789 *res = 0; 790 vp = NULL; 791 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 792 if (error == 0) 793 error = nlookup(&nd); 794 if (error == 0) 795 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 796 nlookup_done(&nd); 797 if (error) 798 return (error); 799 800 mp = vp->v_mount; 801 802 /* 803 * Must be the root of the filesystem 804 */ 805 if ((vp->v_flag & VROOT) == 0) { 806 vput(vp); 807 return (EINVAL); 808 } 809 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 810 buf, buflen, res); 811 vput(vp); 812 return (error); 813 } 814 815 int 816 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 817 { 818 struct thread *td = curthread; 819 struct proc *p = td->td_proc; 820 struct mount *mp; 821 struct statfs *sp; 822 char *fullpath, *freepath; 823 int error; 824 825 if ((error = nlookup(nd)) != 0) 826 return (error); 827 mp = nd->nl_ncp->nc_mount; 828 sp = &mp->mnt_stat; 829 if ((error = VFS_STATFS(mp, sp, td)) != 0) 830 return (error); 831 832 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 833 if (error) 834 return(error); 835 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 836 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 837 free(freepath, M_TEMP); 838 839 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 840 bcopy(sp, buf, sizeof(*buf)); 841 /* Only root should have access to the fsid's. */ 842 if (suser(td)) 843 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 844 return (0); 845 } 846 847 /* 848 * statfs_args(char *path, struct statfs *buf) 849 * 850 * Get filesystem statistics. 851 */ 852 int 853 statfs(struct statfs_args *uap) 854 { 855 struct nlookupdata nd; 856 struct statfs buf; 857 int error; 858 859 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 860 if (error == 0) 861 error = kern_statfs(&nd, &buf); 862 nlookup_done(&nd); 863 if (error == 0) 864 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 865 return (error); 866 } 867 868 int 869 kern_fstatfs(int fd, struct statfs *buf) 870 { 871 struct thread *td = curthread; 872 struct proc *p = td->td_proc; 873 struct file *fp; 874 struct mount *mp; 875 struct statfs *sp; 876 char *fullpath, *freepath; 877 int error; 878 879 KKASSERT(p); 880 error = getvnode(p->p_fd, fd, &fp); 881 if (error) 882 return (error); 883 mp = ((struct vnode *)fp->f_data)->v_mount; 884 if (mp == NULL) 885 return (EBADF); 886 sp = &mp->mnt_stat; 887 error = VFS_STATFS(mp, sp, td); 888 if (error) 889 return (error); 890 891 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 892 if (error) 893 return(error); 894 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 895 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 896 free(freepath, M_TEMP); 897 898 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 899 bcopy(sp, buf, sizeof(*buf)); 900 901 /* Only root should have access to the fsid's. */ 902 if (suser(td)) 903 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 904 return (0); 905 } 906 907 /* 908 * fstatfs_args(int fd, struct statfs *buf) 909 * 910 * Get filesystem statistics. 911 */ 912 int 913 fstatfs(struct fstatfs_args *uap) 914 { 915 struct statfs buf; 916 int error; 917 918 error = kern_fstatfs(uap->fd, &buf); 919 920 if (error == 0) 921 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 922 return (error); 923 } 924 925 /* 926 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 927 * 928 * Get statistics on all filesystems. 929 */ 930 931 struct getfsstat_info { 932 struct statfs *sfsp; 933 long count; 934 long maxcount; 935 int error; 936 int flags; 937 int is_chrooted; 938 struct thread *td; 939 struct proc *p; 940 }; 941 942 static int getfsstat_callback(struct mount *, void *); 943 944 /* ARGSUSED */ 945 int 946 getfsstat(struct getfsstat_args *uap) 947 { 948 struct thread *td = curthread; 949 struct proc *p = td->td_proc; 950 struct getfsstat_info info; 951 952 bzero(&info, sizeof(info)); 953 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0) 954 info.is_chrooted = 1; 955 else 956 info.is_chrooted = 0; 957 958 info.maxcount = uap->bufsize / sizeof(struct statfs); 959 info.sfsp = uap->buf; 960 info.count = 0; 961 info.flags = uap->flags; 962 info.td = td; 963 info.p = p; 964 965 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 966 if (info.sfsp && info.count > info.maxcount) 967 uap->sysmsg_result = info.maxcount; 968 else 969 uap->sysmsg_result = info.count; 970 return (info.error); 971 } 972 973 static int 974 getfsstat_callback(struct mount *mp, void *data) 975 { 976 struct getfsstat_info *info = data; 977 struct statfs *sp; 978 char *freepath; 979 char *fullpath; 980 int error; 981 982 if (info->sfsp && info->count < info->maxcount) { 983 if (info->is_chrooted && !chroot_visible_mnt(mp, info->p)) 984 return(0); 985 sp = &mp->mnt_stat; 986 987 /* 988 * If MNT_NOWAIT or MNT_LAZY is specified, do not 989 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 990 * overrides MNT_WAIT. 991 */ 992 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 993 (info->flags & MNT_WAIT)) && 994 (error = VFS_STATFS(mp, sp, info->td))) { 995 return(0); 996 } 997 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 998 999 error = cache_fullpath(info->p, mp->mnt_ncp, 1000 &fullpath, &freepath); 1001 if (error) { 1002 info->error = error; 1003 return(-1); 1004 } 1005 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1006 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1007 free(freepath, M_TEMP); 1008 1009 error = copyout(sp, info->sfsp, sizeof(*sp)); 1010 if (error) { 1011 info->error = error; 1012 return (-1); 1013 } 1014 ++info->sfsp; 1015 } 1016 info->count++; 1017 return(0); 1018 } 1019 1020 /* 1021 * fchdir_args(int fd) 1022 * 1023 * Change current working directory to a given file descriptor. 1024 */ 1025 /* ARGSUSED */ 1026 int 1027 fchdir(struct fchdir_args *uap) 1028 { 1029 struct thread *td = curthread; 1030 struct proc *p = td->td_proc; 1031 struct filedesc *fdp = p->p_fd; 1032 struct vnode *vp, *ovp; 1033 struct mount *mp; 1034 struct file *fp; 1035 struct namecache *ncp, *oncp; 1036 struct namecache *nct; 1037 int error; 1038 1039 if ((error = getvnode(fdp, uap->fd, &fp)) != 0) 1040 return (error); 1041 vp = (struct vnode *)fp->f_data; 1042 vref(vp); 1043 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1044 if (vp->v_type != VDIR || fp->f_ncp == NULL) 1045 error = ENOTDIR; 1046 else 1047 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, td); 1048 if (error) { 1049 vput(vp); 1050 return (error); 1051 } 1052 ncp = cache_hold(fp->f_ncp); 1053 while (!error && (mp = vp->v_mountedhere) != NULL) { 1054 error = nlookup_mp(mp, &nct); 1055 if (error == 0) { 1056 cache_unlock(nct); /* leave ref intact */ 1057 vput(vp); 1058 vp = nct->nc_vp; 1059 error = vget(vp, LK_SHARED, td); 1060 KKASSERT(error == 0); 1061 cache_drop(ncp); 1062 ncp = nct; 1063 } 1064 } 1065 if (error == 0) { 1066 ovp = fdp->fd_cdir; 1067 oncp = fdp->fd_ncdir; 1068 VOP_UNLOCK(vp, 0, td); /* leave ref intact */ 1069 fdp->fd_cdir = vp; 1070 fdp->fd_ncdir = ncp; 1071 cache_drop(oncp); 1072 vrele(ovp); 1073 } else { 1074 cache_drop(ncp); 1075 vput(vp); 1076 } 1077 return (error); 1078 } 1079 1080 int 1081 kern_chdir(struct nlookupdata *nd) 1082 { 1083 struct thread *td = curthread; 1084 struct proc *p = td->td_proc; 1085 struct filedesc *fdp = p->p_fd; 1086 struct vnode *vp, *ovp; 1087 struct namecache *oncp; 1088 int error; 1089 1090 if ((error = nlookup(nd)) != 0) 1091 return (error); 1092 if ((vp = nd->nl_ncp->nc_vp) == NULL) 1093 return (ENOENT); 1094 if ((error = vget(vp, LK_SHARED, td)) != 0) 1095 return (error); 1096 1097 error = checkvp_chdir(vp, td); 1098 VOP_UNLOCK(vp, 0, td); 1099 if (error == 0) { 1100 ovp = fdp->fd_cdir; 1101 oncp = fdp->fd_ncdir; 1102 cache_unlock(nd->nl_ncp); /* leave reference intact */ 1103 fdp->fd_ncdir = nd->nl_ncp; 1104 fdp->fd_cdir = vp; 1105 cache_drop(oncp); 1106 vrele(ovp); 1107 nd->nl_ncp = NULL; 1108 } else { 1109 vrele(vp); 1110 } 1111 return (error); 1112 } 1113 1114 /* 1115 * chdir_args(char *path) 1116 * 1117 * Change current working directory (``.''). 1118 */ 1119 int 1120 chdir(struct chdir_args *uap) 1121 { 1122 struct nlookupdata nd; 1123 int error; 1124 1125 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1126 if (error == 0) 1127 error = kern_chdir(&nd); 1128 nlookup_done(&nd); 1129 return (error); 1130 } 1131 1132 /* 1133 * Helper function for raised chroot(2) security function: Refuse if 1134 * any filedescriptors are open directories. 1135 */ 1136 static int 1137 chroot_refuse_vdir_fds(fdp) 1138 struct filedesc *fdp; 1139 { 1140 struct vnode *vp; 1141 struct file *fp; 1142 int error; 1143 int fd; 1144 1145 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1146 error = getvnode(fdp, fd, &fp); 1147 if (error) 1148 continue; 1149 vp = (struct vnode *)fp->f_data; 1150 if (vp->v_type != VDIR) 1151 continue; 1152 return(EPERM); 1153 } 1154 return (0); 1155 } 1156 1157 /* 1158 * This sysctl determines if we will allow a process to chroot(2) if it 1159 * has a directory open: 1160 * 0: disallowed for all processes. 1161 * 1: allowed for processes that were not already chroot(2)'ed. 1162 * 2: allowed for all processes. 1163 */ 1164 1165 static int chroot_allow_open_directories = 1; 1166 1167 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1168 &chroot_allow_open_directories, 0, ""); 1169 1170 /* 1171 * chroot to the specified namecache entry. We obtain the vp from the 1172 * namecache data. The passed ncp must be locked and referenced and will 1173 * remain locked and referenced on return. 1174 */ 1175 int 1176 kern_chroot(struct namecache *ncp) 1177 { 1178 struct thread *td = curthread; 1179 struct proc *p = td->td_proc; 1180 struct filedesc *fdp = p->p_fd; 1181 struct vnode *vp; 1182 int error; 1183 1184 /* 1185 * Only root can chroot 1186 */ 1187 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1188 return (error); 1189 1190 /* 1191 * Disallow open directory descriptors (fchdir() breakouts). 1192 */ 1193 if (chroot_allow_open_directories == 0 || 1194 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1195 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1196 return (error); 1197 } 1198 if ((vp = ncp->nc_vp) == NULL) 1199 return (ENOENT); 1200 1201 if ((error = vget(vp, LK_SHARED, td)) != 0) 1202 return (error); 1203 1204 /* 1205 * Check the validity of vp as a directory to change to and 1206 * associate it with rdir/jdir. 1207 */ 1208 error = checkvp_chdir(vp, td); 1209 VOP_UNLOCK(vp, 0, td); /* leave reference intact */ 1210 if (error == 0) { 1211 vrele(fdp->fd_rdir); 1212 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1213 cache_drop(fdp->fd_nrdir); 1214 fdp->fd_nrdir = cache_hold(ncp); 1215 if (fdp->fd_jdir == NULL) { 1216 fdp->fd_jdir = vp; 1217 vref(fdp->fd_jdir); 1218 fdp->fd_njdir = cache_hold(ncp); 1219 } 1220 } else { 1221 vrele(vp); 1222 } 1223 return (error); 1224 } 1225 1226 /* 1227 * chroot_args(char *path) 1228 * 1229 * Change notion of root (``/'') directory. 1230 */ 1231 /* ARGSUSED */ 1232 int 1233 chroot(struct chroot_args *uap) 1234 { 1235 struct thread *td = curthread; 1236 struct nlookupdata nd; 1237 int error; 1238 1239 KKASSERT(td->td_proc); 1240 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1241 if (error) { 1242 nlookup_done(&nd); 1243 return(error); 1244 } 1245 error = nlookup(&nd); 1246 if (error == 0) 1247 error = kern_chroot(nd.nl_ncp); 1248 nlookup_done(&nd); 1249 return(error); 1250 } 1251 1252 /* 1253 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1254 * determine whether it is legal to chdir to the vnode. The vnode's state 1255 * is not changed by this call. 1256 */ 1257 int 1258 checkvp_chdir(struct vnode *vp, struct thread *td) 1259 { 1260 int error; 1261 1262 if (vp->v_type != VDIR) 1263 error = ENOTDIR; 1264 else 1265 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td); 1266 return (error); 1267 } 1268 1269 int 1270 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1271 { 1272 struct thread *td = curthread; 1273 struct proc *p = td->td_proc; 1274 struct filedesc *fdp = p->p_fd; 1275 int cmode, flags; 1276 struct file *nfp; 1277 struct file *fp; 1278 struct vnode *vp; 1279 int type, indx, error; 1280 struct flock lf; 1281 1282 if ((oflags & O_ACCMODE) == O_ACCMODE) 1283 return (EINVAL); 1284 flags = FFLAGS(oflags); 1285 error = falloc(p, &nfp, NULL); 1286 if (error) 1287 return (error); 1288 fp = nfp; 1289 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1290 1291 /* 1292 * XXX p_dupfd is a real mess. It allows a device to return a 1293 * file descriptor to be duplicated rather then doing the open 1294 * itself. 1295 */ 1296 p->p_dupfd = -1; 1297 1298 /* 1299 * Call vn_open() to do the lookup and assign the vnode to the 1300 * file pointer. vn_open() does not change the ref count on fp 1301 * and the vnode, on success, will be inherited by the file pointer 1302 * and unlocked. 1303 */ 1304 nd->nl_flags |= NLC_LOCKVP; 1305 error = vn_open(nd, fp, flags, cmode); 1306 nlookup_done(nd); 1307 if (error) { 1308 /* 1309 * handle special fdopen() case. bleh. dupfdopen() is 1310 * responsible for dropping the old contents of ofiles[indx] 1311 * if it succeeds. 1312 * 1313 * Note that if fsetfd() succeeds it will add a ref to fp 1314 * which represents the fd_files[] assignment. We must still 1315 * drop our reference. 1316 */ 1317 if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0) { 1318 if (fsetfd(p, fp, &indx) == 0) { 1319 error = dupfdopen(fdp, indx, p->p_dupfd, flags, error); 1320 if (error == 0) { 1321 *res = indx; 1322 fdrop(fp, td); /* our ref */ 1323 return (0); 1324 } 1325 if (fdp->fd_files[indx].fp == fp) { 1326 funsetfd(fdp, indx); 1327 fdrop(fp, td); /* fd_files[] ref */ 1328 } 1329 } 1330 } 1331 fdrop(fp, td); /* our ref */ 1332 if (error == ERESTART) 1333 error = EINTR; 1334 return (error); 1335 } 1336 1337 /* 1338 * ref the vnode for ourselves so it can't be ripped out from under 1339 * is. XXX need an ND flag to request that the vnode be returned 1340 * anyway. 1341 */ 1342 vp = (struct vnode *)fp->f_data; 1343 vref(vp); 1344 if ((error = fsetfd(p, fp, &indx)) != 0) { 1345 fdrop(fp, td); 1346 vrele(vp); 1347 return (error); 1348 } 1349 1350 /* 1351 * If no error occurs the vp will have been assigned to the file 1352 * pointer. 1353 */ 1354 p->p_dupfd = 0; 1355 1356 /* 1357 * There should be 2 references on the file, one from the descriptor 1358 * table, and one for us. 1359 * 1360 * Handle the case where someone closed the file (via its file 1361 * descriptor) while we were blocked. The end result should look 1362 * like opening the file succeeded but it was immediately closed. 1363 */ 1364 if (fp->f_count == 1) { 1365 KASSERT(fdp->fd_files[indx].fp != fp, 1366 ("Open file descriptor lost all refs")); 1367 vrele(vp); 1368 fo_close(fp, td); 1369 fdrop(fp, td); 1370 *res = indx; 1371 return 0; 1372 } 1373 1374 if (flags & (O_EXLOCK | O_SHLOCK)) { 1375 lf.l_whence = SEEK_SET; 1376 lf.l_start = 0; 1377 lf.l_len = 0; 1378 if (flags & O_EXLOCK) 1379 lf.l_type = F_WRLCK; 1380 else 1381 lf.l_type = F_RDLCK; 1382 type = F_FLOCK; 1383 if ((flags & FNONBLOCK) == 0) 1384 type |= F_WAIT; 1385 1386 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1387 /* 1388 * lock request failed. Normally close the descriptor 1389 * but handle the case where someone might have dup()d 1390 * it when we weren't looking. One reference is 1391 * owned by the descriptor array, the other by us. 1392 */ 1393 vrele(vp); 1394 if (fdp->fd_files[indx].fp == fp) { 1395 funsetfd(fdp, indx); 1396 fdrop(fp, td); 1397 } 1398 fdrop(fp, td); 1399 return (error); 1400 } 1401 fp->f_flag |= FHASLOCK; 1402 } 1403 /* assert that vn_open created a backing object if one is needed */ 1404 KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, 1405 ("open: vmio vnode has no backing object after vn_open")); 1406 1407 vrele(vp); 1408 1409 /* 1410 * release our private reference, leaving the one associated with the 1411 * descriptor table intact. 1412 */ 1413 fdrop(fp, td); 1414 *res = indx; 1415 return (0); 1416 } 1417 1418 /* 1419 * open_args(char *path, int flags, int mode) 1420 * 1421 * Check permissions, allocate an open file structure, 1422 * and call the device open routine if any. 1423 */ 1424 int 1425 open(struct open_args *uap) 1426 { 1427 struct nlookupdata nd; 1428 int error; 1429 1430 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1431 if (error == 0) { 1432 error = kern_open(&nd, uap->flags, 1433 uap->mode, &uap->sysmsg_result); 1434 } 1435 nlookup_done(&nd); 1436 return (error); 1437 } 1438 1439 int 1440 kern_mknod(struct nlookupdata *nd, int mode, int dev) 1441 { 1442 struct namecache *ncp; 1443 struct thread *td = curthread; 1444 struct proc *p = td->td_proc; 1445 struct vnode *vp; 1446 struct vattr vattr; 1447 int error; 1448 int whiteout = 0; 1449 1450 KKASSERT(p); 1451 1452 switch (mode & S_IFMT) { 1453 case S_IFCHR: 1454 case S_IFBLK: 1455 error = suser(td); 1456 break; 1457 default: 1458 error = suser_cred(p->p_ucred, PRISON_ROOT); 1459 break; 1460 } 1461 if (error) 1462 return (error); 1463 1464 bwillwrite(); 1465 nd->nl_flags |= NLC_CREATE; 1466 if ((error = nlookup(nd)) != 0) 1467 return (error); 1468 ncp = nd->nl_ncp; 1469 if (ncp->nc_vp) 1470 return (EEXIST); 1471 1472 VATTR_NULL(&vattr); 1473 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1474 vattr.va_rdev = dev; 1475 whiteout = 0; 1476 1477 switch (mode & S_IFMT) { 1478 case S_IFMT: /* used by badsect to flag bad sectors */ 1479 vattr.va_type = VBAD; 1480 break; 1481 case S_IFCHR: 1482 vattr.va_type = VCHR; 1483 break; 1484 case S_IFBLK: 1485 vattr.va_type = VBLK; 1486 break; 1487 case S_IFWHT: 1488 whiteout = 1; 1489 break; 1490 default: 1491 error = EINVAL; 1492 break; 1493 } 1494 if (error == 0) { 1495 if (whiteout) { 1496 error = VOP_NWHITEOUT(ncp, nd->nl_cred, NAMEI_CREATE); 1497 } else { 1498 vp = NULL; 1499 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1500 if (error == 0) 1501 vput(vp); 1502 } 1503 } 1504 return (error); 1505 } 1506 1507 /* 1508 * mknod_args(char *path, int mode, int dev) 1509 * 1510 * Create a special file. 1511 */ 1512 int 1513 mknod(struct mknod_args *uap) 1514 { 1515 struct nlookupdata nd; 1516 int error; 1517 1518 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1519 if (error == 0) 1520 error = kern_mknod(&nd, uap->mode, uap->dev); 1521 nlookup_done(&nd); 1522 return (error); 1523 } 1524 1525 int 1526 kern_mkfifo(struct nlookupdata *nd, int mode) 1527 { 1528 struct namecache *ncp; 1529 struct thread *td = curthread; 1530 struct proc *p = td->td_proc; 1531 struct vattr vattr; 1532 struct vnode *vp; 1533 int error; 1534 1535 bwillwrite(); 1536 1537 nd->nl_flags |= NLC_CREATE; 1538 if ((error = nlookup(nd)) != 0) 1539 return (error); 1540 ncp = nd->nl_ncp; 1541 if (ncp->nc_vp) 1542 return (EEXIST); 1543 1544 VATTR_NULL(&vattr); 1545 vattr.va_type = VFIFO; 1546 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1547 vp = NULL; 1548 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1549 if (error == 0) 1550 vput(vp); 1551 return (error); 1552 } 1553 1554 /* 1555 * mkfifo_args(char *path, int mode) 1556 * 1557 * Create a named pipe. 1558 */ 1559 int 1560 mkfifo(struct mkfifo_args *uap) 1561 { 1562 struct nlookupdata nd; 1563 int error; 1564 1565 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1566 if (error == 0) 1567 error = kern_mkfifo(&nd, uap->mode); 1568 nlookup_done(&nd); 1569 return (error); 1570 } 1571 1572 int 1573 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 1574 { 1575 struct thread *td = curthread; 1576 struct vnode *vp; 1577 int error; 1578 1579 /* 1580 * Lookup the source and obtained a locked vnode. 1581 * 1582 * XXX relookup on vget failure / race ? 1583 */ 1584 bwillwrite(); 1585 if ((error = nlookup(nd)) != 0) 1586 return (error); 1587 vp = nd->nl_ncp->nc_vp; 1588 KKASSERT(vp != NULL); 1589 if (vp->v_type == VDIR) 1590 return (EPERM); /* POSIX */ 1591 if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) 1592 return (error); 1593 1594 /* 1595 * Unlock the source so we can lookup the target without deadlocking 1596 * (XXX vp is locked already, possible other deadlock?). The target 1597 * must not exist. 1598 */ 1599 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 1600 nd->nl_flags &= ~NLC_NCPISLOCKED; 1601 cache_unlock(nd->nl_ncp); 1602 1603 linknd->nl_flags |= NLC_CREATE; 1604 if ((error = nlookup(linknd)) != 0) { 1605 vput(vp); 1606 return (error); 1607 } 1608 if (linknd->nl_ncp->nc_vp) { 1609 vput(vp); 1610 return (EEXIST); 1611 } 1612 1613 /* 1614 * Finally run the new API VOP. 1615 */ 1616 error = VOP_NLINK(linknd->nl_ncp, vp, linknd->nl_cred); 1617 vput(vp); 1618 return (error); 1619 } 1620 1621 /* 1622 * link_args(char *path, char *link) 1623 * 1624 * Make a hard file link. 1625 */ 1626 int 1627 link(struct link_args *uap) 1628 { 1629 struct nlookupdata nd, linknd; 1630 int error; 1631 1632 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1633 if (error == 0) { 1634 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 1635 if (error == 0) 1636 error = kern_link(&nd, &linknd); 1637 nlookup_done(&linknd); 1638 } 1639 nlookup_done(&nd); 1640 return (error); 1641 } 1642 1643 int 1644 kern_symlink(struct nlookupdata *nd, char *path, int mode) 1645 { 1646 struct namecache *ncp; 1647 struct vattr vattr; 1648 struct vnode *vp; 1649 int error; 1650 1651 bwillwrite(); 1652 nd->nl_flags |= NLC_CREATE; 1653 if ((error = nlookup(nd)) != 0) 1654 return (error); 1655 ncp = nd->nl_ncp; 1656 if (ncp->nc_vp) 1657 return (EEXIST); 1658 1659 VATTR_NULL(&vattr); 1660 vattr.va_mode = mode; 1661 error = VOP_NSYMLINK(ncp, &vp, nd->nl_cred, &vattr, path); 1662 if (error == 0) 1663 vput(vp); 1664 return (error); 1665 } 1666 1667 /* 1668 * symlink(char *path, char *link) 1669 * 1670 * Make a symbolic link. 1671 */ 1672 int 1673 symlink(struct symlink_args *uap) 1674 { 1675 struct thread *td = curthread; 1676 struct nlookupdata nd; 1677 char *path; 1678 int error; 1679 int mode; 1680 1681 path = zalloc(namei_zone); 1682 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1683 if (error == 0) { 1684 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 1685 if (error == 0) { 1686 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 1687 error = kern_symlink(&nd, path, mode); 1688 } 1689 nlookup_done(&nd); 1690 } 1691 zfree(namei_zone, path); 1692 return (error); 1693 } 1694 1695 /* 1696 * undelete_args(char *path) 1697 * 1698 * Delete a whiteout from the filesystem. 1699 */ 1700 /* ARGSUSED */ 1701 int 1702 undelete(struct undelete_args *uap) 1703 { 1704 struct nlookupdata nd; 1705 int error; 1706 1707 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1708 bwillwrite(); 1709 nd.nl_flags |= NLC_DELETE; 1710 if (error == 0) 1711 error = nlookup(&nd); 1712 if (error == 0) 1713 error = VOP_NWHITEOUT(nd.nl_ncp, nd.nl_cred, NAMEI_DELETE); 1714 nlookup_done(&nd); 1715 return (error); 1716 } 1717 1718 int 1719 kern_unlink(struct nlookupdata *nd) 1720 { 1721 struct namecache *ncp; 1722 int error; 1723 1724 bwillwrite(); 1725 nd->nl_flags |= NLC_DELETE; 1726 if ((error = nlookup(nd)) != 0) 1727 return (error); 1728 ncp = nd->nl_ncp; 1729 error = VOP_NREMOVE(ncp, nd->nl_cred); 1730 return (error); 1731 } 1732 1733 /* 1734 * unlink_args(char *path) 1735 * 1736 * Delete a name from the filesystem. 1737 */ 1738 int 1739 unlink(struct unlink_args *uap) 1740 { 1741 struct nlookupdata nd; 1742 int error; 1743 1744 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1745 if (error == 0) 1746 error = kern_unlink(&nd); 1747 nlookup_done(&nd); 1748 return (error); 1749 } 1750 1751 int 1752 kern_lseek(int fd, off_t offset, int whence, off_t *res) 1753 { 1754 struct thread *td = curthread; 1755 struct proc *p = td->td_proc; 1756 struct filedesc *fdp = p->p_fd; 1757 struct file *fp; 1758 struct vattr vattr; 1759 int error; 1760 1761 if ((u_int)fd >= fdp->fd_nfiles || 1762 (fp = fdp->fd_files[fd].fp) == NULL) 1763 return (EBADF); 1764 if (fp->f_type != DTYPE_VNODE) 1765 return (ESPIPE); 1766 switch (whence) { 1767 case L_INCR: 1768 fp->f_offset += offset; 1769 break; 1770 case L_XTND: 1771 error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, td); 1772 if (error) 1773 return (error); 1774 fp->f_offset = offset + vattr.va_size; 1775 break; 1776 case L_SET: 1777 fp->f_offset = offset; 1778 break; 1779 default: 1780 return (EINVAL); 1781 } 1782 *res = fp->f_offset; 1783 return (0); 1784 } 1785 1786 /* 1787 * lseek_args(int fd, int pad, off_t offset, int whence) 1788 * 1789 * Reposition read/write file offset. 1790 */ 1791 int 1792 lseek(struct lseek_args *uap) 1793 { 1794 int error; 1795 1796 error = kern_lseek(uap->fd, uap->offset, uap->whence, 1797 &uap->sysmsg_offset); 1798 1799 return (error); 1800 } 1801 1802 int 1803 kern_access(struct nlookupdata *nd, int aflags) 1804 { 1805 struct thread *td = curthread; 1806 struct vnode *vp; 1807 int error, flags; 1808 1809 if ((error = nlookup(nd)) != 0) 1810 return (error); 1811 retry: 1812 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 1813 if (error) 1814 return (error); 1815 1816 /* Flags == 0 means only check for existence. */ 1817 if (aflags) { 1818 flags = 0; 1819 if (aflags & R_OK) 1820 flags |= VREAD; 1821 if (aflags & W_OK) 1822 flags |= VWRITE; 1823 if (aflags & X_OK) 1824 flags |= VEXEC; 1825 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1826 error = VOP_ACCESS(vp, flags, nd->nl_cred, td); 1827 1828 /* 1829 * If the file handle is stale we have to re-resolve the 1830 * entry. This is a hack at the moment. 1831 */ 1832 if (error == ESTALE) { 1833 cache_setunresolved(nd->nl_ncp); 1834 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 1835 if (error == 0) { 1836 vput(vp); 1837 vp = NULL; 1838 goto retry; 1839 } 1840 } 1841 } 1842 vput(vp); 1843 return (error); 1844 } 1845 1846 /* 1847 * access_args(char *path, int flags) 1848 * 1849 * Check access permissions. 1850 */ 1851 int 1852 access(struct access_args *uap) 1853 { 1854 struct nlookupdata nd; 1855 int error; 1856 1857 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1858 if (error == 0) 1859 error = kern_access(&nd, uap->flags); 1860 nlookup_done(&nd); 1861 return (error); 1862 } 1863 1864 int 1865 kern_stat(struct nlookupdata *nd, struct stat *st) 1866 { 1867 int error; 1868 struct vnode *vp; 1869 thread_t td; 1870 1871 if ((error = nlookup(nd)) != 0) 1872 return (error); 1873 again: 1874 if ((vp = nd->nl_ncp->nc_vp) == NULL) 1875 return (ENOENT); 1876 1877 td = curthread; 1878 if ((error = vget(vp, LK_SHARED, td)) != 0) 1879 return (error); 1880 error = vn_stat(vp, st, td); 1881 1882 /* 1883 * If the file handle is stale we have to re-resolve the entry. This 1884 * is a hack at the moment. 1885 */ 1886 if (error == ESTALE) { 1887 cache_setunresolved(nd->nl_ncp); 1888 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 1889 if (error == 0) { 1890 vput(vp); 1891 goto again; 1892 } 1893 } 1894 1895 /* 1896 * The fsmid can be used to detect that something has changed 1897 * at or below the specified file/dir in the filesystem. At 1898 * a minimum the fsmid is synthesized by the kernel via the 1899 * namecache and requires an open descriptor for deterministic 1900 * operation. Filesystems supporting fsmid may store it in the 1901 * inode, but this is not a requirement. 1902 */ 1903 st->st_fsmid = nd->nl_ncp->nc_fsmid; 1904 1905 vput(vp); 1906 return (error); 1907 } 1908 1909 /* 1910 * stat_args(char *path, struct stat *ub) 1911 * 1912 * Get file status; this version follows links. 1913 */ 1914 int 1915 stat(struct stat_args *uap) 1916 { 1917 struct nlookupdata nd; 1918 struct stat st; 1919 int error; 1920 1921 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1922 if (error == 0) { 1923 error = kern_stat(&nd, &st); 1924 if (error == 0) 1925 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1926 } 1927 nlookup_done(&nd); 1928 return (error); 1929 } 1930 1931 /* 1932 * lstat_args(char *path, struct stat *ub) 1933 * 1934 * Get file status; this version does not follow links. 1935 */ 1936 int 1937 lstat(struct lstat_args *uap) 1938 { 1939 struct nlookupdata nd; 1940 struct stat st; 1941 int error; 1942 1943 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1944 if (error == 0) { 1945 error = kern_stat(&nd, &st); 1946 if (error == 0) 1947 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1948 } 1949 nlookup_done(&nd); 1950 return (error); 1951 } 1952 1953 /* 1954 * pathconf_Args(char *path, int name) 1955 * 1956 * Get configurable pathname variables. 1957 */ 1958 /* ARGSUSED */ 1959 int 1960 pathconf(struct pathconf_args *uap) 1961 { 1962 struct nlookupdata nd; 1963 struct vnode *vp; 1964 int error; 1965 1966 vp = NULL; 1967 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1968 if (error == 0) 1969 error = nlookup(&nd); 1970 if (error == 0) 1971 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 1972 nlookup_done(&nd); 1973 if (error == 0) { 1974 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 1975 vput(vp); 1976 } 1977 return (error); 1978 } 1979 1980 /* 1981 * XXX: daver 1982 * kern_readlink isn't properly split yet. There is a copyin burried 1983 * in VOP_READLINK(). 1984 */ 1985 int 1986 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 1987 { 1988 struct thread *td = curthread; 1989 struct proc *p = td->td_proc; 1990 struct vnode *vp; 1991 struct iovec aiov; 1992 struct uio auio; 1993 int error; 1994 1995 if ((error = nlookup(nd)) != 0) 1996 return (error); 1997 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 1998 if (error) 1999 return (error); 2000 if (vp->v_type != VLNK) { 2001 error = EINVAL; 2002 } else { 2003 aiov.iov_base = buf; 2004 aiov.iov_len = count; 2005 auio.uio_iov = &aiov; 2006 auio.uio_iovcnt = 1; 2007 auio.uio_offset = 0; 2008 auio.uio_rw = UIO_READ; 2009 auio.uio_segflg = UIO_USERSPACE; 2010 auio.uio_td = td; 2011 auio.uio_resid = count; 2012 error = VOP_READLINK(vp, &auio, p->p_ucred); 2013 } 2014 vput(vp); 2015 *res = count - auio.uio_resid; 2016 return (error); 2017 } 2018 2019 /* 2020 * readlink_args(char *path, char *buf, int count) 2021 * 2022 * Return target name of a symbolic link. 2023 */ 2024 int 2025 readlink(struct readlink_args *uap) 2026 { 2027 struct nlookupdata nd; 2028 int error; 2029 2030 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2031 if (error == 0) { 2032 error = kern_readlink(&nd, uap->buf, uap->count, 2033 &uap->sysmsg_result); 2034 } 2035 nlookup_done(&nd); 2036 return (error); 2037 } 2038 2039 static int 2040 setfflags(struct vnode *vp, int flags) 2041 { 2042 struct thread *td = curthread; 2043 struct proc *p = td->td_proc; 2044 int error; 2045 struct vattr vattr; 2046 2047 /* 2048 * Prevent non-root users from setting flags on devices. When 2049 * a device is reused, users can retain ownership of the device 2050 * if they are allowed to set flags and programs assume that 2051 * chown can't fail when done as root. 2052 */ 2053 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2054 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2055 return (error); 2056 2057 /* 2058 * note: vget is required for any operation that might mod the vnode 2059 * so VINACTIVE is properly cleared. 2060 */ 2061 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2062 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2063 VATTR_NULL(&vattr); 2064 vattr.va_flags = flags; 2065 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2066 vput(vp); 2067 } 2068 return (error); 2069 } 2070 2071 /* 2072 * chflags(char *path, int flags) 2073 * 2074 * Change flags of a file given a path name. 2075 */ 2076 /* ARGSUSED */ 2077 int 2078 chflags(struct chflags_args *uap) 2079 { 2080 struct nlookupdata nd; 2081 struct vnode *vp; 2082 int error; 2083 2084 vp = NULL; 2085 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2086 /* XXX Add NLC flag indicating modifying operation? */ 2087 if (error == 0) 2088 error = nlookup(&nd); 2089 if (error == 0) 2090 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 2091 nlookup_done(&nd); 2092 if (error == 0) { 2093 error = setfflags(vp, uap->flags); 2094 vrele(vp); 2095 } 2096 return (error); 2097 } 2098 2099 /* 2100 * fchflags_args(int fd, int flags) 2101 * 2102 * Change flags of a file given a file descriptor. 2103 */ 2104 /* ARGSUSED */ 2105 int 2106 fchflags(struct fchflags_args *uap) 2107 { 2108 struct thread *td = curthread; 2109 struct proc *p = td->td_proc; 2110 struct file *fp; 2111 int error; 2112 2113 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2114 return (error); 2115 return setfflags((struct vnode *) fp->f_data, uap->flags); 2116 } 2117 2118 static int 2119 setfmode(struct vnode *vp, int mode) 2120 { 2121 struct thread *td = curthread; 2122 struct proc *p = td->td_proc; 2123 int error; 2124 struct vattr vattr; 2125 2126 /* 2127 * note: vget is required for any operation that might mod the vnode 2128 * so VINACTIVE is properly cleared. 2129 */ 2130 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2131 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2132 VATTR_NULL(&vattr); 2133 vattr.va_mode = mode & ALLPERMS; 2134 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2135 vput(vp); 2136 } 2137 return error; 2138 } 2139 2140 int 2141 kern_chmod(struct nlookupdata *nd, int mode) 2142 { 2143 struct vnode *vp; 2144 int error; 2145 2146 /* XXX Add NLC flag indicating modifying operation? */ 2147 if ((error = nlookup(nd)) != 0) 2148 return (error); 2149 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2150 return (error); 2151 error = setfmode(vp, mode); 2152 vrele(vp); 2153 return (error); 2154 } 2155 2156 /* 2157 * chmod_args(char *path, int mode) 2158 * 2159 * Change mode of a file given path name. 2160 */ 2161 /* ARGSUSED */ 2162 int 2163 chmod(struct chmod_args *uap) 2164 { 2165 struct nlookupdata nd; 2166 int error; 2167 2168 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2169 if (error == 0) 2170 error = kern_chmod(&nd, uap->mode); 2171 nlookup_done(&nd); 2172 return (error); 2173 } 2174 2175 /* 2176 * lchmod_args(char *path, int mode) 2177 * 2178 * Change mode of a file given path name (don't follow links.) 2179 */ 2180 /* ARGSUSED */ 2181 int 2182 lchmod(struct lchmod_args *uap) 2183 { 2184 struct nlookupdata nd; 2185 int error; 2186 2187 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2188 if (error == 0) 2189 error = kern_chmod(&nd, uap->mode); 2190 nlookup_done(&nd); 2191 return (error); 2192 } 2193 2194 /* 2195 * fchmod_args(int fd, int mode) 2196 * 2197 * Change mode of a file given a file descriptor. 2198 */ 2199 /* ARGSUSED */ 2200 int 2201 fchmod(struct fchmod_args *uap) 2202 { 2203 struct thread *td = curthread; 2204 struct proc *p = td->td_proc; 2205 struct file *fp; 2206 int error; 2207 2208 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2209 return (error); 2210 return setfmode((struct vnode *)fp->f_data, uap->mode); 2211 } 2212 2213 static int 2214 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2215 { 2216 struct thread *td = curthread; 2217 struct proc *p = td->td_proc; 2218 int error; 2219 struct vattr vattr; 2220 2221 /* 2222 * note: vget is required for any operation that might mod the vnode 2223 * so VINACTIVE is properly cleared. 2224 */ 2225 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2226 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2227 VATTR_NULL(&vattr); 2228 vattr.va_uid = uid; 2229 vattr.va_gid = gid; 2230 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2231 vput(vp); 2232 } 2233 return error; 2234 } 2235 2236 int 2237 kern_chown(struct nlookupdata *nd, int uid, int gid) 2238 { 2239 struct vnode *vp; 2240 int error; 2241 2242 /* XXX Add NLC flag indicating modifying operation? */ 2243 if ((error = nlookup(nd)) != 0) 2244 return (error); 2245 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2246 return (error); 2247 error = setfown(vp, uid, gid); 2248 vrele(vp); 2249 return (error); 2250 } 2251 2252 /* 2253 * chown(char *path, int uid, int gid) 2254 * 2255 * Set ownership given a path name. 2256 */ 2257 int 2258 chown(struct chown_args *uap) 2259 { 2260 struct nlookupdata nd; 2261 int error; 2262 2263 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2264 if (error == 0) 2265 error = kern_chown(&nd, uap->uid, uap->gid); 2266 nlookup_done(&nd); 2267 return (error); 2268 } 2269 2270 /* 2271 * lchown_args(char *path, int uid, int gid) 2272 * 2273 * Set ownership given a path name, do not cross symlinks. 2274 */ 2275 int 2276 lchown(struct lchown_args *uap) 2277 { 2278 struct nlookupdata nd; 2279 int error; 2280 2281 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2282 if (error == 0) 2283 error = kern_chown(&nd, uap->uid, uap->gid); 2284 nlookup_done(&nd); 2285 return (error); 2286 } 2287 2288 /* 2289 * fchown_args(int fd, int uid, int gid) 2290 * 2291 * Set ownership given a file descriptor. 2292 */ 2293 /* ARGSUSED */ 2294 int 2295 fchown(struct fchown_args *uap) 2296 { 2297 struct thread *td = curthread; 2298 struct proc *p = td->td_proc; 2299 struct file *fp; 2300 int error; 2301 2302 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2303 return (error); 2304 return setfown((struct vnode *)fp->f_data, 2305 uap->uid, uap->gid); 2306 } 2307 2308 static int 2309 getutimes(const struct timeval *tvp, struct timespec *tsp) 2310 { 2311 struct timeval tv[2]; 2312 2313 if (tvp == NULL) { 2314 microtime(&tv[0]); 2315 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2316 tsp[1] = tsp[0]; 2317 } else { 2318 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2319 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2320 } 2321 return 0; 2322 } 2323 2324 static int 2325 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2326 { 2327 struct thread *td = curthread; 2328 struct proc *p = td->td_proc; 2329 int error; 2330 struct vattr vattr; 2331 2332 /* 2333 * note: vget is required for any operation that might mod the vnode 2334 * so VINACTIVE is properly cleared. 2335 */ 2336 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2337 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2338 VATTR_NULL(&vattr); 2339 vattr.va_atime = ts[0]; 2340 vattr.va_mtime = ts[1]; 2341 if (nullflag) 2342 vattr.va_vaflags |= VA_UTIMES_NULL; 2343 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2344 vput(vp); 2345 } 2346 return error; 2347 } 2348 2349 int 2350 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2351 { 2352 struct timespec ts[2]; 2353 struct vnode *vp; 2354 int error; 2355 2356 if ((error = getutimes(tptr, ts)) != 0) 2357 return (error); 2358 /* XXX Add NLC flag indicating modifying operation? */ 2359 if ((error = nlookup(nd)) != 0) 2360 return (error); 2361 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2362 return (error); 2363 error = setutimes(vp, ts, tptr == NULL); 2364 vrele(vp); 2365 return (error); 2366 } 2367 2368 /* 2369 * utimes_args(char *path, struct timeval *tptr) 2370 * 2371 * Set the access and modification times of a file. 2372 */ 2373 int 2374 utimes(struct utimes_args *uap) 2375 { 2376 struct timeval tv[2]; 2377 struct nlookupdata nd; 2378 int error; 2379 2380 if (uap->tptr) { 2381 error = copyin(uap->tptr, tv, sizeof(tv)); 2382 if (error) 2383 return (error); 2384 } 2385 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2386 if (error == 0) 2387 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2388 nlookup_done(&nd); 2389 return (error); 2390 } 2391 2392 /* 2393 * lutimes_args(char *path, struct timeval *tptr) 2394 * 2395 * Set the access and modification times of a file. 2396 */ 2397 int 2398 lutimes(struct lutimes_args *uap) 2399 { 2400 struct timeval tv[2]; 2401 struct nlookupdata nd; 2402 int error; 2403 2404 if (uap->tptr) { 2405 error = copyin(uap->tptr, tv, sizeof(tv)); 2406 if (error) 2407 return (error); 2408 } 2409 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2410 if (error == 0) 2411 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2412 nlookup_done(&nd); 2413 return (error); 2414 } 2415 2416 int 2417 kern_futimes(int fd, struct timeval *tptr) 2418 { 2419 struct thread *td = curthread; 2420 struct proc *p = td->td_proc; 2421 struct timespec ts[2]; 2422 struct file *fp; 2423 int error; 2424 2425 error = getutimes(tptr, ts); 2426 if (error) 2427 return (error); 2428 error = getvnode(p->p_fd, fd, &fp); 2429 if (error) 2430 return (error); 2431 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2432 return (error); 2433 } 2434 2435 /* 2436 * futimes_args(int fd, struct timeval *tptr) 2437 * 2438 * Set the access and modification times of a file. 2439 */ 2440 int 2441 futimes(struct futimes_args *uap) 2442 { 2443 struct timeval tv[2]; 2444 int error; 2445 2446 if (uap->tptr) { 2447 error = copyin(uap->tptr, tv, sizeof(tv)); 2448 if (error) 2449 return (error); 2450 } 2451 2452 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2453 2454 return (error); 2455 } 2456 2457 int 2458 kern_truncate(struct nlookupdata *nd, off_t length) 2459 { 2460 struct vnode *vp; 2461 struct vattr vattr; 2462 int error; 2463 2464 if (length < 0) 2465 return(EINVAL); 2466 /* XXX Add NLC flag indicating modifying operation? */ 2467 if ((error = nlookup(nd)) != 0) 2468 return (error); 2469 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2470 return (error); 2471 VOP_LEASE(vp, nd->nl_td, nd->nl_cred, LEASE_WRITE); 2472 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, nd->nl_td)) != 0) { 2473 vrele(vp); 2474 return (error); 2475 } 2476 if (vp->v_type == VDIR) { 2477 error = EISDIR; 2478 } else if ((error = vn_writechk(vp)) == 0 && 2479 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred, nd->nl_td)) == 0) { 2480 VATTR_NULL(&vattr); 2481 vattr.va_size = length; 2482 error = VOP_SETATTR(vp, &vattr, nd->nl_cred, nd->nl_td); 2483 } 2484 vput(vp); 2485 return (error); 2486 } 2487 2488 /* 2489 * truncate(char *path, int pad, off_t length) 2490 * 2491 * Truncate a file given its path name. 2492 */ 2493 int 2494 truncate(struct truncate_args *uap) 2495 { 2496 struct nlookupdata nd; 2497 int error; 2498 2499 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2500 if (error == 0) 2501 error = kern_truncate(&nd, uap->length); 2502 nlookup_done(&nd); 2503 return error; 2504 } 2505 2506 int 2507 kern_ftruncate(int fd, off_t length) 2508 { 2509 struct thread *td = curthread; 2510 struct proc *p = td->td_proc; 2511 struct vattr vattr; 2512 struct vnode *vp; 2513 struct file *fp; 2514 int error; 2515 2516 if (length < 0) 2517 return(EINVAL); 2518 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2519 return (error); 2520 if ((fp->f_flag & FWRITE) == 0) 2521 return (EINVAL); 2522 vp = (struct vnode *)fp->f_data; 2523 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2524 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2525 if (vp->v_type == VDIR) 2526 error = EISDIR; 2527 else if ((error = vn_writechk(vp)) == 0) { 2528 VATTR_NULL(&vattr); 2529 vattr.va_size = length; 2530 error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); 2531 } 2532 VOP_UNLOCK(vp, 0, td); 2533 return (error); 2534 } 2535 2536 /* 2537 * ftruncate_args(int fd, int pad, off_t length) 2538 * 2539 * Truncate a file given a file descriptor. 2540 */ 2541 int 2542 ftruncate(struct ftruncate_args *uap) 2543 { 2544 int error; 2545 2546 error = kern_ftruncate(uap->fd, uap->length); 2547 2548 return (error); 2549 } 2550 2551 /* 2552 * fsync(int fd) 2553 * 2554 * Sync an open file. 2555 */ 2556 /* ARGSUSED */ 2557 int 2558 fsync(struct fsync_args *uap) 2559 { 2560 struct thread *td = curthread; 2561 struct proc *p = td->td_proc; 2562 struct vnode *vp; 2563 struct file *fp; 2564 vm_object_t obj; 2565 int error; 2566 2567 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2568 return (error); 2569 vp = (struct vnode *)fp->f_data; 2570 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2571 if (VOP_GETVOBJECT(vp, &obj) == 0) 2572 vm_object_page_clean(obj, 0, 0, 0); 2573 if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) == 0 && 2574 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) && 2575 bioops.io_fsync) 2576 error = (*bioops.io_fsync)(vp); 2577 VOP_UNLOCK(vp, 0, td); 2578 return (error); 2579 } 2580 2581 int 2582 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 2583 { 2584 struct namecache *fncpd; 2585 struct namecache *tncpd; 2586 struct namecache *ncp; 2587 struct mount *mp; 2588 int error; 2589 2590 bwillwrite(); 2591 if ((error = nlookup(fromnd)) != 0) 2592 return (error); 2593 if ((fncpd = fromnd->nl_ncp->nc_parent) == NULL) 2594 return (ENOENT); 2595 cache_hold(fncpd); 2596 2597 /* 2598 * unlock the source ncp so we can lookup the target ncp without 2599 * deadlocking. The target may or may not exist so we do not check 2600 * for a target vp like kern_mkdir() and other creation functions do. 2601 * 2602 * The source and target directories are ref'd and rechecked after 2603 * everything is relocked to determine if the source or target file 2604 * has been renamed. 2605 */ 2606 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 2607 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 2608 cache_unlock(fromnd->nl_ncp); 2609 2610 tond->nl_flags |= NLC_CREATE; 2611 if ((error = nlookup(tond)) != 0) { 2612 cache_drop(fncpd); 2613 return (error); 2614 } 2615 if ((tncpd = tond->nl_ncp->nc_parent) == NULL) { 2616 cache_drop(fncpd); 2617 return (ENOENT); 2618 } 2619 cache_hold(tncpd); 2620 2621 /* 2622 * If the source and target are the same there is nothing to do 2623 */ 2624 if (fromnd->nl_ncp == tond->nl_ncp) { 2625 cache_drop(fncpd); 2626 cache_drop(tncpd); 2627 return (0); 2628 } 2629 2630 /* 2631 * relock the source ncp. NOTE AFTER RELOCKING: the source ncp 2632 * may have become invalid while it was unlocked, nc_vp and nc_mount 2633 * could be NULL. 2634 */ 2635 if (cache_lock_nonblock(fromnd->nl_ncp) == 0) { 2636 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2637 } else if (fromnd->nl_ncp > tond->nl_ncp) { 2638 cache_lock(fromnd->nl_ncp); 2639 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2640 } else { 2641 cache_unlock(tond->nl_ncp); 2642 cache_lock(fromnd->nl_ncp); 2643 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2644 cache_lock(tond->nl_ncp); 2645 cache_resolve(tond->nl_ncp, tond->nl_cred); 2646 } 2647 fromnd->nl_flags |= NLC_NCPISLOCKED; 2648 2649 /* 2650 * make sure the parent directories linkages are the same 2651 */ 2652 if (fncpd != fromnd->nl_ncp->nc_parent || 2653 tncpd != tond->nl_ncp->nc_parent) { 2654 cache_drop(fncpd); 2655 cache_drop(tncpd); 2656 return (ENOENT); 2657 } 2658 2659 /* 2660 * Both the source and target must be within the same filesystem and 2661 * in the same filesystem as their parent directories within the 2662 * namecache topology. 2663 * 2664 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 2665 */ 2666 mp = fncpd->nc_mount; 2667 if (mp != tncpd->nc_mount || mp != fromnd->nl_ncp->nc_mount || 2668 mp != tond->nl_ncp->nc_mount) { 2669 cache_drop(fncpd); 2670 cache_drop(tncpd); 2671 return (EXDEV); 2672 } 2673 2674 /* 2675 * If the target exists and either the source or target is a directory, 2676 * then both must be directories. 2677 * 2678 * Due to relocking of the source, fromnd->nl_ncp->nc_vp might have 2679 * become NULL. 2680 */ 2681 if (tond->nl_ncp->nc_vp) { 2682 if (fromnd->nl_ncp->nc_vp == NULL) { 2683 error = ENOENT; 2684 } else if (fromnd->nl_ncp->nc_vp->v_type == VDIR) { 2685 if (tond->nl_ncp->nc_vp->v_type != VDIR) 2686 error = ENOTDIR; 2687 } else if (tond->nl_ncp->nc_vp->v_type == VDIR) { 2688 error = EISDIR; 2689 } 2690 } 2691 2692 /* 2693 * You cannot rename a source into itself or a subdirectory of itself. 2694 * We check this by travsersing the target directory upwards looking 2695 * for a match against the source. 2696 */ 2697 if (error == 0) { 2698 for (ncp = tncpd; ncp; ncp = ncp->nc_parent) { 2699 if (fromnd->nl_ncp == ncp) { 2700 error = EINVAL; 2701 break; 2702 } 2703 } 2704 } 2705 2706 cache_drop(fncpd); 2707 cache_drop(tncpd); 2708 2709 /* 2710 * Even though the namespaces are different, they may still represent 2711 * hardlinks to the same file. The filesystem might have a hard time 2712 * with this so we issue a NREMOVE of the source instead of a NRENAME 2713 * when we detect the situation. 2714 */ 2715 if (error == 0) { 2716 if (fromnd->nl_ncp->nc_vp == tond->nl_ncp->nc_vp) { 2717 error = VOP_NREMOVE(fromnd->nl_ncp, fromnd->nl_cred); 2718 } else { 2719 error = VOP_NRENAME(fromnd->nl_ncp, tond->nl_ncp, 2720 tond->nl_cred); 2721 } 2722 } 2723 return (error); 2724 } 2725 2726 /* 2727 * rename_args(char *from, char *to) 2728 * 2729 * Rename files. Source and destination must either both be directories, 2730 * or both not be directories. If target is a directory, it must be empty. 2731 */ 2732 int 2733 rename(struct rename_args *uap) 2734 { 2735 struct nlookupdata fromnd, tond; 2736 int error; 2737 2738 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 2739 if (error == 0) { 2740 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 2741 if (error == 0) 2742 error = kern_rename(&fromnd, &tond); 2743 nlookup_done(&tond); 2744 } 2745 nlookup_done(&fromnd); 2746 return (error); 2747 } 2748 2749 int 2750 kern_mkdir(struct nlookupdata *nd, int mode) 2751 { 2752 struct thread *td = curthread; 2753 struct proc *p = td->td_proc; 2754 struct namecache *ncp; 2755 struct vnode *vp; 2756 struct vattr vattr; 2757 int error; 2758 2759 bwillwrite(); 2760 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE; 2761 if ((error = nlookup(nd)) != 0) 2762 return (error); 2763 2764 ncp = nd->nl_ncp; 2765 if (ncp->nc_vp) 2766 return (EEXIST); 2767 2768 VATTR_NULL(&vattr); 2769 vattr.va_type = VDIR; 2770 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 2771 2772 vp = NULL; 2773 error = VOP_NMKDIR(ncp, &vp, p->p_ucred, &vattr); 2774 if (error == 0) 2775 vput(vp); 2776 return (error); 2777 } 2778 2779 /* 2780 * mkdir_args(char *path, int mode) 2781 * 2782 * Make a directory file. 2783 */ 2784 /* ARGSUSED */ 2785 int 2786 mkdir(struct mkdir_args *uap) 2787 { 2788 struct nlookupdata nd; 2789 int error; 2790 2791 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2792 if (error == 0) 2793 error = kern_mkdir(&nd, uap->mode); 2794 nlookup_done(&nd); 2795 return (error); 2796 } 2797 2798 int 2799 kern_rmdir(struct nlookupdata *nd) 2800 { 2801 struct namecache *ncp; 2802 int error; 2803 2804 bwillwrite(); 2805 nd->nl_flags |= NLC_DELETE; 2806 if ((error = nlookup(nd)) != 0) 2807 return (error); 2808 2809 ncp = nd->nl_ncp; 2810 error = VOP_NRMDIR(ncp, nd->nl_cred); 2811 return (error); 2812 } 2813 2814 /* 2815 * rmdir_args(char *path) 2816 * 2817 * Remove a directory file. 2818 */ 2819 /* ARGSUSED */ 2820 int 2821 rmdir(struct rmdir_args *uap) 2822 { 2823 struct nlookupdata nd; 2824 int error; 2825 2826 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2827 if (error == 0) 2828 error = kern_rmdir(&nd); 2829 nlookup_done(&nd); 2830 return (error); 2831 } 2832 2833 int 2834 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 2835 enum uio_seg direction) 2836 { 2837 struct thread *td = curthread; 2838 struct proc *p = td->td_proc; 2839 struct vnode *vp; 2840 struct file *fp; 2841 struct uio auio; 2842 struct iovec aiov; 2843 long loff; 2844 int error, eofflag; 2845 2846 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2847 return (error); 2848 if ((fp->f_flag & FREAD) == 0) 2849 return (EBADF); 2850 vp = (struct vnode *)fp->f_data; 2851 unionread: 2852 if (vp->v_type != VDIR) 2853 return (EINVAL); 2854 aiov.iov_base = buf; 2855 aiov.iov_len = count; 2856 auio.uio_iov = &aiov; 2857 auio.uio_iovcnt = 1; 2858 auio.uio_rw = UIO_READ; 2859 auio.uio_segflg = direction; 2860 auio.uio_td = td; 2861 auio.uio_resid = count; 2862 /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */ 2863 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2864 loff = auio.uio_offset = fp->f_offset; 2865 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 2866 fp->f_offset = auio.uio_offset; 2867 VOP_UNLOCK(vp, 0, td); 2868 if (error) 2869 return (error); 2870 if (count == auio.uio_resid) { 2871 if (union_dircheckp) { 2872 error = union_dircheckp(td, &vp, fp); 2873 if (error == -1) 2874 goto unionread; 2875 if (error) 2876 return (error); 2877 } 2878 if ((vp->v_flag & VROOT) && 2879 (vp->v_mount->mnt_flag & MNT_UNION)) { 2880 struct vnode *tvp = vp; 2881 vp = vp->v_mount->mnt_vnodecovered; 2882 vref(vp); 2883 fp->f_data = (caddr_t)vp; 2884 fp->f_offset = 0; 2885 vrele(tvp); 2886 goto unionread; 2887 } 2888 } 2889 if (basep) { 2890 *basep = loff; 2891 } 2892 *res = count - auio.uio_resid; 2893 return (error); 2894 } 2895 2896 /* 2897 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 2898 * 2899 * Read a block of directory entries in a file system independent format. 2900 */ 2901 int 2902 getdirentries(struct getdirentries_args *uap) 2903 { 2904 long base; 2905 int error; 2906 2907 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 2908 &uap->sysmsg_result, UIO_USERSPACE); 2909 2910 if (error == 0) 2911 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 2912 return (error); 2913 } 2914 2915 /* 2916 * getdents_args(int fd, char *buf, size_t count) 2917 */ 2918 int 2919 getdents(struct getdents_args *uap) 2920 { 2921 int error; 2922 2923 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 2924 &uap->sysmsg_result, UIO_USERSPACE); 2925 2926 return (error); 2927 } 2928 2929 /* 2930 * umask(int newmask) 2931 * 2932 * Set the mode mask for creation of filesystem nodes. 2933 * 2934 * MP SAFE 2935 */ 2936 int 2937 umask(struct umask_args *uap) 2938 { 2939 struct thread *td = curthread; 2940 struct proc *p = td->td_proc; 2941 struct filedesc *fdp; 2942 2943 fdp = p->p_fd; 2944 uap->sysmsg_result = fdp->fd_cmask; 2945 fdp->fd_cmask = uap->newmask & ALLPERMS; 2946 return (0); 2947 } 2948 2949 /* 2950 * revoke(char *path) 2951 * 2952 * Void all references to file by ripping underlying filesystem 2953 * away from vnode. 2954 */ 2955 /* ARGSUSED */ 2956 int 2957 revoke(struct revoke_args *uap) 2958 { 2959 struct thread *td = curthread; 2960 struct nlookupdata nd; 2961 struct vattr vattr; 2962 struct vnode *vp; 2963 struct ucred *cred; 2964 int error; 2965 2966 vp = NULL; 2967 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2968 if (error == 0) 2969 error = nlookup(&nd); 2970 if (error == 0) 2971 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 2972 cred = crhold(nd.nl_cred); 2973 nlookup_done(&nd); 2974 if (error == 0) { 2975 if (vp->v_type != VCHR && vp->v_type != VBLK) 2976 error = EINVAL; 2977 if (error == 0) 2978 error = VOP_GETATTR(vp, &vattr, td); 2979 if (error == 0 && cred->cr_uid != vattr.va_uid) 2980 error = suser_cred(cred, PRISON_ROOT); 2981 if (error == 0 && count_udev(vp->v_udev) > 0) { 2982 if ((error = vx_lock(vp)) == 0) { 2983 VOP_REVOKE(vp, REVOKEALL); 2984 vx_unlock(vp); 2985 } 2986 } 2987 vrele(vp); 2988 } 2989 crfree(cred); 2990 return (error); 2991 } 2992 2993 /* 2994 * Convert a user file descriptor to a kernel file entry. 2995 */ 2996 int 2997 getvnode(struct filedesc *fdp, int fd, struct file **fpp) 2998 { 2999 struct file *fp; 3000 3001 if ((u_int)fd >= fdp->fd_nfiles || 3002 (fp = fdp->fd_files[fd].fp) == NULL) 3003 return (EBADF); 3004 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) 3005 return (EINVAL); 3006 *fpp = fp; 3007 return (0); 3008 } 3009 /* 3010 * getfh_args(char *fname, fhandle_t *fhp) 3011 * 3012 * Get (NFS) file handle 3013 */ 3014 int 3015 getfh(struct getfh_args *uap) 3016 { 3017 struct thread *td = curthread; 3018 struct nlookupdata nd; 3019 fhandle_t fh; 3020 struct vnode *vp; 3021 int error; 3022 3023 /* 3024 * Must be super user 3025 */ 3026 if ((error = suser(td)) != 0) 3027 return (error); 3028 3029 vp = NULL; 3030 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 3031 if (error == 0) 3032 error = nlookup(&nd); 3033 if (error == 0) 3034 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3035 nlookup_done(&nd); 3036 if (error == 0) { 3037 bzero(&fh, sizeof(fh)); 3038 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3039 error = VFS_VPTOFH(vp, &fh.fh_fid); 3040 vput(vp); 3041 if (error == 0) 3042 error = copyout(&fh, uap->fhp, sizeof(fh)); 3043 } 3044 return (error); 3045 } 3046 3047 /* 3048 * fhopen_args(const struct fhandle *u_fhp, int flags) 3049 * 3050 * syscall for the rpc.lockd to use to translate a NFS file handle into 3051 * an open descriptor. 3052 * 3053 * warning: do not remove the suser() call or this becomes one giant 3054 * security hole. 3055 */ 3056 int 3057 fhopen(struct fhopen_args *uap) 3058 { 3059 struct thread *td = curthread; 3060 struct proc *p = td->td_proc; 3061 struct mount *mp; 3062 struct vnode *vp; 3063 struct fhandle fhp; 3064 struct vattr vat; 3065 struct vattr *vap = &vat; 3066 struct flock lf; 3067 struct filedesc *fdp = p->p_fd; 3068 int fmode, mode, error, type; 3069 struct file *nfp; 3070 struct file *fp; 3071 int indx; 3072 3073 /* 3074 * Must be super user 3075 */ 3076 error = suser(td); 3077 if (error) 3078 return (error); 3079 3080 fmode = FFLAGS(uap->flags); 3081 /* why not allow a non-read/write open for our lockd? */ 3082 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3083 return (EINVAL); 3084 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3085 if (error) 3086 return(error); 3087 /* find the mount point */ 3088 mp = vfs_getvfs(&fhp.fh_fsid); 3089 if (mp == NULL) 3090 return (ESTALE); 3091 /* now give me my vnode, it gets returned to me locked */ 3092 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); 3093 if (error) 3094 return (error); 3095 /* 3096 * from now on we have to make sure not 3097 * to forget about the vnode 3098 * any error that causes an abort must vput(vp) 3099 * just set error = err and 'goto bad;'. 3100 */ 3101 3102 /* 3103 * from vn_open 3104 */ 3105 if (vp->v_type == VLNK) { 3106 error = EMLINK; 3107 goto bad; 3108 } 3109 if (vp->v_type == VSOCK) { 3110 error = EOPNOTSUPP; 3111 goto bad; 3112 } 3113 mode = 0; 3114 if (fmode & (FWRITE | O_TRUNC)) { 3115 if (vp->v_type == VDIR) { 3116 error = EISDIR; 3117 goto bad; 3118 } 3119 error = vn_writechk(vp); 3120 if (error) 3121 goto bad; 3122 mode |= VWRITE; 3123 } 3124 if (fmode & FREAD) 3125 mode |= VREAD; 3126 if (mode) { 3127 error = VOP_ACCESS(vp, mode, p->p_ucred, td); 3128 if (error) 3129 goto bad; 3130 } 3131 if (fmode & O_TRUNC) { 3132 VOP_UNLOCK(vp, 0, td); /* XXX */ 3133 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 3134 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ 3135 VATTR_NULL(vap); 3136 vap->va_size = 0; 3137 error = VOP_SETATTR(vp, vap, p->p_ucred, td); 3138 if (error) 3139 goto bad; 3140 } 3141 3142 /* 3143 * VOP_OPEN needs the file pointer so it can potentially override 3144 * it. 3145 * 3146 * WARNING! no f_ncp will be associated when fhopen()ing a directory. 3147 * XXX 3148 */ 3149 if ((error = falloc(p, &nfp, NULL)) != 0) 3150 goto bad; 3151 fp = nfp; 3152 3153 fp->f_data = (caddr_t)vp; 3154 fp->f_flag = fmode & FMASK; 3155 fp->f_ops = &vnode_fileops; 3156 fp->f_type = DTYPE_VNODE; 3157 3158 error = VOP_OPEN(vp, fmode, p->p_ucred, fp, td); 3159 if (error) { 3160 /* 3161 * setting f_ops this way prevents VOP_CLOSE from being 3162 * called or fdrop() releasing the vp from v_data. Since 3163 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3164 */ 3165 fp->f_ops = &badfileops; 3166 fp->f_data = NULL; 3167 fdrop(fp, td); 3168 goto bad; 3169 } 3170 if (fmode & FWRITE) 3171 vp->v_writecount++; 3172 3173 /* 3174 * The fp now owns a reference on the vnode. We still have our own 3175 * ref+lock. 3176 */ 3177 vref(vp); 3178 3179 /* 3180 * Make sure that a VM object is created for VMIO support. If this 3181 * fails just fdrop() normally to clean up. 3182 */ 3183 if (vn_canvmio(vp) == TRUE) { 3184 if ((error = vfs_object_create(vp, td)) != 0) { 3185 fdrop(fp, td); 3186 goto bad; 3187 } 3188 } 3189 3190 /* 3191 * The open was successful, associate it with a file descriptor. 3192 */ 3193 if ((error = fsetfd(p, fp, &indx)) != 0) { 3194 if (fmode & FWRITE) 3195 vp->v_writecount--; 3196 fdrop(fp, td); 3197 goto bad; 3198 } 3199 3200 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3201 lf.l_whence = SEEK_SET; 3202 lf.l_start = 0; 3203 lf.l_len = 0; 3204 if (fmode & O_EXLOCK) 3205 lf.l_type = F_WRLCK; 3206 else 3207 lf.l_type = F_RDLCK; 3208 type = F_FLOCK; 3209 if ((fmode & FNONBLOCK) == 0) 3210 type |= F_WAIT; 3211 VOP_UNLOCK(vp, 0, td); 3212 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3213 /* 3214 * lock request failed. Normally close the descriptor 3215 * but handle the case where someone might have dup()d 3216 * or close()d it when we weren't looking. 3217 */ 3218 if (fdp->fd_files[indx].fp == fp) { 3219 funsetfd(fdp, indx); 3220 fdrop(fp, td); 3221 } 3222 3223 /* 3224 * release our private reference. 3225 */ 3226 fdrop(fp, td); 3227 vrele(vp); 3228 return (error); 3229 } 3230 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 3231 fp->f_flag |= FHASLOCK; 3232 } 3233 if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0)) 3234 vfs_object_create(vp, td); 3235 3236 vput(vp); 3237 fdrop(fp, td); 3238 uap->sysmsg_result = indx; 3239 return (0); 3240 3241 bad: 3242 vput(vp); 3243 return (error); 3244 } 3245 3246 /* 3247 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3248 */ 3249 int 3250 fhstat(struct fhstat_args *uap) 3251 { 3252 struct thread *td = curthread; 3253 struct stat sb; 3254 fhandle_t fh; 3255 struct mount *mp; 3256 struct vnode *vp; 3257 int error; 3258 3259 /* 3260 * Must be super user 3261 */ 3262 error = suser(td); 3263 if (error) 3264 return (error); 3265 3266 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3267 if (error) 3268 return (error); 3269 3270 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3271 return (ESTALE); 3272 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3273 return (error); 3274 error = vn_stat(vp, &sb, td); 3275 vput(vp); 3276 if (error) 3277 return (error); 3278 error = copyout(&sb, uap->sb, sizeof(sb)); 3279 return (error); 3280 } 3281 3282 /* 3283 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3284 */ 3285 int 3286 fhstatfs(struct fhstatfs_args *uap) 3287 { 3288 struct thread *td = curthread; 3289 struct proc *p = td->td_proc; 3290 struct statfs *sp; 3291 struct mount *mp; 3292 struct vnode *vp; 3293 struct statfs sb; 3294 char *fullpath, *freepath; 3295 fhandle_t fh; 3296 int error; 3297 3298 /* 3299 * Must be super user 3300 */ 3301 if ((error = suser(td))) 3302 return (error); 3303 3304 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3305 return (error); 3306 3307 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3308 return (ESTALE); 3309 3310 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0 && 3311 !chroot_visible_mnt(mp, p)) 3312 return (ESTALE); 3313 3314 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3315 return (error); 3316 mp = vp->v_mount; 3317 sp = &mp->mnt_stat; 3318 vput(vp); 3319 if ((error = VFS_STATFS(mp, sp, td)) != 0) 3320 return (error); 3321 3322 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 3323 if (error) 3324 return(error); 3325 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3326 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3327 free(freepath, M_TEMP); 3328 3329 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3330 if (suser(td)) { 3331 bcopy(sp, &sb, sizeof(sb)); 3332 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3333 sp = &sb; 3334 } 3335 return (copyout(sp, uap->buf, sizeof(*sp))); 3336 } 3337 3338 /* 3339 * Syscall to push extended attribute configuration information into the 3340 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3341 * a command (int cmd), and attribute name and misc data. For now, the 3342 * attribute name is left in userspace for consumption by the VFS_op. 3343 * It will probably be changed to be copied into sysspace by the 3344 * syscall in the future, once issues with various consumers of the 3345 * attribute code have raised their hands. 3346 * 3347 * Currently this is used only by UFS Extended Attributes. 3348 */ 3349 int 3350 extattrctl(struct extattrctl_args *uap) 3351 { 3352 struct nlookupdata nd; 3353 struct mount *mp; 3354 struct vnode *vp; 3355 int error; 3356 3357 vp = NULL; 3358 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3359 if (error == 0) 3360 error = nlookup(&nd); 3361 if (error == 0) { 3362 mp = nd.nl_ncp->nc_mount; 3363 error = VFS_EXTATTRCTL(mp, uap->cmd, 3364 uap->attrname, uap->arg, 3365 nd.nl_td); 3366 } 3367 nlookup_done(&nd); 3368 return (error); 3369 } 3370 3371 /* 3372 * Syscall to set a named extended attribute on a file or directory. 3373 * Accepts attribute name, and a uio structure pointing to the data to set. 3374 * The uio is consumed in the style of writev(). The real work happens 3375 * in VOP_SETEXTATTR(). 3376 */ 3377 int 3378 extattr_set_file(struct extattr_set_file_args *uap) 3379 { 3380 char attrname[EXTATTR_MAXNAMELEN]; 3381 struct iovec aiov[UIO_SMALLIOV]; 3382 struct iovec *needfree; 3383 struct nlookupdata nd; 3384 struct iovec *iov; 3385 struct vnode *vp; 3386 struct uio auio; 3387 u_int iovlen; 3388 u_int cnt; 3389 int error; 3390 int i; 3391 3392 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3393 if (error) 3394 return (error); 3395 3396 vp = NULL; 3397 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3398 if (error == 0) 3399 error = nlookup(&nd); 3400 if (error == 0) 3401 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3402 if (error) { 3403 nlookup_done(&nd); 3404 return (error); 3405 } 3406 3407 needfree = NULL; 3408 iovlen = uap->iovcnt * sizeof(struct iovec); 3409 if (uap->iovcnt > UIO_SMALLIOV) { 3410 if (uap->iovcnt > UIO_MAXIOV) { 3411 error = EINVAL; 3412 goto done; 3413 } 3414 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3415 needfree = iov; 3416 } else { 3417 iov = aiov; 3418 } 3419 auio.uio_iov = iov; 3420 auio.uio_iovcnt = uap->iovcnt; 3421 auio.uio_rw = UIO_WRITE; 3422 auio.uio_segflg = UIO_USERSPACE; 3423 auio.uio_td = nd.nl_td; 3424 auio.uio_offset = 0; 3425 if ((error = copyin(uap->iovp, iov, iovlen))) 3426 goto done; 3427 auio.uio_resid = 0; 3428 for (i = 0; i < uap->iovcnt; i++) { 3429 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3430 error = EINVAL; 3431 goto done; 3432 } 3433 auio.uio_resid += iov->iov_len; 3434 iov++; 3435 } 3436 cnt = auio.uio_resid; 3437 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred, nd.nl_td); 3438 cnt -= auio.uio_resid; 3439 uap->sysmsg_result = cnt; 3440 done: 3441 vput(vp); 3442 nlookup_done(&nd); 3443 if (needfree) 3444 FREE(needfree, M_IOV); 3445 return (error); 3446 } 3447 3448 /* 3449 * Syscall to get a named extended attribute on a file or directory. 3450 * Accepts attribute name, and a uio structure pointing to a buffer for the 3451 * data. The uio is consumed in the style of readv(). The real work 3452 * happens in VOP_GETEXTATTR(); 3453 */ 3454 int 3455 extattr_get_file(struct extattr_get_file_args *uap) 3456 { 3457 char attrname[EXTATTR_MAXNAMELEN]; 3458 struct iovec aiov[UIO_SMALLIOV]; 3459 struct iovec *needfree; 3460 struct nlookupdata nd; 3461 struct iovec *iov; 3462 struct vnode *vp; 3463 struct uio auio; 3464 u_int iovlen; 3465 u_int cnt; 3466 int error; 3467 int i; 3468 3469 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3470 if (error) 3471 return (error); 3472 3473 vp = NULL; 3474 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3475 if (error == 0) 3476 error = nlookup(&nd); 3477 if (error == 0) 3478 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3479 if (error) { 3480 nlookup_done(&nd); 3481 return (error); 3482 } 3483 3484 iovlen = uap->iovcnt * sizeof (struct iovec); 3485 needfree = NULL; 3486 if (uap->iovcnt > UIO_SMALLIOV) { 3487 if (uap->iovcnt > UIO_MAXIOV) { 3488 error = EINVAL; 3489 goto done; 3490 } 3491 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3492 needfree = iov; 3493 } else { 3494 iov = aiov; 3495 } 3496 auio.uio_iov = iov; 3497 auio.uio_iovcnt = uap->iovcnt; 3498 auio.uio_rw = UIO_READ; 3499 auio.uio_segflg = UIO_USERSPACE; 3500 auio.uio_td = nd.nl_td; 3501 auio.uio_offset = 0; 3502 if ((error = copyin(uap->iovp, iov, iovlen))) 3503 goto done; 3504 auio.uio_resid = 0; 3505 for (i = 0; i < uap->iovcnt; i++) { 3506 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3507 error = EINVAL; 3508 goto done; 3509 } 3510 auio.uio_resid += iov->iov_len; 3511 iov++; 3512 } 3513 cnt = auio.uio_resid; 3514 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred, nd.nl_td); 3515 cnt -= auio.uio_resid; 3516 uap->sysmsg_result = cnt; 3517 done: 3518 vput(vp); 3519 nlookup_done(&nd); 3520 if (needfree) 3521 FREE(needfree, M_IOV); 3522 return(error); 3523 } 3524 3525 /* 3526 * Syscall to delete a named extended attribute from a file or directory. 3527 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 3528 */ 3529 int 3530 extattr_delete_file(struct extattr_delete_file_args *uap) 3531 { 3532 char attrname[EXTATTR_MAXNAMELEN]; 3533 struct nlookupdata nd; 3534 struct vnode *vp; 3535 int error; 3536 3537 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3538 if (error) 3539 return(error); 3540 3541 vp = NULL; 3542 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3543 if (error == 0) 3544 error = nlookup(&nd); 3545 if (error == 0) 3546 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3547 if (error) { 3548 nlookup_done(&nd); 3549 return (error); 3550 } 3551 3552 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred, nd.nl_td); 3553 vput(vp); 3554 nlookup_done(&nd); 3555 return(error); 3556 } 3557 3558 static int 3559 chroot_visible_mnt(struct mount *mp, struct proc *p) 3560 { 3561 struct namecache *ncp; 3562 /* 3563 * First check if this file system is below 3564 * the chroot path. 3565 */ 3566 ncp = mp->mnt_ncp; 3567 while (ncp != NULL && ncp != p->p_fd->fd_nrdir) 3568 ncp = ncp->nc_parent; 3569 if (ncp == NULL) { 3570 /* 3571 * This is not below the chroot path. 3572 * 3573 * Check if the chroot path is on the same filesystem, 3574 * by determing if we have to cross a mount point 3575 * before reaching mp->mnt_ncp. 3576 */ 3577 ncp = p->p_fd->fd_nrdir; 3578 while (ncp != NULL && ncp != mp->mnt_ncp) { 3579 if (ncp->nc_flag & NCF_MOUNTPT) { 3580 ncp = NULL; 3581 break; 3582 } 3583 ncp = ncp->nc_parent; 3584 } 3585 } 3586 return(ncp != NULL); 3587 } 3588