1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.65 2005/07/23 23:26:50 joerg Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/mountctl.h> 51 #include <sys/sysproto.h> 52 #include <sys/filedesc.h> 53 #include <sys/kernel.h> 54 #include <sys/fcntl.h> 55 #include <sys/file.h> 56 #include <sys/linker.h> 57 #include <sys/stat.h> 58 #include <sys/unistd.h> 59 #include <sys/vnode.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/dirent.h> 64 #include <sys/extattr.h> 65 #include <sys/kern_syscall.h> 66 67 #include <machine/limits.h> 68 #include <vfs/union/union.h> 69 #include <sys/sysctl.h> 70 #include <vm/vm.h> 71 #include <vm/vm_object.h> 72 #include <vm/vm_zone.h> 73 #include <vm/vm_page.h> 74 75 #include <sys/file2.h> 76 77 static int checkvp_chdir (struct vnode *vn, struct thread *td); 78 static void checkdirs (struct vnode *olddp, struct namecache *ncp); 79 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 80 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 81 static int getutimes (const struct timeval *, struct timespec *); 82 static int setfown (struct vnode *, uid_t, gid_t); 83 static int setfmode (struct vnode *, int); 84 static int setfflags (struct vnode *, int); 85 static int setutimes (struct vnode *, const struct timespec *, int); 86 static int usermount = 0; /* if 1, non-root can mount fs. */ 87 88 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 89 90 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 91 92 /* 93 * Virtual File System System Calls 94 */ 95 96 /* 97 * Mount a file system. 98 */ 99 /* 100 * mount_args(char *type, char *path, int flags, caddr_t data) 101 */ 102 /* ARGSUSED */ 103 int 104 mount(struct mount_args *uap) 105 { 106 struct thread *td = curthread; 107 struct proc *p = td->td_proc; 108 struct vnode *vp; 109 struct namecache *ncp; 110 struct mount *mp; 111 struct vfsconf *vfsp; 112 int error, flag = 0, flag2 = 0; 113 struct vattr va; 114 struct nlookupdata nd; 115 char fstypename[MFSNAMELEN]; 116 struct nlcomponent nlc; 117 118 KKASSERT(p); 119 if (p->p_ucred->cr_prison != NULL) 120 return (EPERM); 121 if (usermount == 0 && (error = suser(td))) 122 return (error); 123 /* 124 * Do not allow NFS export by non-root users. 125 */ 126 if (uap->flags & MNT_EXPORTED) { 127 error = suser(td); 128 if (error) 129 return (error); 130 } 131 /* 132 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 133 */ 134 if (suser(td)) 135 uap->flags |= MNT_NOSUID | MNT_NODEV; 136 137 /* 138 * Lookup the requested path and extract the ncp and vnode. 139 */ 140 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 141 if (error == 0) { 142 if ((error = nlookup(&nd)) == 0) { 143 if (nd.nl_ncp->nc_vp == NULL) 144 error = ENOENT; 145 } 146 } 147 if (error) { 148 nlookup_done(&nd); 149 return (error); 150 } 151 152 /* 153 * Extract the locked+refd ncp and cleanup the nd structure 154 */ 155 ncp = nd.nl_ncp; 156 nd.nl_ncp = NULL; 157 nlookup_done(&nd); 158 159 /* 160 * now we have the locked ref'd ncp and unreferenced vnode. 161 */ 162 vp = ncp->nc_vp; 163 if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) { 164 cache_put(ncp); 165 return (error); 166 } 167 cache_unlock(ncp); 168 169 /* 170 * Now we have an unlocked ref'd ncp and a locked ref'd vp 171 */ 172 if (uap->flags & MNT_UPDATE) { 173 if ((vp->v_flag & VROOT) == 0) { 174 cache_drop(ncp); 175 vput(vp); 176 return (EINVAL); 177 } 178 mp = vp->v_mount; 179 flag = mp->mnt_flag; 180 flag2 = mp->mnt_kern_flag; 181 /* 182 * We only allow the filesystem to be reloaded if it 183 * is currently mounted read-only. 184 */ 185 if ((uap->flags & MNT_RELOAD) && 186 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 187 cache_drop(ncp); 188 vput(vp); 189 return (EOPNOTSUPP); /* Needs translation */ 190 } 191 /* 192 * Only root, or the user that did the original mount is 193 * permitted to update it. 194 */ 195 if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && 196 (error = suser(td))) { 197 cache_drop(ncp); 198 vput(vp); 199 return (error); 200 } 201 if (vfs_busy(mp, LK_NOWAIT, td)) { 202 cache_drop(ncp); 203 vput(vp); 204 return (EBUSY); 205 } 206 if ((vp->v_flag & VMOUNT) != 0 || 207 vp->v_mountedhere != NULL) { 208 cache_drop(ncp); 209 vfs_unbusy(mp, td); 210 vput(vp); 211 return (EBUSY); 212 } 213 vp->v_flag |= VMOUNT; 214 mp->mnt_flag |= 215 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 216 VOP_UNLOCK(vp, 0, td); 217 goto update; 218 } 219 /* 220 * If the user is not root, ensure that they own the directory 221 * onto which we are attempting to mount. 222 */ 223 if ((error = VOP_GETATTR(vp, &va, td)) || 224 (va.va_uid != p->p_ucred->cr_uid && 225 (error = suser(td)))) { 226 cache_drop(ncp); 227 vput(vp); 228 return (error); 229 } 230 if ((error = vinvalbuf(vp, V_SAVE, td, 0, 0)) != 0) { 231 cache_drop(ncp); 232 vput(vp); 233 return (error); 234 } 235 if (vp->v_type != VDIR) { 236 cache_drop(ncp); 237 vput(vp); 238 return (ENOTDIR); 239 } 240 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 241 cache_drop(ncp); 242 vput(vp); 243 return (error); 244 } 245 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 246 if (!strcmp(vfsp->vfc_name, fstypename)) 247 break; 248 } 249 if (vfsp == NULL) { 250 linker_file_t lf; 251 252 /* Only load modules for root (very important!) */ 253 if ((error = suser(td)) != 0) { 254 cache_drop(ncp); 255 vput(vp); 256 return error; 257 } 258 error = linker_load_file(fstypename, &lf); 259 if (error || lf == NULL) { 260 cache_drop(ncp); 261 vput(vp); 262 if (lf == NULL) 263 error = ENODEV; 264 return error; 265 } 266 lf->userrefs++; 267 /* lookup again, see if the VFS was loaded */ 268 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) { 269 if (!strcmp(vfsp->vfc_name, fstypename)) 270 break; 271 } 272 if (vfsp == NULL) { 273 lf->userrefs--; 274 linker_file_unload(lf); 275 cache_drop(ncp); 276 vput(vp); 277 return (ENODEV); 278 } 279 } 280 if ((vp->v_flag & VMOUNT) != 0 || 281 vp->v_mountedhere != NULL) { 282 cache_drop(ncp); 283 vput(vp); 284 return (EBUSY); 285 } 286 vp->v_flag |= VMOUNT; 287 288 /* 289 * Allocate and initialize the filesystem. 290 */ 291 mp = malloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 292 TAILQ_INIT(&mp->mnt_nvnodelist); 293 TAILQ_INIT(&mp->mnt_reservedvnlist); 294 TAILQ_INIT(&mp->mnt_jlist); 295 mp->mnt_nvnodelistsize = 0; 296 lockinit(&mp->mnt_lock, 0, "vfslock", 0, LK_NOPAUSE); 297 vfs_busy(mp, LK_NOWAIT, td); 298 mp->mnt_op = vfsp->vfc_vfsops; 299 mp->mnt_vfc = vfsp; 300 vfsp->vfc_refcount++; 301 mp->mnt_stat.f_type = vfsp->vfc_typenum; 302 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 303 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 304 mp->mnt_vnodecovered = vp; 305 mp->mnt_stat.f_owner = p->p_ucred->cr_uid; 306 mp->mnt_iosize_max = DFLTPHYS; 307 VOP_UNLOCK(vp, 0, td); 308 update: 309 /* 310 * Set the mount level flags. 311 */ 312 if (uap->flags & MNT_RDONLY) 313 mp->mnt_flag |= MNT_RDONLY; 314 else if (mp->mnt_flag & MNT_RDONLY) 315 mp->mnt_kern_flag |= MNTK_WANTRDWR; 316 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 317 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 318 MNT_NOSYMFOLLOW | MNT_IGNORE | 319 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 320 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 321 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 322 MNT_NOSYMFOLLOW | MNT_IGNORE | 323 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 324 /* 325 * Mount the filesystem. 326 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 327 * get. 328 */ 329 error = VFS_MOUNT(mp, uap->path, uap->data, td); 330 if (mp->mnt_flag & MNT_UPDATE) { 331 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 332 mp->mnt_flag &= ~MNT_RDONLY; 333 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 334 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 335 if (error) { 336 mp->mnt_flag = flag; 337 mp->mnt_kern_flag = flag2; 338 } 339 vfs_unbusy(mp, td); 340 vp->v_flag &= ~VMOUNT; 341 vrele(vp); 342 cache_drop(ncp); 343 return (error); 344 } 345 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 346 /* 347 * Put the new filesystem on the mount list after root. The mount 348 * point gets its own mnt_ncp which is a special ncp linking the 349 * vnode-under to the root of the new mount. The lookup code 350 * detects the mount point going forward and detects the special 351 * mnt_ncp via NCP_MOUNTPT going backwards. 352 * 353 * It is not necessary to invalidate or purge the vnode underneath 354 * because elements under the mount will be given their own glue 355 * namecache record. 356 */ 357 if (!error) { 358 nlc.nlc_nameptr = ""; 359 nlc.nlc_namelen = 0; 360 mp->mnt_ncp = cache_nlookup(ncp, &nlc); 361 cache_setunresolved(mp->mnt_ncp); 362 mp->mnt_ncp->nc_flag |= NCF_MOUNTPT; 363 mp->mnt_ncp->nc_mount = mp; 364 cache_drop(ncp); 365 /* XXX get the root of the fs and cache_setvp(mnt_ncp...) */ 366 vp->v_flag &= ~VMOUNT; 367 vp->v_mountedhere = mp; 368 mountlist_insert(mp, MNTINS_LAST); 369 checkdirs(vp, mp->mnt_ncp); 370 cache_unlock(mp->mnt_ncp); /* leave ref intact */ 371 VOP_UNLOCK(vp, 0, td); 372 error = vfs_allocate_syncvnode(mp); 373 vfs_unbusy(mp, td); 374 if ((error = VFS_START(mp, 0, td)) != 0) 375 vrele(vp); 376 } else { 377 vfs_rm_vnodeops(&mp->mnt_vn_coherency_ops); 378 vfs_rm_vnodeops(&mp->mnt_vn_journal_ops); 379 vfs_rm_vnodeops(&mp->mnt_vn_norm_ops); 380 vfs_rm_vnodeops(&mp->mnt_vn_spec_ops); 381 vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops); 382 vp->v_flag &= ~VMOUNT; 383 mp->mnt_vfc->vfc_refcount--; 384 vfs_unbusy(mp, td); 385 free(mp, M_MOUNT); 386 cache_drop(ncp); 387 vput(vp); 388 } 389 return (error); 390 } 391 392 /* 393 * Scan all active processes to see if any of them have a current 394 * or root directory onto which the new filesystem has just been 395 * mounted. If so, replace them with the new mount point. 396 * 397 * The passed ncp is ref'd and locked (from the mount code) and 398 * must be associated with the vnode representing the root of the 399 * mount point. 400 */ 401 static void 402 checkdirs(struct vnode *olddp, struct namecache *ncp) 403 { 404 struct filedesc *fdp; 405 struct vnode *newdp; 406 struct mount *mp; 407 struct proc *p; 408 409 if (olddp->v_usecount == 1) 410 return; 411 mp = olddp->v_mountedhere; 412 if (VFS_ROOT(mp, &newdp)) 413 panic("mount: lost mount"); 414 cache_setvp(ncp, newdp); 415 416 if (rootvnode == olddp) { 417 vref(newdp); 418 vfs_cache_setroot(newdp, cache_hold(ncp)); 419 } 420 421 FOREACH_PROC_IN_SYSTEM(p) { 422 fdp = p->p_fd; 423 if (fdp->fd_cdir == olddp) { 424 vrele(fdp->fd_cdir); 425 vref(newdp); 426 fdp->fd_cdir = newdp; 427 cache_drop(fdp->fd_ncdir); 428 fdp->fd_ncdir = cache_hold(ncp); 429 } 430 if (fdp->fd_rdir == olddp) { 431 vrele(fdp->fd_rdir); 432 vref(newdp); 433 fdp->fd_rdir = newdp; 434 cache_drop(fdp->fd_nrdir); 435 fdp->fd_nrdir = cache_hold(ncp); 436 } 437 } 438 vput(newdp); 439 } 440 441 /* 442 * Unmount a file system. 443 * 444 * Note: unmount takes a path to the vnode mounted on as argument, 445 * not special file (as before). 446 */ 447 /* 448 * umount_args(char *path, int flags) 449 */ 450 /* ARGSUSED */ 451 int 452 unmount(struct unmount_args *uap) 453 { 454 struct thread *td = curthread; 455 struct proc *p = td->td_proc; 456 struct vnode *vp; 457 struct mount *mp; 458 int error; 459 struct nlookupdata nd; 460 461 KKASSERT(p); 462 if (p->p_ucred->cr_prison != NULL) 463 return (EPERM); 464 if (usermount == 0 && (error = suser(td))) 465 return (error); 466 467 vp = NULL; 468 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 469 if (error == 0) 470 error = nlookup(&nd); 471 if (error == 0) 472 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 473 nlookup_done(&nd); 474 if (error) 475 return (error); 476 477 mp = vp->v_mount; 478 479 /* 480 * Only root, or the user that did the original mount is 481 * permitted to unmount this filesystem. 482 */ 483 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 484 (error = suser(td))) { 485 vput(vp); 486 return (error); 487 } 488 489 /* 490 * Don't allow unmounting the root file system. 491 */ 492 if (mp->mnt_flag & MNT_ROOTFS) { 493 vput(vp); 494 return (EINVAL); 495 } 496 497 /* 498 * Must be the root of the filesystem 499 */ 500 if ((vp->v_flag & VROOT) == 0) { 501 vput(vp); 502 return (EINVAL); 503 } 504 vput(vp); 505 return (dounmount(mp, uap->flags, td)); 506 } 507 508 /* 509 * Do the actual file system unmount. 510 */ 511 static int 512 dounmount_interlock(struct mount *mp) 513 { 514 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 515 return (EBUSY); 516 mp->mnt_kern_flag |= MNTK_UNMOUNT; 517 return(0); 518 } 519 520 int 521 dounmount(struct mount *mp, int flags, struct thread *td) 522 { 523 struct vnode *coveredvp; 524 int error; 525 int async_flag; 526 527 /* 528 * Exclusive access for unmounting purposes 529 */ 530 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 531 return (error); 532 533 /* 534 * Allow filesystems to detect that a forced unmount is in progress. 535 */ 536 if (flags & MNT_FORCE) 537 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 538 error = lockmgr(&mp->mnt_lock, LK_DRAIN | 539 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), NULL, td); 540 if (error) { 541 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 542 if (mp->mnt_kern_flag & MNTK_MWAIT) 543 wakeup(mp); 544 return (error); 545 } 546 547 if (mp->mnt_flag & MNT_EXPUBLIC) 548 vfs_setpublicfs(NULL, NULL, NULL); 549 550 vfs_msync(mp, MNT_WAIT); 551 async_flag = mp->mnt_flag & MNT_ASYNC; 552 mp->mnt_flag &=~ MNT_ASYNC; 553 cache_purgevfs(mp); /* remove cache entries for this file sys */ 554 if (mp->mnt_syncer != NULL) 555 vrele(mp->mnt_syncer); 556 if (((mp->mnt_flag & MNT_RDONLY) || 557 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 558 (flags & MNT_FORCE)) 559 error = VFS_UNMOUNT(mp, flags, td); 560 if (error) { 561 if (mp->mnt_syncer == NULL) 562 vfs_allocate_syncvnode(mp); 563 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 564 mp->mnt_flag |= async_flag; 565 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_REENABLE, NULL, td); 566 if (mp->mnt_kern_flag & MNTK_MWAIT) 567 wakeup(mp); 568 return (error); 569 } 570 /* 571 * Clean up any journals still associated with the mount after 572 * filesystem activity has ceased. 573 */ 574 journal_remove_all_journals(mp, 575 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 576 577 mountlist_remove(mp); 578 579 /* 580 * Remove any installed vnode ops here so the individual VFSs don't 581 * have to. 582 */ 583 vfs_rm_vnodeops(&mp->mnt_vn_coherency_ops); 584 vfs_rm_vnodeops(&mp->mnt_vn_journal_ops); 585 vfs_rm_vnodeops(&mp->mnt_vn_norm_ops); 586 vfs_rm_vnodeops(&mp->mnt_vn_spec_ops); 587 vfs_rm_vnodeops(&mp->mnt_vn_fifo_ops); 588 589 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 590 coveredvp->v_mountedhere = NULL; 591 vrele(coveredvp); 592 cache_drop(mp->mnt_ncp); 593 mp->mnt_ncp = NULL; 594 } 595 mp->mnt_vfc->vfc_refcount--; 596 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 597 panic("unmount: dangling vnode"); 598 lockmgr(&mp->mnt_lock, LK_RELEASE, NULL, td); 599 if (mp->mnt_kern_flag & MNTK_MWAIT) 600 wakeup(mp); 601 free(mp, M_MOUNT); 602 return (0); 603 } 604 605 /* 606 * Sync each mounted filesystem. 607 */ 608 609 #ifdef DEBUG 610 static int syncprt = 0; 611 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 612 #endif /* DEBUG */ 613 614 static int sync_callback(struct mount *mp, void *data); 615 616 /* ARGSUSED */ 617 int 618 sync(struct sync_args *uap) 619 { 620 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 621 #ifdef DEBUG 622 /* 623 * print out buffer pool stat information on each sync() call. 624 */ 625 if (syncprt) 626 vfs_bufstats(); 627 #endif /* DEBUG */ 628 return (0); 629 } 630 631 static 632 int 633 sync_callback(struct mount *mp, void *data __unused) 634 { 635 int asyncflag; 636 637 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 638 asyncflag = mp->mnt_flag & MNT_ASYNC; 639 mp->mnt_flag &= ~MNT_ASYNC; 640 vfs_msync(mp, MNT_NOWAIT); 641 VFS_SYNC(mp, MNT_NOWAIT, curthread); 642 mp->mnt_flag |= asyncflag; 643 } 644 return(0); 645 } 646 647 /* XXX PRISON: could be per prison flag */ 648 static int prison_quotas; 649 #if 0 650 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 651 #endif 652 653 /* 654 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 655 * 656 * Change filesystem quotas. 657 */ 658 /* ARGSUSED */ 659 int 660 quotactl(struct quotactl_args *uap) 661 { 662 struct nlookupdata nd; 663 struct thread *td; 664 struct proc *p; 665 struct mount *mp; 666 int error; 667 668 td = curthread; 669 p = td->td_proc; 670 if (p->p_ucred->cr_prison && !prison_quotas) 671 return (EPERM); 672 673 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 674 if (error == 0) 675 error = nlookup(&nd); 676 if (error == 0) { 677 mp = nd.nl_ncp->nc_mount; 678 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 679 uap->arg, nd.nl_td); 680 } 681 nlookup_done(&nd); 682 return (error); 683 } 684 685 /* 686 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 687 * void *buf, int buflen) 688 * 689 * This function operates on a mount point and executes the specified 690 * operation using the specified control data, and possibly returns data. 691 * 692 * The actual number of bytes stored in the result buffer is returned, 0 693 * if none, otherwise an error is returned. 694 */ 695 /* ARGSUSED */ 696 int 697 mountctl(struct mountctl_args *uap) 698 { 699 struct thread *td = curthread; 700 struct proc *p = td->td_proc; 701 struct filedesc *fdp = p->p_fd; 702 struct file *fp; 703 void *ctl = NULL; 704 void *buf = NULL; 705 char *path = NULL; 706 int error; 707 708 /* 709 * Sanity and permissions checks. We must be root. 710 */ 711 KKASSERT(p); 712 if (p->p_ucred->cr_prison != NULL) 713 return (EPERM); 714 if ((error = suser(td)) != 0) 715 return (error); 716 717 /* 718 * Argument length checks 719 */ 720 if (uap->ctllen < 0 || uap->ctllen > 1024) 721 return (EINVAL); 722 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 723 return (EINVAL); 724 if (uap->path == NULL) 725 return (EINVAL); 726 727 /* 728 * Allocate the necessary buffers and copyin data 729 */ 730 path = zalloc(namei_zone); 731 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 732 if (error) 733 goto done; 734 735 if (uap->ctllen) { 736 ctl = malloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 737 error = copyin(uap->ctl, ctl, uap->ctllen); 738 if (error) 739 goto done; 740 } 741 if (uap->buflen) 742 buf = malloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 743 744 /* 745 * Validate the descriptor 746 */ 747 if (uap->fd == -1) { 748 fp = NULL; 749 } else if ((u_int)uap->fd >= fdp->fd_nfiles || 750 (fp = fdp->fd_files[uap->fd].fp) == NULL) { 751 error = EBADF; 752 goto done; 753 } 754 if (fp) 755 fhold(fp); 756 757 /* 758 * Execute the internal kernel function and clean up. 759 */ 760 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, buf, uap->buflen, &uap->sysmsg_result); 761 if (fp) 762 fdrop(fp, td); 763 if (error == 0 && uap->sysmsg_result > 0) 764 error = copyout(buf, uap->buf, uap->sysmsg_result); 765 done: 766 if (path) 767 zfree(namei_zone, path); 768 if (ctl) 769 free(ctl, M_TEMP); 770 if (buf) 771 free(buf, M_TEMP); 772 return (error); 773 } 774 775 /* 776 * Execute a mount control operation by resolving the path to a mount point 777 * and calling vop_mountctl(). 778 */ 779 int 780 kern_mountctl(const char *path, int op, struct file *fp, 781 const void *ctl, int ctllen, 782 void *buf, int buflen, int *res) 783 { 784 struct vnode *vp; 785 struct mount *mp; 786 struct nlookupdata nd; 787 int error; 788 789 *res = 0; 790 vp = NULL; 791 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 792 if (error == 0) 793 error = nlookup(&nd); 794 if (error == 0) 795 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 796 nlookup_done(&nd); 797 if (error) 798 return (error); 799 800 mp = vp->v_mount; 801 802 /* 803 * Must be the root of the filesystem 804 */ 805 if ((vp->v_flag & VROOT) == 0) { 806 vput(vp); 807 return (EINVAL); 808 } 809 error = vop_mountctl(mp->mnt_vn_use_ops, op, fp, ctl, ctllen, 810 buf, buflen, res); 811 vput(vp); 812 return (error); 813 } 814 815 int 816 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 817 { 818 struct thread *td = curthread; 819 struct proc *p = td->td_proc; 820 struct mount *mp; 821 struct statfs *sp; 822 char *fullpath, *freepath; 823 int error; 824 825 if ((error = nlookup(nd)) != 0) 826 return (error); 827 mp = nd->nl_ncp->nc_mount; 828 sp = &mp->mnt_stat; 829 if ((error = VFS_STATFS(mp, sp, td)) != 0) 830 return (error); 831 832 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 833 if (error) 834 return(error); 835 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 836 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 837 free(freepath, M_TEMP); 838 839 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 840 bcopy(sp, buf, sizeof(*buf)); 841 /* Only root should have access to the fsid's. */ 842 if (suser(td)) 843 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 844 return (0); 845 } 846 847 /* 848 * statfs_args(char *path, struct statfs *buf) 849 * 850 * Get filesystem statistics. 851 */ 852 int 853 statfs(struct statfs_args *uap) 854 { 855 struct nlookupdata nd; 856 struct statfs buf; 857 int error; 858 859 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 860 if (error == 0) 861 error = kern_statfs(&nd, &buf); 862 nlookup_done(&nd); 863 if (error == 0) 864 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 865 return (error); 866 } 867 868 int 869 kern_fstatfs(int fd, struct statfs *buf) 870 { 871 struct thread *td = curthread; 872 struct proc *p = td->td_proc; 873 struct file *fp; 874 struct mount *mp; 875 struct statfs *sp; 876 char *fullpath, *freepath; 877 int error; 878 879 KKASSERT(p); 880 error = getvnode(p->p_fd, fd, &fp); 881 if (error) 882 return (error); 883 mp = ((struct vnode *)fp->f_data)->v_mount; 884 if (mp == NULL) 885 return (EBADF); 886 sp = &mp->mnt_stat; 887 error = VFS_STATFS(mp, sp, td); 888 if (error) 889 return (error); 890 891 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 892 if (error) 893 return(error); 894 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 895 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 896 free(freepath, M_TEMP); 897 898 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 899 bcopy(sp, buf, sizeof(*buf)); 900 901 /* Only root should have access to the fsid's. */ 902 if (suser(td)) 903 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 904 return (0); 905 } 906 907 /* 908 * fstatfs_args(int fd, struct statfs *buf) 909 * 910 * Get filesystem statistics. 911 */ 912 int 913 fstatfs(struct fstatfs_args *uap) 914 { 915 struct statfs buf; 916 int error; 917 918 error = kern_fstatfs(uap->fd, &buf); 919 920 if (error == 0) 921 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 922 return (error); 923 } 924 925 /* 926 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 927 * 928 * Get statistics on all filesystems. 929 */ 930 931 struct getfsstat_info { 932 struct statfs *sfsp; 933 long count; 934 long maxcount; 935 int error; 936 int flags; 937 int is_chrooted; 938 struct thread *td; 939 struct proc *p; 940 }; 941 942 static int getfsstat_callback(struct mount *, void *); 943 944 /* ARGSUSED */ 945 int 946 getfsstat(struct getfsstat_args *uap) 947 { 948 struct thread *td = curthread; 949 struct proc *p = td->td_proc; 950 struct getfsstat_info info; 951 952 bzero(&info, sizeof(info)); 953 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0) 954 info.is_chrooted = 1; 955 else 956 info.is_chrooted = 0; 957 958 info.maxcount = uap->bufsize / sizeof(struct statfs); 959 info.sfsp = uap->buf; 960 info.count = 0; 961 info.flags = uap->flags; 962 info.td = td; 963 info.p = p; 964 965 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 966 if (info.sfsp && info.count > info.maxcount) 967 uap->sysmsg_result = info.maxcount; 968 else 969 uap->sysmsg_result = info.count; 970 return (info.error); 971 } 972 973 static int 974 getfsstat_callback(struct mount *mp, void *data) 975 { 976 struct getfsstat_info *info = data; 977 struct statfs *sp; 978 char *freepath; 979 char *fullpath; 980 int error; 981 982 if (info->sfsp && info->count < info->maxcount) { 983 if (info->is_chrooted && !chroot_visible_mnt(mp, info->p)) 984 return(0); 985 sp = &mp->mnt_stat; 986 987 /* 988 * If MNT_NOWAIT or MNT_LAZY is specified, do not 989 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 990 * overrides MNT_WAIT. 991 */ 992 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 993 (info->flags & MNT_WAIT)) && 994 (error = VFS_STATFS(mp, sp, info->td))) { 995 return(0); 996 } 997 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 998 999 error = cache_fullpath(info->p, mp->mnt_ncp, 1000 &fullpath, &freepath); 1001 if (error) { 1002 info->error = error; 1003 return(-1); 1004 } 1005 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1006 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1007 free(freepath, M_TEMP); 1008 1009 error = copyout(sp, info->sfsp, sizeof(*sp)); 1010 if (error) { 1011 info->error = error; 1012 return (-1); 1013 } 1014 ++info->sfsp; 1015 } 1016 info->count++; 1017 return(0); 1018 } 1019 1020 /* 1021 * fchdir_args(int fd) 1022 * 1023 * Change current working directory to a given file descriptor. 1024 */ 1025 /* ARGSUSED */ 1026 int 1027 fchdir(struct fchdir_args *uap) 1028 { 1029 struct thread *td = curthread; 1030 struct proc *p = td->td_proc; 1031 struct filedesc *fdp = p->p_fd; 1032 struct vnode *vp, *ovp; 1033 struct mount *mp; 1034 struct file *fp; 1035 struct namecache *ncp, *oncp; 1036 struct namecache *nct; 1037 int error; 1038 1039 if ((error = getvnode(fdp, uap->fd, &fp)) != 0) 1040 return (error); 1041 vp = (struct vnode *)fp->f_data; 1042 vref(vp); 1043 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 1044 if (vp->v_type != VDIR || fp->f_ncp == NULL) 1045 error = ENOTDIR; 1046 else 1047 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, td); 1048 if (error) { 1049 vput(vp); 1050 return (error); 1051 } 1052 ncp = cache_hold(fp->f_ncp); 1053 while (!error && (mp = vp->v_mountedhere) != NULL) { 1054 error = nlookup_mp(mp, &nct); 1055 if (error == 0) { 1056 cache_unlock(nct); /* leave ref intact */ 1057 vput(vp); 1058 vp = nct->nc_vp; 1059 error = vget(vp, LK_SHARED, td); 1060 KKASSERT(error == 0); 1061 cache_drop(ncp); 1062 ncp = nct; 1063 } 1064 } 1065 if (error == 0) { 1066 ovp = fdp->fd_cdir; 1067 oncp = fdp->fd_ncdir; 1068 VOP_UNLOCK(vp, 0, td); /* leave ref intact */ 1069 fdp->fd_cdir = vp; 1070 fdp->fd_ncdir = ncp; 1071 cache_drop(oncp); 1072 vrele(ovp); 1073 } else { 1074 cache_drop(ncp); 1075 vput(vp); 1076 } 1077 return (error); 1078 } 1079 1080 int 1081 kern_chdir(struct nlookupdata *nd) 1082 { 1083 struct thread *td = curthread; 1084 struct proc *p = td->td_proc; 1085 struct filedesc *fdp = p->p_fd; 1086 struct vnode *vp, *ovp; 1087 struct namecache *oncp; 1088 int error; 1089 1090 if ((error = nlookup(nd)) != 0) 1091 return (error); 1092 if ((vp = nd->nl_ncp->nc_vp) == NULL) 1093 return (ENOENT); 1094 if ((error = vget(vp, LK_SHARED, td)) != 0) 1095 return (error); 1096 1097 error = checkvp_chdir(vp, td); 1098 VOP_UNLOCK(vp, 0, td); 1099 if (error == 0) { 1100 ovp = fdp->fd_cdir; 1101 oncp = fdp->fd_ncdir; 1102 cache_unlock(nd->nl_ncp); /* leave reference intact */ 1103 fdp->fd_ncdir = nd->nl_ncp; 1104 fdp->fd_cdir = vp; 1105 cache_drop(oncp); 1106 vrele(ovp); 1107 nd->nl_ncp = NULL; 1108 } else { 1109 vrele(vp); 1110 } 1111 return (error); 1112 } 1113 1114 /* 1115 * chdir_args(char *path) 1116 * 1117 * Change current working directory (``.''). 1118 */ 1119 int 1120 chdir(struct chdir_args *uap) 1121 { 1122 struct nlookupdata nd; 1123 int error; 1124 1125 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1126 if (error == 0) 1127 error = kern_chdir(&nd); 1128 nlookup_done(&nd); 1129 return (error); 1130 } 1131 1132 /* 1133 * Helper function for raised chroot(2) security function: Refuse if 1134 * any filedescriptors are open directories. 1135 */ 1136 static int 1137 chroot_refuse_vdir_fds(fdp) 1138 struct filedesc *fdp; 1139 { 1140 struct vnode *vp; 1141 struct file *fp; 1142 int error; 1143 int fd; 1144 1145 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1146 error = getvnode(fdp, fd, &fp); 1147 if (error) 1148 continue; 1149 vp = (struct vnode *)fp->f_data; 1150 if (vp->v_type != VDIR) 1151 continue; 1152 return(EPERM); 1153 } 1154 return (0); 1155 } 1156 1157 /* 1158 * This sysctl determines if we will allow a process to chroot(2) if it 1159 * has a directory open: 1160 * 0: disallowed for all processes. 1161 * 1: allowed for processes that were not already chroot(2)'ed. 1162 * 2: allowed for all processes. 1163 */ 1164 1165 static int chroot_allow_open_directories = 1; 1166 1167 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1168 &chroot_allow_open_directories, 0, ""); 1169 1170 /* 1171 * chroot to the specified namecache entry. We obtain the vp from the 1172 * namecache data. The passed ncp must be locked and referenced and will 1173 * remain locked and referenced on return. 1174 */ 1175 int 1176 kern_chroot(struct namecache *ncp) 1177 { 1178 struct thread *td = curthread; 1179 struct proc *p = td->td_proc; 1180 struct filedesc *fdp = p->p_fd; 1181 struct vnode *vp; 1182 int error; 1183 1184 /* 1185 * Only root can chroot 1186 */ 1187 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 1188 return (error); 1189 1190 /* 1191 * Disallow open directory descriptors (fchdir() breakouts). 1192 */ 1193 if (chroot_allow_open_directories == 0 || 1194 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1195 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 1196 return (error); 1197 } 1198 if ((vp = ncp->nc_vp) == NULL) 1199 return (ENOENT); 1200 1201 if ((error = vget(vp, LK_SHARED, td)) != 0) 1202 return (error); 1203 1204 /* 1205 * Check the validity of vp as a directory to change to and 1206 * associate it with rdir/jdir. 1207 */ 1208 error = checkvp_chdir(vp, td); 1209 VOP_UNLOCK(vp, 0, td); /* leave reference intact */ 1210 if (error == 0) { 1211 vrele(fdp->fd_rdir); 1212 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1213 cache_drop(fdp->fd_nrdir); 1214 fdp->fd_nrdir = cache_hold(ncp); 1215 if (fdp->fd_jdir == NULL) { 1216 fdp->fd_jdir = vp; 1217 vref(fdp->fd_jdir); 1218 fdp->fd_njdir = cache_hold(ncp); 1219 } 1220 } else { 1221 vrele(vp); 1222 } 1223 return (error); 1224 } 1225 1226 /* 1227 * chroot_args(char *path) 1228 * 1229 * Change notion of root (``/'') directory. 1230 */ 1231 /* ARGSUSED */ 1232 int 1233 chroot(struct chroot_args *uap) 1234 { 1235 struct thread *td = curthread; 1236 struct nlookupdata nd; 1237 int error; 1238 1239 KKASSERT(td->td_proc); 1240 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1241 if (error) { 1242 nlookup_done(&nd); 1243 return(error); 1244 } 1245 error = nlookup(&nd); 1246 if (error == 0) 1247 error = kern_chroot(nd.nl_ncp); 1248 nlookup_done(&nd); 1249 return(error); 1250 } 1251 1252 /* 1253 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1254 * determine whether it is legal to chdir to the vnode. The vnode's state 1255 * is not changed by this call. 1256 */ 1257 int 1258 checkvp_chdir(struct vnode *vp, struct thread *td) 1259 { 1260 int error; 1261 1262 if (vp->v_type != VDIR) 1263 error = ENOTDIR; 1264 else 1265 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td); 1266 return (error); 1267 } 1268 1269 int 1270 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1271 { 1272 struct thread *td = curthread; 1273 struct proc *p = td->td_proc; 1274 struct filedesc *fdp = p->p_fd; 1275 int cmode, flags; 1276 struct file *nfp; 1277 struct file *fp; 1278 struct vnode *vp; 1279 int type, indx, error; 1280 struct flock lf; 1281 1282 if ((oflags & O_ACCMODE) == O_ACCMODE) 1283 return (EINVAL); 1284 flags = FFLAGS(oflags); 1285 error = falloc(p, &nfp, NULL); 1286 if (error) 1287 return (error); 1288 fp = nfp; 1289 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 1290 1291 /* 1292 * XXX p_dupfd is a real mess. It allows a device to return a 1293 * file descriptor to be duplicated rather then doing the open 1294 * itself. 1295 */ 1296 p->p_dupfd = -1; 1297 1298 /* 1299 * Call vn_open() to do the lookup and assign the vnode to the 1300 * file pointer. vn_open() does not change the ref count on fp 1301 * and the vnode, on success, will be inherited by the file pointer 1302 * and unlocked. 1303 */ 1304 nd->nl_flags |= NLC_LOCKVP; 1305 error = vn_open(nd, fp, flags, cmode); 1306 nlookup_done(nd); 1307 if (error) { 1308 /* 1309 * handle special fdopen() case. bleh. dupfdopen() is 1310 * responsible for dropping the old contents of ofiles[indx] 1311 * if it succeeds. 1312 * 1313 * Note that if fsetfd() succeeds it will add a ref to fp 1314 * which represents the fd_files[] assignment. We must still 1315 * drop our reference. 1316 */ 1317 if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0) { 1318 if (fsetfd(p, fp, &indx) == 0) { 1319 error = dupfdopen(fdp, indx, p->p_dupfd, flags, error); 1320 if (error == 0) { 1321 *res = indx; 1322 fdrop(fp, td); /* our ref */ 1323 return (0); 1324 } 1325 if (fdp->fd_files[indx].fp == fp) { 1326 funsetfd(fdp, indx); 1327 fdrop(fp, td); /* fd_files[] ref */ 1328 } 1329 } 1330 } 1331 fdrop(fp, td); /* our ref */ 1332 if (error == ERESTART) 1333 error = EINTR; 1334 return (error); 1335 } 1336 1337 /* 1338 * ref the vnode for ourselves so it can't be ripped out from under 1339 * is. XXX need an ND flag to request that the vnode be returned 1340 * anyway. 1341 */ 1342 vp = (struct vnode *)fp->f_data; 1343 vref(vp); 1344 if ((error = fsetfd(p, fp, &indx)) != 0) { 1345 fdrop(fp, td); 1346 vrele(vp); 1347 return (error); 1348 } 1349 1350 /* 1351 * If no error occurs the vp will have been assigned to the file 1352 * pointer. 1353 */ 1354 p->p_dupfd = 0; 1355 1356 /* 1357 * There should be 2 references on the file, one from the descriptor 1358 * table, and one for us. 1359 * 1360 * Handle the case where someone closed the file (via its file 1361 * descriptor) while we were blocked. The end result should look 1362 * like opening the file succeeded but it was immediately closed. 1363 */ 1364 if (fp->f_count == 1) { 1365 KASSERT(fdp->fd_files[indx].fp != fp, 1366 ("Open file descriptor lost all refs")); 1367 vrele(vp); 1368 fo_close(fp, td); 1369 fdrop(fp, td); 1370 *res = indx; 1371 return 0; 1372 } 1373 1374 if (flags & (O_EXLOCK | O_SHLOCK)) { 1375 lf.l_whence = SEEK_SET; 1376 lf.l_start = 0; 1377 lf.l_len = 0; 1378 if (flags & O_EXLOCK) 1379 lf.l_type = F_WRLCK; 1380 else 1381 lf.l_type = F_RDLCK; 1382 type = F_FLOCK; 1383 if ((flags & FNONBLOCK) == 0) 1384 type |= F_WAIT; 1385 1386 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1387 /* 1388 * lock request failed. Normally close the descriptor 1389 * but handle the case where someone might have dup()d 1390 * it when we weren't looking. One reference is 1391 * owned by the descriptor array, the other by us. 1392 */ 1393 vrele(vp); 1394 if (fdp->fd_files[indx].fp == fp) { 1395 funsetfd(fdp, indx); 1396 fdrop(fp, td); 1397 } 1398 fdrop(fp, td); 1399 return (error); 1400 } 1401 fp->f_flag |= FHASLOCK; 1402 } 1403 /* assert that vn_open created a backing object if one is needed */ 1404 KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, 1405 ("open: vmio vnode has no backing object after vn_open")); 1406 1407 vrele(vp); 1408 1409 /* 1410 * release our private reference, leaving the one associated with the 1411 * descriptor table intact. 1412 */ 1413 fdrop(fp, td); 1414 *res = indx; 1415 return (0); 1416 } 1417 1418 /* 1419 * open_args(char *path, int flags, int mode) 1420 * 1421 * Check permissions, allocate an open file structure, 1422 * and call the device open routine if any. 1423 */ 1424 int 1425 open(struct open_args *uap) 1426 { 1427 struct nlookupdata nd; 1428 int error; 1429 1430 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1431 if (error == 0) { 1432 error = kern_open(&nd, uap->flags, 1433 uap->mode, &uap->sysmsg_result); 1434 } 1435 nlookup_done(&nd); 1436 return (error); 1437 } 1438 1439 int 1440 kern_mknod(struct nlookupdata *nd, int mode, int dev) 1441 { 1442 struct namecache *ncp; 1443 struct thread *td = curthread; 1444 struct proc *p = td->td_proc; 1445 struct vnode *vp; 1446 struct vattr vattr; 1447 int error; 1448 int whiteout = 0; 1449 1450 KKASSERT(p); 1451 1452 switch (mode & S_IFMT) { 1453 case S_IFCHR: 1454 case S_IFBLK: 1455 error = suser(td); 1456 break; 1457 default: 1458 error = suser_cred(p->p_ucred, PRISON_ROOT); 1459 break; 1460 } 1461 if (error) 1462 return (error); 1463 1464 bwillwrite(); 1465 nd->nl_flags |= NLC_CREATE; 1466 if ((error = nlookup(nd)) != 0) 1467 return (error); 1468 ncp = nd->nl_ncp; 1469 if (ncp->nc_vp) 1470 return (EEXIST); 1471 1472 VATTR_NULL(&vattr); 1473 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1474 vattr.va_rdev = dev; 1475 whiteout = 0; 1476 1477 switch (mode & S_IFMT) { 1478 case S_IFMT: /* used by badsect to flag bad sectors */ 1479 vattr.va_type = VBAD; 1480 break; 1481 case S_IFCHR: 1482 vattr.va_type = VCHR; 1483 break; 1484 case S_IFBLK: 1485 vattr.va_type = VBLK; 1486 break; 1487 case S_IFWHT: 1488 whiteout = 1; 1489 break; 1490 default: 1491 error = EINVAL; 1492 break; 1493 } 1494 if (error == 0) { 1495 if (whiteout) { 1496 error = VOP_NWHITEOUT(ncp, nd->nl_cred, NAMEI_CREATE); 1497 } else { 1498 vp = NULL; 1499 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1500 if (error == 0) 1501 vput(vp); 1502 } 1503 } 1504 return (error); 1505 } 1506 1507 /* 1508 * mknod_args(char *path, int mode, int dev) 1509 * 1510 * Create a special file. 1511 */ 1512 int 1513 mknod(struct mknod_args *uap) 1514 { 1515 struct nlookupdata nd; 1516 int error; 1517 1518 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1519 if (error == 0) 1520 error = kern_mknod(&nd, uap->mode, uap->dev); 1521 nlookup_done(&nd); 1522 return (error); 1523 } 1524 1525 int 1526 kern_mkfifo(struct nlookupdata *nd, int mode) 1527 { 1528 struct namecache *ncp; 1529 struct thread *td = curthread; 1530 struct proc *p = td->td_proc; 1531 struct vattr vattr; 1532 struct vnode *vp; 1533 int error; 1534 1535 bwillwrite(); 1536 1537 nd->nl_flags |= NLC_CREATE; 1538 if ((error = nlookup(nd)) != 0) 1539 return (error); 1540 ncp = nd->nl_ncp; 1541 if (ncp->nc_vp) 1542 return (EEXIST); 1543 1544 VATTR_NULL(&vattr); 1545 vattr.va_type = VFIFO; 1546 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1547 vp = NULL; 1548 error = VOP_NMKNOD(ncp, &vp, nd->nl_cred, &vattr); 1549 if (error == 0) 1550 vput(vp); 1551 return (error); 1552 } 1553 1554 /* 1555 * mkfifo_args(char *path, int mode) 1556 * 1557 * Create a named pipe. 1558 */ 1559 int 1560 mkfifo(struct mkfifo_args *uap) 1561 { 1562 struct nlookupdata nd; 1563 int error; 1564 1565 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1566 if (error == 0) 1567 error = kern_mkfifo(&nd, uap->mode); 1568 nlookup_done(&nd); 1569 return (error); 1570 } 1571 1572 int 1573 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 1574 { 1575 struct thread *td = curthread; 1576 struct vnode *vp; 1577 int error; 1578 1579 /* 1580 * Lookup the source and obtained a locked vnode. 1581 * 1582 * XXX relookup on vget failure / race ? 1583 */ 1584 bwillwrite(); 1585 if ((error = nlookup(nd)) != 0) 1586 return (error); 1587 vp = nd->nl_ncp->nc_vp; 1588 KKASSERT(vp != NULL); 1589 if (vp->v_type == VDIR) 1590 return (EPERM); /* POSIX */ 1591 if ((error = vget(vp, LK_EXCLUSIVE, td)) != 0) 1592 return (error); 1593 1594 /* 1595 * Unlock the source so we can lookup the target without deadlocking 1596 * (XXX vp is locked already, possible other deadlock?). The target 1597 * must not exist. 1598 */ 1599 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 1600 nd->nl_flags &= ~NLC_NCPISLOCKED; 1601 cache_unlock(nd->nl_ncp); 1602 1603 linknd->nl_flags |= NLC_CREATE; 1604 if ((error = nlookup(linknd)) != 0) { 1605 vput(vp); 1606 return (error); 1607 } 1608 if (linknd->nl_ncp->nc_vp) { 1609 vput(vp); 1610 return (EEXIST); 1611 } 1612 1613 /* 1614 * Finally run the new API VOP. 1615 */ 1616 error = VOP_NLINK(linknd->nl_ncp, vp, linknd->nl_cred); 1617 vput(vp); 1618 return (error); 1619 } 1620 1621 /* 1622 * link_args(char *path, char *link) 1623 * 1624 * Make a hard file link. 1625 */ 1626 int 1627 link(struct link_args *uap) 1628 { 1629 struct nlookupdata nd, linknd; 1630 int error; 1631 1632 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1633 if (error == 0) { 1634 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 1635 if (error == 0) 1636 error = kern_link(&nd, &linknd); 1637 nlookup_done(&linknd); 1638 } 1639 nlookup_done(&nd); 1640 return (error); 1641 } 1642 1643 int 1644 kern_symlink(struct nlookupdata *nd, char *path, int mode) 1645 { 1646 struct namecache *ncp; 1647 struct vattr vattr; 1648 struct vnode *vp; 1649 int error; 1650 1651 bwillwrite(); 1652 nd->nl_flags |= NLC_CREATE; 1653 if ((error = nlookup(nd)) != 0) 1654 return (error); 1655 ncp = nd->nl_ncp; 1656 if (ncp->nc_vp) 1657 return (EEXIST); 1658 1659 VATTR_NULL(&vattr); 1660 vattr.va_mode = mode; 1661 error = VOP_NSYMLINK(ncp, &vp, nd->nl_cred, &vattr, path); 1662 if (error == 0) 1663 vput(vp); 1664 return (error); 1665 } 1666 1667 /* 1668 * symlink(char *path, char *link) 1669 * 1670 * Make a symbolic link. 1671 */ 1672 int 1673 symlink(struct symlink_args *uap) 1674 { 1675 struct thread *td = curthread; 1676 struct nlookupdata nd; 1677 char *path; 1678 int error; 1679 int mode; 1680 1681 path = zalloc(namei_zone); 1682 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1683 if (error == 0) { 1684 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 1685 if (error == 0) { 1686 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 1687 error = kern_symlink(&nd, path, mode); 1688 } 1689 nlookup_done(&nd); 1690 } 1691 zfree(namei_zone, path); 1692 return (error); 1693 } 1694 1695 /* 1696 * undelete_args(char *path) 1697 * 1698 * Delete a whiteout from the filesystem. 1699 */ 1700 /* ARGSUSED */ 1701 int 1702 undelete(struct undelete_args *uap) 1703 { 1704 struct nlookupdata nd; 1705 int error; 1706 1707 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1708 bwillwrite(); 1709 nd.nl_flags |= NLC_DELETE; 1710 if (error == 0) 1711 error = nlookup(&nd); 1712 if (error == 0) 1713 error = VOP_NWHITEOUT(nd.nl_ncp, nd.nl_cred, NAMEI_DELETE); 1714 nlookup_done(&nd); 1715 return (error); 1716 } 1717 1718 int 1719 kern_unlink(struct nlookupdata *nd) 1720 { 1721 struct namecache *ncp; 1722 int error; 1723 1724 bwillwrite(); 1725 nd->nl_flags |= NLC_DELETE; 1726 if ((error = nlookup(nd)) != 0) 1727 return (error); 1728 ncp = nd->nl_ncp; 1729 error = VOP_NREMOVE(ncp, nd->nl_cred); 1730 return (error); 1731 } 1732 1733 /* 1734 * unlink_args(char *path) 1735 * 1736 * Delete a name from the filesystem. 1737 */ 1738 int 1739 unlink(struct unlink_args *uap) 1740 { 1741 struct nlookupdata nd; 1742 int error; 1743 1744 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1745 if (error == 0) 1746 error = kern_unlink(&nd); 1747 nlookup_done(&nd); 1748 return (error); 1749 } 1750 1751 int 1752 kern_lseek(int fd, off_t offset, int whence, off_t *res) 1753 { 1754 struct thread *td = curthread; 1755 struct proc *p = td->td_proc; 1756 struct filedesc *fdp = p->p_fd; 1757 struct file *fp; 1758 struct vattr vattr; 1759 int error; 1760 1761 if ((u_int)fd >= fdp->fd_nfiles || 1762 (fp = fdp->fd_files[fd].fp) == NULL) 1763 return (EBADF); 1764 if (fp->f_type != DTYPE_VNODE) 1765 return (ESPIPE); 1766 switch (whence) { 1767 case L_INCR: 1768 fp->f_offset += offset; 1769 break; 1770 case L_XTND: 1771 error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, td); 1772 if (error) 1773 return (error); 1774 fp->f_offset = offset + vattr.va_size; 1775 break; 1776 case L_SET: 1777 fp->f_offset = offset; 1778 break; 1779 default: 1780 return (EINVAL); 1781 } 1782 *res = fp->f_offset; 1783 return (0); 1784 } 1785 1786 /* 1787 * lseek_args(int fd, int pad, off_t offset, int whence) 1788 * 1789 * Reposition read/write file offset. 1790 */ 1791 int 1792 lseek(struct lseek_args *uap) 1793 { 1794 int error; 1795 1796 error = kern_lseek(uap->fd, uap->offset, uap->whence, 1797 &uap->sysmsg_offset); 1798 1799 return (error); 1800 } 1801 1802 int 1803 kern_access(struct nlookupdata *nd, int aflags) 1804 { 1805 struct thread *td = curthread; 1806 struct vnode *vp; 1807 int error, flags; 1808 1809 if ((error = nlookup(nd)) != 0) 1810 return (error); 1811 retry: 1812 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 1813 if (error) 1814 return (error); 1815 1816 /* Flags == 0 means only check for existence. */ 1817 if (aflags) { 1818 flags = 0; 1819 if (aflags & R_OK) 1820 flags |= VREAD; 1821 if (aflags & W_OK) 1822 flags |= VWRITE; 1823 if (aflags & X_OK) 1824 flags |= VEXEC; 1825 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1826 error = VOP_ACCESS(vp, flags, nd->nl_cred, td); 1827 1828 /* 1829 * If the file handle is stale we have to re-resolve the 1830 * entry. This is a hack at the moment. 1831 */ 1832 if (error == ESTALE) { 1833 cache_setunresolved(nd->nl_ncp); 1834 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 1835 if (error == 0) { 1836 vput(vp); 1837 vp = NULL; 1838 goto retry; 1839 } 1840 } 1841 } 1842 vput(vp); 1843 return (error); 1844 } 1845 1846 /* 1847 * access_args(char *path, int flags) 1848 * 1849 * Check access permissions. 1850 */ 1851 int 1852 access(struct access_args *uap) 1853 { 1854 struct nlookupdata nd; 1855 int error; 1856 1857 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1858 if (error == 0) 1859 error = kern_access(&nd, uap->flags); 1860 nlookup_done(&nd); 1861 return (error); 1862 } 1863 1864 int 1865 kern_stat(struct nlookupdata *nd, struct stat *st) 1866 { 1867 int error; 1868 struct vnode *vp; 1869 thread_t td; 1870 1871 if ((error = nlookup(nd)) != 0) 1872 return (error); 1873 again: 1874 if ((vp = nd->nl_ncp->nc_vp) == NULL) 1875 return (ENOENT); 1876 1877 td = curthread; 1878 if ((error = vget(vp, LK_SHARED, td)) != 0) 1879 return (error); 1880 error = vn_stat(vp, st, td); 1881 1882 /* 1883 * If the file handle is stale we have to re-resolve the entry. This 1884 * is a hack at the moment. 1885 */ 1886 if (error == ESTALE) { 1887 cache_setunresolved(nd->nl_ncp); 1888 error = cache_resolve(nd->nl_ncp, nd->nl_cred); 1889 if (error == 0) { 1890 vput(vp); 1891 goto again; 1892 } 1893 } 1894 vput(vp); 1895 return (error); 1896 } 1897 1898 /* 1899 * stat_args(char *path, struct stat *ub) 1900 * 1901 * Get file status; this version follows links. 1902 */ 1903 int 1904 stat(struct stat_args *uap) 1905 { 1906 struct nlookupdata nd; 1907 struct stat st; 1908 int error; 1909 1910 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1911 if (error == 0) { 1912 error = kern_stat(&nd, &st); 1913 if (error == 0) 1914 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1915 } 1916 nlookup_done(&nd); 1917 return (error); 1918 } 1919 1920 /* 1921 * lstat_args(char *path, struct stat *ub) 1922 * 1923 * Get file status; this version does not follow links. 1924 */ 1925 int 1926 lstat(struct lstat_args *uap) 1927 { 1928 struct nlookupdata nd; 1929 struct stat st; 1930 int error; 1931 1932 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 1933 if (error == 0) { 1934 error = kern_stat(&nd, &st); 1935 if (error == 0) 1936 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1937 } 1938 nlookup_done(&nd); 1939 return (error); 1940 } 1941 1942 /* 1943 * pathconf_Args(char *path, int name) 1944 * 1945 * Get configurable pathname variables. 1946 */ 1947 /* ARGSUSED */ 1948 int 1949 pathconf(struct pathconf_args *uap) 1950 { 1951 struct nlookupdata nd; 1952 struct vnode *vp; 1953 int error; 1954 1955 vp = NULL; 1956 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1957 if (error == 0) 1958 error = nlookup(&nd); 1959 if (error == 0) 1960 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 1961 nlookup_done(&nd); 1962 if (error == 0) { 1963 error = VOP_PATHCONF(vp, uap->name, uap->sysmsg_fds); 1964 vput(vp); 1965 } 1966 return (error); 1967 } 1968 1969 /* 1970 * XXX: daver 1971 * kern_readlink isn't properly split yet. There is a copyin burried 1972 * in VOP_READLINK(). 1973 */ 1974 int 1975 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 1976 { 1977 struct thread *td = curthread; 1978 struct proc *p = td->td_proc; 1979 struct vnode *vp; 1980 struct iovec aiov; 1981 struct uio auio; 1982 int error; 1983 1984 if ((error = nlookup(nd)) != 0) 1985 return (error); 1986 error = cache_vget(nd->nl_ncp, nd->nl_cred, LK_EXCLUSIVE, &vp); 1987 if (error) 1988 return (error); 1989 if (vp->v_type != VLNK) { 1990 error = EINVAL; 1991 } else { 1992 aiov.iov_base = buf; 1993 aiov.iov_len = count; 1994 auio.uio_iov = &aiov; 1995 auio.uio_iovcnt = 1; 1996 auio.uio_offset = 0; 1997 auio.uio_rw = UIO_READ; 1998 auio.uio_segflg = UIO_USERSPACE; 1999 auio.uio_td = td; 2000 auio.uio_resid = count; 2001 error = VOP_READLINK(vp, &auio, p->p_ucred); 2002 } 2003 vput(vp); 2004 *res = count - auio.uio_resid; 2005 return (error); 2006 } 2007 2008 /* 2009 * readlink_args(char *path, char *buf, int count) 2010 * 2011 * Return target name of a symbolic link. 2012 */ 2013 int 2014 readlink(struct readlink_args *uap) 2015 { 2016 struct nlookupdata nd; 2017 int error; 2018 2019 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2020 if (error == 0) { 2021 error = kern_readlink(&nd, uap->buf, uap->count, 2022 &uap->sysmsg_result); 2023 } 2024 nlookup_done(&nd); 2025 return (error); 2026 } 2027 2028 static int 2029 setfflags(struct vnode *vp, int flags) 2030 { 2031 struct thread *td = curthread; 2032 struct proc *p = td->td_proc; 2033 int error; 2034 struct vattr vattr; 2035 2036 /* 2037 * Prevent non-root users from setting flags on devices. When 2038 * a device is reused, users can retain ownership of the device 2039 * if they are allowed to set flags and programs assume that 2040 * chown can't fail when done as root. 2041 */ 2042 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 2043 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 2044 return (error); 2045 2046 /* 2047 * note: vget is required for any operation that might mod the vnode 2048 * so VINACTIVE is properly cleared. 2049 */ 2050 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2051 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2052 VATTR_NULL(&vattr); 2053 vattr.va_flags = flags; 2054 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2055 vput(vp); 2056 } 2057 return (error); 2058 } 2059 2060 /* 2061 * chflags(char *path, int flags) 2062 * 2063 * Change flags of a file given a path name. 2064 */ 2065 /* ARGSUSED */ 2066 int 2067 chflags(struct chflags_args *uap) 2068 { 2069 struct nlookupdata nd; 2070 struct vnode *vp; 2071 int error; 2072 2073 vp = NULL; 2074 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2075 /* XXX Add NLC flag indicating modifying operation? */ 2076 if (error == 0) 2077 error = nlookup(&nd); 2078 if (error == 0) 2079 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 2080 nlookup_done(&nd); 2081 if (error == 0) { 2082 error = setfflags(vp, uap->flags); 2083 vrele(vp); 2084 } 2085 return (error); 2086 } 2087 2088 /* 2089 * fchflags_args(int fd, int flags) 2090 * 2091 * Change flags of a file given a file descriptor. 2092 */ 2093 /* ARGSUSED */ 2094 int 2095 fchflags(struct fchflags_args *uap) 2096 { 2097 struct thread *td = curthread; 2098 struct proc *p = td->td_proc; 2099 struct file *fp; 2100 int error; 2101 2102 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2103 return (error); 2104 return setfflags((struct vnode *) fp->f_data, uap->flags); 2105 } 2106 2107 static int 2108 setfmode(struct vnode *vp, int mode) 2109 { 2110 struct thread *td = curthread; 2111 struct proc *p = td->td_proc; 2112 int error; 2113 struct vattr vattr; 2114 2115 /* 2116 * note: vget is required for any operation that might mod the vnode 2117 * so VINACTIVE is properly cleared. 2118 */ 2119 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2120 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2121 VATTR_NULL(&vattr); 2122 vattr.va_mode = mode & ALLPERMS; 2123 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2124 vput(vp); 2125 } 2126 return error; 2127 } 2128 2129 int 2130 kern_chmod(struct nlookupdata *nd, int mode) 2131 { 2132 struct vnode *vp; 2133 int error; 2134 2135 /* XXX Add NLC flag indicating modifying operation? */ 2136 if ((error = nlookup(nd)) != 0) 2137 return (error); 2138 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2139 return (error); 2140 error = setfmode(vp, mode); 2141 vrele(vp); 2142 return (error); 2143 } 2144 2145 /* 2146 * chmod_args(char *path, int mode) 2147 * 2148 * Change mode of a file given path name. 2149 */ 2150 /* ARGSUSED */ 2151 int 2152 chmod(struct chmod_args *uap) 2153 { 2154 struct nlookupdata nd; 2155 int error; 2156 2157 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2158 if (error == 0) 2159 error = kern_chmod(&nd, uap->mode); 2160 nlookup_done(&nd); 2161 return (error); 2162 } 2163 2164 /* 2165 * lchmod_args(char *path, int mode) 2166 * 2167 * Change mode of a file given path name (don't follow links.) 2168 */ 2169 /* ARGSUSED */ 2170 int 2171 lchmod(struct lchmod_args *uap) 2172 { 2173 struct nlookupdata nd; 2174 int error; 2175 2176 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2177 if (error == 0) 2178 error = kern_chmod(&nd, uap->mode); 2179 nlookup_done(&nd); 2180 return (error); 2181 } 2182 2183 /* 2184 * fchmod_args(int fd, int mode) 2185 * 2186 * Change mode of a file given a file descriptor. 2187 */ 2188 /* ARGSUSED */ 2189 int 2190 fchmod(struct fchmod_args *uap) 2191 { 2192 struct thread *td = curthread; 2193 struct proc *p = td->td_proc; 2194 struct file *fp; 2195 int error; 2196 2197 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2198 return (error); 2199 return setfmode((struct vnode *)fp->f_data, uap->mode); 2200 } 2201 2202 static int 2203 setfown(struct vnode *vp, uid_t uid, gid_t gid) 2204 { 2205 struct thread *td = curthread; 2206 struct proc *p = td->td_proc; 2207 int error; 2208 struct vattr vattr; 2209 2210 /* 2211 * note: vget is required for any operation that might mod the vnode 2212 * so VINACTIVE is properly cleared. 2213 */ 2214 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2215 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2216 VATTR_NULL(&vattr); 2217 vattr.va_uid = uid; 2218 vattr.va_gid = gid; 2219 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2220 vput(vp); 2221 } 2222 return error; 2223 } 2224 2225 int 2226 kern_chown(struct nlookupdata *nd, int uid, int gid) 2227 { 2228 struct vnode *vp; 2229 int error; 2230 2231 /* XXX Add NLC flag indicating modifying operation? */ 2232 if ((error = nlookup(nd)) != 0) 2233 return (error); 2234 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2235 return (error); 2236 error = setfown(vp, uid, gid); 2237 vrele(vp); 2238 return (error); 2239 } 2240 2241 /* 2242 * chown(char *path, int uid, int gid) 2243 * 2244 * Set ownership given a path name. 2245 */ 2246 int 2247 chown(struct chown_args *uap) 2248 { 2249 struct nlookupdata nd; 2250 int error; 2251 2252 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2253 if (error == 0) 2254 error = kern_chown(&nd, uap->uid, uap->gid); 2255 nlookup_done(&nd); 2256 return (error); 2257 } 2258 2259 /* 2260 * lchown_args(char *path, int uid, int gid) 2261 * 2262 * Set ownership given a path name, do not cross symlinks. 2263 */ 2264 int 2265 lchown(struct lchown_args *uap) 2266 { 2267 struct nlookupdata nd; 2268 int error; 2269 2270 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2271 if (error == 0) 2272 error = kern_chown(&nd, uap->uid, uap->gid); 2273 nlookup_done(&nd); 2274 return (error); 2275 } 2276 2277 /* 2278 * fchown_args(int fd, int uid, int gid) 2279 * 2280 * Set ownership given a file descriptor. 2281 */ 2282 /* ARGSUSED */ 2283 int 2284 fchown(struct fchown_args *uap) 2285 { 2286 struct thread *td = curthread; 2287 struct proc *p = td->td_proc; 2288 struct file *fp; 2289 int error; 2290 2291 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2292 return (error); 2293 return setfown((struct vnode *)fp->f_data, 2294 uap->uid, uap->gid); 2295 } 2296 2297 static int 2298 getutimes(const struct timeval *tvp, struct timespec *tsp) 2299 { 2300 struct timeval tv[2]; 2301 2302 if (tvp == NULL) { 2303 microtime(&tv[0]); 2304 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2305 tsp[1] = tsp[0]; 2306 } else { 2307 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2308 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2309 } 2310 return 0; 2311 } 2312 2313 static int 2314 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2315 { 2316 struct thread *td = curthread; 2317 struct proc *p = td->td_proc; 2318 int error; 2319 struct vattr vattr; 2320 2321 /* 2322 * note: vget is required for any operation that might mod the vnode 2323 * so VINACTIVE is properly cleared. 2324 */ 2325 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2326 if ((error = vget(vp, LK_EXCLUSIVE, td)) == 0) { 2327 VATTR_NULL(&vattr); 2328 vattr.va_atime = ts[0]; 2329 vattr.va_mtime = ts[1]; 2330 if (nullflag) 2331 vattr.va_vaflags |= VA_UTIMES_NULL; 2332 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2333 vput(vp); 2334 } 2335 return error; 2336 } 2337 2338 int 2339 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 2340 { 2341 struct timespec ts[2]; 2342 struct vnode *vp; 2343 int error; 2344 2345 if ((error = getutimes(tptr, ts)) != 0) 2346 return (error); 2347 /* XXX Add NLC flag indicating modifying operation? */ 2348 if ((error = nlookup(nd)) != 0) 2349 return (error); 2350 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2351 return (error); 2352 error = setutimes(vp, ts, tptr == NULL); 2353 vrele(vp); 2354 return (error); 2355 } 2356 2357 /* 2358 * utimes_args(char *path, struct timeval *tptr) 2359 * 2360 * Set the access and modification times of a file. 2361 */ 2362 int 2363 utimes(struct utimes_args *uap) 2364 { 2365 struct timeval tv[2]; 2366 struct nlookupdata nd; 2367 int error; 2368 2369 if (uap->tptr) { 2370 error = copyin(uap->tptr, tv, sizeof(tv)); 2371 if (error) 2372 return (error); 2373 } 2374 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2375 if (error == 0) 2376 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2377 nlookup_done(&nd); 2378 return (error); 2379 } 2380 2381 /* 2382 * lutimes_args(char *path, struct timeval *tptr) 2383 * 2384 * Set the access and modification times of a file. 2385 */ 2386 int 2387 lutimes(struct lutimes_args *uap) 2388 { 2389 struct timeval tv[2]; 2390 struct nlookupdata nd; 2391 int error; 2392 2393 if (uap->tptr) { 2394 error = copyin(uap->tptr, tv, sizeof(tv)); 2395 if (error) 2396 return (error); 2397 } 2398 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2399 if (error == 0) 2400 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2401 nlookup_done(&nd); 2402 return (error); 2403 } 2404 2405 int 2406 kern_futimes(int fd, struct timeval *tptr) 2407 { 2408 struct thread *td = curthread; 2409 struct proc *p = td->td_proc; 2410 struct timespec ts[2]; 2411 struct file *fp; 2412 int error; 2413 2414 error = getutimes(tptr, ts); 2415 if (error) 2416 return (error); 2417 error = getvnode(p->p_fd, fd, &fp); 2418 if (error) 2419 return (error); 2420 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2421 return (error); 2422 } 2423 2424 /* 2425 * futimes_args(int fd, struct timeval *tptr) 2426 * 2427 * Set the access and modification times of a file. 2428 */ 2429 int 2430 futimes(struct futimes_args *uap) 2431 { 2432 struct timeval tv[2]; 2433 int error; 2434 2435 if (uap->tptr) { 2436 error = copyin(uap->tptr, tv, sizeof(tv)); 2437 if (error) 2438 return (error); 2439 } 2440 2441 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2442 2443 return (error); 2444 } 2445 2446 int 2447 kern_truncate(struct nlookupdata *nd, off_t length) 2448 { 2449 struct vnode *vp; 2450 struct vattr vattr; 2451 int error; 2452 2453 if (length < 0) 2454 return(EINVAL); 2455 /* XXX Add NLC flag indicating modifying operation? */ 2456 if ((error = nlookup(nd)) != 0) 2457 return (error); 2458 if ((error = cache_vref(nd->nl_ncp, nd->nl_cred, &vp)) != 0) 2459 return (error); 2460 VOP_LEASE(vp, nd->nl_td, nd->nl_cred, LEASE_WRITE); 2461 if ((error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, nd->nl_td)) != 0) { 2462 vrele(vp); 2463 return (error); 2464 } 2465 if (vp->v_type == VDIR) { 2466 error = EISDIR; 2467 } else if ((error = vn_writechk(vp)) == 0 && 2468 (error = VOP_ACCESS(vp, VWRITE, nd->nl_cred, nd->nl_td)) == 0) { 2469 VATTR_NULL(&vattr); 2470 vattr.va_size = length; 2471 error = VOP_SETATTR(vp, &vattr, nd->nl_cred, nd->nl_td); 2472 } 2473 vput(vp); 2474 return (error); 2475 } 2476 2477 /* 2478 * truncate(char *path, int pad, off_t length) 2479 * 2480 * Truncate a file given its path name. 2481 */ 2482 int 2483 truncate(struct truncate_args *uap) 2484 { 2485 struct nlookupdata nd; 2486 int error; 2487 2488 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2489 if (error == 0) 2490 error = kern_truncate(&nd, uap->length); 2491 nlookup_done(&nd); 2492 return error; 2493 } 2494 2495 int 2496 kern_ftruncate(int fd, off_t length) 2497 { 2498 struct thread *td = curthread; 2499 struct proc *p = td->td_proc; 2500 struct vattr vattr; 2501 struct vnode *vp; 2502 struct file *fp; 2503 int error; 2504 2505 if (length < 0) 2506 return(EINVAL); 2507 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2508 return (error); 2509 if ((fp->f_flag & FWRITE) == 0) 2510 return (EINVAL); 2511 vp = (struct vnode *)fp->f_data; 2512 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2513 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2514 if (vp->v_type == VDIR) 2515 error = EISDIR; 2516 else if ((error = vn_writechk(vp)) == 0) { 2517 VATTR_NULL(&vattr); 2518 vattr.va_size = length; 2519 error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); 2520 } 2521 VOP_UNLOCK(vp, 0, td); 2522 return (error); 2523 } 2524 2525 /* 2526 * ftruncate_args(int fd, int pad, off_t length) 2527 * 2528 * Truncate a file given a file descriptor. 2529 */ 2530 int 2531 ftruncate(struct ftruncate_args *uap) 2532 { 2533 int error; 2534 2535 error = kern_ftruncate(uap->fd, uap->length); 2536 2537 return (error); 2538 } 2539 2540 /* 2541 * fsync(int fd) 2542 * 2543 * Sync an open file. 2544 */ 2545 /* ARGSUSED */ 2546 int 2547 fsync(struct fsync_args *uap) 2548 { 2549 struct thread *td = curthread; 2550 struct proc *p = td->td_proc; 2551 struct vnode *vp; 2552 struct file *fp; 2553 vm_object_t obj; 2554 int error; 2555 2556 if ((error = getvnode(p->p_fd, uap->fd, &fp)) != 0) 2557 return (error); 2558 vp = (struct vnode *)fp->f_data; 2559 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2560 if (VOP_GETVOBJECT(vp, &obj) == 0) 2561 vm_object_page_clean(obj, 0, 0, 0); 2562 if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) == 0 && 2563 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) && 2564 bioops.io_fsync) 2565 error = (*bioops.io_fsync)(vp); 2566 VOP_UNLOCK(vp, 0, td); 2567 return (error); 2568 } 2569 2570 int 2571 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 2572 { 2573 struct namecache *fncpd; 2574 struct namecache *tncpd; 2575 struct namecache *ncp; 2576 struct mount *mp; 2577 int error; 2578 2579 bwillwrite(); 2580 if ((error = nlookup(fromnd)) != 0) 2581 return (error); 2582 if ((fncpd = fromnd->nl_ncp->nc_parent) == NULL) 2583 return (ENOENT); 2584 cache_hold(fncpd); 2585 2586 /* 2587 * unlock the source ncp so we can lookup the target ncp without 2588 * deadlocking. The target may or may not exist so we do not check 2589 * for a target vp like kern_mkdir() and other creation functions do. 2590 * 2591 * The source and target directories are ref'd and rechecked after 2592 * everything is relocked to determine if the source or target file 2593 * has been renamed. 2594 */ 2595 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 2596 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 2597 cache_unlock(fromnd->nl_ncp); 2598 2599 tond->nl_flags |= NLC_CREATE; 2600 if ((error = nlookup(tond)) != 0) { 2601 cache_drop(fncpd); 2602 return (error); 2603 } 2604 if ((tncpd = tond->nl_ncp->nc_parent) == NULL) { 2605 cache_drop(fncpd); 2606 return (ENOENT); 2607 } 2608 cache_hold(tncpd); 2609 2610 /* 2611 * If the source and target are the same there is nothing to do 2612 */ 2613 if (fromnd->nl_ncp == tond->nl_ncp) { 2614 cache_drop(fncpd); 2615 cache_drop(tncpd); 2616 return (0); 2617 } 2618 2619 /* 2620 * relock the source ncp 2621 */ 2622 if (cache_lock_nonblock(fromnd->nl_ncp) == 0) { 2623 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2624 } else if (fromnd->nl_ncp > tond->nl_ncp) { 2625 cache_lock(fromnd->nl_ncp); 2626 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2627 } else { 2628 cache_unlock(tond->nl_ncp); 2629 cache_lock(fromnd->nl_ncp); 2630 cache_resolve(fromnd->nl_ncp, fromnd->nl_cred); 2631 cache_lock(tond->nl_ncp); 2632 cache_resolve(tond->nl_ncp, tond->nl_cred); 2633 } 2634 fromnd->nl_flags |= NLC_NCPISLOCKED; 2635 2636 /* 2637 * make sure the parent directories linkages are the same 2638 */ 2639 if (fncpd != fromnd->nl_ncp->nc_parent || 2640 tncpd != tond->nl_ncp->nc_parent) { 2641 cache_drop(fncpd); 2642 cache_drop(tncpd); 2643 return (ENOENT); 2644 } 2645 2646 /* 2647 * Both the source and target must be within the same filesystem and 2648 * in the same filesystem as their parent directories within the 2649 * namecache topology. 2650 */ 2651 mp = fncpd->nc_mount; 2652 if (mp != tncpd->nc_mount || mp != fromnd->nl_ncp->nc_mount || 2653 mp != tond->nl_ncp->nc_mount) { 2654 cache_drop(fncpd); 2655 cache_drop(tncpd); 2656 return (EXDEV); 2657 } 2658 2659 /* 2660 * If the target exists and either the source or target is a directory, 2661 * then both must be directories. 2662 */ 2663 if (tond->nl_ncp->nc_vp) { 2664 if (fromnd->nl_ncp->nc_vp->v_type == VDIR) { 2665 if (tond->nl_ncp->nc_vp->v_type != VDIR) 2666 error = ENOTDIR; 2667 } else if (tond->nl_ncp->nc_vp->v_type == VDIR) { 2668 error = EISDIR; 2669 } 2670 } 2671 2672 /* 2673 * You cannot rename a source into itself or a subdirectory of itself. 2674 * We check this by travsersing the target directory upwards looking 2675 * for a match against the source. 2676 */ 2677 if (error == 0) { 2678 for (ncp = tncpd; ncp; ncp = ncp->nc_parent) { 2679 if (fromnd->nl_ncp == ncp) { 2680 error = EINVAL; 2681 break; 2682 } 2683 } 2684 } 2685 2686 cache_drop(fncpd); 2687 cache_drop(tncpd); 2688 if (error) 2689 return (error); 2690 error = VOP_NRENAME(fromnd->nl_ncp, tond->nl_ncp, tond->nl_cred); 2691 return (error); 2692 } 2693 2694 /* 2695 * rename_args(char *from, char *to) 2696 * 2697 * Rename files. Source and destination must either both be directories, 2698 * or both not be directories. If target is a directory, it must be empty. 2699 */ 2700 int 2701 rename(struct rename_args *uap) 2702 { 2703 struct nlookupdata fromnd, tond; 2704 int error; 2705 2706 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 2707 if (error == 0) { 2708 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 2709 if (error == 0) 2710 error = kern_rename(&fromnd, &tond); 2711 nlookup_done(&tond); 2712 } 2713 nlookup_done(&fromnd); 2714 return (error); 2715 } 2716 2717 int 2718 kern_mkdir(struct nlookupdata *nd, int mode) 2719 { 2720 struct thread *td = curthread; 2721 struct proc *p = td->td_proc; 2722 struct namecache *ncp; 2723 struct vnode *vp; 2724 struct vattr vattr; 2725 int error; 2726 2727 bwillwrite(); 2728 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE; 2729 if ((error = nlookup(nd)) != 0) 2730 return (error); 2731 2732 ncp = nd->nl_ncp; 2733 if (ncp->nc_vp) 2734 return (EEXIST); 2735 2736 VATTR_NULL(&vattr); 2737 vattr.va_type = VDIR; 2738 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 2739 2740 vp = NULL; 2741 error = VOP_NMKDIR(ncp, &vp, p->p_ucred, &vattr); 2742 if (error == 0) 2743 vput(vp); 2744 return (error); 2745 } 2746 2747 /* 2748 * mkdir_args(char *path, int mode) 2749 * 2750 * Make a directory file. 2751 */ 2752 /* ARGSUSED */ 2753 int 2754 mkdir(struct mkdir_args *uap) 2755 { 2756 struct nlookupdata nd; 2757 int error; 2758 2759 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2760 if (error == 0) 2761 error = kern_mkdir(&nd, uap->mode); 2762 nlookup_done(&nd); 2763 return (error); 2764 } 2765 2766 int 2767 kern_rmdir(struct nlookupdata *nd) 2768 { 2769 struct namecache *ncp; 2770 int error; 2771 2772 bwillwrite(); 2773 nd->nl_flags |= NLC_DELETE; 2774 if ((error = nlookup(nd)) != 0) 2775 return (error); 2776 2777 ncp = nd->nl_ncp; 2778 error = VOP_NRMDIR(ncp, nd->nl_cred); 2779 return (error); 2780 } 2781 2782 /* 2783 * rmdir_args(char *path) 2784 * 2785 * Remove a directory file. 2786 */ 2787 /* ARGSUSED */ 2788 int 2789 rmdir(struct rmdir_args *uap) 2790 { 2791 struct nlookupdata nd; 2792 int error; 2793 2794 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2795 if (error == 0) 2796 error = kern_rmdir(&nd); 2797 nlookup_done(&nd); 2798 return (error); 2799 } 2800 2801 int 2802 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res) 2803 { 2804 struct thread *td = curthread; 2805 struct proc *p = td->td_proc; 2806 struct vnode *vp; 2807 struct file *fp; 2808 struct uio auio; 2809 struct iovec aiov; 2810 long loff; 2811 int error, eofflag; 2812 2813 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2814 return (error); 2815 if ((fp->f_flag & FREAD) == 0) 2816 return (EBADF); 2817 vp = (struct vnode *)fp->f_data; 2818 unionread: 2819 if (vp->v_type != VDIR) 2820 return (EINVAL); 2821 aiov.iov_base = buf; 2822 aiov.iov_len = count; 2823 auio.uio_iov = &aiov; 2824 auio.uio_iovcnt = 1; 2825 auio.uio_rw = UIO_READ; 2826 auio.uio_segflg = UIO_USERSPACE; 2827 auio.uio_td = td; 2828 auio.uio_resid = count; 2829 /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */ 2830 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 2831 loff = auio.uio_offset = fp->f_offset; 2832 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 2833 fp->f_offset = auio.uio_offset; 2834 VOP_UNLOCK(vp, 0, td); 2835 if (error) 2836 return (error); 2837 if (count == auio.uio_resid) { 2838 if (union_dircheckp) { 2839 error = union_dircheckp(td, &vp, fp); 2840 if (error == -1) 2841 goto unionread; 2842 if (error) 2843 return (error); 2844 } 2845 if ((vp->v_flag & VROOT) && 2846 (vp->v_mount->mnt_flag & MNT_UNION)) { 2847 struct vnode *tvp = vp; 2848 vp = vp->v_mount->mnt_vnodecovered; 2849 vref(vp); 2850 fp->f_data = (caddr_t)vp; 2851 fp->f_offset = 0; 2852 vrele(tvp); 2853 goto unionread; 2854 } 2855 } 2856 if (basep) { 2857 *basep = loff; 2858 } 2859 *res = count - auio.uio_resid; 2860 return (error); 2861 } 2862 2863 /* 2864 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 2865 * 2866 * Read a block of directory entries in a file system independent format. 2867 */ 2868 int 2869 getdirentries(struct getdirentries_args *uap) 2870 { 2871 long base; 2872 int error; 2873 2874 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 2875 &uap->sysmsg_result); 2876 2877 if (error == 0) 2878 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 2879 return (error); 2880 } 2881 2882 /* 2883 * getdents_args(int fd, char *buf, size_t count) 2884 */ 2885 int 2886 getdents(struct getdents_args *uap) 2887 { 2888 int error; 2889 2890 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 2891 &uap->sysmsg_result); 2892 2893 return (error); 2894 } 2895 2896 /* 2897 * umask(int newmask) 2898 * 2899 * Set the mode mask for creation of filesystem nodes. 2900 * 2901 * MP SAFE 2902 */ 2903 int 2904 umask(struct umask_args *uap) 2905 { 2906 struct thread *td = curthread; 2907 struct proc *p = td->td_proc; 2908 struct filedesc *fdp; 2909 2910 fdp = p->p_fd; 2911 uap->sysmsg_result = fdp->fd_cmask; 2912 fdp->fd_cmask = uap->newmask & ALLPERMS; 2913 return (0); 2914 } 2915 2916 /* 2917 * revoke(char *path) 2918 * 2919 * Void all references to file by ripping underlying filesystem 2920 * away from vnode. 2921 */ 2922 /* ARGSUSED */ 2923 int 2924 revoke(struct revoke_args *uap) 2925 { 2926 struct thread *td = curthread; 2927 struct nlookupdata nd; 2928 struct vattr vattr; 2929 struct vnode *vp; 2930 struct ucred *cred; 2931 int error; 2932 2933 vp = NULL; 2934 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2935 if (error == 0) 2936 error = nlookup(&nd); 2937 if (error == 0) 2938 error = cache_vref(nd.nl_ncp, nd.nl_cred, &vp); 2939 cred = crhold(nd.nl_cred); 2940 nlookup_done(&nd); 2941 if (error == 0) { 2942 if (vp->v_type != VCHR && vp->v_type != VBLK) 2943 error = EINVAL; 2944 if (error == 0) 2945 error = VOP_GETATTR(vp, &vattr, td); 2946 if (error == 0 && cred->cr_uid != vattr.va_uid) 2947 error = suser_cred(cred, PRISON_ROOT); 2948 if (error == 0 && count_udev(vp->v_udev) > 0) { 2949 if ((error = vx_lock(vp)) == 0) { 2950 VOP_REVOKE(vp, REVOKEALL); 2951 vx_unlock(vp); 2952 } 2953 } 2954 vrele(vp); 2955 } 2956 crfree(cred); 2957 return (error); 2958 } 2959 2960 /* 2961 * Convert a user file descriptor to a kernel file entry. 2962 */ 2963 int 2964 getvnode(struct filedesc *fdp, int fd, struct file **fpp) 2965 { 2966 struct file *fp; 2967 2968 if ((u_int)fd >= fdp->fd_nfiles || 2969 (fp = fdp->fd_files[fd].fp) == NULL) 2970 return (EBADF); 2971 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) 2972 return (EINVAL); 2973 *fpp = fp; 2974 return (0); 2975 } 2976 /* 2977 * getfh_args(char *fname, fhandle_t *fhp) 2978 * 2979 * Get (NFS) file handle 2980 */ 2981 int 2982 getfh(struct getfh_args *uap) 2983 { 2984 struct thread *td = curthread; 2985 struct nlookupdata nd; 2986 fhandle_t fh; 2987 struct vnode *vp; 2988 int error; 2989 2990 /* 2991 * Must be super user 2992 */ 2993 if ((error = suser(td)) != 0) 2994 return (error); 2995 2996 vp = NULL; 2997 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 2998 if (error == 0) 2999 error = nlookup(&nd); 3000 if (error == 0) 3001 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3002 nlookup_done(&nd); 3003 if (error == 0) { 3004 bzero(&fh, sizeof(fh)); 3005 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 3006 error = VFS_VPTOFH(vp, &fh.fh_fid); 3007 vput(vp); 3008 if (error == 0) 3009 error = copyout(&fh, uap->fhp, sizeof(fh)); 3010 } 3011 return (error); 3012 } 3013 3014 /* 3015 * fhopen_args(const struct fhandle *u_fhp, int flags) 3016 * 3017 * syscall for the rpc.lockd to use to translate a NFS file handle into 3018 * an open descriptor. 3019 * 3020 * warning: do not remove the suser() call or this becomes one giant 3021 * security hole. 3022 */ 3023 int 3024 fhopen(struct fhopen_args *uap) 3025 { 3026 struct thread *td = curthread; 3027 struct proc *p = td->td_proc; 3028 struct mount *mp; 3029 struct vnode *vp; 3030 struct fhandle fhp; 3031 struct vattr vat; 3032 struct vattr *vap = &vat; 3033 struct flock lf; 3034 struct filedesc *fdp = p->p_fd; 3035 int fmode, mode, error, type; 3036 struct file *nfp; 3037 struct file *fp; 3038 int indx; 3039 3040 /* 3041 * Must be super user 3042 */ 3043 error = suser(td); 3044 if (error) 3045 return (error); 3046 3047 fmode = FFLAGS(uap->flags); 3048 /* why not allow a non-read/write open for our lockd? */ 3049 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 3050 return (EINVAL); 3051 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 3052 if (error) 3053 return(error); 3054 /* find the mount point */ 3055 mp = vfs_getvfs(&fhp.fh_fsid); 3056 if (mp == NULL) 3057 return (ESTALE); 3058 /* now give me my vnode, it gets returned to me locked */ 3059 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); 3060 if (error) 3061 return (error); 3062 /* 3063 * from now on we have to make sure not 3064 * to forget about the vnode 3065 * any error that causes an abort must vput(vp) 3066 * just set error = err and 'goto bad;'. 3067 */ 3068 3069 /* 3070 * from vn_open 3071 */ 3072 if (vp->v_type == VLNK) { 3073 error = EMLINK; 3074 goto bad; 3075 } 3076 if (vp->v_type == VSOCK) { 3077 error = EOPNOTSUPP; 3078 goto bad; 3079 } 3080 mode = 0; 3081 if (fmode & (FWRITE | O_TRUNC)) { 3082 if (vp->v_type == VDIR) { 3083 error = EISDIR; 3084 goto bad; 3085 } 3086 error = vn_writechk(vp); 3087 if (error) 3088 goto bad; 3089 mode |= VWRITE; 3090 } 3091 if (fmode & FREAD) 3092 mode |= VREAD; 3093 if (mode) { 3094 error = VOP_ACCESS(vp, mode, p->p_ucred, td); 3095 if (error) 3096 goto bad; 3097 } 3098 if (fmode & O_TRUNC) { 3099 VOP_UNLOCK(vp, 0, td); /* XXX */ 3100 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 3101 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ 3102 VATTR_NULL(vap); 3103 vap->va_size = 0; 3104 error = VOP_SETATTR(vp, vap, p->p_ucred, td); 3105 if (error) 3106 goto bad; 3107 } 3108 3109 /* 3110 * VOP_OPEN needs the file pointer so it can potentially override 3111 * it. 3112 * 3113 * WARNING! no f_ncp will be associated when fhopen()ing a directory. 3114 * XXX 3115 */ 3116 if ((error = falloc(p, &nfp, NULL)) != 0) 3117 goto bad; 3118 fp = nfp; 3119 3120 fp->f_data = (caddr_t)vp; 3121 fp->f_flag = fmode & FMASK; 3122 fp->f_ops = &vnode_fileops; 3123 fp->f_type = DTYPE_VNODE; 3124 3125 error = VOP_OPEN(vp, fmode, p->p_ucred, fp, td); 3126 if (error) { 3127 /* 3128 * setting f_ops this way prevents VOP_CLOSE from being 3129 * called or fdrop() releasing the vp from v_data. Since 3130 * the VOP_OPEN failed we don't want to VOP_CLOSE. 3131 */ 3132 fp->f_ops = &badfileops; 3133 fp->f_data = NULL; 3134 fdrop(fp, td); 3135 goto bad; 3136 } 3137 if (fmode & FWRITE) 3138 vp->v_writecount++; 3139 3140 /* 3141 * The fp now owns a reference on the vnode. We still have our own 3142 * ref+lock. 3143 */ 3144 vref(vp); 3145 3146 /* 3147 * Make sure that a VM object is created for VMIO support. If this 3148 * fails just fdrop() normally to clean up. 3149 */ 3150 if (vn_canvmio(vp) == TRUE) { 3151 if ((error = vfs_object_create(vp, td)) != 0) { 3152 fdrop(fp, td); 3153 goto bad; 3154 } 3155 } 3156 3157 /* 3158 * The open was successful, associate it with a file descriptor. 3159 */ 3160 if ((error = fsetfd(p, fp, &indx)) != 0) { 3161 if (fmode & FWRITE) 3162 vp->v_writecount--; 3163 fdrop(fp, td); 3164 goto bad; 3165 } 3166 3167 if (fmode & (O_EXLOCK | O_SHLOCK)) { 3168 lf.l_whence = SEEK_SET; 3169 lf.l_start = 0; 3170 lf.l_len = 0; 3171 if (fmode & O_EXLOCK) 3172 lf.l_type = F_WRLCK; 3173 else 3174 lf.l_type = F_RDLCK; 3175 type = F_FLOCK; 3176 if ((fmode & FNONBLOCK) == 0) 3177 type |= F_WAIT; 3178 VOP_UNLOCK(vp, 0, td); 3179 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 3180 /* 3181 * lock request failed. Normally close the descriptor 3182 * but handle the case where someone might have dup()d 3183 * or close()d it when we weren't looking. 3184 */ 3185 if (fdp->fd_files[indx].fp == fp) { 3186 funsetfd(fdp, indx); 3187 fdrop(fp, td); 3188 } 3189 3190 /* 3191 * release our private reference. 3192 */ 3193 fdrop(fp, td); 3194 vrele(vp); 3195 return (error); 3196 } 3197 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); 3198 fp->f_flag |= FHASLOCK; 3199 } 3200 if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0)) 3201 vfs_object_create(vp, td); 3202 3203 vput(vp); 3204 fdrop(fp, td); 3205 uap->sysmsg_result = indx; 3206 return (0); 3207 3208 bad: 3209 vput(vp); 3210 return (error); 3211 } 3212 3213 /* 3214 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 3215 */ 3216 int 3217 fhstat(struct fhstat_args *uap) 3218 { 3219 struct thread *td = curthread; 3220 struct stat sb; 3221 fhandle_t fh; 3222 struct mount *mp; 3223 struct vnode *vp; 3224 int error; 3225 3226 /* 3227 * Must be super user 3228 */ 3229 error = suser(td); 3230 if (error) 3231 return (error); 3232 3233 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 3234 if (error) 3235 return (error); 3236 3237 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3238 return (ESTALE); 3239 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3240 return (error); 3241 error = vn_stat(vp, &sb, td); 3242 vput(vp); 3243 if (error) 3244 return (error); 3245 error = copyout(&sb, uap->sb, sizeof(sb)); 3246 return (error); 3247 } 3248 3249 /* 3250 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 3251 */ 3252 int 3253 fhstatfs(struct fhstatfs_args *uap) 3254 { 3255 struct thread *td = curthread; 3256 struct proc *p = td->td_proc; 3257 struct statfs *sp; 3258 struct mount *mp; 3259 struct vnode *vp; 3260 struct statfs sb; 3261 char *fullpath, *freepath; 3262 fhandle_t fh; 3263 int error; 3264 3265 /* 3266 * Must be super user 3267 */ 3268 if ((error = suser(td))) 3269 return (error); 3270 3271 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 3272 return (error); 3273 3274 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3275 return (ESTALE); 3276 3277 if (p != NULL && (p->p_fd->fd_nrdir->nc_flag & NCF_ROOT) == 0 && 3278 !chroot_visible_mnt(mp, p)) 3279 return (ESTALE); 3280 3281 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3282 return (error); 3283 mp = vp->v_mount; 3284 sp = &mp->mnt_stat; 3285 vput(vp); 3286 if ((error = VFS_STATFS(mp, sp, td)) != 0) 3287 return (error); 3288 3289 error = cache_fullpath(p, mp->mnt_ncp, &fullpath, &freepath); 3290 if (error) 3291 return(error); 3292 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 3293 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 3294 free(freepath, M_TEMP); 3295 3296 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3297 if (suser(td)) { 3298 bcopy(sp, &sb, sizeof(sb)); 3299 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3300 sp = &sb; 3301 } 3302 return (copyout(sp, uap->buf, sizeof(*sp))); 3303 } 3304 3305 /* 3306 * Syscall to push extended attribute configuration information into the 3307 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3308 * a command (int cmd), and attribute name and misc data. For now, the 3309 * attribute name is left in userspace for consumption by the VFS_op. 3310 * It will probably be changed to be copied into sysspace by the 3311 * syscall in the future, once issues with various consumers of the 3312 * attribute code have raised their hands. 3313 * 3314 * Currently this is used only by UFS Extended Attributes. 3315 */ 3316 int 3317 extattrctl(struct extattrctl_args *uap) 3318 { 3319 struct nlookupdata nd; 3320 struct mount *mp; 3321 struct vnode *vp; 3322 int error; 3323 3324 vp = NULL; 3325 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3326 if (error == 0) 3327 error = nlookup(&nd); 3328 if (error == 0) { 3329 mp = nd.nl_ncp->nc_mount; 3330 error = VFS_EXTATTRCTL(mp, uap->cmd, 3331 uap->attrname, uap->arg, 3332 nd.nl_td); 3333 } 3334 nlookup_done(&nd); 3335 return (error); 3336 } 3337 3338 /* 3339 * Syscall to set a named extended attribute on a file or directory. 3340 * Accepts attribute name, and a uio structure pointing to the data to set. 3341 * The uio is consumed in the style of writev(). The real work happens 3342 * in VOP_SETEXTATTR(). 3343 */ 3344 int 3345 extattr_set_file(struct extattr_set_file_args *uap) 3346 { 3347 char attrname[EXTATTR_MAXNAMELEN]; 3348 struct iovec aiov[UIO_SMALLIOV]; 3349 struct iovec *needfree; 3350 struct nlookupdata nd; 3351 struct iovec *iov; 3352 struct vnode *vp; 3353 struct uio auio; 3354 u_int iovlen; 3355 u_int cnt; 3356 int error; 3357 int i; 3358 3359 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3360 if (error) 3361 return (error); 3362 3363 vp = NULL; 3364 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3365 if (error == 0) 3366 error = nlookup(&nd); 3367 if (error == 0) 3368 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3369 if (error) { 3370 nlookup_done(&nd); 3371 return (error); 3372 } 3373 3374 needfree = NULL; 3375 iovlen = uap->iovcnt * sizeof(struct iovec); 3376 if (uap->iovcnt > UIO_SMALLIOV) { 3377 if (uap->iovcnt > UIO_MAXIOV) { 3378 error = EINVAL; 3379 goto done; 3380 } 3381 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3382 needfree = iov; 3383 } else { 3384 iov = aiov; 3385 } 3386 auio.uio_iov = iov; 3387 auio.uio_iovcnt = uap->iovcnt; 3388 auio.uio_rw = UIO_WRITE; 3389 auio.uio_segflg = UIO_USERSPACE; 3390 auio.uio_td = nd.nl_td; 3391 auio.uio_offset = 0; 3392 if ((error = copyin(uap->iovp, iov, iovlen))) 3393 goto done; 3394 auio.uio_resid = 0; 3395 for (i = 0; i < uap->iovcnt; i++) { 3396 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3397 error = EINVAL; 3398 goto done; 3399 } 3400 auio.uio_resid += iov->iov_len; 3401 iov++; 3402 } 3403 cnt = auio.uio_resid; 3404 error = VOP_SETEXTATTR(vp, attrname, &auio, nd.nl_cred, nd.nl_td); 3405 cnt -= auio.uio_resid; 3406 uap->sysmsg_result = cnt; 3407 done: 3408 vput(vp); 3409 nlookup_done(&nd); 3410 if (needfree) 3411 FREE(needfree, M_IOV); 3412 return (error); 3413 } 3414 3415 /* 3416 * Syscall to get a named extended attribute on a file or directory. 3417 * Accepts attribute name, and a uio structure pointing to a buffer for the 3418 * data. The uio is consumed in the style of readv(). The real work 3419 * happens in VOP_GETEXTATTR(); 3420 */ 3421 int 3422 extattr_get_file(struct extattr_get_file_args *uap) 3423 { 3424 char attrname[EXTATTR_MAXNAMELEN]; 3425 struct iovec aiov[UIO_SMALLIOV]; 3426 struct iovec *needfree; 3427 struct nlookupdata nd; 3428 struct iovec *iov; 3429 struct vnode *vp; 3430 struct uio auio; 3431 u_int iovlen; 3432 u_int cnt; 3433 int error; 3434 int i; 3435 3436 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3437 if (error) 3438 return (error); 3439 3440 vp = NULL; 3441 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3442 if (error == 0) 3443 error = nlookup(&nd); 3444 if (error == 0) 3445 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3446 if (error) { 3447 nlookup_done(&nd); 3448 return (error); 3449 } 3450 3451 iovlen = uap->iovcnt * sizeof (struct iovec); 3452 needfree = NULL; 3453 if (uap->iovcnt > UIO_SMALLIOV) { 3454 if (uap->iovcnt > UIO_MAXIOV) { 3455 error = EINVAL; 3456 goto done; 3457 } 3458 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3459 needfree = iov; 3460 } else { 3461 iov = aiov; 3462 } 3463 auio.uio_iov = iov; 3464 auio.uio_iovcnt = uap->iovcnt; 3465 auio.uio_rw = UIO_READ; 3466 auio.uio_segflg = UIO_USERSPACE; 3467 auio.uio_td = nd.nl_td; 3468 auio.uio_offset = 0; 3469 if ((error = copyin(uap->iovp, iov, iovlen))) 3470 goto done; 3471 auio.uio_resid = 0; 3472 for (i = 0; i < uap->iovcnt; i++) { 3473 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3474 error = EINVAL; 3475 goto done; 3476 } 3477 auio.uio_resid += iov->iov_len; 3478 iov++; 3479 } 3480 cnt = auio.uio_resid; 3481 error = VOP_GETEXTATTR(vp, attrname, &auio, nd.nl_cred, nd.nl_td); 3482 cnt -= auio.uio_resid; 3483 uap->sysmsg_result = cnt; 3484 done: 3485 vput(vp); 3486 nlookup_done(&nd); 3487 if (needfree) 3488 FREE(needfree, M_IOV); 3489 return(error); 3490 } 3491 3492 /* 3493 * Syscall to delete a named extended attribute from a file or directory. 3494 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 3495 */ 3496 int 3497 extattr_delete_file(struct extattr_delete_file_args *uap) 3498 { 3499 char attrname[EXTATTR_MAXNAMELEN]; 3500 struct nlookupdata nd; 3501 struct vnode *vp; 3502 int error; 3503 3504 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 3505 if (error) 3506 return(error); 3507 3508 vp = NULL; 3509 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3510 if (error == 0) 3511 error = nlookup(&nd); 3512 if (error == 0) 3513 error = cache_vget(nd.nl_ncp, nd.nl_cred, LK_EXCLUSIVE, &vp); 3514 if (error) { 3515 nlookup_done(&nd); 3516 return (error); 3517 } 3518 3519 error = VOP_SETEXTATTR(vp, attrname, NULL, nd.nl_cred, nd.nl_td); 3520 vput(vp); 3521 nlookup_done(&nd); 3522 return(error); 3523 } 3524 3525 /* 3526 * print out statistics from the current status of the buffer pool 3527 * this can be toggeled by the system control option debug.syncprt 3528 */ 3529 #ifdef DEBUG 3530 void 3531 vfs_bufstats(void) 3532 { 3533 int i, j, count; 3534 struct buf *bp; 3535 struct bqueues *dp; 3536 int counts[(MAXBSIZE / PAGE_SIZE) + 1]; 3537 static char *bname[3] = { "LOCKED", "LRU", "AGE" }; 3538 3539 for (dp = bufqueues, i = 0; dp < &bufqueues[3]; dp++, i++) { 3540 count = 0; 3541 for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++) 3542 counts[j] = 0; 3543 crit_enter(); 3544 TAILQ_FOREACH(bp, dp, b_freelist) { 3545 counts[bp->b_bufsize/PAGE_SIZE]++; 3546 count++; 3547 } 3548 crit_exit(); 3549 printf("%s: total-%d", bname[i], count); 3550 for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++) 3551 if (counts[j] != 0) 3552 printf(", %d-%d", j * PAGE_SIZE, counts[j]); 3553 printf("\n"); 3554 } 3555 } 3556 #endif 3557 3558 static int 3559 chroot_visible_mnt(struct mount *mp, struct proc *p) 3560 { 3561 struct namecache *ncp; 3562 /* 3563 * First check if this file system is below 3564 * the chroot path. 3565 */ 3566 ncp = mp->mnt_ncp; 3567 while (ncp != NULL && ncp != p->p_fd->fd_nrdir) 3568 ncp = ncp->nc_parent; 3569 if (ncp == NULL) { 3570 /* 3571 * This is not below the chroot path. 3572 * 3573 * Check if the chroot path is on the same filesystem, 3574 * by determing if we have to cross a mount point 3575 * before reaching mp->mnt_ncp. 3576 */ 3577 ncp = p->p_fd->fd_nrdir; 3578 while (ncp != NULL && ncp != mp->mnt_ncp) { 3579 if (ncp->nc_flag & NCF_MOUNTPT) { 3580 ncp = NULL; 3581 break; 3582 } 3583 ncp = ncp->nc_parent; 3584 } 3585 } 3586 return(ncp != NULL); 3587 } 3588