1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 39 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 40 * $DragonFly: src/sys/kern/vfs_syscalls.c,v 1.37 2004/05/26 19:09:04 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/buf.h> 46 #include <sys/conf.h> 47 #include <sys/sysent.h> 48 #include <sys/malloc.h> 49 #include <sys/mount.h> 50 #include <sys/sysproto.h> 51 #include <sys/filedesc.h> 52 #include <sys/kernel.h> 53 #include <sys/fcntl.h> 54 #include <sys/file.h> 55 #include <sys/linker.h> 56 #include <sys/stat.h> 57 #include <sys/unistd.h> 58 #include <sys/vnode.h> 59 #include <sys/proc.h> 60 #include <sys/namei.h> 61 #include <sys/dirent.h> 62 #include <sys/extattr.h> 63 #include <sys/kern_syscall.h> 64 65 #include <machine/limits.h> 66 #include <vfs/union/union.h> 67 #include <sys/sysctl.h> 68 #include <vm/vm.h> 69 #include <vm/vm_object.h> 70 #include <vm/vm_zone.h> 71 #include <vm/vm_page.h> 72 73 #include <sys/file2.h> 74 75 static int checkvp_chdir (struct vnode *vn, struct thread *td); 76 static void checkdirs (struct vnode *olddp); 77 static int chroot_refuse_vdir_fds (struct filedesc *fdp); 78 static int getutimes (const struct timeval *, struct timespec *); 79 static int setfown (struct vnode *, uid_t, gid_t); 80 static int setfmode (struct vnode *, int); 81 static int setfflags (struct vnode *, int); 82 static int setutimes (struct vnode *, const struct timespec *, int); 83 static int usermount = 0; /* if 1, non-root can mount fs. */ 84 85 int (*union_dircheckp) (struct thread *, struct vnode **, struct file *); 86 87 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); 88 89 /* 90 * Virtual File System System Calls 91 */ 92 93 /* 94 * Mount a file system. 95 */ 96 /* 97 * mount_args(char *type, char *path, int flags, caddr_t data) 98 */ 99 /* ARGSUSED */ 100 int 101 mount(struct mount_args *uap) 102 { 103 struct thread *td = curthread; 104 struct proc *p = td->td_proc; 105 struct vnode *vp; 106 struct mount *mp; 107 struct vfsconf *vfsp; 108 int error, flag = 0, flag2 = 0; 109 struct vattr va; 110 struct nameidata nd; 111 char fstypename[MFSNAMELEN]; 112 lwkt_tokref vlock; 113 lwkt_tokref ilock; 114 115 KKASSERT(p); 116 if (p->p_ucred->cr_prison != NULL) 117 return (EPERM); 118 if (usermount == 0 && (error = suser(td))) 119 return (error); 120 /* 121 * Do not allow NFS export by non-root users. 122 */ 123 if (SCARG(uap, flags) & MNT_EXPORTED) { 124 error = suser(td); 125 if (error) 126 return (error); 127 } 128 /* 129 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 130 */ 131 if (suser(td)) 132 SCARG(uap, flags) |= MNT_NOSUID | MNT_NODEV; 133 /* 134 * Get vnode to be covered 135 */ 136 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, 137 SCARG(uap, path), td); 138 if ((error = namei(&nd)) != 0) 139 return (error); 140 NDFREE(&nd, NDF_ONLY_PNBUF); 141 vp = nd.ni_vp; 142 if (SCARG(uap, flags) & MNT_UPDATE) { 143 if ((vp->v_flag & VROOT) == 0) { 144 vput(vp); 145 return (EINVAL); 146 } 147 mp = vp->v_mount; 148 flag = mp->mnt_flag; 149 flag2 = mp->mnt_kern_flag; 150 /* 151 * We only allow the filesystem to be reloaded if it 152 * is currently mounted read-only. 153 */ 154 if ((SCARG(uap, flags) & MNT_RELOAD) && 155 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 156 vput(vp); 157 return (EOPNOTSUPP); /* Needs translation */ 158 } 159 /* 160 * Only root, or the user that did the original mount is 161 * permitted to update it. 162 */ 163 if (mp->mnt_stat.f_owner != p->p_ucred->cr_uid && 164 (error = suser(td))) { 165 vput(vp); 166 return (error); 167 } 168 if (vfs_busy(mp, LK_NOWAIT, NULL, td)) { 169 vput(vp); 170 return (EBUSY); 171 } 172 lwkt_gettoken(&vlock, vp->v_interlock); 173 if ((vp->v_flag & VMOUNT) != 0 || 174 vp->v_mountedhere != NULL) { 175 lwkt_reltoken(&vlock); 176 vfs_unbusy(mp, td); 177 vput(vp); 178 return (EBUSY); 179 } 180 vp->v_flag |= VMOUNT; 181 lwkt_reltoken(&vlock); 182 mp->mnt_flag |= 183 SCARG(uap, flags) & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 184 VOP_UNLOCK(vp, NULL, 0, td); 185 goto update; 186 } 187 /* 188 * If the user is not root, ensure that they own the directory 189 * onto which we are attempting to mount. 190 */ 191 if ((error = VOP_GETATTR(vp, &va, td)) || 192 (va.va_uid != p->p_ucred->cr_uid && 193 (error = suser(td)))) { 194 vput(vp); 195 return (error); 196 } 197 if ((error = vinvalbuf(vp, V_SAVE, td, 0, 0)) != 0) { 198 vput(vp); 199 return (error); 200 } 201 if (vp->v_type != VDIR) { 202 vput(vp); 203 return (ENOTDIR); 204 } 205 if ((error = copyinstr(SCARG(uap, type), fstypename, MFSNAMELEN, NULL)) != 0) { 206 vput(vp); 207 return (error); 208 } 209 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 210 if (!strcmp(vfsp->vfc_name, fstypename)) 211 break; 212 if (vfsp == NULL) { 213 linker_file_t lf; 214 215 /* Only load modules for root (very important!) */ 216 if ((error = suser(td)) != 0) { 217 vput(vp); 218 return error; 219 } 220 error = linker_load_file(fstypename, &lf); 221 if (error || lf == NULL) { 222 vput(vp); 223 if (lf == NULL) 224 error = ENODEV; 225 return error; 226 } 227 lf->userrefs++; 228 /* lookup again, see if the VFS was loaded */ 229 for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) 230 if (!strcmp(vfsp->vfc_name, fstypename)) 231 break; 232 if (vfsp == NULL) { 233 lf->userrefs--; 234 linker_file_unload(lf); 235 vput(vp); 236 return (ENODEV); 237 } 238 } 239 lwkt_gettoken(&vlock, vp->v_interlock); 240 if ((vp->v_flag & VMOUNT) != 0 || 241 vp->v_mountedhere != NULL) { 242 lwkt_reltoken(&vlock); 243 vput(vp); 244 return (EBUSY); 245 } 246 vp->v_flag |= VMOUNT; 247 lwkt_reltoken(&vlock); 248 249 /* 250 * Allocate and initialize the filesystem. 251 */ 252 mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK); 253 bzero((char *)mp, (u_long)sizeof(struct mount)); 254 TAILQ_INIT(&mp->mnt_nvnodelist); 255 TAILQ_INIT(&mp->mnt_reservedvnlist); 256 mp->mnt_nvnodelistsize = 0; 257 lockinit(&mp->mnt_lock, 0, "vfslock", 0, LK_NOPAUSE); 258 vfs_busy(mp, LK_NOWAIT, NULL, td); 259 mp->mnt_op = vfsp->vfc_vfsops; 260 mp->mnt_vfc = vfsp; 261 vfsp->vfc_refcount++; 262 mp->mnt_stat.f_type = vfsp->vfc_typenum; 263 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 264 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 265 mp->mnt_vnodecovered = vp; 266 mp->mnt_stat.f_owner = p->p_ucred->cr_uid; 267 mp->mnt_iosize_max = DFLTPHYS; 268 VOP_UNLOCK(vp, NULL, 0, td); 269 update: 270 /* 271 * Set the mount level flags. 272 */ 273 if (SCARG(uap, flags) & MNT_RDONLY) 274 mp->mnt_flag |= MNT_RDONLY; 275 else if (mp->mnt_flag & MNT_RDONLY) 276 mp->mnt_kern_flag |= MNTK_WANTRDWR; 277 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 278 MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | 279 MNT_NOSYMFOLLOW | MNT_IGNORE | 280 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 281 mp->mnt_flag |= SCARG(uap, flags) & (MNT_NOSUID | MNT_NOEXEC | 282 MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | 283 MNT_NOSYMFOLLOW | MNT_IGNORE | 284 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); 285 /* 286 * Mount the filesystem. 287 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 288 * get. No freeing of cn_pnbuf. 289 */ 290 error = VFS_MOUNT(mp, SCARG(uap, path), SCARG(uap, data), &nd, td); 291 if (mp->mnt_flag & MNT_UPDATE) { 292 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 293 mp->mnt_flag &= ~MNT_RDONLY; 294 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 295 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 296 if (error) { 297 mp->mnt_flag = flag; 298 mp->mnt_kern_flag = flag2; 299 } 300 vfs_unbusy(mp, td); 301 lwkt_gettoken(&vlock, vp->v_interlock); 302 vp->v_flag &= ~VMOUNT; 303 lwkt_reltoken(&vlock); 304 vrele(vp); 305 return (error); 306 } 307 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 308 /* 309 * Put the new filesystem on the mount list after root. 310 */ 311 cache_purge(vp); 312 if (!error) { 313 lwkt_gettoken(&vlock, vp->v_interlock); 314 vp->v_flag &= ~VMOUNT; 315 vp->v_mountedhere = mp; 316 lwkt_reltoken(&vlock); 317 lwkt_gettoken(&ilock, &mountlist_token); 318 TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); 319 lwkt_reltoken(&ilock); 320 checkdirs(vp); 321 VOP_UNLOCK(vp, NULL, 0, td); 322 error = vfs_allocate_syncvnode(mp); 323 vfs_unbusy(mp, td); 324 if ((error = VFS_START(mp, 0, td)) != 0) 325 vrele(vp); 326 } else { 327 lwkt_gettoken(&vlock, vp->v_interlock); 328 vp->v_flag &= ~VMOUNT; 329 lwkt_reltoken(&vlock); 330 mp->mnt_vfc->vfc_refcount--; 331 vfs_unbusy(mp, td); 332 free(mp, M_MOUNT); 333 vput(vp); 334 } 335 return (error); 336 } 337 338 /* 339 * Scan all active processes to see if any of them have a current 340 * or root directory onto which the new filesystem has just been 341 * mounted. If so, replace them with the new mount point. 342 */ 343 static void 344 checkdirs(struct vnode *olddp) 345 { 346 struct filedesc *fdp; 347 struct vnode *newdp; 348 struct proc *p; 349 350 if (olddp->v_usecount == 1) 351 return; 352 if (VFS_ROOT(olddp->v_mountedhere, &newdp)) 353 panic("mount: lost mount"); 354 FOREACH_PROC_IN_SYSTEM(p) { 355 fdp = p->p_fd; 356 if (fdp->fd_cdir == olddp) { 357 vrele(fdp->fd_cdir); 358 vref(newdp); 359 fdp->fd_cdir = newdp; 360 } 361 if (fdp->fd_rdir == olddp) { 362 vrele(fdp->fd_rdir); 363 vref(newdp); 364 fdp->fd_rdir = newdp; 365 } 366 } 367 if (rootvnode == olddp) { 368 vrele(rootvnode); 369 vref(newdp); 370 rootvnode = newdp; 371 vfs_cache_setroot(rootvnode); 372 } 373 vput(newdp); 374 } 375 376 /* 377 * Unmount a file system. 378 * 379 * Note: unmount takes a path to the vnode mounted on as argument, 380 * not special file (as before). 381 */ 382 /* 383 * umount_args(char *path, int flags) 384 */ 385 /* ARGSUSED */ 386 int 387 unmount(struct unmount_args *uap) 388 { 389 struct thread *td = curthread; 390 struct proc *p = td->td_proc; 391 struct vnode *vp; 392 struct mount *mp; 393 int error; 394 struct nameidata nd; 395 396 KKASSERT(p); 397 if (p->p_ucred->cr_prison != NULL) 398 return (EPERM); 399 if (usermount == 0 && (error = suser(td))) 400 return (error); 401 402 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, 403 SCARG(uap, path), td); 404 if ((error = namei(&nd)) != 0) 405 return (error); 406 vp = nd.ni_vp; 407 NDFREE(&nd, NDF_ONLY_PNBUF); 408 mp = vp->v_mount; 409 410 /* 411 * Only root, or the user that did the original mount is 412 * permitted to unmount this filesystem. 413 */ 414 if ((mp->mnt_stat.f_owner != p->p_ucred->cr_uid) && 415 (error = suser(td))) { 416 vput(vp); 417 return (error); 418 } 419 420 /* 421 * Don't allow unmounting the root file system. 422 */ 423 if (mp->mnt_flag & MNT_ROOTFS) { 424 vput(vp); 425 return (EINVAL); 426 } 427 428 /* 429 * Must be the root of the filesystem 430 */ 431 if ((vp->v_flag & VROOT) == 0) { 432 vput(vp); 433 return (EINVAL); 434 } 435 vput(vp); 436 return (dounmount(mp, SCARG(uap, flags), td)); 437 } 438 439 /* 440 * Do the actual file system unmount. 441 */ 442 int 443 dounmount(struct mount *mp, int flags, struct thread *td) 444 { 445 struct vnode *coveredvp; 446 int error; 447 int async_flag; 448 lwkt_tokref ilock; 449 450 lwkt_gettoken(&ilock, &mountlist_token); 451 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 452 lwkt_reltoken(&ilock); 453 return (EBUSY); 454 } 455 mp->mnt_kern_flag |= MNTK_UNMOUNT; 456 /* Allow filesystems to detect that a forced unmount is in progress. */ 457 if (flags & MNT_FORCE) 458 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 459 error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | 460 ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &ilock, td); 461 if (error) { 462 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 463 if (mp->mnt_kern_flag & MNTK_MWAIT) 464 wakeup(mp); 465 return (error); 466 } 467 468 if (mp->mnt_flag & MNT_EXPUBLIC) 469 vfs_setpublicfs(NULL, NULL, NULL); 470 471 vfs_msync(mp, MNT_WAIT); 472 async_flag = mp->mnt_flag & MNT_ASYNC; 473 mp->mnt_flag &=~ MNT_ASYNC; 474 cache_purgevfs(mp); /* remove cache entries for this file sys */ 475 if (mp->mnt_syncer != NULL) 476 vrele(mp->mnt_syncer); 477 if (((mp->mnt_flag & MNT_RDONLY) || 478 (error = VFS_SYNC(mp, MNT_WAIT, td)) == 0) || 479 (flags & MNT_FORCE)) 480 error = VFS_UNMOUNT(mp, flags, td); 481 lwkt_gettokref(&ilock); 482 if (error) { 483 if (mp->mnt_syncer == NULL) 484 vfs_allocate_syncvnode(mp); 485 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 486 mp->mnt_flag |= async_flag; 487 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK | LK_REENABLE, 488 &ilock, td); 489 if (mp->mnt_kern_flag & MNTK_MWAIT) 490 wakeup(mp); 491 return (error); 492 } 493 TAILQ_REMOVE(&mountlist, mp, mnt_list); 494 if ((coveredvp = mp->mnt_vnodecovered) != NULLVP) { 495 coveredvp->v_mountedhere = NULL; 496 vrele(coveredvp); 497 } 498 mp->mnt_vfc->vfc_refcount--; 499 if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) 500 panic("unmount: dangling vnode"); 501 lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &ilock, td); 502 if (mp->mnt_kern_flag & MNTK_MWAIT) 503 wakeup(mp); 504 free(mp, M_MOUNT); 505 return (0); 506 } 507 508 /* 509 * Sync each mounted filesystem. 510 */ 511 512 #ifdef DEBUG 513 static int syncprt = 0; 514 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 515 #endif /* DEBUG */ 516 517 /* ARGSUSED */ 518 int 519 sync(struct sync_args *uap) 520 { 521 struct thread *td = curthread; 522 struct mount *mp, *nmp; 523 lwkt_tokref ilock; 524 int asyncflag; 525 526 lwkt_gettoken(&ilock, &mountlist_token); 527 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 528 if (vfs_busy(mp, LK_NOWAIT, &ilock, td)) { 529 nmp = TAILQ_NEXT(mp, mnt_list); 530 continue; 531 } 532 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 533 asyncflag = mp->mnt_flag & MNT_ASYNC; 534 mp->mnt_flag &= ~MNT_ASYNC; 535 vfs_msync(mp, MNT_NOWAIT); 536 VFS_SYNC(mp, MNT_NOWAIT, td); 537 mp->mnt_flag |= asyncflag; 538 } 539 lwkt_gettokref(&ilock); 540 nmp = TAILQ_NEXT(mp, mnt_list); 541 vfs_unbusy(mp, td); 542 } 543 lwkt_reltoken(&ilock); 544 /* 545 * print out buffer pool stat information on each sync() call. 546 */ 547 #ifdef DEBUG 548 if (syncprt) 549 vfs_bufstats(); 550 #endif /* DEBUG */ 551 return (0); 552 } 553 554 /* XXX PRISON: could be per prison flag */ 555 static int prison_quotas; 556 #if 0 557 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 558 #endif 559 560 /* 561 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 562 * 563 * Change filesystem quotas. 564 */ 565 /* ARGSUSED */ 566 int 567 quotactl(struct quotactl_args *uap) 568 { 569 struct thread *td = curthread; 570 struct proc *p = td->td_proc; 571 struct mount *mp; 572 int error; 573 struct nameidata nd; 574 575 KKASSERT(p); 576 if (p->p_ucred->cr_prison && !prison_quotas) 577 return (EPERM); 578 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, 579 SCARG(uap, path), td); 580 if ((error = namei(&nd)) != 0) 581 return (error); 582 mp = nd.ni_vp->v_mount; 583 NDFREE(&nd, NDF_ONLY_PNBUF); 584 vrele(nd.ni_vp); 585 return (VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), 586 SCARG(uap, arg), td)); 587 } 588 589 int 590 kern_statfs(struct nameidata *nd, struct statfs *buf) 591 { 592 struct thread *td = curthread; 593 struct mount *mp; 594 struct statfs *sp; 595 int error; 596 597 error = namei(nd); 598 if (error) 599 return (error); 600 mp = nd->ni_vp->v_mount; 601 sp = &mp->mnt_stat; 602 NDFREE(nd, NDF_ONLY_PNBUF); 603 vrele(nd->ni_vp); 604 error = VFS_STATFS(mp, sp, td); 605 if (error) 606 return (error); 607 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 608 bcopy(sp, buf, sizeof(*buf)); 609 /* Only root should have access to the fsid's. */ 610 if (suser(td)) 611 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 612 return (0); 613 } 614 615 /* 616 * statfs_args(char *path, struct statfs *buf) 617 * 618 * Get filesystem statistics. 619 */ 620 int 621 statfs(struct statfs_args *uap) 622 { 623 struct thread *td = curthread; 624 struct nameidata nd; 625 struct statfs buf; 626 int error; 627 628 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td); 629 630 error = kern_statfs(&nd, &buf); 631 632 if (error == 0) 633 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 634 return (error); 635 } 636 637 int 638 kern_fstatfs(int fd, struct statfs *buf) 639 { 640 struct thread *td = curthread; 641 struct proc *p = td->td_proc; 642 struct file *fp; 643 struct mount *mp; 644 struct statfs *sp; 645 int error; 646 647 KKASSERT(p); 648 error = getvnode(p->p_fd, fd, &fp); 649 if (error) 650 return (error); 651 mp = ((struct vnode *)fp->f_data)->v_mount; 652 if (mp == NULL) 653 return (EBADF); 654 sp = &mp->mnt_stat; 655 error = VFS_STATFS(mp, sp, td); 656 if (error) 657 return (error); 658 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 659 bcopy(sp, buf, sizeof(*buf)); 660 /* Only root should have access to the fsid's. */ 661 if (suser(td)) 662 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 663 return (0); 664 } 665 666 /* 667 * fstatfs_args(int fd, struct statfs *buf) 668 * 669 * Get filesystem statistics. 670 */ 671 int 672 fstatfs(struct fstatfs_args *uap) 673 { 674 struct statfs buf; 675 int error; 676 677 error = kern_fstatfs(uap->fd, &buf); 678 679 if (error == 0) 680 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 681 return (error); 682 } 683 684 /* 685 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 686 * 687 * Get statistics on all filesystems. 688 */ 689 /* ARGSUSED */ 690 int 691 getfsstat(struct getfsstat_args *uap) 692 { 693 struct thread *td = curthread; 694 struct mount *mp, *nmp; 695 struct statfs *sp; 696 caddr_t sfsp; 697 lwkt_tokref ilock; 698 long count, maxcount, error; 699 700 maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); 701 sfsp = (caddr_t)SCARG(uap, buf); 702 count = 0; 703 lwkt_gettoken(&ilock, &mountlist_token); 704 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 705 if (vfs_busy(mp, LK_NOWAIT, &ilock, td)) { 706 nmp = TAILQ_NEXT(mp, mnt_list); 707 continue; 708 } 709 if (sfsp && count < maxcount) { 710 sp = &mp->mnt_stat; 711 /* 712 * If MNT_NOWAIT or MNT_LAZY is specified, do not 713 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 714 * overrides MNT_WAIT. 715 */ 716 if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || 717 (SCARG(uap, flags) & MNT_WAIT)) && 718 (error = VFS_STATFS(mp, sp, td))) { 719 lwkt_gettokref(&ilock); 720 nmp = TAILQ_NEXT(mp, mnt_list); 721 vfs_unbusy(mp, td); 722 continue; 723 } 724 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 725 error = copyout(sp, sfsp, sizeof(*sp)); 726 if (error) { 727 vfs_unbusy(mp, td); 728 return (error); 729 } 730 sfsp += sizeof(*sp); 731 } 732 count++; 733 lwkt_gettokref(&ilock); 734 nmp = TAILQ_NEXT(mp, mnt_list); 735 vfs_unbusy(mp, td); 736 } 737 lwkt_reltoken(&ilock); 738 if (sfsp && count > maxcount) 739 uap->sysmsg_result = maxcount; 740 else 741 uap->sysmsg_result = count; 742 return (0); 743 } 744 745 /* 746 * fchdir_args(int fd) 747 * 748 * Change current working directory to a given file descriptor. 749 */ 750 /* ARGSUSED */ 751 int 752 fchdir(struct fchdir_args *uap) 753 { 754 struct thread *td = curthread; 755 struct proc *p = td->td_proc; 756 struct filedesc *fdp = p->p_fd; 757 struct vnode *vp, *tdp; 758 struct mount *mp; 759 struct file *fp; 760 int error; 761 762 if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) 763 return (error); 764 vp = (struct vnode *)fp->f_data; 765 vref(vp); 766 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 767 if (vp->v_type != VDIR) 768 error = ENOTDIR; 769 else 770 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, td); 771 while (!error && (mp = vp->v_mountedhere) != NULL) { 772 if (vfs_busy(mp, 0, NULL, td)) 773 continue; 774 error = VFS_ROOT(mp, &tdp); 775 vfs_unbusy(mp, td); 776 if (error) 777 break; 778 vput(vp); 779 vp = tdp; 780 } 781 if (error) { 782 vput(vp); 783 return (error); 784 } 785 VOP_UNLOCK(vp, NULL, 0, td); 786 vrele(fdp->fd_cdir); 787 fdp->fd_cdir = vp; 788 return (0); 789 } 790 791 int 792 kern_chdir(struct nameidata *nd) 793 { 794 struct thread *td = curthread; 795 struct proc *p = td->td_proc; 796 struct filedesc *fdp = p->p_fd; 797 int error; 798 799 if ((error = namei(nd)) != 0) 800 return (error); 801 if ((error = checkvp_chdir(nd->ni_vp, td)) == 0) { 802 vrele(fdp->fd_cdir); 803 fdp->fd_cdir = nd->ni_vp; 804 vref(fdp->fd_cdir); 805 } 806 NDFREE(nd, ~(NDF_NO_FREE_PNBUF | NDF_NO_VP_PUT)); 807 return (error); 808 } 809 810 /* 811 * chdir_args(char *path) 812 * 813 * Change current working directory (``.''). 814 */ 815 int 816 chdir(struct chdir_args *uap) 817 { 818 struct thread *td = curthread; 819 struct nameidata nd; 820 int error; 821 822 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, 823 uap->path, td); 824 825 error = kern_chdir(&nd); 826 827 return (error); 828 } 829 830 /* 831 * Helper function for raised chroot(2) security function: Refuse if 832 * any filedescriptors are open directories. 833 */ 834 static int 835 chroot_refuse_vdir_fds(fdp) 836 struct filedesc *fdp; 837 { 838 struct vnode *vp; 839 struct file *fp; 840 int error; 841 int fd; 842 843 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 844 error = getvnode(fdp, fd, &fp); 845 if (error) 846 continue; 847 vp = (struct vnode *)fp->f_data; 848 if (vp->v_type != VDIR) 849 continue; 850 return(EPERM); 851 } 852 return (0); 853 } 854 855 /* 856 * This sysctl determines if we will allow a process to chroot(2) if it 857 * has a directory open: 858 * 0: disallowed for all processes. 859 * 1: allowed for processes that were not already chroot(2)'ed. 860 * 2: allowed for all processes. 861 */ 862 863 static int chroot_allow_open_directories = 1; 864 865 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 866 &chroot_allow_open_directories, 0, ""); 867 868 /* 869 * Chroot to the specified vnode. vp must be locked and referenced on 870 * call, and will be left locked and referenced on return. This routine 871 * may acquire additional refs on the vnode when associating it with 872 * the process's root and/or jail dirs. 873 */ 874 int 875 kern_chroot(struct vnode *vp) 876 { 877 struct thread *td = curthread; 878 struct proc *p = td->td_proc; 879 struct filedesc *fdp = p->p_fd; 880 int error; 881 882 /* 883 * Only root can chroot 884 */ 885 if ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0) 886 return (error); 887 888 /* 889 * Disallow open directory descriptors (fchdir() breakouts). 890 */ 891 if (chroot_allow_open_directories == 0 || 892 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 893 if ((error = chroot_refuse_vdir_fds(fdp)) != 0) 894 return (error); 895 } 896 897 /* 898 * Check the validity of vp as a directory to change to and 899 * associate it with rdir/jdir. 900 */ 901 if ((error = checkvp_chdir(vp, td)) == 0) { 902 vrele(fdp->fd_rdir); 903 fdp->fd_rdir = vp; 904 vref(fdp->fd_rdir); 905 if (fdp->fd_jdir == NULL) { 906 fdp->fd_jdir = vp; 907 vref(fdp->fd_jdir); 908 } 909 } 910 return (error); 911 } 912 913 /* 914 * chroot_args(char *path) 915 * 916 * Change notion of root (``/'') directory. 917 */ 918 /* ARGSUSED */ 919 int 920 chroot(struct chroot_args *uap) 921 { 922 struct thread *td = curthread; 923 struct nameidata nd; 924 int error; 925 926 KKASSERT(td->td_proc); 927 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, 928 SCARG(uap, path), td); 929 if ((error = namei(&nd)) == 0) { 930 error = kern_chroot(nd.ni_vp); 931 NDFREE(&nd, ~(NDF_NO_FREE_PNBUF | NDF_NO_VP_PUT)); 932 } 933 return (error); 934 } 935 936 /* 937 * Common routine for chroot and chdir. Given a locked, referenced vnode, 938 * determine whether it is legal to chdir to the vnode. The vnode's state 939 * is not changed by this call. 940 */ 941 int 942 checkvp_chdir(struct vnode *vp, struct thread *td) 943 { 944 int error; 945 946 if (vp->v_type != VDIR) 947 error = ENOTDIR; 948 else 949 error = VOP_ACCESS(vp, VEXEC, td->td_proc->p_ucred, td); 950 return (error); 951 } 952 953 int 954 kern_open(struct nameidata *nd, int oflags, int mode, int *res) 955 { 956 struct thread *td = curthread; 957 struct proc *p = td->td_proc; 958 struct filedesc *fdp = p->p_fd; 959 struct file *fp; 960 struct vnode *vp; 961 int cmode, flags; 962 struct file *nfp; 963 int type, indx, error; 964 struct flock lf; 965 966 if ((oflags & O_ACCMODE) == O_ACCMODE) 967 return (EINVAL); 968 flags = FFLAGS(oflags); 969 error = falloc(p, &nfp, &indx); 970 if (error) 971 return (error); 972 fp = nfp; 973 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; 974 p->p_dupfd = -indx - 1; /* XXX check for fdopen */ 975 /* 976 * Bump the ref count to prevent another process from closing 977 * the descriptor while we are blocked in vn_open() 978 */ 979 fhold(fp); 980 error = vn_open(nd, flags, cmode); 981 if (error) { 982 /* 983 * release our own reference 984 */ 985 fdrop(fp, td); 986 987 /* 988 * handle special fdopen() case. bleh. dupfdopen() is 989 * responsible for dropping the old contents of ofiles[indx] 990 * if it succeeds. 991 */ 992 if ((error == ENODEV || error == ENXIO) && 993 p->p_dupfd >= 0 && /* XXX from fdopen */ 994 (error = 995 dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { 996 *res = indx; 997 return (0); 998 } 999 /* 1000 * Clean up the descriptor, but only if another thread hadn't 1001 * replaced or closed it. 1002 */ 1003 if (fdp->fd_ofiles[indx] == fp) { 1004 fdp->fd_ofiles[indx] = NULL; 1005 fdrop(fp, td); 1006 } 1007 1008 if (error == ERESTART) 1009 error = EINTR; 1010 return (error); 1011 } 1012 p->p_dupfd = 0; 1013 NDFREE(nd, NDF_ONLY_PNBUF); 1014 vp = nd->ni_vp; 1015 1016 /* 1017 * There should be 2 references on the file, one from the descriptor 1018 * table, and one for us. 1019 * 1020 * Handle the case where someone closed the file (via its file 1021 * descriptor) while we were blocked. The end result should look 1022 * like opening the file succeeded but it was immediately closed. 1023 */ 1024 if (fp->f_count == 1) { 1025 KASSERT(fdp->fd_ofiles[indx] != fp, 1026 ("Open file descriptor lost all refs")); 1027 VOP_UNLOCK(vp, NULL, 0, td); 1028 vn_close(vp, flags & FMASK, td); 1029 fdrop(fp, td); 1030 *res = indx; 1031 return 0; 1032 } 1033 1034 fp->f_data = (caddr_t)vp; 1035 fp->f_flag = flags & FMASK; 1036 fp->f_ops = &vnops; 1037 fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); 1038 if (flags & (O_EXLOCK | O_SHLOCK)) { 1039 lf.l_whence = SEEK_SET; 1040 lf.l_start = 0; 1041 lf.l_len = 0; 1042 if (flags & O_EXLOCK) 1043 lf.l_type = F_WRLCK; 1044 else 1045 lf.l_type = F_RDLCK; 1046 type = F_FLOCK; 1047 if ((flags & FNONBLOCK) == 0) 1048 type |= F_WAIT; 1049 VOP_UNLOCK(vp, NULL, 0, td); 1050 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 1051 /* 1052 * lock request failed. Normally close the descriptor 1053 * but handle the case where someone might have dup()d 1054 * it when we weren't looking. One reference is 1055 * owned by the descriptor array, the other by us. 1056 */ 1057 if (fdp->fd_ofiles[indx] == fp) { 1058 fdp->fd_ofiles[indx] = NULL; 1059 fdrop(fp, td); 1060 } 1061 fdrop(fp, td); 1062 return (error); 1063 } 1064 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 1065 fp->f_flag |= FHASLOCK; 1066 } 1067 /* assert that vn_open created a backing object if one is needed */ 1068 KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, 1069 ("open: vmio vnode has no backing object after vn_open")); 1070 VOP_UNLOCK(vp, NULL, 0, td); 1071 1072 /* 1073 * release our private reference, leaving the one associated with the 1074 * descriptor table intact. 1075 */ 1076 fdrop(fp, td); 1077 *res = indx; 1078 return (0); 1079 } 1080 1081 /* 1082 * open_args(char *path, int flags, int mode) 1083 * 1084 * Check permissions, allocate an open file structure, 1085 * and call the device open routine if any. 1086 */ 1087 int 1088 open(struct open_args *uap) 1089 { 1090 struct thread *td = curthread; 1091 struct nameidata nd; 1092 int error; 1093 1094 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td); 1095 1096 error = kern_open(&nd, uap->flags, uap->mode, &uap->sysmsg_result); 1097 1098 return (error); 1099 } 1100 1101 int 1102 kern_mknod(struct nameidata *nd, int mode, int dev) 1103 { 1104 struct thread *td = curthread; 1105 struct proc *p = td->td_proc; 1106 struct vnode *vp; 1107 struct vattr vattr; 1108 int error; 1109 int whiteout = 0; 1110 1111 KKASSERT(p); 1112 1113 switch (mode & S_IFMT) { 1114 case S_IFCHR: 1115 case S_IFBLK: 1116 error = suser(td); 1117 break; 1118 default: 1119 error = suser_cred(p->p_ucred, PRISON_ROOT); 1120 break; 1121 } 1122 if (error) 1123 return (error); 1124 bwillwrite(); 1125 error = namei(nd); 1126 if (error) 1127 return (error); 1128 vp = nd->ni_vp; 1129 if (vp != NULL) 1130 error = EEXIST; 1131 else { 1132 VATTR_NULL(&vattr); 1133 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1134 vattr.va_rdev = dev; 1135 whiteout = 0; 1136 1137 switch (mode & S_IFMT) { 1138 case S_IFMT: /* used by badsect to flag bad sectors */ 1139 vattr.va_type = VBAD; 1140 break; 1141 case S_IFCHR: 1142 vattr.va_type = VCHR; 1143 break; 1144 case S_IFBLK: 1145 vattr.va_type = VBLK; 1146 break; 1147 case S_IFWHT: 1148 whiteout = 1; 1149 break; 1150 default: 1151 error = EINVAL; 1152 break; 1153 } 1154 } 1155 if (error == 0) { 1156 VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE); 1157 if (whiteout) 1158 error = VOP_WHITEOUT(nd->ni_dvp, NCPNULL, 1159 &nd->ni_cnd, NAMEI_CREATE); 1160 else { 1161 error = VOP_MKNOD(nd->ni_dvp, NCPNULL, &nd->ni_vp, 1162 &nd->ni_cnd, &vattr); 1163 if (error == 0) 1164 vput(nd->ni_vp); 1165 } 1166 NDFREE(nd, NDF_ONLY_PNBUF); 1167 vput(nd->ni_dvp); 1168 } else { 1169 NDFREE(nd, NDF_ONLY_PNBUF); 1170 if (nd->ni_dvp == vp) 1171 vrele(nd->ni_dvp); 1172 else 1173 vput(nd->ni_dvp); 1174 if (vp) 1175 vrele(vp); 1176 } 1177 ASSERT_VOP_UNLOCKED(nd->ni_dvp, "mknod"); 1178 ASSERT_VOP_UNLOCKED(nd->ni_vp, "mknod"); 1179 return (error); 1180 } 1181 1182 /* 1183 * mknod_args(char *path, int mode, int dev) 1184 * 1185 * Create a special file. 1186 */ 1187 int 1188 mknod(struct mknod_args *uap) 1189 { 1190 struct thread *td = curthread; 1191 struct nameidata nd; 1192 int error; 1193 1194 NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path, 1195 td); 1196 1197 error = kern_mknod(&nd, uap->mode, uap->dev); 1198 1199 return (error); 1200 } 1201 1202 int 1203 kern_mkfifo(struct nameidata *nd, int mode) 1204 { 1205 struct thread *td = curthread; 1206 struct proc *p = td->td_proc; 1207 struct vattr vattr; 1208 int error; 1209 1210 bwillwrite(); 1211 error = namei(nd); 1212 if (error) 1213 return (error); 1214 if (nd->ni_vp != NULL) { 1215 NDFREE(nd, NDF_ONLY_PNBUF); 1216 if (nd->ni_dvp == nd->ni_vp) 1217 vrele(nd->ni_dvp); 1218 else 1219 vput(nd->ni_dvp); 1220 vrele(nd->ni_vp); 1221 return (EEXIST); 1222 } 1223 VATTR_NULL(&vattr); 1224 vattr.va_type = VFIFO; 1225 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 1226 VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE); 1227 error = VOP_MKNOD(nd->ni_dvp, NCPNULL, &nd->ni_vp, &nd->ni_cnd, &vattr); 1228 if (error == 0) 1229 vput(nd->ni_vp); 1230 NDFREE(nd, NDF_ONLY_PNBUF); 1231 vput(nd->ni_dvp); 1232 return (error); 1233 } 1234 1235 /* 1236 * mkfifo_args(char *path, int mode) 1237 * 1238 * Create a named pipe. 1239 */ 1240 int 1241 mkfifo(struct mkfifo_args *uap) 1242 { 1243 struct thread *td = curthread; 1244 struct nameidata nd; 1245 int error; 1246 1247 NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path, 1248 td); 1249 1250 error = kern_mkfifo(&nd, uap->mode); 1251 1252 return (error); 1253 } 1254 1255 int 1256 kern_link(struct nameidata *nd, struct nameidata *linknd) 1257 { 1258 struct thread *td = curthread; 1259 struct proc *p = td->td_proc; 1260 struct vnode *vp; 1261 int error; 1262 1263 bwillwrite(); 1264 error = namei(nd); 1265 if (error) 1266 return (error); 1267 NDFREE(nd, NDF_ONLY_PNBUF); 1268 vp = nd->ni_vp; 1269 if (vp->v_type == VDIR) 1270 error = EPERM; /* POSIX */ 1271 else { 1272 error = namei(linknd); 1273 if (error == 0) { 1274 if (linknd->ni_vp != NULL) { 1275 if (linknd->ni_vp) 1276 vrele(linknd->ni_vp); 1277 error = EEXIST; 1278 } else { 1279 VOP_LEASE(linknd->ni_dvp, td, p->p_ucred, 1280 LEASE_WRITE); 1281 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 1282 error = VOP_LINK(linknd->ni_dvp, NCPNULL, vp, 1283 &linknd->ni_cnd); 1284 } 1285 NDFREE(linknd, NDF_ONLY_PNBUF); 1286 if (linknd->ni_dvp == linknd->ni_vp) 1287 vrele(linknd->ni_dvp); 1288 else 1289 vput(linknd->ni_dvp); 1290 ASSERT_VOP_UNLOCKED(linknd->ni_dvp, "link"); 1291 ASSERT_VOP_UNLOCKED(linknd->ni_vp, "link"); 1292 } 1293 } 1294 vrele(vp); 1295 return (error); 1296 } 1297 1298 /* 1299 * link_args(char *path, char *link) 1300 * 1301 * Make a hard file link. 1302 */ 1303 int 1304 link(struct link_args *uap) 1305 { 1306 struct thread *td = curthread; 1307 struct nameidata nd, linknd; 1308 int error; 1309 1310 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_NOOBJ, UIO_USERSPACE, 1311 uap->path, td); 1312 NDINIT(&linknd, NAMEI_CREATE, CNP_LOCKPARENT | CNP_NOOBJ, 1313 UIO_USERSPACE, uap->link, td); 1314 1315 error = kern_link(&nd, &linknd); 1316 1317 return (error); 1318 } 1319 1320 int 1321 kern_symlink(char *path, struct nameidata *nd) 1322 { 1323 struct thread *td = curthread; 1324 struct proc *p = td->td_proc; 1325 struct vattr vattr; 1326 int error; 1327 1328 bwillwrite(); 1329 error = namei(nd); 1330 if (error) 1331 return (error); 1332 if (nd->ni_vp) { 1333 NDFREE(nd, NDF_ONLY_PNBUF); 1334 if (nd->ni_dvp == nd->ni_vp) 1335 vrele(nd->ni_dvp); 1336 else 1337 vput(nd->ni_dvp); 1338 vrele(nd->ni_vp); 1339 return (EEXIST); 1340 } 1341 VATTR_NULL(&vattr); 1342 vattr.va_mode = ACCESSPERMS &~ p->p_fd->fd_cmask; 1343 VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE); 1344 error = VOP_SYMLINK(nd->ni_dvp, NCPNULL, &nd->ni_vp, &nd->ni_cnd, 1345 &vattr, path); 1346 NDFREE(nd, NDF_ONLY_PNBUF); 1347 if (error == 0) 1348 vput(nd->ni_vp); 1349 vput(nd->ni_dvp); 1350 ASSERT_VOP_UNLOCKED(nd->ni_dvp, "symlink"); 1351 ASSERT_VOP_UNLOCKED(nd->ni_vp, "symlink"); 1352 1353 return (error); 1354 } 1355 1356 /* 1357 * symlink(char *path, char *link) 1358 * 1359 * Make a symbolic link. 1360 */ 1361 int 1362 symlink(struct symlink_args *uap) 1363 { 1364 struct thread *td = curthread; 1365 struct nameidata nd; 1366 char *path; 1367 int error; 1368 1369 path = zalloc(namei_zone); 1370 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1371 if (error == 0) { 1372 NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT | CNP_NOOBJ, 1373 UIO_USERSPACE, uap->link, td); 1374 error = kern_symlink(path, &nd); 1375 } 1376 zfree(namei_zone, path); 1377 return (error); 1378 } 1379 1380 /* 1381 * undelete_args(char *path) 1382 * 1383 * Delete a whiteout from the filesystem. 1384 */ 1385 /* ARGSUSED */ 1386 int 1387 undelete(struct undelete_args *uap) 1388 { 1389 struct thread *td = curthread; 1390 struct proc *p = td->td_proc; 1391 int error; 1392 struct nameidata nd; 1393 1394 bwillwrite(); 1395 NDINIT(&nd, NAMEI_DELETE, CNP_LOCKPARENT | CNP_DOWHITEOUT, UIO_USERSPACE, 1396 SCARG(uap, path), td); 1397 error = namei(&nd); 1398 if (error) 1399 return (error); 1400 1401 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & CNP_ISWHITEOUT)) { 1402 NDFREE(&nd, NDF_ONLY_PNBUF); 1403 if (nd.ni_dvp == nd.ni_vp) 1404 vrele(nd.ni_dvp); 1405 else 1406 vput(nd.ni_dvp); 1407 if (nd.ni_vp) 1408 vrele(nd.ni_vp); 1409 return (EEXIST); 1410 } 1411 1412 VOP_LEASE(nd.ni_dvp, td, p->p_ucred, LEASE_WRITE); 1413 error = VOP_WHITEOUT(nd.ni_dvp, NCPNULL, &nd.ni_cnd, NAMEI_DELETE); 1414 NDFREE(&nd, NDF_ONLY_PNBUF); 1415 vput(nd.ni_dvp); 1416 ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); 1417 ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); 1418 return (error); 1419 } 1420 1421 int 1422 kern_unlink(struct nameidata *nd) 1423 { 1424 struct thread *td = curthread; 1425 struct proc *p = td->td_proc; 1426 struct vnode *vp; 1427 int error; 1428 1429 bwillwrite(); 1430 error = namei(nd); 1431 if (error) 1432 return (error); 1433 vp = nd->ni_vp; 1434 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 1435 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 1436 1437 if (vp->v_type == VDIR) 1438 error = EPERM; /* POSIX */ 1439 else { 1440 /* 1441 * The root of a mounted filesystem cannot be deleted. 1442 * 1443 * XXX: can this only be a VDIR case? 1444 */ 1445 if (vp->v_flag & VROOT) 1446 error = EBUSY; 1447 } 1448 1449 if (error == 0) { 1450 VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE); 1451 error = VOP_REMOVE(nd->ni_dvp, NCPNULL, vp, &nd->ni_cnd); 1452 } 1453 NDFREE(nd, NDF_ONLY_PNBUF); 1454 if (nd->ni_dvp == vp) 1455 vrele(nd->ni_dvp); 1456 else 1457 vput(nd->ni_dvp); 1458 if (vp != NULLVP) 1459 vput(vp); 1460 ASSERT_VOP_UNLOCKED(nd->ni_dvp, "unlink"); 1461 ASSERT_VOP_UNLOCKED(nd->ni_vp, "unlink"); 1462 return (error); 1463 } 1464 1465 /* 1466 * unlink_args(char *path) 1467 * 1468 * Delete a name from the filesystem. 1469 */ 1470 int 1471 unlink(struct unlink_args *uap) 1472 { 1473 struct thread *td = curthread; 1474 struct nameidata nd; 1475 int error; 1476 1477 NDINIT(&nd, NAMEI_DELETE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path, 1478 td); 1479 1480 error = kern_unlink(&nd); 1481 1482 return (error); 1483 } 1484 1485 int 1486 kern_lseek(int fd, off_t offset, int whence, off_t *res) 1487 { 1488 struct thread *td = curthread; 1489 struct proc *p = td->td_proc; 1490 struct filedesc *fdp = p->p_fd; 1491 struct file *fp; 1492 struct vattr vattr; 1493 int error; 1494 1495 if (fd >= fdp->fd_nfiles || 1496 (fp = fdp->fd_ofiles[fd]) == NULL) 1497 return (EBADF); 1498 if (fp->f_type != DTYPE_VNODE) 1499 return (ESPIPE); 1500 switch (whence) { 1501 case L_INCR: 1502 fp->f_offset += offset; 1503 break; 1504 case L_XTND: 1505 error=VOP_GETATTR((struct vnode *)fp->f_data, &vattr, td); 1506 if (error) 1507 return (error); 1508 fp->f_offset = offset + vattr.va_size; 1509 break; 1510 case L_SET: 1511 fp->f_offset = offset; 1512 break; 1513 default: 1514 return (EINVAL); 1515 } 1516 *res = fp->f_offset; 1517 return (0); 1518 } 1519 1520 /* 1521 * lseek_args(int fd, int pad, off_t offset, int whence) 1522 * 1523 * Reposition read/write file offset. 1524 */ 1525 int 1526 lseek(struct lseek_args *uap) 1527 { 1528 int error; 1529 1530 error = kern_lseek(uap->fd, uap->offset, uap->whence, 1531 &uap->sysmsg_offset); 1532 1533 return (error); 1534 } 1535 1536 int 1537 kern_access(struct nameidata *nd, int aflags) 1538 { 1539 struct thread *td = curthread; 1540 struct proc *p = td->td_proc; 1541 struct ucred *cred, *tmpcred; 1542 struct vnode *vp; 1543 int error, flags; 1544 1545 cred = p->p_ucred; 1546 /* 1547 * Create and modify a temporary credential instead of one that 1548 * is potentially shared. This could also mess up socket 1549 * buffer accounting which can run in an interrupt context. 1550 */ 1551 tmpcred = crdup(cred); 1552 tmpcred->cr_uid = p->p_ucred->cr_ruid; 1553 tmpcred->cr_groups[0] = p->p_ucred->cr_rgid; 1554 p->p_ucred = tmpcred; 1555 nd->ni_cnd.cn_cred = tmpcred; 1556 error = namei(nd); 1557 if (error) 1558 goto out1; 1559 vp = nd->ni_vp; 1560 1561 /* Flags == 0 means only check for existence. */ 1562 if (aflags) { 1563 flags = 0; 1564 if (aflags & R_OK) 1565 flags |= VREAD; 1566 if (aflags & W_OK) 1567 flags |= VWRITE; 1568 if (aflags & X_OK) 1569 flags |= VEXEC; 1570 if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 1571 error = VOP_ACCESS(vp, flags, tmpcred, td); 1572 } 1573 NDFREE(nd, NDF_ONLY_PNBUF); 1574 vput(vp); 1575 out1: 1576 p->p_ucred = cred; 1577 crfree(tmpcred); 1578 return (error); 1579 } 1580 1581 /* 1582 * access_args(char *path, int flags) 1583 * 1584 * Check access permissions. 1585 */ 1586 int 1587 access(struct access_args *uap) 1588 { 1589 struct thread *td = curthread; 1590 struct nameidata nd; 1591 int error; 1592 1593 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ, 1594 UIO_USERSPACE, uap->path, td); 1595 1596 error = kern_access(&nd, uap->flags); 1597 1598 return (error); 1599 } 1600 1601 int 1602 kern_stat(struct nameidata *nd, struct stat *st) 1603 { 1604 struct thread *td = curthread; 1605 int error; 1606 1607 error = namei(nd); 1608 if (error) 1609 return (error); 1610 error = vn_stat(nd->ni_vp, st, td); 1611 NDFREE(nd, NDF_ONLY_PNBUF); 1612 vput(nd->ni_vp); 1613 return (error); 1614 } 1615 1616 /* 1617 * stat_args(char *path, struct stat *ub) 1618 * 1619 * Get file status; this version follows links. 1620 */ 1621 int 1622 stat(struct stat_args *uap) 1623 { 1624 struct thread *td = curthread; 1625 struct nameidata nd; 1626 struct stat st; 1627 int error; 1628 1629 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ, 1630 UIO_USERSPACE, uap->path, td); 1631 1632 error = kern_stat(&nd, &st); 1633 1634 if (error == 0) 1635 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1636 return (error); 1637 } 1638 1639 /* 1640 * lstat_args(char *path, struct stat *ub) 1641 * 1642 * Get file status; this version does not follow links. 1643 */ 1644 int 1645 lstat(struct lstat_args *uap) 1646 { 1647 struct thread *td = curthread; 1648 struct nameidata nd; 1649 struct stat st; 1650 int error; 1651 1652 NDINIT(&nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_NOOBJ, 1653 UIO_USERSPACE, SCARG(uap, path), td); 1654 1655 error = kern_stat(&nd, &st); 1656 1657 if (error == 0) 1658 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 1659 return (error); 1660 } 1661 1662 void 1663 cvtnstat(sb, nsb) 1664 struct stat *sb; 1665 struct nstat *nsb; 1666 { 1667 nsb->st_dev = sb->st_dev; 1668 nsb->st_ino = sb->st_ino; 1669 nsb->st_mode = sb->st_mode; 1670 nsb->st_nlink = sb->st_nlink; 1671 nsb->st_uid = sb->st_uid; 1672 nsb->st_gid = sb->st_gid; 1673 nsb->st_rdev = sb->st_rdev; 1674 nsb->st_atimespec = sb->st_atimespec; 1675 nsb->st_mtimespec = sb->st_mtimespec; 1676 nsb->st_ctimespec = sb->st_ctimespec; 1677 nsb->st_size = sb->st_size; 1678 nsb->st_blocks = sb->st_blocks; 1679 nsb->st_blksize = sb->st_blksize; 1680 nsb->st_flags = sb->st_flags; 1681 nsb->st_gen = sb->st_gen; 1682 nsb->st_qspare[0] = sb->st_qspare[0]; 1683 nsb->st_qspare[1] = sb->st_qspare[1]; 1684 } 1685 1686 /* 1687 * nstat_args(char *path, struct nstat *ub) 1688 */ 1689 /* ARGSUSED */ 1690 int 1691 nstat(struct nstat_args *uap) 1692 { 1693 struct thread *td = curthread; 1694 struct stat sb; 1695 struct nstat nsb; 1696 int error; 1697 struct nameidata nd; 1698 1699 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ, 1700 UIO_USERSPACE, SCARG(uap, path), td); 1701 if ((error = namei(&nd)) != 0) 1702 return (error); 1703 NDFREE(&nd, NDF_ONLY_PNBUF); 1704 error = vn_stat(nd.ni_vp, &sb, td); 1705 vput(nd.ni_vp); 1706 if (error) 1707 return (error); 1708 cvtnstat(&sb, &nsb); 1709 error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb)); 1710 return (error); 1711 } 1712 1713 /* 1714 * lstat_args(char *path, struct stat *ub) 1715 * 1716 * Get file status; this version does not follow links. 1717 */ 1718 /* ARGSUSED */ 1719 int 1720 nlstat(struct nlstat_args *uap) 1721 { 1722 struct thread *td = curthread; 1723 int error; 1724 struct vnode *vp; 1725 struct stat sb; 1726 struct nstat nsb; 1727 struct nameidata nd; 1728 1729 NDINIT(&nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_NOOBJ, 1730 UIO_USERSPACE, SCARG(uap, path), td); 1731 if ((error = namei(&nd)) != 0) 1732 return (error); 1733 vp = nd.ni_vp; 1734 NDFREE(&nd, NDF_ONLY_PNBUF); 1735 error = vn_stat(vp, &sb, td); 1736 vput(vp); 1737 if (error) 1738 return (error); 1739 cvtnstat(&sb, &nsb); 1740 error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb)); 1741 return (error); 1742 } 1743 1744 /* 1745 * pathconf_Args(char *path, int name) 1746 * 1747 * Get configurable pathname variables. 1748 */ 1749 /* ARGSUSED */ 1750 int 1751 pathconf(struct pathconf_args *uap) 1752 { 1753 struct thread *td = curthread; 1754 int error; 1755 struct nameidata nd; 1756 1757 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF | CNP_NOOBJ, 1758 UIO_USERSPACE, SCARG(uap, path), td); 1759 if ((error = namei(&nd)) != 0) 1760 return (error); 1761 NDFREE(&nd, NDF_ONLY_PNBUF); 1762 error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), uap->sysmsg_fds); 1763 vput(nd.ni_vp); 1764 return (error); 1765 } 1766 1767 /* 1768 * XXX: daver 1769 * kern_readlink isn't properly split yet. There is a copyin burried 1770 * in VOP_READLINK(). 1771 */ 1772 int 1773 kern_readlink(struct nameidata *nd, char *buf, int count, int *res) 1774 { 1775 struct thread *td = curthread; 1776 struct proc *p = td->td_proc; 1777 struct vnode *vp; 1778 struct iovec aiov; 1779 struct uio auio; 1780 int error; 1781 1782 error = namei(nd); 1783 if (error) 1784 return (error); 1785 NDFREE(nd, NDF_ONLY_PNBUF); 1786 vp = nd->ni_vp; 1787 if (vp->v_type != VLNK) 1788 error = EINVAL; 1789 else { 1790 aiov.iov_base = buf; 1791 aiov.iov_len = count; 1792 auio.uio_iov = &aiov; 1793 auio.uio_iovcnt = 1; 1794 auio.uio_offset = 0; 1795 auio.uio_rw = UIO_READ; 1796 auio.uio_segflg = UIO_USERSPACE; 1797 auio.uio_td = td; 1798 auio.uio_resid = count; 1799 error = VOP_READLINK(vp, &auio, p->p_ucred); 1800 } 1801 vput(vp); 1802 *res = count - auio.uio_resid; 1803 return (error); 1804 } 1805 1806 /* 1807 * readlink_args(char *path, char *buf, int count) 1808 * 1809 * Return target name of a symbolic link. 1810 */ 1811 int 1812 readlink(struct readlink_args *uap) 1813 { 1814 struct thread *td = curthread; 1815 struct nameidata nd; 1816 int error; 1817 1818 NDINIT(&nd, NAMEI_LOOKUP, CNP_LOCKLEAF | CNP_NOOBJ, UIO_USERSPACE, 1819 uap->path, td); 1820 1821 error = kern_readlink(&nd, uap->buf, uap->count, 1822 &uap->sysmsg_result); 1823 1824 return (error); 1825 } 1826 1827 static int 1828 setfflags(struct vnode *vp, int flags) 1829 { 1830 struct thread *td = curthread; 1831 struct proc *p = td->td_proc; 1832 int error; 1833 struct vattr vattr; 1834 1835 /* 1836 * Prevent non-root users from setting flags on devices. When 1837 * a device is reused, users can retain ownership of the device 1838 * if they are allowed to set flags and programs assume that 1839 * chown can't fail when done as root. 1840 */ 1841 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 1842 ((error = suser_cred(p->p_ucred, PRISON_ROOT)) != 0)) 1843 return (error); 1844 1845 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 1846 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 1847 VATTR_NULL(&vattr); 1848 vattr.va_flags = flags; 1849 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 1850 VOP_UNLOCK(vp, NULL, 0, td); 1851 return (error); 1852 } 1853 1854 /* 1855 * chflags(char *path, int flags) 1856 * 1857 * Change flags of a file given a path name. 1858 */ 1859 /* ARGSUSED */ 1860 int 1861 chflags(struct chflags_args *uap) 1862 { 1863 struct thread *td = curthread; 1864 int error; 1865 struct nameidata nd; 1866 1867 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, 1868 SCARG(uap, path), td); 1869 if ((error = namei(&nd)) != 0) 1870 return (error); 1871 NDFREE(&nd, NDF_ONLY_PNBUF); 1872 error = setfflags(nd.ni_vp, SCARG(uap, flags)); 1873 vrele(nd.ni_vp); 1874 return error; 1875 } 1876 1877 /* 1878 * fchflags_args(int fd, int flags) 1879 * 1880 * Change flags of a file given a file descriptor. 1881 */ 1882 /* ARGSUSED */ 1883 int 1884 fchflags(struct fchflags_args *uap) 1885 { 1886 struct thread *td = curthread; 1887 struct proc *p = td->td_proc; 1888 struct file *fp; 1889 int error; 1890 1891 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 1892 return (error); 1893 return setfflags((struct vnode *) fp->f_data, SCARG(uap, flags)); 1894 } 1895 1896 static int 1897 setfmode(struct vnode *vp, int mode) 1898 { 1899 struct thread *td = curthread; 1900 struct proc *p = td->td_proc; 1901 int error; 1902 struct vattr vattr; 1903 1904 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 1905 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 1906 VATTR_NULL(&vattr); 1907 vattr.va_mode = mode & ALLPERMS; 1908 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 1909 VOP_UNLOCK(vp, NULL, 0, td); 1910 return error; 1911 } 1912 1913 int 1914 kern_chmod(struct nameidata *nd, int mode) 1915 { 1916 int error; 1917 1918 error = namei(nd); 1919 if (error) 1920 return (error); 1921 NDFREE(nd, NDF_ONLY_PNBUF); 1922 error = setfmode(nd->ni_vp, mode); 1923 vrele(nd->ni_vp); 1924 return error; 1925 } 1926 1927 /* 1928 * chmod_args(char *path, int mode) 1929 * 1930 * Change mode of a file given path name. 1931 */ 1932 /* ARGSUSED */ 1933 int 1934 chmod(struct chmod_args *uap) 1935 { 1936 struct thread *td = curthread; 1937 struct nameidata nd; 1938 int error; 1939 1940 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td); 1941 1942 error = kern_chmod(&nd, uap->mode); 1943 1944 return (error); 1945 } 1946 1947 /* 1948 * lchmod_args(char *path, int mode) 1949 * 1950 * Change mode of a file given path name (don't follow links.) 1951 */ 1952 /* ARGSUSED */ 1953 int 1954 lchmod(struct lchmod_args *uap) 1955 { 1956 struct thread *td = curthread; 1957 int error; 1958 struct nameidata nd; 1959 1960 NDINIT(&nd, NAMEI_LOOKUP, 0, UIO_USERSPACE, SCARG(uap, path), td); 1961 if ((error = namei(&nd)) != 0) 1962 return (error); 1963 NDFREE(&nd, NDF_ONLY_PNBUF); 1964 error = setfmode(nd.ni_vp, SCARG(uap, mode)); 1965 vrele(nd.ni_vp); 1966 return error; 1967 } 1968 1969 /* 1970 * fchmod_args(int fd, int mode) 1971 * 1972 * Change mode of a file given a file descriptor. 1973 */ 1974 /* ARGSUSED */ 1975 int 1976 fchmod(struct fchmod_args *uap) 1977 { 1978 struct thread *td = curthread; 1979 struct proc *p = td->td_proc; 1980 struct file *fp; 1981 int error; 1982 1983 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 1984 return (error); 1985 return setfmode((struct vnode *)fp->f_data, SCARG(uap, mode)); 1986 } 1987 1988 static int 1989 setfown(struct vnode *vp, uid_t uid, gid_t gid) 1990 { 1991 struct thread *td = curthread; 1992 struct proc *p = td->td_proc; 1993 int error; 1994 struct vattr vattr; 1995 1996 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 1997 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 1998 VATTR_NULL(&vattr); 1999 vattr.va_uid = uid; 2000 vattr.va_gid = gid; 2001 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2002 VOP_UNLOCK(vp, NULL, 0, td); 2003 return error; 2004 } 2005 2006 int 2007 kern_chown(struct nameidata *nd, int uid, int gid) 2008 { 2009 int error; 2010 2011 error = namei(nd); 2012 if (error) 2013 return (error); 2014 NDFREE(nd, NDF_ONLY_PNBUF); 2015 error = setfown(nd->ni_vp, uid, gid); 2016 vrele(nd->ni_vp); 2017 return (error); 2018 } 2019 2020 /* 2021 * chown(char *path, int uid, int gid) 2022 * 2023 * Set ownership given a path name. 2024 */ 2025 int 2026 chown(struct chown_args *uap) 2027 { 2028 struct thread *td = curthread; 2029 struct nameidata nd; 2030 int error; 2031 2032 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td); 2033 2034 error = kern_chown(&nd, uap->uid, uap->gid); 2035 2036 return (error); 2037 } 2038 2039 /* 2040 * lchown_args(char *path, int uid, int gid) 2041 * 2042 * Set ownership given a path name, do not cross symlinks. 2043 */ 2044 int 2045 lchown(struct lchown_args *uap) 2046 { 2047 struct thread *td = curthread; 2048 int error; 2049 struct nameidata nd; 2050 2051 NDINIT(&nd, NAMEI_LOOKUP, 0, UIO_USERSPACE, uap->path, td); 2052 2053 error = kern_chown(&nd, uap->uid, uap->gid); 2054 2055 return (error); 2056 } 2057 2058 /* 2059 * fchown_args(int fd, int uid, int gid) 2060 * 2061 * Set ownership given a file descriptor. 2062 */ 2063 /* ARGSUSED */ 2064 int 2065 fchown(struct fchown_args *uap) 2066 { 2067 struct thread *td = curthread; 2068 struct proc *p = td->td_proc; 2069 struct file *fp; 2070 int error; 2071 2072 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2073 return (error); 2074 return setfown((struct vnode *)fp->f_data, 2075 SCARG(uap, uid), SCARG(uap, gid)); 2076 } 2077 2078 static int 2079 getutimes(const struct timeval *tvp, struct timespec *tsp) 2080 { 2081 struct timeval tv[2]; 2082 2083 if (tvp == NULL) { 2084 microtime(&tv[0]); 2085 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 2086 tsp[1] = tsp[0]; 2087 } else { 2088 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 2089 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 2090 } 2091 return 0; 2092 } 2093 2094 static int 2095 setutimes(struct vnode *vp, const struct timespec *ts, int nullflag) 2096 { 2097 struct thread *td = curthread; 2098 struct proc *p = td->td_proc; 2099 int error; 2100 struct vattr vattr; 2101 2102 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2103 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 2104 VATTR_NULL(&vattr); 2105 vattr.va_atime = ts[0]; 2106 vattr.va_mtime = ts[1]; 2107 if (nullflag) 2108 vattr.va_vaflags |= VA_UTIMES_NULL; 2109 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2110 VOP_UNLOCK(vp, NULL, 0, td); 2111 return error; 2112 } 2113 2114 int 2115 kern_utimes(struct nameidata *nd, struct timeval *tptr) 2116 { 2117 struct timespec ts[2]; 2118 int error; 2119 2120 error = getutimes(tptr, ts); 2121 if (error) 2122 return (error); 2123 error = namei(nd); 2124 if (error) 2125 return (error); 2126 NDFREE(nd, NDF_ONLY_PNBUF); 2127 error = setutimes(nd->ni_vp, ts, tptr == NULL); 2128 vrele(nd->ni_vp); 2129 return (error); 2130 } 2131 2132 /* 2133 * utimes_args(char *path, struct timeval *tptr) 2134 * 2135 * Set the access and modification times of a file. 2136 */ 2137 int 2138 utimes(struct utimes_args *uap) 2139 { 2140 struct thread *td = curthread; 2141 struct timeval tv[2]; 2142 struct nameidata nd; 2143 int error; 2144 2145 if (uap->tptr) { 2146 error = copyin(uap->tptr, tv, sizeof(tv)); 2147 if (error) 2148 return (error); 2149 } 2150 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td); 2151 2152 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2153 2154 return (error); 2155 } 2156 2157 /* 2158 * lutimes_args(char *path, struct timeval *tptr) 2159 * 2160 * Set the access and modification times of a file. 2161 */ 2162 int 2163 lutimes(struct lutimes_args *uap) 2164 { 2165 struct thread *td = curthread; 2166 struct timeval tv[2]; 2167 struct nameidata nd; 2168 int error; 2169 2170 if (uap->tptr) { 2171 error = copyin(uap->tptr, tv, sizeof(tv)); 2172 if (error) 2173 return (error); 2174 } 2175 NDINIT(&nd, NAMEI_LOOKUP, 0, UIO_USERSPACE, uap->path, td); 2176 2177 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 2178 2179 return (error); 2180 } 2181 2182 int 2183 kern_futimes(int fd, struct timeval *tptr) 2184 { 2185 struct thread *td = curthread; 2186 struct proc *p = td->td_proc; 2187 struct timespec ts[2]; 2188 struct file *fp; 2189 int error; 2190 2191 error = getutimes(tptr, ts); 2192 if (error) 2193 return (error); 2194 error = getvnode(p->p_fd, fd, &fp); 2195 if (error) 2196 return (error); 2197 error = setutimes((struct vnode *)fp->f_data, ts, tptr == NULL); 2198 return (error); 2199 } 2200 2201 /* 2202 * futimes_args(int fd, struct timeval *tptr) 2203 * 2204 * Set the access and modification times of a file. 2205 */ 2206 int 2207 futimes(struct futimes_args *uap) 2208 { 2209 struct timeval tv[2]; 2210 int error; 2211 2212 if (uap->tptr) { 2213 error = copyin(uap->tptr, tv, sizeof(tv)); 2214 if (error) 2215 return (error); 2216 } 2217 2218 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 2219 2220 return (error); 2221 } 2222 2223 int 2224 kern_truncate(struct nameidata* nd, off_t length) 2225 { 2226 struct thread *td = curthread; 2227 struct proc *p = td->td_proc; 2228 struct vnode *vp; 2229 struct vattr vattr; 2230 int error; 2231 2232 if (length < 0) 2233 return(EINVAL); 2234 if ((error = namei(nd)) != 0) 2235 return (error); 2236 vp = nd->ni_vp; 2237 NDFREE(nd, NDF_ONLY_PNBUF); 2238 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2239 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 2240 if (vp->v_type == VDIR) 2241 error = EISDIR; 2242 else if ((error = vn_writechk(vp)) == 0 && 2243 (error = VOP_ACCESS(vp, VWRITE, p->p_ucred, td)) == 0) { 2244 VATTR_NULL(&vattr); 2245 vattr.va_size = length; 2246 error = VOP_SETATTR(vp, &vattr, p->p_ucred, td); 2247 } 2248 vput(vp); 2249 return (error); 2250 } 2251 2252 /* 2253 * truncate(char *path, int pad, off_t length) 2254 * 2255 * Truncate a file given its path name. 2256 */ 2257 int 2258 truncate(struct truncate_args *uap) 2259 { 2260 struct thread *td = curthread; 2261 struct nameidata nd; 2262 int error; 2263 2264 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, uap->path, td); 2265 2266 error = kern_truncate(&nd, uap->length); 2267 2268 return error; 2269 } 2270 2271 int 2272 kern_ftruncate(int fd, off_t length) 2273 { 2274 struct thread *td = curthread; 2275 struct proc *p = td->td_proc; 2276 struct vattr vattr; 2277 struct vnode *vp; 2278 struct file *fp; 2279 int error; 2280 2281 if (length < 0) 2282 return(EINVAL); 2283 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2284 return (error); 2285 if ((fp->f_flag & FWRITE) == 0) 2286 return (EINVAL); 2287 vp = (struct vnode *)fp->f_data; 2288 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2289 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 2290 if (vp->v_type == VDIR) 2291 error = EISDIR; 2292 else if ((error = vn_writechk(vp)) == 0) { 2293 VATTR_NULL(&vattr); 2294 vattr.va_size = length; 2295 error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); 2296 } 2297 VOP_UNLOCK(vp, NULL, 0, td); 2298 return (error); 2299 } 2300 2301 /* 2302 * ftruncate_args(int fd, int pad, off_t length) 2303 * 2304 * Truncate a file given a file descriptor. 2305 */ 2306 int 2307 ftruncate(struct ftruncate_args *uap) 2308 { 2309 int error; 2310 2311 error = kern_ftruncate(uap->fd, uap->length); 2312 2313 return (error); 2314 } 2315 2316 /* 2317 * fsync(int fd) 2318 * 2319 * Sync an open file. 2320 */ 2321 /* ARGSUSED */ 2322 int 2323 fsync(struct fsync_args *uap) 2324 { 2325 struct thread *td = curthread; 2326 struct proc *p = td->td_proc; 2327 struct vnode *vp; 2328 struct file *fp; 2329 vm_object_t obj; 2330 int error; 2331 2332 if ((error = getvnode(p->p_fd, SCARG(uap, fd), &fp)) != 0) 2333 return (error); 2334 vp = (struct vnode *)fp->f_data; 2335 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 2336 if (VOP_GETVOBJECT(vp, &obj) == 0) 2337 vm_object_page_clean(obj, 0, 0, 0); 2338 if ((error = VOP_FSYNC(vp, MNT_WAIT, td)) == 0 && 2339 vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP) && 2340 bioops.io_fsync) 2341 error = (*bioops.io_fsync)(vp); 2342 VOP_UNLOCK(vp, NULL, 0, td); 2343 return (error); 2344 } 2345 2346 int 2347 kern_rename(struct nameidata *fromnd, struct nameidata *tond) 2348 { 2349 struct thread *td = curthread; 2350 struct proc *p = td->td_proc; 2351 struct vnode *tvp, *fvp, *tdvp; 2352 int error; 2353 2354 bwillwrite(); 2355 error = namei(fromnd); 2356 if (error) 2357 return (error); 2358 fvp = fromnd->ni_vp; 2359 if (fromnd->ni_vp->v_type == VDIR) 2360 tond->ni_cnd.cn_flags |= CNP_WILLBEDIR; 2361 error = namei(tond); 2362 if (error) { 2363 /* Translate error code for rename("dir1", "dir2/."). */ 2364 if (error == EISDIR && fvp->v_type == VDIR) 2365 error = EINVAL; 2366 NDFREE(fromnd, NDF_ONLY_PNBUF); 2367 vrele(fromnd->ni_dvp); 2368 vrele(fvp); 2369 goto out1; 2370 } 2371 tdvp = tond->ni_dvp; 2372 tvp = tond->ni_vp; 2373 if (tvp != NULL) { 2374 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 2375 error = ENOTDIR; 2376 goto out; 2377 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 2378 error = EISDIR; 2379 goto out; 2380 } 2381 } 2382 if (fvp == tdvp) 2383 error = EINVAL; 2384 /* 2385 * If the source is the same as the destination (that is, if they 2386 * are links to the same vnode), then there is nothing to do. 2387 */ 2388 if (fvp == tvp) 2389 error = -1; 2390 out: 2391 if (!error) { 2392 VOP_LEASE(tdvp, td, p->p_ucred, LEASE_WRITE); 2393 if (fromnd->ni_dvp != tdvp) { 2394 VOP_LEASE(fromnd->ni_dvp, td, p->p_ucred, LEASE_WRITE); 2395 } 2396 if (tvp) { 2397 VOP_LEASE(tvp, td, p->p_ucred, LEASE_WRITE); 2398 } 2399 error = VOP_RENAME(fromnd->ni_dvp, NCPNULL, fromnd->ni_vp, 2400 &fromnd->ni_cnd, tond->ni_dvp, NCPNULL, tond->ni_vp, 2401 &tond->ni_cnd); 2402 NDFREE(fromnd, NDF_ONLY_PNBUF); 2403 NDFREE(tond, NDF_ONLY_PNBUF); 2404 } else { 2405 NDFREE(fromnd, NDF_ONLY_PNBUF); 2406 NDFREE(tond, NDF_ONLY_PNBUF); 2407 if (tdvp == tvp) 2408 vrele(tdvp); 2409 else 2410 vput(tdvp); 2411 if (tvp) 2412 vput(tvp); 2413 vrele(fromnd->ni_dvp); 2414 vrele(fvp); 2415 } 2416 vrele(tond->ni_startdir); 2417 ASSERT_VOP_UNLOCKED(fromnd->ni_dvp, "rename"); 2418 ASSERT_VOP_UNLOCKED(fromnd->ni_vp, "rename"); 2419 ASSERT_VOP_UNLOCKED(tond->ni_dvp, "rename"); 2420 ASSERT_VOP_UNLOCKED(tond->ni_vp, "rename"); 2421 out1: 2422 if (fromnd->ni_startdir) 2423 vrele(fromnd->ni_startdir); 2424 if (error == -1) 2425 return (0); 2426 return (error); 2427 } 2428 2429 /* 2430 * rename_args(char *from, char *to) 2431 * 2432 * Rename files. Source and destination must either both be directories, 2433 * or both not be directories. If target is a directory, it must be empty. 2434 */ 2435 int 2436 rename(struct rename_args *uap) 2437 { 2438 struct thread *td = curthread; 2439 struct nameidata fromnd, tond; 2440 int error; 2441 2442 NDINIT(&fromnd, NAMEI_DELETE, CNP_WANTPARENT | CNP_SAVESTART, 2443 UIO_USERSPACE, uap->from, td); 2444 NDINIT(&tond, NAMEI_RENAME, 2445 CNP_LOCKPARENT | CNP_LOCKLEAF | CNP_NOCACHE | 2446 CNP_SAVESTART | CNP_NOOBJ, 2447 UIO_USERSPACE, uap->to, td); 2448 2449 error = kern_rename(&fromnd, &tond); 2450 2451 return (error); 2452 } 2453 2454 int 2455 kern_mkdir(struct nameidata *nd, int mode) 2456 { 2457 struct thread *td = curthread; 2458 struct proc *p = td->td_proc; 2459 struct vnode *vp; 2460 struct vattr vattr; 2461 int error; 2462 2463 bwillwrite(); 2464 nd->ni_cnd.cn_flags |= CNP_WILLBEDIR; 2465 error = namei(nd); 2466 if (error) 2467 return (error); 2468 vp = nd->ni_vp; 2469 if (vp) { 2470 NDFREE(nd, NDF_ONLY_PNBUF); 2471 if (nd->ni_dvp == vp) 2472 vrele(nd->ni_dvp); 2473 else 2474 vput(nd->ni_dvp); 2475 vrele(vp); 2476 return (EEXIST); 2477 } 2478 VATTR_NULL(&vattr); 2479 vattr.va_type = VDIR; 2480 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 2481 VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE); 2482 error = VOP_MKDIR(nd->ni_dvp, NCPNULL, &nd->ni_vp, &nd->ni_cnd, 2483 &vattr); 2484 NDFREE(nd, NDF_ONLY_PNBUF); 2485 vput(nd->ni_dvp); 2486 if (error == 0) 2487 vput(nd->ni_vp); 2488 ASSERT_VOP_UNLOCKED(nd->ni_dvp, "mkdir"); 2489 ASSERT_VOP_UNLOCKED(nd->ni_vp, "mkdir"); 2490 return (error); 2491 } 2492 2493 /* 2494 * mkdir_args(char *path, int mode) 2495 * 2496 * Make a directory file. 2497 */ 2498 /* ARGSUSED */ 2499 int 2500 mkdir(struct mkdir_args *uap) 2501 { 2502 struct thread *td = curthread; 2503 struct nameidata nd; 2504 int error; 2505 2506 NDINIT(&nd, NAMEI_CREATE, CNP_LOCKPARENT, UIO_USERSPACE, uap->path, 2507 td); 2508 2509 error = kern_mkdir(&nd, uap->mode); 2510 2511 return (error); 2512 } 2513 2514 int 2515 kern_rmdir(struct nameidata *nd) 2516 { 2517 struct thread *td = curthread; 2518 struct proc *p = td->td_proc; 2519 struct vnode *vp; 2520 int error; 2521 2522 bwillwrite(); 2523 error = namei(nd); 2524 if (error) 2525 return (error); 2526 vp = nd->ni_vp; 2527 if (vp->v_type != VDIR) { 2528 error = ENOTDIR; 2529 goto out; 2530 } 2531 /* 2532 * No rmdir "." please. 2533 */ 2534 if (nd->ni_dvp == vp) { 2535 error = EINVAL; 2536 goto out; 2537 } 2538 /* 2539 * The root of a mounted filesystem cannot be deleted. 2540 */ 2541 if (vp->v_flag & VROOT) 2542 error = EBUSY; 2543 else { 2544 VOP_LEASE(nd->ni_dvp, td, p->p_ucred, LEASE_WRITE); 2545 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2546 error = VOP_RMDIR(nd->ni_dvp, NCPNULL, nd->ni_vp, 2547 &nd->ni_cnd); 2548 } 2549 out: 2550 NDFREE(nd, NDF_ONLY_PNBUF); 2551 if (nd->ni_dvp == vp) 2552 vrele(nd->ni_dvp); 2553 else 2554 vput(nd->ni_dvp); 2555 if (vp != NULLVP) 2556 vput(vp); 2557 ASSERT_VOP_UNLOCKED(nd->ni_dvp, "rmdir"); 2558 ASSERT_VOP_UNLOCKED(nd->ni_vp, "rmdir"); 2559 return (error); 2560 } 2561 2562 /* 2563 * rmdir_args(char *path) 2564 * 2565 * Remove a directory file. 2566 */ 2567 /* ARGSUSED */ 2568 int 2569 rmdir(struct rmdir_args *uap) 2570 { 2571 struct thread *td = curthread; 2572 struct nameidata nd; 2573 int error; 2574 2575 NDINIT(&nd, NAMEI_DELETE, CNP_LOCKPARENT | CNP_LOCKLEAF, 2576 UIO_USERSPACE, uap->path, td); 2577 2578 error = kern_rmdir(&nd); 2579 2580 return (error); 2581 } 2582 2583 int 2584 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res) 2585 { 2586 struct thread *td = curthread; 2587 struct proc *p = td->td_proc; 2588 struct vnode *vp; 2589 struct file *fp; 2590 struct uio auio; 2591 struct iovec aiov; 2592 long loff; 2593 int error, eofflag; 2594 2595 if ((error = getvnode(p->p_fd, fd, &fp)) != 0) 2596 return (error); 2597 if ((fp->f_flag & FREAD) == 0) 2598 return (EBADF); 2599 vp = (struct vnode *)fp->f_data; 2600 unionread: 2601 if (vp->v_type != VDIR) 2602 return (EINVAL); 2603 aiov.iov_base = buf; 2604 aiov.iov_len = count; 2605 auio.uio_iov = &aiov; 2606 auio.uio_iovcnt = 1; 2607 auio.uio_rw = UIO_READ; 2608 auio.uio_segflg = UIO_USERSPACE; 2609 auio.uio_td = td; 2610 auio.uio_resid = count; 2611 /* vn_lock(vp, NULL, LK_SHARED | LK_RETRY, td); */ 2612 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 2613 loff = auio.uio_offset = fp->f_offset; 2614 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 2615 fp->f_offset = auio.uio_offset; 2616 VOP_UNLOCK(vp, NULL, 0, td); 2617 if (error) 2618 return (error); 2619 if (count == auio.uio_resid) { 2620 if (union_dircheckp) { 2621 error = union_dircheckp(td, &vp, fp); 2622 if (error == -1) 2623 goto unionread; 2624 if (error) 2625 return (error); 2626 } 2627 if ((vp->v_flag & VROOT) && 2628 (vp->v_mount->mnt_flag & MNT_UNION)) { 2629 struct vnode *tvp = vp; 2630 vp = vp->v_mount->mnt_vnodecovered; 2631 vref(vp); 2632 fp->f_data = (caddr_t)vp; 2633 fp->f_offset = 0; 2634 vrele(tvp); 2635 goto unionread; 2636 } 2637 } 2638 if (basep) { 2639 *basep = loff; 2640 } 2641 *res = count - auio.uio_resid; 2642 return (error); 2643 } 2644 2645 /* 2646 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 2647 * 2648 * Read a block of directory entries in a file system independent format. 2649 */ 2650 int 2651 getdirentries(struct getdirentries_args *uap) 2652 { 2653 long base; 2654 int error; 2655 2656 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 2657 &uap->sysmsg_result); 2658 2659 if (error == 0) 2660 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 2661 return (error); 2662 } 2663 2664 /* 2665 * getdents_args(int fd, char *buf, size_t count) 2666 */ 2667 int 2668 getdents(struct getdents_args *uap) 2669 { 2670 int error; 2671 2672 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 2673 &uap->sysmsg_result); 2674 2675 return (error); 2676 } 2677 2678 /* 2679 * umask(int newmask) 2680 * 2681 * Set the mode mask for creation of filesystem nodes. 2682 * 2683 * MP SAFE 2684 */ 2685 int 2686 umask(struct umask_args *uap) 2687 { 2688 struct thread *td = curthread; 2689 struct proc *p = td->td_proc; 2690 struct filedesc *fdp; 2691 2692 fdp = p->p_fd; 2693 uap->sysmsg_result = fdp->fd_cmask; 2694 fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; 2695 return (0); 2696 } 2697 2698 /* 2699 * revoke(char *path) 2700 * 2701 * Void all references to file by ripping underlying filesystem 2702 * away from vnode. 2703 */ 2704 /* ARGSUSED */ 2705 int 2706 revoke(struct revoke_args *uap) 2707 { 2708 struct thread *td = curthread; 2709 struct proc *p = td->td_proc; 2710 struct vnode *vp; 2711 struct vattr vattr; 2712 int error; 2713 struct nameidata nd; 2714 2715 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); 2716 if ((error = namei(&nd)) != 0) 2717 return (error); 2718 vp = nd.ni_vp; 2719 NDFREE(&nd, NDF_ONLY_PNBUF); 2720 if (vp->v_type != VCHR && vp->v_type != VBLK) { 2721 error = EINVAL; 2722 goto out; 2723 } 2724 if ((error = VOP_GETATTR(vp, &vattr, td)) != 0) 2725 goto out; 2726 if (p->p_ucred->cr_uid != vattr.va_uid && 2727 (error = suser_cred(p->p_ucred, PRISON_ROOT))) 2728 goto out; 2729 if (count_udev(vp->v_udev) > 0) 2730 VOP_REVOKE(vp, REVOKEALL); 2731 out: 2732 vrele(vp); 2733 return (error); 2734 } 2735 2736 /* 2737 * Convert a user file descriptor to a kernel file entry. 2738 */ 2739 int 2740 getvnode(struct filedesc *fdp, int fd, struct file **fpp) 2741 { 2742 struct file *fp; 2743 2744 if ((u_int)fd >= fdp->fd_nfiles || 2745 (fp = fdp->fd_ofiles[fd]) == NULL) 2746 return (EBADF); 2747 if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) 2748 return (EINVAL); 2749 *fpp = fp; 2750 return (0); 2751 } 2752 /* 2753 * getfh_args(char *fname, fhandle_t *fhp) 2754 * 2755 * Get (NFS) file handle 2756 */ 2757 int 2758 getfh(struct getfh_args *uap) 2759 { 2760 struct thread *td = curthread; 2761 struct nameidata nd; 2762 fhandle_t fh; 2763 struct vnode *vp; 2764 int error; 2765 2766 /* 2767 * Must be super user 2768 */ 2769 error = suser(td); 2770 if (error) 2771 return (error); 2772 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, uap->fname, td); 2773 error = namei(&nd); 2774 if (error) 2775 return (error); 2776 NDFREE(&nd, NDF_ONLY_PNBUF); 2777 vp = nd.ni_vp; 2778 bzero(&fh, sizeof(fh)); 2779 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 2780 error = VFS_VPTOFH(vp, &fh.fh_fid); 2781 vput(vp); 2782 if (error) 2783 return (error); 2784 error = copyout(&fh, uap->fhp, sizeof (fh)); 2785 return (error); 2786 } 2787 2788 /* 2789 * fhopen_args(const struct fhandle *u_fhp, int flags) 2790 * 2791 * syscall for the rpc.lockd to use to translate a NFS file handle into 2792 * an open descriptor. 2793 * 2794 * warning: do not remove the suser() call or this becomes one giant 2795 * security hole. 2796 */ 2797 int 2798 fhopen(struct fhopen_args *uap) 2799 { 2800 struct thread *td = curthread; 2801 struct proc *p = td->td_proc; 2802 struct mount *mp; 2803 struct vnode *vp; 2804 struct fhandle fhp; 2805 struct vattr vat; 2806 struct vattr *vap = &vat; 2807 struct flock lf; 2808 struct file *fp; 2809 struct filedesc *fdp = p->p_fd; 2810 int fmode, mode, error, type; 2811 struct file *nfp; 2812 int indx; 2813 2814 /* 2815 * Must be super user 2816 */ 2817 error = suser(td); 2818 if (error) 2819 return (error); 2820 2821 fmode = FFLAGS(SCARG(uap, flags)); 2822 /* why not allow a non-read/write open for our lockd? */ 2823 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 2824 return (EINVAL); 2825 error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp)); 2826 if (error) 2827 return(error); 2828 /* find the mount point */ 2829 mp = vfs_getvfs(&fhp.fh_fsid); 2830 if (mp == NULL) 2831 return (ESTALE); 2832 /* now give me my vnode, it gets returned to me locked */ 2833 error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); 2834 if (error) 2835 return (error); 2836 /* 2837 * from now on we have to make sure not 2838 * to forget about the vnode 2839 * any error that causes an abort must vput(vp) 2840 * just set error = err and 'goto bad;'. 2841 */ 2842 2843 /* 2844 * from vn_open 2845 */ 2846 if (vp->v_type == VLNK) { 2847 error = EMLINK; 2848 goto bad; 2849 } 2850 if (vp->v_type == VSOCK) { 2851 error = EOPNOTSUPP; 2852 goto bad; 2853 } 2854 mode = 0; 2855 if (fmode & (FWRITE | O_TRUNC)) { 2856 if (vp->v_type == VDIR) { 2857 error = EISDIR; 2858 goto bad; 2859 } 2860 error = vn_writechk(vp); 2861 if (error) 2862 goto bad; 2863 mode |= VWRITE; 2864 } 2865 if (fmode & FREAD) 2866 mode |= VREAD; 2867 if (mode) { 2868 error = VOP_ACCESS(vp, mode, p->p_ucred, td); 2869 if (error) 2870 goto bad; 2871 } 2872 if (fmode & O_TRUNC) { 2873 VOP_UNLOCK(vp, NULL, 0, td); /* XXX */ 2874 VOP_LEASE(vp, td, p->p_ucred, LEASE_WRITE); 2875 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ 2876 VATTR_NULL(vap); 2877 vap->va_size = 0; 2878 error = VOP_SETATTR(vp, vap, p->p_ucred, td); 2879 if (error) 2880 goto bad; 2881 } 2882 error = VOP_OPEN(vp, fmode, p->p_ucred, td); 2883 if (error) 2884 goto bad; 2885 /* 2886 * Make sure that a VM object is created for VMIO support. 2887 */ 2888 if (vn_canvmio(vp) == TRUE) { 2889 if ((error = vfs_object_create(vp, td)) != 0) 2890 goto bad; 2891 } 2892 if (fmode & FWRITE) 2893 vp->v_writecount++; 2894 2895 /* 2896 * end of vn_open code 2897 */ 2898 2899 if ((error = falloc(p, &nfp, &indx)) != 0) { 2900 if (fmode & FWRITE) 2901 vp->v_writecount--; 2902 goto bad; 2903 } 2904 fp = nfp; 2905 2906 /* 2907 * hold an extra reference to avoid having fp ripped out 2908 * from under us while we block in the lock op. 2909 */ 2910 fhold(fp); 2911 nfp->f_data = (caddr_t)vp; 2912 nfp->f_flag = fmode & FMASK; 2913 nfp->f_ops = &vnops; 2914 nfp->f_type = DTYPE_VNODE; 2915 if (fmode & (O_EXLOCK | O_SHLOCK)) { 2916 lf.l_whence = SEEK_SET; 2917 lf.l_start = 0; 2918 lf.l_len = 0; 2919 if (fmode & O_EXLOCK) 2920 lf.l_type = F_WRLCK; 2921 else 2922 lf.l_type = F_RDLCK; 2923 type = F_FLOCK; 2924 if ((fmode & FNONBLOCK) == 0) 2925 type |= F_WAIT; 2926 VOP_UNLOCK(vp, NULL, 0, td); 2927 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2928 /* 2929 * lock request failed. Normally close the descriptor 2930 * but handle the case where someone might have dup()d 2931 * or close()d it when we weren't looking. 2932 */ 2933 if (fdp->fd_ofiles[indx] == fp) { 2934 fdp->fd_ofiles[indx] = NULL; 2935 fdrop(fp, td); 2936 } 2937 2938 /* 2939 * release our private reference. 2940 */ 2941 fdrop(fp, td); 2942 return (error); 2943 } 2944 vn_lock(vp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 2945 fp->f_flag |= FHASLOCK; 2946 } 2947 if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0)) 2948 vfs_object_create(vp, td); 2949 2950 VOP_UNLOCK(vp, NULL, 0, td); 2951 fdrop(fp, td); 2952 uap->sysmsg_result = indx; 2953 return (0); 2954 2955 bad: 2956 vput(vp); 2957 return (error); 2958 } 2959 2960 /* 2961 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 2962 */ 2963 int 2964 fhstat(struct fhstat_args *uap) 2965 { 2966 struct thread *td = curthread; 2967 struct stat sb; 2968 fhandle_t fh; 2969 struct mount *mp; 2970 struct vnode *vp; 2971 int error; 2972 2973 /* 2974 * Must be super user 2975 */ 2976 error = suser(td); 2977 if (error) 2978 return (error); 2979 2980 error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t)); 2981 if (error) 2982 return (error); 2983 2984 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 2985 return (ESTALE); 2986 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 2987 return (error); 2988 error = vn_stat(vp, &sb, td); 2989 vput(vp); 2990 if (error) 2991 return (error); 2992 error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); 2993 return (error); 2994 } 2995 2996 /* 2997 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 2998 */ 2999 int 3000 fhstatfs(struct fhstatfs_args *uap) 3001 { 3002 struct thread *td = curthread; 3003 struct statfs *sp; 3004 struct mount *mp; 3005 struct vnode *vp; 3006 struct statfs sb; 3007 fhandle_t fh; 3008 int error; 3009 3010 /* 3011 * Must be super user 3012 */ 3013 if ((error = suser(td))) 3014 return (error); 3015 3016 if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0) 3017 return (error); 3018 3019 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 3020 return (ESTALE); 3021 if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) 3022 return (error); 3023 mp = vp->v_mount; 3024 sp = &mp->mnt_stat; 3025 vput(vp); 3026 if ((error = VFS_STATFS(mp, sp, td)) != 0) 3027 return (error); 3028 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 3029 if (suser(td)) { 3030 bcopy(sp, &sb, sizeof(sb)); 3031 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 3032 sp = &sb; 3033 } 3034 return (copyout(sp, SCARG(uap, buf), sizeof(*sp))); 3035 } 3036 3037 /* 3038 * Syscall to push extended attribute configuration information into the 3039 * VFS. Accepts a path, which it converts to a mountpoint, as well as 3040 * a command (int cmd), and attribute name and misc data. For now, the 3041 * attribute name is left in userspace for consumption by the VFS_op. 3042 * It will probably be changed to be copied into sysspace by the 3043 * syscall in the future, once issues with various consumers of the 3044 * attribute code have raised their hands. 3045 * 3046 * Currently this is used only by UFS Extended Attributes. 3047 */ 3048 int 3049 extattrctl(struct extattrctl_args *uap) 3050 { 3051 struct thread *td = curthread; 3052 struct nameidata nd; 3053 struct mount *mp; 3054 int error; 3055 3056 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); 3057 if ((error = namei(&nd)) != 0) 3058 return (error); 3059 mp = nd.ni_vp->v_mount; 3060 NDFREE(&nd, 0); 3061 return (VFS_EXTATTRCTL(mp, SCARG(uap, cmd), SCARG(uap, attrname), 3062 SCARG(uap, arg), td)); 3063 } 3064 3065 /* 3066 * Syscall to set a named extended attribute on a file or directory. 3067 * Accepts attribute name, and a uio structure pointing to the data to set. 3068 * The uio is consumed in the style of writev(). The real work happens 3069 * in VOP_SETEXTATTR(). 3070 */ 3071 int 3072 extattr_set_file(struct extattr_set_file_args *uap) 3073 { 3074 struct thread *td = curthread; 3075 struct proc *p = td->td_proc; 3076 struct nameidata nd; 3077 struct uio auio; 3078 struct iovec *iov, *needfree = NULL, aiov[UIO_SMALLIOV]; 3079 char attrname[EXTATTR_MAXNAMELEN]; 3080 u_int iovlen, cnt; 3081 int error, i; 3082 3083 error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); 3084 if (error) 3085 return (error); 3086 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, 3087 SCARG(uap, path), td); 3088 if ((error = namei(&nd)) != 0) 3089 return(error); 3090 iovlen = uap->iovcnt * sizeof(struct iovec); 3091 if (uap->iovcnt > UIO_SMALLIOV) { 3092 if (uap->iovcnt > UIO_MAXIOV) { 3093 error = EINVAL; 3094 goto done; 3095 } 3096 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3097 needfree = iov; 3098 } else 3099 iov = aiov; 3100 auio.uio_iov = iov; 3101 auio.uio_iovcnt = uap->iovcnt; 3102 auio.uio_rw = UIO_WRITE; 3103 auio.uio_segflg = UIO_USERSPACE; 3104 auio.uio_td = td; 3105 auio.uio_offset = 0; 3106 if ((error = copyin(uap->iovp, iov, iovlen))) 3107 goto done; 3108 auio.uio_resid = 0; 3109 for (i = 0; i < uap->iovcnt; i++) { 3110 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3111 error = EINVAL; 3112 goto done; 3113 } 3114 auio.uio_resid += iov->iov_len; 3115 iov++; 3116 } 3117 cnt = auio.uio_resid; 3118 error = VOP_SETEXTATTR(nd.ni_vp, attrname, &auio, p->p_ucred, td); 3119 cnt -= auio.uio_resid; 3120 uap->sysmsg_result = cnt; 3121 done: 3122 if (needfree) 3123 FREE(needfree, M_IOV); 3124 NDFREE(&nd, 0); 3125 return (error); 3126 } 3127 3128 /* 3129 * Syscall to get a named extended attribute on a file or directory. 3130 * Accepts attribute name, and a uio structure pointing to a buffer for the 3131 * data. The uio is consumed in the style of readv(). The real work 3132 * happens in VOP_GETEXTATTR(); 3133 */ 3134 int 3135 extattr_get_file(struct extattr_get_file_args *uap) 3136 { 3137 struct thread *td = curthread; 3138 struct proc *p = td->td_proc; 3139 struct nameidata nd; 3140 struct uio auio; 3141 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV]; 3142 char attrname[EXTATTR_MAXNAMELEN]; 3143 u_int iovlen, cnt; 3144 int error, i; 3145 3146 error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); 3147 if (error) 3148 return (error); 3149 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, 3150 SCARG(uap, path), td); 3151 if ((error = namei(&nd)) != 0) 3152 return (error); 3153 iovlen = uap->iovcnt * sizeof (struct iovec); 3154 if (uap->iovcnt > UIO_SMALLIOV) { 3155 if (uap->iovcnt > UIO_MAXIOV) { 3156 NDFREE(&nd, 0); 3157 return (EINVAL); 3158 } 3159 MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); 3160 needfree = iov; 3161 } else { 3162 iov = aiov; 3163 needfree = NULL; 3164 } 3165 auio.uio_iov = iov; 3166 auio.uio_iovcnt = uap->iovcnt; 3167 auio.uio_rw = UIO_READ; 3168 auio.uio_segflg = UIO_USERSPACE; 3169 auio.uio_td = td; 3170 auio.uio_offset = 0; 3171 if ((error = copyin(uap->iovp, iov, iovlen))) 3172 goto done; 3173 auio.uio_resid = 0; 3174 for (i = 0; i < uap->iovcnt; i++) { 3175 if (iov->iov_len > INT_MAX - auio.uio_resid) { 3176 error = EINVAL; 3177 goto done; 3178 } 3179 auio.uio_resid += iov->iov_len; 3180 iov++; 3181 } 3182 cnt = auio.uio_resid; 3183 error = VOP_GETEXTATTR(nd.ni_vp, attrname, &auio, p->p_ucred, td); 3184 cnt -= auio.uio_resid; 3185 uap->sysmsg_result = cnt; 3186 done: 3187 if (needfree) 3188 FREE(needfree, M_IOV); 3189 NDFREE(&nd, 0); 3190 return(error); 3191 } 3192 3193 /* 3194 * Syscall to delete a named extended attribute from a file or directory. 3195 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 3196 */ 3197 int 3198 extattr_delete_file(struct extattr_delete_file_args *uap) 3199 { 3200 struct thread *td = curthread; 3201 struct proc *p = td->td_proc; 3202 struct nameidata nd; 3203 char attrname[EXTATTR_MAXNAMELEN]; 3204 int error; 3205 3206 error = copyin(SCARG(uap, attrname), attrname, EXTATTR_MAXNAMELEN); 3207 if (error) 3208 return(error); 3209 NDINIT(&nd, NAMEI_LOOKUP, CNP_FOLLOW | CNP_LOCKLEAF, UIO_USERSPACE, 3210 SCARG(uap, path), td); 3211 if ((error = namei(&nd)) != 0) 3212 return(error); 3213 error = VOP_SETEXTATTR(nd.ni_vp, attrname, NULL, p->p_ucred, td); 3214 NDFREE(&nd, 0); 3215 return(error); 3216 } 3217 3218 /* 3219 * print out statistics from the current status of the buffer pool 3220 * this can be toggeled by the system control option debug.syncprt 3221 */ 3222 #ifdef DEBUG 3223 void 3224 vfs_bufstats(void) 3225 { 3226 int s, i, j, count; 3227 struct buf *bp; 3228 struct bqueues *dp; 3229 int counts[(MAXBSIZE / PAGE_SIZE) + 1]; 3230 static char *bname[3] = { "LOCKED", "LRU", "AGE" }; 3231 3232 for (dp = bufqueues, i = 0; dp < &bufqueues[3]; dp++, i++) { 3233 count = 0; 3234 for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++) 3235 counts[j] = 0; 3236 s = splbio(); 3237 TAILQ_FOREACH(bp, dp, b_freelist) { 3238 counts[bp->b_bufsize/PAGE_SIZE]++; 3239 count++; 3240 } 3241 splx(s); 3242 printf("%s: total-%d", bname[i], count); 3243 for (j = 0; j <= MAXBSIZE/PAGE_SIZE; j++) 3244 if (counts[j] != 0) 3245 printf(", %d-%d", j * PAGE_SIZE, counts[j]); 3246 printf("\n"); 3247 } 3248 } 3249 #endif 3250