1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 84 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 85 static int getutimes (struct timeval *, struct timespec *); 86 static int getutimens (const struct timespec *, struct timespec *, int *); 87 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 88 static int setfmode (struct vnode *, int); 89 static int setfflags (struct vnode *, int); 90 static int setutimes (struct vnode *, struct vattr *, 91 const struct timespec *, int); 92 static int usermount = 0; /* if 1, non-root can mount fs. */ 93 94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 95 "Allow non-root users to mount filesystems"); 96 97 /* 98 * Virtual File System System Calls 99 */ 100 101 /* 102 * Mount a file system. 103 * 104 * mount_args(char *type, char *path, int flags, caddr_t data) 105 * 106 * MPALMOSTSAFE 107 */ 108 int 109 sys_mount(struct mount_args *uap) 110 { 111 struct thread *td = curthread; 112 struct vnode *vp; 113 struct nchandle nch; 114 struct mount *mp, *nullmp; 115 struct vfsconf *vfsp; 116 int error, flag = 0, flag2 = 0; 117 int hasmount; 118 struct vattr va; 119 struct nlookupdata nd; 120 char fstypename[MFSNAMELEN]; 121 struct ucred *cred; 122 123 cred = td->td_ucred; 124 if (jailed(cred)) { 125 error = EPERM; 126 goto done; 127 } 128 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 129 goto done; 130 131 /* 132 * Do not allow NFS export by non-root users. 133 */ 134 if (uap->flags & MNT_EXPORTED) { 135 error = priv_check(td, PRIV_ROOT); 136 if (error) 137 goto done; 138 } 139 /* 140 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 141 */ 142 if (priv_check(td, PRIV_ROOT)) 143 uap->flags |= MNT_NOSUID | MNT_NODEV; 144 145 /* 146 * Lookup the requested path and extract the nch and vnode. 147 */ 148 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 149 if (error == 0) { 150 if ((error = nlookup(&nd)) == 0) { 151 if (nd.nl_nch.ncp->nc_vp == NULL) 152 error = ENOENT; 153 } 154 } 155 if (error) { 156 nlookup_done(&nd); 157 goto done; 158 } 159 160 /* 161 * If the target filesystem is resolved via a nullfs mount, then 162 * nd.nl_nch.mount will be pointing to the nullfs mount structure 163 * instead of the target file system. We need it in case we are 164 * doing an update. 165 */ 166 nullmp = nd.nl_nch.mount; 167 168 /* 169 * Extract the locked+refd ncp and cleanup the nd structure 170 */ 171 nch = nd.nl_nch; 172 cache_zero(&nd.nl_nch); 173 nlookup_done(&nd); 174 175 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 176 (mp = cache_findmount(&nch)) != NULL) { 177 cache_dropmount(mp); 178 hasmount = 1; 179 } else { 180 hasmount = 0; 181 } 182 183 184 /* 185 * now we have the locked ref'd nch and unreferenced vnode. 186 */ 187 vp = nch.ncp->nc_vp; 188 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 189 cache_put(&nch); 190 goto done; 191 } 192 cache_unlock(&nch); 193 194 /* 195 * Extract the file system type. We need to know this early, to take 196 * appropriate actions if we are dealing with a nullfs. 197 */ 198 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 199 cache_drop(&nch); 200 vput(vp); 201 goto done; 202 } 203 204 /* 205 * Now we have an unlocked ref'd nch and a locked ref'd vp 206 */ 207 if (uap->flags & MNT_UPDATE) { 208 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 209 cache_drop(&nch); 210 vput(vp); 211 error = EINVAL; 212 goto done; 213 } 214 215 if (strncmp(fstypename, "null", 5) == 0) { 216 KKASSERT(nullmp); 217 mp = nullmp; 218 } else { 219 mp = vp->v_mount; 220 } 221 222 flag = mp->mnt_flag; 223 flag2 = mp->mnt_kern_flag; 224 /* 225 * We only allow the filesystem to be reloaded if it 226 * is currently mounted read-only. 227 */ 228 if ((uap->flags & MNT_RELOAD) && 229 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 230 cache_drop(&nch); 231 vput(vp); 232 error = EOPNOTSUPP; /* Needs translation */ 233 goto done; 234 } 235 /* 236 * Only root, or the user that did the original mount is 237 * permitted to update it. 238 */ 239 if (mp->mnt_stat.f_owner != cred->cr_uid && 240 (error = priv_check(td, PRIV_ROOT))) { 241 cache_drop(&nch); 242 vput(vp); 243 goto done; 244 } 245 if (vfs_busy(mp, LK_NOWAIT)) { 246 cache_drop(&nch); 247 vput(vp); 248 error = EBUSY; 249 goto done; 250 } 251 if (hasmount) { 252 cache_drop(&nch); 253 vfs_unbusy(mp); 254 vput(vp); 255 error = EBUSY; 256 goto done; 257 } 258 mp->mnt_flag |= 259 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 260 lwkt_gettoken(&mp->mnt_token); 261 vn_unlock(vp); 262 vfsp = mp->mnt_vfc; 263 goto update; 264 } 265 266 /* 267 * If the user is not root, ensure that they own the directory 268 * onto which we are attempting to mount. 269 */ 270 if ((error = VOP_GETATTR(vp, &va)) || 271 (va.va_uid != cred->cr_uid && 272 (error = priv_check(td, PRIV_ROOT)))) { 273 cache_drop(&nch); 274 vput(vp); 275 goto done; 276 } 277 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 278 cache_drop(&nch); 279 vput(vp); 280 goto done; 281 } 282 if (vp->v_type != VDIR) { 283 cache_drop(&nch); 284 vput(vp); 285 error = ENOTDIR; 286 goto done; 287 } 288 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 289 cache_drop(&nch); 290 vput(vp); 291 error = EPERM; 292 goto done; 293 } 294 vfsp = vfsconf_find_by_name(fstypename); 295 if (vfsp == NULL) { 296 linker_file_t lf; 297 298 /* Only load modules for root (very important!) */ 299 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 300 cache_drop(&nch); 301 vput(vp); 302 goto done; 303 } 304 error = linker_load_file(fstypename, &lf); 305 if (error || lf == NULL) { 306 cache_drop(&nch); 307 vput(vp); 308 if (lf == NULL) 309 error = ENODEV; 310 goto done; 311 } 312 lf->userrefs++; 313 /* lookup again, see if the VFS was loaded */ 314 vfsp = vfsconf_find_by_name(fstypename); 315 if (vfsp == NULL) { 316 lf->userrefs--; 317 linker_file_unload(lf); 318 cache_drop(&nch); 319 vput(vp); 320 error = ENODEV; 321 goto done; 322 } 323 } 324 if (hasmount) { 325 cache_drop(&nch); 326 vput(vp); 327 error = EBUSY; 328 goto done; 329 } 330 331 /* 332 * Allocate and initialize the filesystem. 333 */ 334 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 335 mount_init(mp); 336 vfs_busy(mp, LK_NOWAIT); 337 mp->mnt_op = vfsp->vfc_vfsops; 338 mp->mnt_vfc = vfsp; 339 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 340 vfsp->vfc_refcount++; 341 mp->mnt_stat.f_type = vfsp->vfc_typenum; 342 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 343 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 344 mp->mnt_stat.f_owner = cred->cr_uid; 345 lwkt_gettoken(&mp->mnt_token); 346 vn_unlock(vp); 347 update: 348 /* 349 * (per-mount token acquired at this point) 350 * 351 * Set the mount level flags. 352 */ 353 if (uap->flags & MNT_RDONLY) 354 mp->mnt_flag |= MNT_RDONLY; 355 else if (mp->mnt_flag & MNT_RDONLY) 356 mp->mnt_kern_flag |= MNTK_WANTRDWR; 357 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 358 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 359 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 360 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 361 MNT_AUTOMOUNTED); 362 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 363 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 364 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 365 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 366 MNT_AUTOMOUNTED); 367 368 /* 369 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 370 * This way the initial VFS_MOUNT() call will also be MPSAFE. 371 */ 372 if (vfsp->vfc_flags & VFCF_MPSAFE) 373 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 374 375 /* 376 * Mount the filesystem. 377 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 378 * get. 379 */ 380 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 381 if (mp->mnt_flag & MNT_UPDATE) { 382 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 383 mp->mnt_flag &= ~MNT_RDONLY; 384 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 385 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 386 if (error) { 387 mp->mnt_flag = flag; 388 mp->mnt_kern_flag = flag2; 389 } 390 lwkt_reltoken(&mp->mnt_token); 391 vfs_unbusy(mp); 392 vrele(vp); 393 cache_drop(&nch); 394 goto done; 395 } 396 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 397 398 /* 399 * Put the new filesystem on the mount list after root. The mount 400 * point gets its own mnt_ncmountpt (unless the VFS already set one 401 * up) which represents the root of the mount. The lookup code 402 * detects the mount point going forward and checks the root of 403 * the mount going backwards. 404 * 405 * It is not necessary to invalidate or purge the vnode underneath 406 * because elements under the mount will be given their own glue 407 * namecache record. 408 */ 409 if (!error) { 410 if (mp->mnt_ncmountpt.ncp == NULL) { 411 /* 412 * Allocate, then unlock, but leave the ref intact. 413 * This is the mnt_refs (1) that we will retain 414 * through to the unmount. 415 */ 416 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 417 cache_unlock(&mp->mnt_ncmountpt); 418 } 419 vn_unlock(vp); 420 mp->mnt_ncmounton = nch; /* inherits ref */ 421 cache_lock(&nch); 422 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 423 cache_unlock(&nch); 424 cache_ismounting(mp); 425 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 426 427 mountlist_insert(mp, MNTINS_LAST); 428 vn_unlock(vp); 429 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 430 error = vfs_allocate_syncvnode(mp); 431 lwkt_reltoken(&mp->mnt_token); 432 vfs_unbusy(mp); 433 error = VFS_START(mp, 0); 434 vrele(vp); 435 KNOTE(&fs_klist, VQ_MOUNT); 436 } else { 437 vn_syncer_thr_stop(mp); 438 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 439 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 440 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 441 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 442 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 443 mp->mnt_vfc->vfc_refcount--; 444 lwkt_reltoken(&mp->mnt_token); 445 vfs_unbusy(mp); 446 kfree(mp, M_MOUNT); 447 cache_drop(&nch); 448 vput(vp); 449 } 450 done: 451 return (error); 452 } 453 454 /* 455 * Scan all active processes to see if any of them have a current 456 * or root directory onto which the new filesystem has just been 457 * mounted. If so, replace them with the new mount point. 458 * 459 * Both old_nch and new_nch are ref'd on call but not locked. 460 * new_nch must be temporarily locked so it can be associated with the 461 * vnode representing the root of the mount point. 462 */ 463 struct checkdirs_info { 464 struct nchandle old_nch; 465 struct nchandle new_nch; 466 struct vnode *old_vp; 467 struct vnode *new_vp; 468 }; 469 470 static int checkdirs_callback(struct proc *p, void *data); 471 472 static void 473 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 474 { 475 struct checkdirs_info info; 476 struct vnode *olddp; 477 struct vnode *newdp; 478 struct mount *mp; 479 480 /* 481 * If the old mount point's vnode has a usecount of 1, it is not 482 * being held as a descriptor anywhere. 483 */ 484 olddp = old_nch->ncp->nc_vp; 485 if (olddp == NULL || VREFCNT(olddp) == 1) 486 return; 487 488 /* 489 * Force the root vnode of the new mount point to be resolved 490 * so we can update any matching processes. 491 */ 492 mp = new_nch->mount; 493 if (VFS_ROOT(mp, &newdp)) 494 panic("mount: lost mount"); 495 vn_unlock(newdp); 496 cache_lock(new_nch); 497 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 498 cache_setunresolved(new_nch); 499 cache_setvp(new_nch, newdp); 500 cache_unlock(new_nch); 501 502 /* 503 * Special handling of the root node 504 */ 505 if (rootvnode == olddp) { 506 vref(newdp); 507 vfs_cache_setroot(newdp, cache_hold(new_nch)); 508 } 509 510 /* 511 * Pass newdp separately so the callback does not have to access 512 * it via new_nch->ncp->nc_vp. 513 */ 514 info.old_nch = *old_nch; 515 info.new_nch = *new_nch; 516 info.new_vp = newdp; 517 allproc_scan(checkdirs_callback, &info, 0); 518 vput(newdp); 519 } 520 521 /* 522 * NOTE: callback is not MP safe because the scanned process's filedesc 523 * structure can be ripped out from under us, amoung other things. 524 */ 525 static int 526 checkdirs_callback(struct proc *p, void *data) 527 { 528 struct checkdirs_info *info = data; 529 struct filedesc *fdp; 530 struct nchandle ncdrop1; 531 struct nchandle ncdrop2; 532 struct vnode *vprele1; 533 struct vnode *vprele2; 534 535 if ((fdp = p->p_fd) != NULL) { 536 cache_zero(&ncdrop1); 537 cache_zero(&ncdrop2); 538 vprele1 = NULL; 539 vprele2 = NULL; 540 541 /* 542 * MPUNSAFE - XXX fdp can be pulled out from under a 543 * foreign process. 544 * 545 * A shared filedesc is ok, we don't have to copy it 546 * because we are making this change globally. 547 */ 548 spin_lock(&fdp->fd_spin); 549 if (fdp->fd_ncdir.mount == info->old_nch.mount && 550 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 551 vprele1 = fdp->fd_cdir; 552 vref(info->new_vp); 553 fdp->fd_cdir = info->new_vp; 554 ncdrop1 = fdp->fd_ncdir; 555 cache_copy(&info->new_nch, &fdp->fd_ncdir); 556 } 557 if (fdp->fd_nrdir.mount == info->old_nch.mount && 558 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 559 vprele2 = fdp->fd_rdir; 560 vref(info->new_vp); 561 fdp->fd_rdir = info->new_vp; 562 ncdrop2 = fdp->fd_nrdir; 563 cache_copy(&info->new_nch, &fdp->fd_nrdir); 564 } 565 spin_unlock(&fdp->fd_spin); 566 if (ncdrop1.ncp) 567 cache_drop(&ncdrop1); 568 if (ncdrop2.ncp) 569 cache_drop(&ncdrop2); 570 if (vprele1) 571 vrele(vprele1); 572 if (vprele2) 573 vrele(vprele2); 574 } 575 return(0); 576 } 577 578 /* 579 * Unmount a file system. 580 * 581 * Note: unmount takes a path to the vnode mounted on as argument, 582 * not special file (as before). 583 * 584 * umount_args(char *path, int flags) 585 * 586 * MPALMOSTSAFE 587 */ 588 int 589 sys_unmount(struct unmount_args *uap) 590 { 591 struct thread *td = curthread; 592 struct proc *p __debugvar = td->td_proc; 593 struct mount *mp = NULL; 594 struct nlookupdata nd; 595 int error; 596 597 KKASSERT(p); 598 if (td->td_ucred->cr_prison != NULL) { 599 error = EPERM; 600 goto done; 601 } 602 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 603 goto done; 604 605 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 606 if (error == 0) 607 error = nlookup(&nd); 608 if (error) 609 goto out; 610 611 mp = nd.nl_nch.mount; 612 613 /* 614 * Only root, or the user that did the original mount is 615 * permitted to unmount this filesystem. 616 */ 617 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 618 (error = priv_check(td, PRIV_ROOT))) 619 goto out; 620 621 /* 622 * Don't allow unmounting the root file system. 623 */ 624 if (mp->mnt_flag & MNT_ROOTFS) { 625 error = EINVAL; 626 goto out; 627 } 628 629 /* 630 * Must be the root of the filesystem 631 */ 632 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 633 error = EINVAL; 634 goto out; 635 } 636 637 /* 638 * If no error try to issue the unmount. We lose our cache 639 * ref when we call nlookup_done so we must hold the mount point 640 * to prevent use-after-free races. 641 */ 642 out: 643 if (error == 0) { 644 mount_hold(mp); 645 nlookup_done(&nd); 646 error = dounmount(mp, uap->flags, 0); 647 mount_drop(mp); 648 } else { 649 nlookup_done(&nd); 650 } 651 done: 652 return (error); 653 } 654 655 /* 656 * Do the actual file system unmount (interlocked against the mountlist 657 * token and mp->mnt_token). 658 */ 659 static int 660 dounmount_interlock(struct mount *mp) 661 { 662 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 663 return (EBUSY); 664 mp->mnt_kern_flag |= MNTK_UNMOUNT; 665 return(0); 666 } 667 668 static int 669 unmount_allproc_cb(struct proc *p, void *arg) 670 { 671 struct mount *mp; 672 673 if (p->p_textnch.ncp == NULL) 674 return 0; 675 676 mp = (struct mount *)arg; 677 if (p->p_textnch.mount == mp) 678 cache_drop(&p->p_textnch); 679 680 return 0; 681 } 682 683 /* 684 * The guts of the unmount code. The mount owns one ref and one hold 685 * count. If we successfully interlock the unmount, those refs are ours. 686 * (The ref is from mnt_ncmountpt). 687 * 688 * When halting we shortcut certain mount types such as devfs by not actually 689 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 690 * from the mountlist so higher-level filesytems can unmount cleanly. 691 * 692 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 693 */ 694 int 695 dounmount(struct mount *mp, int flags, int halting) 696 { 697 struct namecache *ncp; 698 struct nchandle nch; 699 struct vnode *vp; 700 int error; 701 int async_flag; 702 int lflags; 703 int freeok = 1; 704 int retry; 705 int quickhalt; 706 707 lwkt_gettoken(&mp->mnt_token); 708 709 /* 710 * When halting, certain mount points can essentially just 711 * be unhooked and otherwise ignored. 712 */ 713 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 714 quickhalt = 1; 715 freeok = 0; 716 } else { 717 quickhalt = 0; 718 } 719 720 721 /* 722 * Exclusive access for unmounting purposes. 723 */ 724 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 725 goto out; 726 727 /* 728 * We now 'own' the last mp->mnt_refs 729 * 730 * Allow filesystems to detect that a forced unmount is in progress. 731 */ 732 if (flags & MNT_FORCE) 733 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 734 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 735 error = lockmgr(&mp->mnt_lock, lflags); 736 if (error) { 737 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 738 if (mp->mnt_kern_flag & MNTK_MWAIT) { 739 mp->mnt_kern_flag &= ~MNTK_MWAIT; 740 wakeup(mp); 741 } 742 goto out; 743 } 744 745 if (mp->mnt_flag & MNT_EXPUBLIC) 746 vfs_setpublicfs(NULL, NULL, NULL); 747 748 vfs_msync(mp, MNT_WAIT); 749 async_flag = mp->mnt_flag & MNT_ASYNC; 750 mp->mnt_flag &=~ MNT_ASYNC; 751 752 /* 753 * If this filesystem isn't aliasing other filesystems, 754 * try to invalidate any remaining namecache entries and 755 * check the count afterwords. 756 * 757 * We own the last mnt_refs by owning mnt_ncmountpt. 758 */ 759 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 760 cache_lock(&mp->mnt_ncmountpt); 761 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 762 cache_unlock(&mp->mnt_ncmountpt); 763 764 cache_clearmntcache(); 765 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 766 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 767 allproc_scan(&unmount_allproc_cb, mp, 0); 768 } 769 770 cache_clearmntcache(); 771 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 772 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 773 774 if ((flags & MNT_FORCE) == 0) { 775 error = EBUSY; 776 mount_warning(mp, "Cannot unmount: " 777 "%d namecache " 778 "references still " 779 "present", 780 ncp->nc_refs - 1); 781 } else { 782 mount_warning(mp, "Forced unmount: " 783 "%d namecache " 784 "references still " 785 "present", 786 ncp->nc_refs - 1); 787 freeok = 0; 788 } 789 } 790 } 791 792 /* 793 * Decomission our special mnt_syncer vnode. This also stops 794 * the vnlru code. If we are unable to unmount we recommission 795 * the vnode. 796 * 797 * Then sync the filesystem. 798 */ 799 if ((vp = mp->mnt_syncer) != NULL) { 800 mp->mnt_syncer = NULL; 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 802 vrele(vp); 803 } 804 805 if (quickhalt == 0) { 806 if ((mp->mnt_flag & MNT_RDONLY) == 0) 807 VFS_SYNC(mp, MNT_WAIT); 808 } 809 810 /* 811 * nchandle records ref the mount structure. Expect a count of 1 812 * (our mount->mnt_ncmountpt). 813 * 814 * Scans can get temporary refs on a mountpoint (thought really 815 * heavy duty stuff like cache_findmount() do not). 816 */ 817 if (mp->mnt_refs != 1) 818 cache_clearmntcache(); 819 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 820 cache_unmounting(mp); 821 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 822 cache_clearmntcache(); 823 } 824 if (mp->mnt_refs != 1) { 825 if ((flags & MNT_FORCE) == 0) { 826 mount_warning(mp, "Cannot unmount: " 827 "%d mount refs still present", 828 mp->mnt_refs - 1); 829 error = EBUSY; 830 } else { 831 mount_warning(mp, "Forced unmount: " 832 "%d mount refs still present", 833 mp->mnt_refs - 1); 834 freeok = 0; 835 } 836 } 837 838 /* 839 * So far so good, sync the filesystem once more and 840 * call the VFS unmount code if the sync succeeds. 841 */ 842 if (error == 0 && quickhalt == 0) { 843 if (mp->mnt_flag & MNT_RDONLY) { 844 error = VFS_UNMOUNT(mp, flags); 845 } else { 846 error = VFS_SYNC(mp, MNT_WAIT); 847 if ((error == 0) || 848 (error == EOPNOTSUPP) || /* No sync */ 849 (flags & MNT_FORCE)) { 850 error = VFS_UNMOUNT(mp, flags); 851 } 852 } 853 } 854 855 /* 856 * If an error occurred we can still recover, restoring the 857 * syncer vnode and misc flags. 858 */ 859 if (error) { 860 if (mp->mnt_syncer == NULL) 861 vfs_allocate_syncvnode(mp); 862 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 863 mp->mnt_flag |= async_flag; 864 lockmgr(&mp->mnt_lock, LK_RELEASE); 865 if (mp->mnt_kern_flag & MNTK_MWAIT) { 866 mp->mnt_kern_flag &= ~MNTK_MWAIT; 867 wakeup(mp); 868 } 869 goto out; 870 } 871 /* 872 * Clean up any journals still associated with the mount after 873 * filesystem activity has ceased. 874 */ 875 journal_remove_all_journals(mp, 876 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 877 878 mountlist_remove(mp); 879 880 /* 881 * Remove any installed vnode ops here so the individual VFSs don't 882 * have to. 883 * 884 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 885 * 886 * When quickhalting we have to keep these intact because the 887 * underlying vnodes have not been destroyed, and some might be 888 * dirty. 889 */ 890 if (quickhalt == 0) { 891 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 892 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 893 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 894 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 895 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 896 } 897 898 if (mp->mnt_ncmountpt.ncp != NULL) { 899 nch = mp->mnt_ncmountpt; 900 cache_zero(&mp->mnt_ncmountpt); 901 cache_clrmountpt(&nch); 902 cache_drop(&nch); 903 } 904 if (mp->mnt_ncmounton.ncp != NULL) { 905 cache_unmounting(mp); 906 nch = mp->mnt_ncmounton; 907 cache_zero(&mp->mnt_ncmounton); 908 cache_clrmountpt(&nch); 909 cache_drop(&nch); 910 } 911 912 mp->mnt_vfc->vfc_refcount--; 913 914 /* 915 * If not quickhalting the mount, we expect there to be no 916 * vnodes left. 917 */ 918 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 919 panic("unmount: dangling vnode"); 920 921 /* 922 * Release the lock 923 */ 924 lockmgr(&mp->mnt_lock, LK_RELEASE); 925 if (mp->mnt_kern_flag & MNTK_MWAIT) { 926 mp->mnt_kern_flag &= ~MNTK_MWAIT; 927 wakeup(mp); 928 } 929 930 /* 931 * If we reach here and freeok != 0 we must free the mount. 932 * mnt_refs should already have dropped to 0, so if it is not 933 * zero we must cycle the caches and wait. 934 * 935 * When we are satisfied that the mount has disconnected we can 936 * drop the hold on the mp that represented the mount (though the 937 * caller might actually have another, so the caller's drop may 938 * do the actual free). 939 */ 940 if (freeok) { 941 if (mp->mnt_refs > 0) 942 cache_clearmntcache(); 943 while (mp->mnt_refs > 0) { 944 cache_unmounting(mp); 945 wakeup(mp); 946 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 947 cache_clearmntcache(); 948 } 949 lwkt_reltoken(&mp->mnt_token); 950 mount_drop(mp); 951 mp = NULL; 952 } else { 953 cache_clearmntcache(); 954 } 955 error = 0; 956 KNOTE(&fs_klist, VQ_UNMOUNT); 957 out: 958 if (mp) 959 lwkt_reltoken(&mp->mnt_token); 960 return (error); 961 } 962 963 static 964 void 965 mount_warning(struct mount *mp, const char *ctl, ...) 966 { 967 char *ptr; 968 char *buf; 969 __va_list va; 970 971 __va_start(va, ctl); 972 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 973 &ptr, &buf, 0) == 0) { 974 kprintf("unmount(%s): ", ptr); 975 kvprintf(ctl, va); 976 kprintf("\n"); 977 kfree(buf, M_TEMP); 978 } else { 979 kprintf("unmount(%p", mp); 980 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 981 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 982 kprintf("): "); 983 kvprintf(ctl, va); 984 kprintf("\n"); 985 } 986 __va_end(va); 987 } 988 989 /* 990 * Shim cache_fullpath() to handle the case where a process is chrooted into 991 * a subdirectory of a mount. In this case if the root mount matches the 992 * process root directory's mount we have to specify the process's root 993 * directory instead of the mount point, because the mount point might 994 * be above the root directory. 995 */ 996 static 997 int 998 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 999 { 1000 struct nchandle *nch; 1001 1002 if (p && p->p_fd->fd_nrdir.mount == mp) 1003 nch = &p->p_fd->fd_nrdir; 1004 else 1005 nch = &mp->mnt_ncmountpt; 1006 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1007 } 1008 1009 /* 1010 * Sync each mounted filesystem. 1011 */ 1012 1013 #ifdef DEBUG 1014 static int syncprt = 0; 1015 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1016 #endif /* DEBUG */ 1017 1018 static int sync_callback(struct mount *mp, void *data); 1019 1020 int 1021 sys_sync(struct sync_args *uap) 1022 { 1023 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1024 return (0); 1025 } 1026 1027 static 1028 int 1029 sync_callback(struct mount *mp, void *data __unused) 1030 { 1031 int asyncflag; 1032 1033 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1034 lwkt_gettoken(&mp->mnt_token); 1035 asyncflag = mp->mnt_flag & MNT_ASYNC; 1036 mp->mnt_flag &= ~MNT_ASYNC; 1037 lwkt_reltoken(&mp->mnt_token); 1038 vfs_msync(mp, MNT_NOWAIT); 1039 VFS_SYNC(mp, MNT_NOWAIT); 1040 lwkt_gettoken(&mp->mnt_token); 1041 mp->mnt_flag |= asyncflag; 1042 lwkt_reltoken(&mp->mnt_token); 1043 } 1044 return(0); 1045 } 1046 1047 /* XXX PRISON: could be per prison flag */ 1048 static int prison_quotas; 1049 #if 0 1050 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1051 #endif 1052 1053 /* 1054 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1055 * 1056 * Change filesystem quotas. 1057 * 1058 * MPALMOSTSAFE 1059 */ 1060 int 1061 sys_quotactl(struct quotactl_args *uap) 1062 { 1063 struct nlookupdata nd; 1064 struct thread *td; 1065 struct mount *mp; 1066 int error; 1067 1068 td = curthread; 1069 if (td->td_ucred->cr_prison && !prison_quotas) { 1070 error = EPERM; 1071 goto done; 1072 } 1073 1074 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1075 if (error == 0) 1076 error = nlookup(&nd); 1077 if (error == 0) { 1078 mp = nd.nl_nch.mount; 1079 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1080 uap->arg, nd.nl_cred); 1081 } 1082 nlookup_done(&nd); 1083 done: 1084 return (error); 1085 } 1086 1087 /* 1088 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1089 * void *buf, int buflen) 1090 * 1091 * This function operates on a mount point and executes the specified 1092 * operation using the specified control data, and possibly returns data. 1093 * 1094 * The actual number of bytes stored in the result buffer is returned, 0 1095 * if none, otherwise an error is returned. 1096 * 1097 * MPALMOSTSAFE 1098 */ 1099 int 1100 sys_mountctl(struct mountctl_args *uap) 1101 { 1102 struct thread *td = curthread; 1103 struct file *fp; 1104 void *ctl = NULL; 1105 void *buf = NULL; 1106 char *path = NULL; 1107 int error; 1108 1109 /* 1110 * Sanity and permissions checks. We must be root. 1111 */ 1112 if (td->td_ucred->cr_prison != NULL) 1113 return (EPERM); 1114 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1115 (error = priv_check(td, PRIV_ROOT)) != 0) 1116 return (error); 1117 1118 /* 1119 * Argument length checks 1120 */ 1121 if (uap->ctllen < 0 || uap->ctllen > 1024) 1122 return (EINVAL); 1123 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1124 return (EINVAL); 1125 if (uap->path == NULL) 1126 return (EINVAL); 1127 1128 /* 1129 * Allocate the necessary buffers and copyin data 1130 */ 1131 path = objcache_get(namei_oc, M_WAITOK); 1132 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1133 if (error) 1134 goto done; 1135 1136 if (uap->ctllen) { 1137 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1138 error = copyin(uap->ctl, ctl, uap->ctllen); 1139 if (error) 1140 goto done; 1141 } 1142 if (uap->buflen) 1143 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1144 1145 /* 1146 * Validate the descriptor 1147 */ 1148 if (uap->fd >= 0) { 1149 fp = holdfp(td, uap->fd, -1); 1150 if (fp == NULL) { 1151 error = EBADF; 1152 goto done; 1153 } 1154 } else { 1155 fp = NULL; 1156 } 1157 1158 /* 1159 * Execute the internal kernel function and clean up. 1160 */ 1161 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1162 buf, uap->buflen, &uap->sysmsg_result); 1163 if (fp) 1164 dropfp(td, uap->fd, fp); 1165 if (error == 0 && uap->sysmsg_result > 0) 1166 error = copyout(buf, uap->buf, uap->sysmsg_result); 1167 done: 1168 if (path) 1169 objcache_put(namei_oc, path); 1170 if (ctl) 1171 kfree(ctl, M_TEMP); 1172 if (buf) 1173 kfree(buf, M_TEMP); 1174 return (error); 1175 } 1176 1177 /* 1178 * Execute a mount control operation by resolving the path to a mount point 1179 * and calling vop_mountctl(). 1180 * 1181 * Use the mount point from the nch instead of the vnode so nullfs mounts 1182 * can properly spike the VOP. 1183 */ 1184 int 1185 kern_mountctl(const char *path, int op, struct file *fp, 1186 const void *ctl, int ctllen, 1187 void *buf, int buflen, int *res) 1188 { 1189 struct vnode *vp; 1190 struct nlookupdata nd; 1191 struct nchandle nch; 1192 struct mount *mp; 1193 int error; 1194 1195 *res = 0; 1196 vp = NULL; 1197 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1198 if (error) 1199 return (error); 1200 error = nlookup(&nd); 1201 if (error) { 1202 nlookup_done(&nd); 1203 return (error); 1204 } 1205 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1206 if (error) { 1207 nlookup_done(&nd); 1208 return (error); 1209 } 1210 1211 /* 1212 * Yes, all this is needed to use the nch.mount below, because 1213 * we must maintain a ref on the mount to avoid ripouts (e.g. 1214 * due to heavy mount/unmount use by synth or poudriere). 1215 */ 1216 nch = nd.nl_nch; 1217 cache_zero(&nd.nl_nch); 1218 cache_unlock(&nch); 1219 nlookup_done(&nd); 1220 vn_unlock(vp); 1221 1222 mp = nch.mount; 1223 1224 /* 1225 * Must be the root of the filesystem 1226 */ 1227 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1228 cache_drop(&nch); 1229 vrele(vp); 1230 return (EINVAL); 1231 } 1232 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1233 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1234 path); 1235 cache_drop(&nch); 1236 vrele(vp); 1237 return (EINVAL); 1238 } 1239 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1240 buf, buflen, res); 1241 vrele(vp); 1242 cache_drop(&nch); 1243 1244 return (error); 1245 } 1246 1247 int 1248 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1249 { 1250 struct thread *td = curthread; 1251 struct proc *p = td->td_proc; 1252 struct mount *mp; 1253 struct statfs *sp; 1254 char *fullpath, *freepath; 1255 int error; 1256 1257 if ((error = nlookup(nd)) != 0) 1258 return (error); 1259 mp = nd->nl_nch.mount; 1260 sp = &mp->mnt_stat; 1261 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1262 return (error); 1263 1264 error = mount_path(p, mp, &fullpath, &freepath); 1265 if (error) 1266 return(error); 1267 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1268 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1269 kfree(freepath, M_TEMP); 1270 1271 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1272 bcopy(sp, buf, sizeof(*buf)); 1273 /* Only root should have access to the fsid's. */ 1274 if (priv_check(td, PRIV_ROOT)) 1275 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1276 return (0); 1277 } 1278 1279 /* 1280 * statfs_args(char *path, struct statfs *buf) 1281 * 1282 * Get filesystem statistics. 1283 */ 1284 int 1285 sys_statfs(struct statfs_args *uap) 1286 { 1287 struct nlookupdata nd; 1288 struct statfs buf; 1289 int error; 1290 1291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1292 if (error == 0) 1293 error = kern_statfs(&nd, &buf); 1294 nlookup_done(&nd); 1295 if (error == 0) 1296 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1297 return (error); 1298 } 1299 1300 int 1301 kern_fstatfs(int fd, struct statfs *buf) 1302 { 1303 struct thread *td = curthread; 1304 struct proc *p = td->td_proc; 1305 struct file *fp; 1306 struct mount *mp; 1307 struct statfs *sp; 1308 char *fullpath, *freepath; 1309 int error; 1310 1311 KKASSERT(p); 1312 if ((error = holdvnode(td, fd, &fp)) != 0) 1313 return (error); 1314 1315 /* 1316 * Try to use mount info from any overlays rather than the 1317 * mount info for the underlying vnode, otherwise we will 1318 * fail when operating on null-mounted paths inside a chroot. 1319 */ 1320 if ((mp = fp->f_nchandle.mount) == NULL) 1321 mp = ((struct vnode *)fp->f_data)->v_mount; 1322 if (mp == NULL) { 1323 error = EBADF; 1324 goto done; 1325 } 1326 if (fp->f_cred == NULL) { 1327 error = EINVAL; 1328 goto done; 1329 } 1330 sp = &mp->mnt_stat; 1331 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1332 goto done; 1333 1334 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1335 goto done; 1336 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1337 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1338 kfree(freepath, M_TEMP); 1339 1340 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1341 bcopy(sp, buf, sizeof(*buf)); 1342 1343 /* Only root should have access to the fsid's. */ 1344 if (priv_check(td, PRIV_ROOT)) 1345 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1346 error = 0; 1347 done: 1348 fdrop(fp); 1349 return (error); 1350 } 1351 1352 /* 1353 * fstatfs_args(int fd, struct statfs *buf) 1354 * 1355 * Get filesystem statistics. 1356 */ 1357 int 1358 sys_fstatfs(struct fstatfs_args *uap) 1359 { 1360 struct statfs buf; 1361 int error; 1362 1363 error = kern_fstatfs(uap->fd, &buf); 1364 1365 if (error == 0) 1366 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1367 return (error); 1368 } 1369 1370 int 1371 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1372 { 1373 struct mount *mp; 1374 struct statvfs *sp; 1375 int error; 1376 1377 if ((error = nlookup(nd)) != 0) 1378 return (error); 1379 mp = nd->nl_nch.mount; 1380 sp = &mp->mnt_vstat; 1381 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1382 return (error); 1383 1384 sp->f_flag = 0; 1385 if (mp->mnt_flag & MNT_RDONLY) 1386 sp->f_flag |= ST_RDONLY; 1387 if (mp->mnt_flag & MNT_NOSUID) 1388 sp->f_flag |= ST_NOSUID; 1389 bcopy(sp, buf, sizeof(*buf)); 1390 return (0); 1391 } 1392 1393 /* 1394 * statfs_args(char *path, struct statfs *buf) 1395 * 1396 * Get filesystem statistics. 1397 */ 1398 int 1399 sys_statvfs(struct statvfs_args *uap) 1400 { 1401 struct nlookupdata nd; 1402 struct statvfs buf; 1403 int error; 1404 1405 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1406 if (error == 0) 1407 error = kern_statvfs(&nd, &buf); 1408 nlookup_done(&nd); 1409 if (error == 0) 1410 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1411 return (error); 1412 } 1413 1414 int 1415 kern_fstatvfs(int fd, struct statvfs *buf) 1416 { 1417 struct thread *td = curthread; 1418 struct file *fp; 1419 struct mount *mp; 1420 struct statvfs *sp; 1421 int error; 1422 1423 if ((error = holdvnode(td, fd, &fp)) != 0) 1424 return (error); 1425 if ((mp = fp->f_nchandle.mount) == NULL) 1426 mp = ((struct vnode *)fp->f_data)->v_mount; 1427 if (mp == NULL) { 1428 error = EBADF; 1429 goto done; 1430 } 1431 if (fp->f_cred == NULL) { 1432 error = EINVAL; 1433 goto done; 1434 } 1435 sp = &mp->mnt_vstat; 1436 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1437 goto done; 1438 1439 sp->f_flag = 0; 1440 if (mp->mnt_flag & MNT_RDONLY) 1441 sp->f_flag |= ST_RDONLY; 1442 if (mp->mnt_flag & MNT_NOSUID) 1443 sp->f_flag |= ST_NOSUID; 1444 1445 bcopy(sp, buf, sizeof(*buf)); 1446 error = 0; 1447 done: 1448 fdrop(fp); 1449 return (error); 1450 } 1451 1452 /* 1453 * fstatfs_args(int fd, struct statfs *buf) 1454 * 1455 * Get filesystem statistics. 1456 */ 1457 int 1458 sys_fstatvfs(struct fstatvfs_args *uap) 1459 { 1460 struct statvfs buf; 1461 int error; 1462 1463 error = kern_fstatvfs(uap->fd, &buf); 1464 1465 if (error == 0) 1466 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1467 return (error); 1468 } 1469 1470 /* 1471 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1472 * 1473 * Get statistics on all filesystems. 1474 */ 1475 1476 struct getfsstat_info { 1477 struct statfs *sfsp; 1478 long count; 1479 long maxcount; 1480 int error; 1481 int flags; 1482 struct thread *td; 1483 }; 1484 1485 static int getfsstat_callback(struct mount *, void *); 1486 1487 int 1488 sys_getfsstat(struct getfsstat_args *uap) 1489 { 1490 struct thread *td = curthread; 1491 struct getfsstat_info info; 1492 1493 bzero(&info, sizeof(info)); 1494 1495 info.maxcount = uap->bufsize / sizeof(struct statfs); 1496 info.sfsp = uap->buf; 1497 info.count = 0; 1498 info.flags = uap->flags; 1499 info.td = td; 1500 1501 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1502 if (info.sfsp && info.count > info.maxcount) 1503 uap->sysmsg_result = info.maxcount; 1504 else 1505 uap->sysmsg_result = info.count; 1506 return (info.error); 1507 } 1508 1509 static int 1510 getfsstat_callback(struct mount *mp, void *data) 1511 { 1512 struct getfsstat_info *info = data; 1513 struct statfs *sp; 1514 char *freepath; 1515 char *fullpath; 1516 int error; 1517 1518 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1519 return(0); 1520 1521 if (info->sfsp && info->count < info->maxcount) { 1522 sp = &mp->mnt_stat; 1523 1524 /* 1525 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1526 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1527 * overrides MNT_WAIT. 1528 */ 1529 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1530 (info->flags & MNT_WAIT)) && 1531 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1532 return(0); 1533 } 1534 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1535 1536 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1537 if (error) { 1538 info->error = error; 1539 return(-1); 1540 } 1541 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1542 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1543 kfree(freepath, M_TEMP); 1544 1545 error = copyout(sp, info->sfsp, sizeof(*sp)); 1546 if (error) { 1547 info->error = error; 1548 return (-1); 1549 } 1550 ++info->sfsp; 1551 } 1552 info->count++; 1553 return(0); 1554 } 1555 1556 /* 1557 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1558 long bufsize, int flags) 1559 * 1560 * Get statistics on all filesystems. 1561 */ 1562 1563 struct getvfsstat_info { 1564 struct statfs *sfsp; 1565 struct statvfs *vsfsp; 1566 long count; 1567 long maxcount; 1568 int error; 1569 int flags; 1570 struct thread *td; 1571 }; 1572 1573 static int getvfsstat_callback(struct mount *, void *); 1574 1575 int 1576 sys_getvfsstat(struct getvfsstat_args *uap) 1577 { 1578 struct thread *td = curthread; 1579 struct getvfsstat_info info; 1580 1581 bzero(&info, sizeof(info)); 1582 1583 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1584 info.sfsp = uap->buf; 1585 info.vsfsp = uap->vbuf; 1586 info.count = 0; 1587 info.flags = uap->flags; 1588 info.td = td; 1589 1590 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1591 if (info.vsfsp && info.count > info.maxcount) 1592 uap->sysmsg_result = info.maxcount; 1593 else 1594 uap->sysmsg_result = info.count; 1595 return (info.error); 1596 } 1597 1598 static int 1599 getvfsstat_callback(struct mount *mp, void *data) 1600 { 1601 struct getvfsstat_info *info = data; 1602 struct statfs *sp; 1603 struct statvfs *vsp; 1604 char *freepath; 1605 char *fullpath; 1606 int error; 1607 1608 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1609 return(0); 1610 1611 if (info->vsfsp && info->count < info->maxcount) { 1612 sp = &mp->mnt_stat; 1613 vsp = &mp->mnt_vstat; 1614 1615 /* 1616 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1617 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1618 * overrides MNT_WAIT. 1619 */ 1620 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1621 (info->flags & MNT_WAIT)) && 1622 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1623 return(0); 1624 } 1625 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1626 1627 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1628 (info->flags & MNT_WAIT)) && 1629 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1630 return(0); 1631 } 1632 vsp->f_flag = 0; 1633 if (mp->mnt_flag & MNT_RDONLY) 1634 vsp->f_flag |= ST_RDONLY; 1635 if (mp->mnt_flag & MNT_NOSUID) 1636 vsp->f_flag |= ST_NOSUID; 1637 1638 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1639 if (error) { 1640 info->error = error; 1641 return(-1); 1642 } 1643 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1644 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1645 kfree(freepath, M_TEMP); 1646 1647 error = copyout(sp, info->sfsp, sizeof(*sp)); 1648 if (error == 0) 1649 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1650 if (error) { 1651 info->error = error; 1652 return (-1); 1653 } 1654 ++info->sfsp; 1655 ++info->vsfsp; 1656 } 1657 info->count++; 1658 return(0); 1659 } 1660 1661 1662 /* 1663 * fchdir_args(int fd) 1664 * 1665 * Change current working directory to a given file descriptor. 1666 */ 1667 int 1668 sys_fchdir(struct fchdir_args *uap) 1669 { 1670 struct thread *td = curthread; 1671 struct proc *p = td->td_proc; 1672 struct filedesc *fdp = p->p_fd; 1673 struct vnode *vp, *ovp; 1674 struct mount *mp; 1675 struct file *fp; 1676 struct nchandle nch, onch, tnch; 1677 int error; 1678 1679 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1680 return (error); 1681 lwkt_gettoken(&p->p_token); 1682 vp = (struct vnode *)fp->f_data; 1683 vref(vp); 1684 vn_lock(vp, LK_SHARED | LK_RETRY); 1685 if (fp->f_nchandle.ncp == NULL) 1686 error = ENOTDIR; 1687 else 1688 error = checkvp_chdir(vp, td); 1689 if (error) { 1690 vput(vp); 1691 goto done; 1692 } 1693 cache_copy(&fp->f_nchandle, &nch); 1694 1695 /* 1696 * If the ncp has become a mount point, traverse through 1697 * the mount point. 1698 */ 1699 1700 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1701 (mp = cache_findmount(&nch)) != NULL 1702 ) { 1703 error = nlookup_mp(mp, &tnch); 1704 if (error == 0) { 1705 cache_unlock(&tnch); /* leave ref intact */ 1706 vput(vp); 1707 vp = tnch.ncp->nc_vp; 1708 error = vget(vp, LK_SHARED); 1709 KKASSERT(error == 0); 1710 cache_drop(&nch); 1711 nch = tnch; 1712 } 1713 cache_dropmount(mp); 1714 } 1715 if (error == 0) { 1716 spin_lock(&fdp->fd_spin); 1717 ovp = fdp->fd_cdir; 1718 onch = fdp->fd_ncdir; 1719 fdp->fd_cdir = vp; 1720 fdp->fd_ncdir = nch; 1721 spin_unlock(&fdp->fd_spin); 1722 vn_unlock(vp); /* leave ref intact */ 1723 cache_drop(&onch); 1724 vrele(ovp); 1725 } else { 1726 cache_drop(&nch); 1727 vput(vp); 1728 } 1729 fdrop(fp); 1730 done: 1731 lwkt_reltoken(&p->p_token); 1732 return (error); 1733 } 1734 1735 int 1736 kern_chdir(struct nlookupdata *nd) 1737 { 1738 struct thread *td = curthread; 1739 struct proc *p = td->td_proc; 1740 struct filedesc *fdp = p->p_fd; 1741 struct vnode *vp, *ovp; 1742 struct nchandle onch; 1743 int error; 1744 1745 nd->nl_flags |= NLC_SHAREDLOCK; 1746 if ((error = nlookup(nd)) != 0) 1747 return (error); 1748 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1749 return (ENOENT); 1750 if ((error = vget(vp, LK_SHARED)) != 0) 1751 return (error); 1752 1753 lwkt_gettoken(&p->p_token); 1754 error = checkvp_chdir(vp, td); 1755 vn_unlock(vp); 1756 if (error == 0) { 1757 spin_lock(&fdp->fd_spin); 1758 ovp = fdp->fd_cdir; 1759 onch = fdp->fd_ncdir; 1760 fdp->fd_ncdir = nd->nl_nch; 1761 fdp->fd_cdir = vp; 1762 spin_unlock(&fdp->fd_spin); 1763 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1764 cache_drop(&onch); 1765 vrele(ovp); 1766 cache_zero(&nd->nl_nch); 1767 } else { 1768 vrele(vp); 1769 } 1770 lwkt_reltoken(&p->p_token); 1771 return (error); 1772 } 1773 1774 /* 1775 * chdir_args(char *path) 1776 * 1777 * Change current working directory (``.''). 1778 */ 1779 int 1780 sys_chdir(struct chdir_args *uap) 1781 { 1782 struct nlookupdata nd; 1783 int error; 1784 1785 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1786 if (error == 0) 1787 error = kern_chdir(&nd); 1788 nlookup_done(&nd); 1789 return (error); 1790 } 1791 1792 /* 1793 * Helper function for raised chroot(2) security function: Refuse if 1794 * any filedescriptors are open directories. 1795 */ 1796 static int 1797 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1798 { 1799 struct vnode *vp; 1800 struct file *fp; 1801 int error; 1802 int fd; 1803 1804 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1805 if ((error = holdvnode(td, fd, &fp)) != 0) 1806 continue; 1807 vp = (struct vnode *)fp->f_data; 1808 if (vp->v_type != VDIR) { 1809 fdrop(fp); 1810 continue; 1811 } 1812 fdrop(fp); 1813 return(EPERM); 1814 } 1815 return (0); 1816 } 1817 1818 /* 1819 * This sysctl determines if we will allow a process to chroot(2) if it 1820 * has a directory open: 1821 * 0: disallowed for all processes. 1822 * 1: allowed for processes that were not already chroot(2)'ed. 1823 * 2: allowed for all processes. 1824 */ 1825 1826 static int chroot_allow_open_directories = 1; 1827 1828 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1829 &chroot_allow_open_directories, 0, ""); 1830 1831 /* 1832 * chroot to the specified namecache entry. We obtain the vp from the 1833 * namecache data. The passed ncp must be locked and referenced and will 1834 * remain locked and referenced on return. 1835 */ 1836 int 1837 kern_chroot(struct nchandle *nch) 1838 { 1839 struct thread *td = curthread; 1840 struct proc *p = td->td_proc; 1841 struct filedesc *fdp = p->p_fd; 1842 struct vnode *vp; 1843 int error; 1844 1845 /* 1846 * Only privileged user can chroot 1847 */ 1848 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1849 if (error) 1850 return (error); 1851 1852 /* 1853 * Disallow open directory descriptors (fchdir() breakouts). 1854 */ 1855 if (chroot_allow_open_directories == 0 || 1856 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1857 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1858 return (error); 1859 } 1860 if ((vp = nch->ncp->nc_vp) == NULL) 1861 return (ENOENT); 1862 1863 if ((error = vget(vp, LK_SHARED)) != 0) 1864 return (error); 1865 1866 /* 1867 * Check the validity of vp as a directory to change to and 1868 * associate it with rdir/jdir. 1869 */ 1870 error = checkvp_chdir(vp, td); 1871 vn_unlock(vp); /* leave reference intact */ 1872 if (error == 0) { 1873 lwkt_gettoken(&p->p_token); 1874 vrele(fdp->fd_rdir); 1875 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1876 cache_drop(&fdp->fd_nrdir); 1877 cache_copy(nch, &fdp->fd_nrdir); 1878 if (fdp->fd_jdir == NULL) { 1879 fdp->fd_jdir = vp; 1880 vref(fdp->fd_jdir); 1881 cache_copy(nch, &fdp->fd_njdir); 1882 } 1883 if ((p->p_flags & P_DIDCHROOT) == 0) { 1884 p->p_flags |= P_DIDCHROOT; 1885 if (p->p_depth <= 65535 - 32) 1886 p->p_depth += 32; 1887 } 1888 lwkt_reltoken(&p->p_token); 1889 } else { 1890 vrele(vp); 1891 } 1892 return (error); 1893 } 1894 1895 /* 1896 * chroot_args(char *path) 1897 * 1898 * Change notion of root (``/'') directory. 1899 */ 1900 int 1901 sys_chroot(struct chroot_args *uap) 1902 { 1903 struct thread *td __debugvar = curthread; 1904 struct nlookupdata nd; 1905 int error; 1906 1907 KKASSERT(td->td_proc); 1908 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1909 if (error == 0) { 1910 nd.nl_flags |= NLC_EXEC; 1911 error = nlookup(&nd); 1912 if (error == 0) 1913 error = kern_chroot(&nd.nl_nch); 1914 } 1915 nlookup_done(&nd); 1916 return(error); 1917 } 1918 1919 int 1920 sys_chroot_kernel(struct chroot_kernel_args *uap) 1921 { 1922 struct thread *td = curthread; 1923 struct nlookupdata nd; 1924 struct nchandle *nch; 1925 struct vnode *vp; 1926 int error; 1927 1928 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1929 if (error) 1930 goto error_nond; 1931 1932 error = nlookup(&nd); 1933 if (error) 1934 goto error_out; 1935 1936 nch = &nd.nl_nch; 1937 1938 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1939 if (error) 1940 goto error_out; 1941 1942 if ((vp = nch->ncp->nc_vp) == NULL) { 1943 error = ENOENT; 1944 goto error_out; 1945 } 1946 1947 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1948 goto error_out; 1949 1950 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1951 vfs_cache_setroot(vp, cache_hold(nch)); 1952 1953 error_out: 1954 nlookup_done(&nd); 1955 error_nond: 1956 return(error); 1957 } 1958 1959 /* 1960 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1961 * determine whether it is legal to chdir to the vnode. The vnode's state 1962 * is not changed by this call. 1963 */ 1964 static int 1965 checkvp_chdir(struct vnode *vp, struct thread *td) 1966 { 1967 int error; 1968 1969 if (vp->v_type != VDIR) 1970 error = ENOTDIR; 1971 else 1972 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1973 return (error); 1974 } 1975 1976 int 1977 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1978 { 1979 struct thread *td = curthread; 1980 struct proc *p = td->td_proc; 1981 struct lwp *lp = td->td_lwp; 1982 struct filedesc *fdp = p->p_fd; 1983 int cmode, flags; 1984 struct file *nfp; 1985 struct file *fp; 1986 struct vnode *vp; 1987 int type, indx, error = 0; 1988 struct flock lf; 1989 1990 if ((oflags & O_ACCMODE) == O_ACCMODE) 1991 return (EINVAL); 1992 flags = FFLAGS(oflags); 1993 error = falloc(lp, &nfp, NULL); 1994 if (error) 1995 return (error); 1996 fp = nfp; 1997 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1998 1999 /* 2000 * XXX p_dupfd is a real mess. It allows a device to return a 2001 * file descriptor to be duplicated rather then doing the open 2002 * itself. 2003 */ 2004 lp->lwp_dupfd = -1; 2005 2006 /* 2007 * Call vn_open() to do the lookup and assign the vnode to the 2008 * file pointer. vn_open() does not change the ref count on fp 2009 * and the vnode, on success, will be inherited by the file pointer 2010 * and unlocked. 2011 * 2012 * Request a shared lock on the vnode if possible. 2013 * 2014 * Executable binaries can race VTEXT against O_RDWR opens, so 2015 * use an exclusive lock for O_RDWR opens as well. 2016 * 2017 * NOTE: We need a flag to separate terminal vnode locking from 2018 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2019 * and O_RDWR only need to lock the terminal vnode exclusively. 2020 */ 2021 nd->nl_flags |= NLC_LOCKVP; 2022 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0) 2023 nd->nl_flags |= NLC_SHAREDLOCK; 2024 2025 error = vn_open(nd, fp, flags, cmode); 2026 nlookup_done(nd); 2027 2028 if (error) { 2029 /* 2030 * handle special fdopen() case. bleh. dupfdopen() is 2031 * responsible for dropping the old contents of ofiles[indx] 2032 * if it succeeds. 2033 * 2034 * Note that fsetfd() will add a ref to fp which represents 2035 * the fd_files[] assignment. We must still drop our 2036 * reference. 2037 */ 2038 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2039 if (fdalloc(p, 0, &indx) == 0) { 2040 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2041 if (error == 0) { 2042 *res = indx; 2043 fdrop(fp); /* our ref */ 2044 return (0); 2045 } 2046 fsetfd(fdp, NULL, indx); 2047 } 2048 } 2049 fdrop(fp); /* our ref */ 2050 if (error == ERESTART) 2051 error = EINTR; 2052 return (error); 2053 } 2054 2055 /* 2056 * ref the vnode for ourselves so it can't be ripped out from under 2057 * is. XXX need an ND flag to request that the vnode be returned 2058 * anyway. 2059 * 2060 * Reserve a file descriptor but do not assign it until the open 2061 * succeeds. 2062 */ 2063 vp = (struct vnode *)fp->f_data; 2064 vref(vp); 2065 if ((error = fdalloc(p, 0, &indx)) != 0) { 2066 fdrop(fp); 2067 vrele(vp); 2068 return (error); 2069 } 2070 2071 /* 2072 * If no error occurs the vp will have been assigned to the file 2073 * pointer. 2074 */ 2075 lp->lwp_dupfd = 0; 2076 2077 if (flags & (O_EXLOCK | O_SHLOCK)) { 2078 lf.l_whence = SEEK_SET; 2079 lf.l_start = 0; 2080 lf.l_len = 0; 2081 if (flags & O_EXLOCK) 2082 lf.l_type = F_WRLCK; 2083 else 2084 lf.l_type = F_RDLCK; 2085 if (flags & FNONBLOCK) 2086 type = 0; 2087 else 2088 type = F_WAIT; 2089 2090 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2091 /* 2092 * lock request failed. Clean up the reserved 2093 * descriptor. 2094 */ 2095 vrele(vp); 2096 fsetfd(fdp, NULL, indx); 2097 fdrop(fp); 2098 return (error); 2099 } 2100 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2101 } 2102 #if 0 2103 /* 2104 * Assert that all regular file vnodes were created with a object. 2105 */ 2106 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2107 ("open: regular file has no backing object after vn_open")); 2108 #endif 2109 2110 vrele(vp); 2111 2112 /* 2113 * release our private reference, leaving the one associated with the 2114 * descriptor table intact. 2115 */ 2116 if (oflags & O_CLOEXEC) 2117 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2118 fsetfd(fdp, fp, indx); 2119 fdrop(fp); 2120 *res = indx; 2121 2122 return (error); 2123 } 2124 2125 /* 2126 * open_args(char *path, int flags, int mode) 2127 * 2128 * Check permissions, allocate an open file structure, 2129 * and call the device open routine if any. 2130 */ 2131 int 2132 sys_open(struct open_args *uap) 2133 { 2134 struct nlookupdata nd; 2135 int error; 2136 2137 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2138 if (error == 0) { 2139 error = kern_open(&nd, uap->flags, 2140 uap->mode, &uap->sysmsg_result); 2141 } 2142 nlookup_done(&nd); 2143 return (error); 2144 } 2145 2146 /* 2147 * openat_args(int fd, char *path, int flags, int mode) 2148 */ 2149 int 2150 sys_openat(struct openat_args *uap) 2151 { 2152 struct nlookupdata nd; 2153 int error; 2154 struct file *fp; 2155 2156 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2157 if (error == 0) { 2158 error = kern_open(&nd, uap->flags, uap->mode, 2159 &uap->sysmsg_result); 2160 } 2161 nlookup_done_at(&nd, fp); 2162 return (error); 2163 } 2164 2165 int 2166 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2167 { 2168 struct thread *td = curthread; 2169 struct proc *p = td->td_proc; 2170 struct vnode *vp; 2171 struct vattr vattr; 2172 int error; 2173 int whiteout = 0; 2174 2175 KKASSERT(p); 2176 2177 VATTR_NULL(&vattr); 2178 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2179 vattr.va_rmajor = rmajor; 2180 vattr.va_rminor = rminor; 2181 2182 switch (mode & S_IFMT) { 2183 case S_IFMT: /* used by badsect to flag bad sectors */ 2184 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2185 vattr.va_type = VBAD; 2186 break; 2187 case S_IFCHR: 2188 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2189 vattr.va_type = VCHR; 2190 break; 2191 case S_IFBLK: 2192 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2193 vattr.va_type = VBLK; 2194 break; 2195 case S_IFWHT: 2196 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2197 whiteout = 1; 2198 break; 2199 case S_IFDIR: /* special directories support for HAMMER */ 2200 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2201 vattr.va_type = VDIR; 2202 break; 2203 default: 2204 error = EINVAL; 2205 break; 2206 } 2207 2208 if (error) 2209 return (error); 2210 2211 bwillinode(1); 2212 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2213 if ((error = nlookup(nd)) != 0) 2214 return (error); 2215 if (nd->nl_nch.ncp->nc_vp) 2216 return (EEXIST); 2217 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2218 return (error); 2219 2220 if (whiteout) { 2221 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2222 nd->nl_cred, NAMEI_CREATE); 2223 } else { 2224 vp = NULL; 2225 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2226 &vp, nd->nl_cred, &vattr); 2227 if (error == 0) 2228 vput(vp); 2229 } 2230 return (error); 2231 } 2232 2233 /* 2234 * mknod_args(char *path, int mode, int dev) 2235 * 2236 * Create a special file. 2237 */ 2238 int 2239 sys_mknod(struct mknod_args *uap) 2240 { 2241 struct nlookupdata nd; 2242 int error; 2243 2244 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2245 if (error == 0) { 2246 error = kern_mknod(&nd, uap->mode, 2247 umajor(uap->dev), uminor(uap->dev)); 2248 } 2249 nlookup_done(&nd); 2250 return (error); 2251 } 2252 2253 /* 2254 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2255 * 2256 * Create a special file. The path is relative to the directory associated 2257 * with fd. 2258 */ 2259 int 2260 sys_mknodat(struct mknodat_args *uap) 2261 { 2262 struct nlookupdata nd; 2263 struct file *fp; 2264 int error; 2265 2266 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2267 if (error == 0) { 2268 error = kern_mknod(&nd, uap->mode, 2269 umajor(uap->dev), uminor(uap->dev)); 2270 } 2271 nlookup_done_at(&nd, fp); 2272 return (error); 2273 } 2274 2275 int 2276 kern_mkfifo(struct nlookupdata *nd, int mode) 2277 { 2278 struct thread *td = curthread; 2279 struct proc *p = td->td_proc; 2280 struct vattr vattr; 2281 struct vnode *vp; 2282 int error; 2283 2284 bwillinode(1); 2285 2286 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2287 if ((error = nlookup(nd)) != 0) 2288 return (error); 2289 if (nd->nl_nch.ncp->nc_vp) 2290 return (EEXIST); 2291 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2292 return (error); 2293 2294 VATTR_NULL(&vattr); 2295 vattr.va_type = VFIFO; 2296 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2297 vp = NULL; 2298 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2299 if (error == 0) 2300 vput(vp); 2301 return (error); 2302 } 2303 2304 /* 2305 * mkfifo_args(char *path, int mode) 2306 * 2307 * Create a named pipe. 2308 */ 2309 int 2310 sys_mkfifo(struct mkfifo_args *uap) 2311 { 2312 struct nlookupdata nd; 2313 int error; 2314 2315 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2316 if (error == 0) 2317 error = kern_mkfifo(&nd, uap->mode); 2318 nlookup_done(&nd); 2319 return (error); 2320 } 2321 2322 /* 2323 * mkfifoat_args(int fd, char *path, mode_t mode) 2324 * 2325 * Create a named pipe. The path is relative to the directory associated 2326 * with fd. 2327 */ 2328 int 2329 sys_mkfifoat(struct mkfifoat_args *uap) 2330 { 2331 struct nlookupdata nd; 2332 struct file *fp; 2333 int error; 2334 2335 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2336 if (error == 0) 2337 error = kern_mkfifo(&nd, uap->mode); 2338 nlookup_done_at(&nd, fp); 2339 return (error); 2340 } 2341 2342 static int hardlink_check_uid = 0; 2343 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2344 &hardlink_check_uid, 0, 2345 "Unprivileged processes cannot create hard links to files owned by other " 2346 "users"); 2347 static int hardlink_check_gid = 0; 2348 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2349 &hardlink_check_gid, 0, 2350 "Unprivileged processes cannot create hard links to files owned by other " 2351 "groups"); 2352 2353 static int 2354 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2355 { 2356 struct vattr va; 2357 int error; 2358 2359 /* 2360 * Shortcut if disabled 2361 */ 2362 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2363 return (0); 2364 2365 /* 2366 * Privileged user can always hardlink 2367 */ 2368 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2369 return (0); 2370 2371 /* 2372 * Otherwise only if the originating file is owned by the 2373 * same user or group. Note that any group is allowed if 2374 * the file is owned by the caller. 2375 */ 2376 error = VOP_GETATTR(vp, &va); 2377 if (error != 0) 2378 return (error); 2379 2380 if (hardlink_check_uid) { 2381 if (cred->cr_uid != va.va_uid) 2382 return (EPERM); 2383 } 2384 2385 if (hardlink_check_gid) { 2386 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2387 return (EPERM); 2388 } 2389 2390 return (0); 2391 } 2392 2393 int 2394 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2395 { 2396 struct thread *td = curthread; 2397 struct vnode *vp; 2398 int error; 2399 2400 /* 2401 * Lookup the source and obtained a locked vnode. 2402 * 2403 * You may only hardlink a file which you have write permission 2404 * on or which you own. 2405 * 2406 * XXX relookup on vget failure / race ? 2407 */ 2408 bwillinode(1); 2409 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2410 if ((error = nlookup(nd)) != 0) 2411 return (error); 2412 vp = nd->nl_nch.ncp->nc_vp; 2413 KKASSERT(vp != NULL); 2414 if (vp->v_type == VDIR) 2415 return (EPERM); /* POSIX */ 2416 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2417 return (error); 2418 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2419 return (error); 2420 2421 /* 2422 * Unlock the source so we can lookup the target without deadlocking 2423 * (XXX vp is locked already, possible other deadlock?). The target 2424 * must not exist. 2425 */ 2426 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2427 nd->nl_flags &= ~NLC_NCPISLOCKED; 2428 cache_unlock(&nd->nl_nch); 2429 vn_unlock(vp); 2430 2431 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2432 if ((error = nlookup(linknd)) != 0) { 2433 vrele(vp); 2434 return (error); 2435 } 2436 if (linknd->nl_nch.ncp->nc_vp) { 2437 vrele(vp); 2438 return (EEXIST); 2439 } 2440 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2441 if (error) { 2442 vrele(vp); 2443 return (error); 2444 } 2445 2446 /* 2447 * Finally run the new API VOP. 2448 */ 2449 error = can_hardlink(vp, td, td->td_ucred); 2450 if (error == 0) { 2451 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2452 vp, linknd->nl_cred); 2453 } 2454 vput(vp); 2455 return (error); 2456 } 2457 2458 /* 2459 * link_args(char *path, char *link) 2460 * 2461 * Make a hard file link. 2462 */ 2463 int 2464 sys_link(struct link_args *uap) 2465 { 2466 struct nlookupdata nd, linknd; 2467 int error; 2468 2469 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2470 if (error == 0) { 2471 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2472 if (error == 0) 2473 error = kern_link(&nd, &linknd); 2474 nlookup_done(&linknd); 2475 } 2476 nlookup_done(&nd); 2477 return (error); 2478 } 2479 2480 /* 2481 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2482 * 2483 * Make a hard file link. The path1 argument is relative to the directory 2484 * associated with fd1, and similarly the path2 argument is relative to 2485 * the directory associated with fd2. 2486 */ 2487 int 2488 sys_linkat(struct linkat_args *uap) 2489 { 2490 struct nlookupdata nd, linknd; 2491 struct file *fp1, *fp2; 2492 int error; 2493 2494 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2495 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2496 if (error == 0) { 2497 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2498 uap->path2, UIO_USERSPACE, 0); 2499 if (error == 0) 2500 error = kern_link(&nd, &linknd); 2501 nlookup_done_at(&linknd, fp2); 2502 } 2503 nlookup_done_at(&nd, fp1); 2504 return (error); 2505 } 2506 2507 int 2508 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2509 { 2510 struct vattr vattr; 2511 struct vnode *vp; 2512 struct vnode *dvp; 2513 int error; 2514 2515 bwillinode(1); 2516 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2517 if ((error = nlookup(nd)) != 0) 2518 return (error); 2519 if (nd->nl_nch.ncp->nc_vp) 2520 return (EEXIST); 2521 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2522 return (error); 2523 dvp = nd->nl_dvp; 2524 VATTR_NULL(&vattr); 2525 vattr.va_mode = mode; 2526 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2527 if (error == 0) 2528 vput(vp); 2529 return (error); 2530 } 2531 2532 /* 2533 * symlink(char *path, char *link) 2534 * 2535 * Make a symbolic link. 2536 */ 2537 int 2538 sys_symlink(struct symlink_args *uap) 2539 { 2540 struct thread *td = curthread; 2541 struct nlookupdata nd; 2542 char *path; 2543 int error; 2544 int mode; 2545 2546 path = objcache_get(namei_oc, M_WAITOK); 2547 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2548 if (error == 0) { 2549 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2550 if (error == 0) { 2551 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2552 error = kern_symlink(&nd, path, mode); 2553 } 2554 nlookup_done(&nd); 2555 } 2556 objcache_put(namei_oc, path); 2557 return (error); 2558 } 2559 2560 /* 2561 * symlinkat_args(char *path1, int fd, char *path2) 2562 * 2563 * Make a symbolic link. The path2 argument is relative to the directory 2564 * associated with fd. 2565 */ 2566 int 2567 sys_symlinkat(struct symlinkat_args *uap) 2568 { 2569 struct thread *td = curthread; 2570 struct nlookupdata nd; 2571 struct file *fp; 2572 char *path1; 2573 int error; 2574 int mode; 2575 2576 path1 = objcache_get(namei_oc, M_WAITOK); 2577 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2578 if (error == 0) { 2579 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2580 UIO_USERSPACE, 0); 2581 if (error == 0) { 2582 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2583 error = kern_symlink(&nd, path1, mode); 2584 } 2585 nlookup_done_at(&nd, fp); 2586 } 2587 objcache_put(namei_oc, path1); 2588 return (error); 2589 } 2590 2591 /* 2592 * undelete_args(char *path) 2593 * 2594 * Delete a whiteout from the filesystem. 2595 */ 2596 int 2597 sys_undelete(struct undelete_args *uap) 2598 { 2599 struct nlookupdata nd; 2600 int error; 2601 2602 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2603 bwillinode(1); 2604 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2605 if (error == 0) 2606 error = nlookup(&nd); 2607 if (error == 0) 2608 error = ncp_writechk(&nd.nl_nch); 2609 if (error == 0) { 2610 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2611 NAMEI_DELETE); 2612 } 2613 nlookup_done(&nd); 2614 return (error); 2615 } 2616 2617 int 2618 kern_unlink(struct nlookupdata *nd) 2619 { 2620 int error; 2621 2622 bwillinode(1); 2623 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2624 if ((error = nlookup(nd)) != 0) 2625 return (error); 2626 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2627 return (error); 2628 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2629 return (error); 2630 } 2631 2632 /* 2633 * unlink_args(char *path) 2634 * 2635 * Delete a name from the filesystem. 2636 */ 2637 int 2638 sys_unlink(struct unlink_args *uap) 2639 { 2640 struct nlookupdata nd; 2641 int error; 2642 2643 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2644 if (error == 0) 2645 error = kern_unlink(&nd); 2646 nlookup_done(&nd); 2647 return (error); 2648 } 2649 2650 2651 /* 2652 * unlinkat_args(int fd, char *path, int flags) 2653 * 2654 * Delete the file or directory entry pointed to by fd/path. 2655 */ 2656 int 2657 sys_unlinkat(struct unlinkat_args *uap) 2658 { 2659 struct nlookupdata nd; 2660 struct file *fp; 2661 int error; 2662 2663 if (uap->flags & ~AT_REMOVEDIR) 2664 return (EINVAL); 2665 2666 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2667 if (error == 0) { 2668 if (uap->flags & AT_REMOVEDIR) 2669 error = kern_rmdir(&nd); 2670 else 2671 error = kern_unlink(&nd); 2672 } 2673 nlookup_done_at(&nd, fp); 2674 return (error); 2675 } 2676 2677 int 2678 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2679 { 2680 struct thread *td = curthread; 2681 struct file *fp; 2682 struct vnode *vp; 2683 struct vattr vattr; 2684 off_t new_offset; 2685 int error; 2686 2687 fp = holdfp(td, fd, -1); 2688 if (fp == NULL) 2689 return (EBADF); 2690 if (fp->f_type != DTYPE_VNODE) { 2691 error = ESPIPE; 2692 goto done; 2693 } 2694 vp = (struct vnode *)fp->f_data; 2695 2696 switch (whence) { 2697 case L_INCR: 2698 spin_lock(&fp->f_spin); 2699 new_offset = fp->f_offset + offset; 2700 error = 0; 2701 break; 2702 case L_XTND: 2703 error = VOP_GETATTR(vp, &vattr); 2704 spin_lock(&fp->f_spin); 2705 new_offset = offset + vattr.va_size; 2706 break; 2707 case L_SET: 2708 new_offset = offset; 2709 error = 0; 2710 spin_lock(&fp->f_spin); 2711 break; 2712 default: 2713 new_offset = 0; 2714 error = EINVAL; 2715 spin_lock(&fp->f_spin); 2716 break; 2717 } 2718 2719 /* 2720 * Validate the seek position. Negative offsets are not allowed 2721 * for regular files or directories. 2722 * 2723 * Normally we would also not want to allow negative offsets for 2724 * character and block-special devices. However kvm addresses 2725 * on 64 bit architectures might appear to be negative and must 2726 * be allowed. 2727 */ 2728 if (error == 0) { 2729 if (new_offset < 0 && 2730 (vp->v_type == VREG || vp->v_type == VDIR)) { 2731 error = EINVAL; 2732 } else { 2733 fp->f_offset = new_offset; 2734 } 2735 } 2736 *res = fp->f_offset; 2737 spin_unlock(&fp->f_spin); 2738 done: 2739 dropfp(td, fd, fp); 2740 2741 return (error); 2742 } 2743 2744 /* 2745 * lseek_args(int fd, int pad, off_t offset, int whence) 2746 * 2747 * Reposition read/write file offset. 2748 */ 2749 int 2750 sys_lseek(struct lseek_args *uap) 2751 { 2752 int error; 2753 2754 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2755 &uap->sysmsg_offset); 2756 2757 return (error); 2758 } 2759 2760 /* 2761 * Check if current process can access given file. amode is a bitmask of *_OK 2762 * access bits. flags is a bitmask of AT_* flags. 2763 */ 2764 int 2765 kern_access(struct nlookupdata *nd, int amode, int flags) 2766 { 2767 struct vnode *vp; 2768 int error, mode; 2769 2770 if (flags & ~AT_EACCESS) 2771 return (EINVAL); 2772 nd->nl_flags |= NLC_SHAREDLOCK; 2773 if ((error = nlookup(nd)) != 0) 2774 return (error); 2775 retry: 2776 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2777 if (error) 2778 return (error); 2779 2780 /* Flags == 0 means only check for existence. */ 2781 if (amode) { 2782 mode = 0; 2783 if (amode & R_OK) 2784 mode |= VREAD; 2785 if (amode & W_OK) 2786 mode |= VWRITE; 2787 if (amode & X_OK) 2788 mode |= VEXEC; 2789 if ((mode & VWRITE) == 0 || 2790 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2791 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2792 2793 /* 2794 * If the file handle is stale we have to re-resolve the 2795 * entry with the ncp held exclusively. This is a hack 2796 * at the moment. 2797 */ 2798 if (error == ESTALE) { 2799 vput(vp); 2800 cache_unlock(&nd->nl_nch); 2801 cache_lock(&nd->nl_nch); 2802 cache_setunresolved(&nd->nl_nch); 2803 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2804 if (error == 0) { 2805 vp = NULL; 2806 goto retry; 2807 } 2808 return(error); 2809 } 2810 } 2811 vput(vp); 2812 return (error); 2813 } 2814 2815 /* 2816 * access_args(char *path, int flags) 2817 * 2818 * Check access permissions. 2819 */ 2820 int 2821 sys_access(struct access_args *uap) 2822 { 2823 struct nlookupdata nd; 2824 int error; 2825 2826 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2827 if (error == 0) 2828 error = kern_access(&nd, uap->flags, 0); 2829 nlookup_done(&nd); 2830 return (error); 2831 } 2832 2833 2834 /* 2835 * eaccess_args(char *path, int flags) 2836 * 2837 * Check access permissions. 2838 */ 2839 int 2840 sys_eaccess(struct eaccess_args *uap) 2841 { 2842 struct nlookupdata nd; 2843 int error; 2844 2845 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2846 if (error == 0) 2847 error = kern_access(&nd, uap->flags, AT_EACCESS); 2848 nlookup_done(&nd); 2849 return (error); 2850 } 2851 2852 2853 /* 2854 * faccessat_args(int fd, char *path, int amode, int flags) 2855 * 2856 * Check access permissions. 2857 */ 2858 int 2859 sys_faccessat(struct faccessat_args *uap) 2860 { 2861 struct nlookupdata nd; 2862 struct file *fp; 2863 int error; 2864 2865 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2866 NLC_FOLLOW); 2867 if (error == 0) 2868 error = kern_access(&nd, uap->amode, uap->flags); 2869 nlookup_done_at(&nd, fp); 2870 return (error); 2871 } 2872 2873 int 2874 kern_stat(struct nlookupdata *nd, struct stat *st) 2875 { 2876 int error; 2877 struct vnode *vp; 2878 2879 nd->nl_flags |= NLC_SHAREDLOCK; 2880 if ((error = nlookup(nd)) != 0) 2881 return (error); 2882 again: 2883 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2884 return (ENOENT); 2885 2886 if ((error = vget(vp, LK_SHARED)) != 0) 2887 return (error); 2888 error = vn_stat(vp, st, nd->nl_cred); 2889 2890 /* 2891 * If the file handle is stale we have to re-resolve the 2892 * entry with the ncp held exclusively. This is a hack 2893 * at the moment. 2894 */ 2895 if (error == ESTALE) { 2896 vput(vp); 2897 cache_unlock(&nd->nl_nch); 2898 cache_lock(&nd->nl_nch); 2899 cache_setunresolved(&nd->nl_nch); 2900 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2901 if (error == 0) 2902 goto again; 2903 } else { 2904 vput(vp); 2905 } 2906 return (error); 2907 } 2908 2909 /* 2910 * stat_args(char *path, struct stat *ub) 2911 * 2912 * Get file status; this version follows links. 2913 */ 2914 int 2915 sys_stat(struct stat_args *uap) 2916 { 2917 struct nlookupdata nd; 2918 struct stat st; 2919 int error; 2920 2921 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2922 if (error == 0) { 2923 error = kern_stat(&nd, &st); 2924 if (error == 0) 2925 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2926 } 2927 nlookup_done(&nd); 2928 return (error); 2929 } 2930 2931 /* 2932 * lstat_args(char *path, struct stat *ub) 2933 * 2934 * Get file status; this version does not follow links. 2935 */ 2936 int 2937 sys_lstat(struct lstat_args *uap) 2938 { 2939 struct nlookupdata nd; 2940 struct stat st; 2941 int error; 2942 2943 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2944 if (error == 0) { 2945 error = kern_stat(&nd, &st); 2946 if (error == 0) 2947 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2948 } 2949 nlookup_done(&nd); 2950 return (error); 2951 } 2952 2953 /* 2954 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2955 * 2956 * Get status of file pointed to by fd/path. 2957 */ 2958 int 2959 sys_fstatat(struct fstatat_args *uap) 2960 { 2961 struct nlookupdata nd; 2962 struct stat st; 2963 int error; 2964 int flags; 2965 struct file *fp; 2966 2967 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2968 return (EINVAL); 2969 2970 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2971 2972 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2973 UIO_USERSPACE, flags); 2974 if (error == 0) { 2975 error = kern_stat(&nd, &st); 2976 if (error == 0) 2977 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2978 } 2979 nlookup_done_at(&nd, fp); 2980 return (error); 2981 } 2982 2983 static int 2984 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2985 { 2986 struct nlookupdata nd; 2987 struct vnode *vp; 2988 int error; 2989 2990 vp = NULL; 2991 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2992 if (error == 0) 2993 error = nlookup(&nd); 2994 if (error == 0) 2995 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2996 nlookup_done(&nd); 2997 if (error == 0) { 2998 error = VOP_PATHCONF(vp, name, sysmsg_regp); 2999 vput(vp); 3000 } 3001 return (error); 3002 } 3003 3004 /* 3005 * pathconf_Args(char *path, int name) 3006 * 3007 * Get configurable pathname variables. 3008 */ 3009 int 3010 sys_pathconf(struct pathconf_args *uap) 3011 { 3012 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3013 &uap->sysmsg_reg)); 3014 } 3015 3016 /* 3017 * lpathconf_Args(char *path, int name) 3018 * 3019 * Get configurable pathname variables, but don't follow symlinks. 3020 */ 3021 int 3022 sys_lpathconf(struct lpathconf_args *uap) 3023 { 3024 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3025 } 3026 3027 /* 3028 * XXX: daver 3029 * kern_readlink isn't properly split yet. There is a copyin burried 3030 * in VOP_READLINK(). 3031 */ 3032 int 3033 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3034 { 3035 struct thread *td = curthread; 3036 struct vnode *vp; 3037 struct iovec aiov; 3038 struct uio auio; 3039 int error; 3040 3041 nd->nl_flags |= NLC_SHAREDLOCK; 3042 if ((error = nlookup(nd)) != 0) 3043 return (error); 3044 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3045 if (error) 3046 return (error); 3047 if (vp->v_type != VLNK) { 3048 error = EINVAL; 3049 } else { 3050 aiov.iov_base = buf; 3051 aiov.iov_len = count; 3052 auio.uio_iov = &aiov; 3053 auio.uio_iovcnt = 1; 3054 auio.uio_offset = 0; 3055 auio.uio_rw = UIO_READ; 3056 auio.uio_segflg = UIO_USERSPACE; 3057 auio.uio_td = td; 3058 auio.uio_resid = count; 3059 error = VOP_READLINK(vp, &auio, td->td_ucred); 3060 } 3061 vput(vp); 3062 *res = count - auio.uio_resid; 3063 return (error); 3064 } 3065 3066 /* 3067 * readlink_args(char *path, char *buf, int count) 3068 * 3069 * Return target name of a symbolic link. 3070 */ 3071 int 3072 sys_readlink(struct readlink_args *uap) 3073 { 3074 struct nlookupdata nd; 3075 int error; 3076 3077 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3078 if (error == 0) { 3079 error = kern_readlink(&nd, uap->buf, uap->count, 3080 &uap->sysmsg_result); 3081 } 3082 nlookup_done(&nd); 3083 return (error); 3084 } 3085 3086 /* 3087 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3088 * 3089 * Return target name of a symbolic link. The path is relative to the 3090 * directory associated with fd. 3091 */ 3092 int 3093 sys_readlinkat(struct readlinkat_args *uap) 3094 { 3095 struct nlookupdata nd; 3096 struct file *fp; 3097 int error; 3098 3099 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3100 if (error == 0) { 3101 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3102 &uap->sysmsg_result); 3103 } 3104 nlookup_done_at(&nd, fp); 3105 return (error); 3106 } 3107 3108 static int 3109 setfflags(struct vnode *vp, int flags) 3110 { 3111 struct thread *td = curthread; 3112 int error; 3113 struct vattr vattr; 3114 3115 /* 3116 * Prevent non-root users from setting flags on devices. When 3117 * a device is reused, users can retain ownership of the device 3118 * if they are allowed to set flags and programs assume that 3119 * chown can't fail when done as root. 3120 */ 3121 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3122 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3123 return (error); 3124 3125 /* 3126 * note: vget is required for any operation that might mod the vnode 3127 * so VINACTIVE is properly cleared. 3128 */ 3129 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3130 VATTR_NULL(&vattr); 3131 vattr.va_flags = flags; 3132 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3133 vput(vp); 3134 } 3135 return (error); 3136 } 3137 3138 /* 3139 * chflags(char *path, int flags) 3140 * 3141 * Change flags of a file given a path name. 3142 */ 3143 int 3144 sys_chflags(struct chflags_args *uap) 3145 { 3146 struct nlookupdata nd; 3147 struct vnode *vp; 3148 int error; 3149 3150 vp = NULL; 3151 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3152 if (error == 0) 3153 error = nlookup(&nd); 3154 if (error == 0) 3155 error = ncp_writechk(&nd.nl_nch); 3156 if (error == 0) 3157 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3158 nlookup_done(&nd); 3159 if (error == 0) { 3160 error = setfflags(vp, uap->flags); 3161 vrele(vp); 3162 } 3163 return (error); 3164 } 3165 3166 /* 3167 * lchflags(char *path, int flags) 3168 * 3169 * Change flags of a file given a path name, but don't follow symlinks. 3170 */ 3171 int 3172 sys_lchflags(struct lchflags_args *uap) 3173 { 3174 struct nlookupdata nd; 3175 struct vnode *vp; 3176 int error; 3177 3178 vp = NULL; 3179 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3180 if (error == 0) 3181 error = nlookup(&nd); 3182 if (error == 0) 3183 error = ncp_writechk(&nd.nl_nch); 3184 if (error == 0) 3185 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3186 nlookup_done(&nd); 3187 if (error == 0) { 3188 error = setfflags(vp, uap->flags); 3189 vrele(vp); 3190 } 3191 return (error); 3192 } 3193 3194 /* 3195 * fchflags_args(int fd, int flags) 3196 * 3197 * Change flags of a file given a file descriptor. 3198 */ 3199 int 3200 sys_fchflags(struct fchflags_args *uap) 3201 { 3202 struct thread *td = curthread; 3203 struct file *fp; 3204 int error; 3205 3206 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3207 return (error); 3208 if (fp->f_nchandle.ncp) 3209 error = ncp_writechk(&fp->f_nchandle); 3210 if (error == 0) 3211 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3212 fdrop(fp); 3213 return (error); 3214 } 3215 3216 /* 3217 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3218 * change flags given a pathname relative to a filedescriptor 3219 */ 3220 int sys_chflagsat(struct chflagsat_args *uap) 3221 { 3222 struct nlookupdata nd; 3223 struct vnode *vp; 3224 struct file *fp; 3225 int error; 3226 int lookupflags; 3227 3228 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3229 return (EINVAL); 3230 3231 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3232 3233 vp = NULL; 3234 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3235 if (error == 0) 3236 error = nlookup(&nd); 3237 if (error == 0) 3238 error = ncp_writechk(&nd.nl_nch); 3239 if (error == 0) 3240 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3241 nlookup_done_at(&nd, fp); 3242 if (error == 0) { 3243 error = setfflags(vp, uap->flags); 3244 vrele(vp); 3245 } 3246 return (error); 3247 } 3248 3249 3250 static int 3251 setfmode(struct vnode *vp, int mode) 3252 { 3253 struct thread *td = curthread; 3254 int error; 3255 struct vattr vattr; 3256 3257 /* 3258 * note: vget is required for any operation that might mod the vnode 3259 * so VINACTIVE is properly cleared. 3260 */ 3261 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3262 VATTR_NULL(&vattr); 3263 vattr.va_mode = mode & ALLPERMS; 3264 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3265 cache_inval_wxok(vp); 3266 vput(vp); 3267 } 3268 return error; 3269 } 3270 3271 int 3272 kern_chmod(struct nlookupdata *nd, int mode) 3273 { 3274 struct vnode *vp; 3275 int error; 3276 3277 if ((error = nlookup(nd)) != 0) 3278 return (error); 3279 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3280 return (error); 3281 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3282 error = setfmode(vp, mode); 3283 vrele(vp); 3284 return (error); 3285 } 3286 3287 /* 3288 * chmod_args(char *path, int mode) 3289 * 3290 * Change mode of a file given path name. 3291 */ 3292 int 3293 sys_chmod(struct chmod_args *uap) 3294 { 3295 struct nlookupdata nd; 3296 int error; 3297 3298 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3299 if (error == 0) 3300 error = kern_chmod(&nd, uap->mode); 3301 nlookup_done(&nd); 3302 return (error); 3303 } 3304 3305 /* 3306 * lchmod_args(char *path, int mode) 3307 * 3308 * Change mode of a file given path name (don't follow links.) 3309 */ 3310 int 3311 sys_lchmod(struct lchmod_args *uap) 3312 { 3313 struct nlookupdata nd; 3314 int error; 3315 3316 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3317 if (error == 0) 3318 error = kern_chmod(&nd, uap->mode); 3319 nlookup_done(&nd); 3320 return (error); 3321 } 3322 3323 /* 3324 * fchmod_args(int fd, int mode) 3325 * 3326 * Change mode of a file given a file descriptor. 3327 */ 3328 int 3329 sys_fchmod(struct fchmod_args *uap) 3330 { 3331 struct thread *td = curthread; 3332 struct file *fp; 3333 int error; 3334 3335 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3336 return (error); 3337 if (fp->f_nchandle.ncp) 3338 error = ncp_writechk(&fp->f_nchandle); 3339 if (error == 0) 3340 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3341 fdrop(fp); 3342 return (error); 3343 } 3344 3345 /* 3346 * fchmodat_args(char *path, int mode) 3347 * 3348 * Change mode of a file pointed to by fd/path. 3349 */ 3350 int 3351 sys_fchmodat(struct fchmodat_args *uap) 3352 { 3353 struct nlookupdata nd; 3354 struct file *fp; 3355 int error; 3356 int flags; 3357 3358 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3359 return (EINVAL); 3360 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3361 3362 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3363 UIO_USERSPACE, flags); 3364 if (error == 0) 3365 error = kern_chmod(&nd, uap->mode); 3366 nlookup_done_at(&nd, fp); 3367 return (error); 3368 } 3369 3370 static int 3371 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3372 { 3373 struct thread *td = curthread; 3374 int error; 3375 struct vattr vattr; 3376 uid_t o_uid; 3377 gid_t o_gid; 3378 uint64_t size; 3379 3380 /* 3381 * note: vget is required for any operation that might mod the vnode 3382 * so VINACTIVE is properly cleared. 3383 */ 3384 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3385 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3386 return error; 3387 o_uid = vattr.va_uid; 3388 o_gid = vattr.va_gid; 3389 size = vattr.va_size; 3390 3391 VATTR_NULL(&vattr); 3392 vattr.va_uid = uid; 3393 vattr.va_gid = gid; 3394 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3395 vput(vp); 3396 } 3397 3398 if (error == 0) { 3399 if (uid == -1) 3400 uid = o_uid; 3401 if (gid == -1) 3402 gid = o_gid; 3403 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3404 VFS_ACCOUNT(mp, uid, gid, size); 3405 } 3406 3407 return error; 3408 } 3409 3410 int 3411 kern_chown(struct nlookupdata *nd, int uid, int gid) 3412 { 3413 struct vnode *vp; 3414 int error; 3415 3416 if ((error = nlookup(nd)) != 0) 3417 return (error); 3418 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3419 return (error); 3420 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3421 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3422 vrele(vp); 3423 return (error); 3424 } 3425 3426 /* 3427 * chown(char *path, int uid, int gid) 3428 * 3429 * Set ownership given a path name. 3430 */ 3431 int 3432 sys_chown(struct chown_args *uap) 3433 { 3434 struct nlookupdata nd; 3435 int error; 3436 3437 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3438 if (error == 0) 3439 error = kern_chown(&nd, uap->uid, uap->gid); 3440 nlookup_done(&nd); 3441 return (error); 3442 } 3443 3444 /* 3445 * lchown_args(char *path, int uid, int gid) 3446 * 3447 * Set ownership given a path name, do not cross symlinks. 3448 */ 3449 int 3450 sys_lchown(struct lchown_args *uap) 3451 { 3452 struct nlookupdata nd; 3453 int error; 3454 3455 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3456 if (error == 0) 3457 error = kern_chown(&nd, uap->uid, uap->gid); 3458 nlookup_done(&nd); 3459 return (error); 3460 } 3461 3462 /* 3463 * fchown_args(int fd, int uid, int gid) 3464 * 3465 * Set ownership given a file descriptor. 3466 */ 3467 int 3468 sys_fchown(struct fchown_args *uap) 3469 { 3470 struct thread *td = curthread; 3471 struct proc *p = td->td_proc; 3472 struct file *fp; 3473 int error; 3474 3475 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3476 return (error); 3477 if (fp->f_nchandle.ncp) 3478 error = ncp_writechk(&fp->f_nchandle); 3479 if (error == 0) 3480 error = setfown(p->p_fd->fd_ncdir.mount, 3481 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3482 fdrop(fp); 3483 return (error); 3484 } 3485 3486 /* 3487 * fchownat(int fd, char *path, int uid, int gid, int flags) 3488 * 3489 * Set ownership of file pointed to by fd/path. 3490 */ 3491 int 3492 sys_fchownat(struct fchownat_args *uap) 3493 { 3494 struct nlookupdata nd; 3495 struct file *fp; 3496 int error; 3497 int flags; 3498 3499 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3500 return (EINVAL); 3501 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3502 3503 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3504 UIO_USERSPACE, flags); 3505 if (error == 0) 3506 error = kern_chown(&nd, uap->uid, uap->gid); 3507 nlookup_done_at(&nd, fp); 3508 return (error); 3509 } 3510 3511 3512 static int 3513 getutimes(struct timeval *tvp, struct timespec *tsp) 3514 { 3515 struct timeval tv[2]; 3516 int error; 3517 3518 if (tvp == NULL) { 3519 microtime(&tv[0]); 3520 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3521 tsp[1] = tsp[0]; 3522 } else { 3523 if ((error = itimerfix(tvp)) != 0) 3524 return (error); 3525 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3526 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3527 } 3528 return 0; 3529 } 3530 3531 static int 3532 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3533 { 3534 struct timespec tsnow; 3535 int error; 3536 3537 *nullflag = 0; 3538 nanotime(&tsnow); 3539 if (ts == NULL) { 3540 newts[0] = tsnow; 3541 newts[1] = tsnow; 3542 *nullflag = 1; 3543 return (0); 3544 } 3545 3546 newts[0] = ts[0]; 3547 newts[1] = ts[1]; 3548 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3549 return (0); 3550 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3551 *nullflag = 1; 3552 3553 if (newts[0].tv_nsec == UTIME_OMIT) 3554 newts[0].tv_sec = VNOVAL; 3555 else if (newts[0].tv_nsec == UTIME_NOW) 3556 newts[0] = tsnow; 3557 else if ((error = itimespecfix(&newts[0])) != 0) 3558 return (error); 3559 3560 if (newts[1].tv_nsec == UTIME_OMIT) 3561 newts[1].tv_sec = VNOVAL; 3562 else if (newts[1].tv_nsec == UTIME_NOW) 3563 newts[1] = tsnow; 3564 else if ((error = itimespecfix(&newts[1])) != 0) 3565 return (error); 3566 3567 return (0); 3568 } 3569 3570 static int 3571 setutimes(struct vnode *vp, struct vattr *vattr, 3572 const struct timespec *ts, int nullflag) 3573 { 3574 struct thread *td = curthread; 3575 int error; 3576 3577 VATTR_NULL(vattr); 3578 vattr->va_atime = ts[0]; 3579 vattr->va_mtime = ts[1]; 3580 if (nullflag) 3581 vattr->va_vaflags |= VA_UTIMES_NULL; 3582 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3583 3584 return error; 3585 } 3586 3587 int 3588 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3589 { 3590 struct timespec ts[2]; 3591 int error; 3592 3593 if (tptr) { 3594 if ((error = getutimes(tptr, ts)) != 0) 3595 return (error); 3596 } 3597 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3598 return (error); 3599 } 3600 3601 /* 3602 * utimes_args(char *path, struct timeval *tptr) 3603 * 3604 * Set the access and modification times of a file. 3605 */ 3606 int 3607 sys_utimes(struct utimes_args *uap) 3608 { 3609 struct timeval tv[2]; 3610 struct nlookupdata nd; 3611 int error; 3612 3613 if (uap->tptr) { 3614 error = copyin(uap->tptr, tv, sizeof(tv)); 3615 if (error) 3616 return (error); 3617 } 3618 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3619 if (error == 0) 3620 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3621 nlookup_done(&nd); 3622 return (error); 3623 } 3624 3625 /* 3626 * lutimes_args(char *path, struct timeval *tptr) 3627 * 3628 * Set the access and modification times of a file. 3629 */ 3630 int 3631 sys_lutimes(struct lutimes_args *uap) 3632 { 3633 struct timeval tv[2]; 3634 struct nlookupdata nd; 3635 int error; 3636 3637 if (uap->tptr) { 3638 error = copyin(uap->tptr, tv, sizeof(tv)); 3639 if (error) 3640 return (error); 3641 } 3642 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3643 if (error == 0) 3644 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3645 nlookup_done(&nd); 3646 return (error); 3647 } 3648 3649 /* 3650 * Set utimes on a file descriptor. The creds used to open the 3651 * file are used to determine whether the operation is allowed 3652 * or not. 3653 */ 3654 int 3655 kern_futimens(int fd, struct timespec *ts) 3656 { 3657 struct thread *td = curthread; 3658 struct timespec newts[2]; 3659 struct file *fp; 3660 struct vnode *vp; 3661 struct vattr vattr; 3662 int nullflag; 3663 int error; 3664 3665 error = getutimens(ts, newts, &nullflag); 3666 if (error) 3667 return (error); 3668 if ((error = holdvnode(td, fd, &fp)) != 0) 3669 return (error); 3670 if (fp->f_nchandle.ncp) 3671 error = ncp_writechk(&fp->f_nchandle); 3672 if (error == 0) { 3673 vp = fp->f_data; 3674 error = vget(vp, LK_EXCLUSIVE); 3675 if (error == 0) { 3676 error = VOP_GETATTR(vp, &vattr); 3677 if (error == 0) { 3678 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3679 fp->f_cred); 3680 } 3681 if (error == 0) { 3682 error = setutimes(vp, &vattr, newts, nullflag); 3683 } 3684 vput(vp); 3685 } 3686 } 3687 fdrop(fp); 3688 return (error); 3689 } 3690 3691 /* 3692 * futimens_args(int fd, struct timespec *ts) 3693 * 3694 * Set the access and modification times of a file. 3695 */ 3696 int 3697 sys_futimens(struct futimens_args *uap) 3698 { 3699 struct timespec ts[2]; 3700 int error; 3701 3702 if (uap->ts) { 3703 error = copyin(uap->ts, ts, sizeof(ts)); 3704 if (error) 3705 return (error); 3706 } 3707 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3708 return (error); 3709 } 3710 3711 int 3712 kern_futimes(int fd, struct timeval *tptr) 3713 { 3714 struct timespec ts[2]; 3715 int error; 3716 3717 if (tptr) { 3718 if ((error = getutimes(tptr, ts)) != 0) 3719 return (error); 3720 } 3721 error = kern_futimens(fd, tptr ? ts : NULL); 3722 return (error); 3723 } 3724 3725 /* 3726 * futimes_args(int fd, struct timeval *tptr) 3727 * 3728 * Set the access and modification times of a file. 3729 */ 3730 int 3731 sys_futimes(struct futimes_args *uap) 3732 { 3733 struct timeval tv[2]; 3734 int error; 3735 3736 if (uap->tptr) { 3737 error = copyin(uap->tptr, tv, sizeof(tv)); 3738 if (error) 3739 return (error); 3740 } 3741 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3742 return (error); 3743 } 3744 3745 int 3746 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3747 { 3748 struct timespec newts[2]; 3749 struct vnode *vp; 3750 struct vattr vattr; 3751 int nullflag; 3752 int error; 3753 3754 if (flags & ~AT_SYMLINK_NOFOLLOW) 3755 return (EINVAL); 3756 3757 error = getutimens(ts, newts, &nullflag); 3758 if (error) 3759 return (error); 3760 3761 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3762 if ((error = nlookup(nd)) != 0) 3763 return (error); 3764 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3765 return (error); 3766 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3767 return (error); 3768 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3769 error = vget(vp, LK_EXCLUSIVE); 3770 if (error == 0) { 3771 error = setutimes(vp, &vattr, newts, nullflag); 3772 vput(vp); 3773 } 3774 } 3775 vrele(vp); 3776 return (error); 3777 } 3778 3779 /* 3780 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3781 * 3782 * Set file access and modification times of a file. 3783 */ 3784 int 3785 sys_utimensat(struct utimensat_args *uap) 3786 { 3787 struct timespec ts[2]; 3788 struct nlookupdata nd; 3789 struct file *fp; 3790 int error; 3791 int flags; 3792 3793 if (uap->ts) { 3794 error = copyin(uap->ts, ts, sizeof(ts)); 3795 if (error) 3796 return (error); 3797 } 3798 3799 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3800 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3801 UIO_USERSPACE, flags); 3802 if (error == 0) 3803 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3804 nlookup_done_at(&nd, fp); 3805 return (error); 3806 } 3807 3808 int 3809 kern_truncate(struct nlookupdata *nd, off_t length) 3810 { 3811 struct vnode *vp; 3812 struct vattr vattr; 3813 int error; 3814 uid_t uid = 0; 3815 gid_t gid = 0; 3816 uint64_t old_size = 0; 3817 3818 if (length < 0) 3819 return(EINVAL); 3820 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3821 if ((error = nlookup(nd)) != 0) 3822 return (error); 3823 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3824 return (error); 3825 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3826 return (error); 3827 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3828 if (error) { 3829 vrele(vp); 3830 return (error); 3831 } 3832 if (vp->v_type == VDIR) { 3833 error = EISDIR; 3834 goto done; 3835 } 3836 if (vfs_quota_enabled) { 3837 error = VOP_GETATTR(vp, &vattr); 3838 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3839 uid = vattr.va_uid; 3840 gid = vattr.va_gid; 3841 old_size = vattr.va_size; 3842 } 3843 3844 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3845 VATTR_NULL(&vattr); 3846 vattr.va_size = length; 3847 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3848 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3849 } 3850 done: 3851 vput(vp); 3852 return (error); 3853 } 3854 3855 /* 3856 * truncate(char *path, int pad, off_t length) 3857 * 3858 * Truncate a file given its path name. 3859 */ 3860 int 3861 sys_truncate(struct truncate_args *uap) 3862 { 3863 struct nlookupdata nd; 3864 int error; 3865 3866 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3867 if (error == 0) 3868 error = kern_truncate(&nd, uap->length); 3869 nlookup_done(&nd); 3870 return error; 3871 } 3872 3873 int 3874 kern_ftruncate(int fd, off_t length) 3875 { 3876 struct thread *td = curthread; 3877 struct vattr vattr; 3878 struct vnode *vp; 3879 struct file *fp; 3880 int error; 3881 uid_t uid = 0; 3882 gid_t gid = 0; 3883 uint64_t old_size = 0; 3884 struct mount *mp; 3885 3886 if (length < 0) 3887 return(EINVAL); 3888 if ((error = holdvnode(td, fd, &fp)) != 0) 3889 return (error); 3890 if (fp->f_nchandle.ncp) { 3891 error = ncp_writechk(&fp->f_nchandle); 3892 if (error) 3893 goto done; 3894 } 3895 if ((fp->f_flag & FWRITE) == 0) { 3896 error = EINVAL; 3897 goto done; 3898 } 3899 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3900 error = EINVAL; 3901 goto done; 3902 } 3903 vp = (struct vnode *)fp->f_data; 3904 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3905 if (vp->v_type == VDIR) { 3906 error = EISDIR; 3907 vn_unlock(vp); 3908 goto done; 3909 } 3910 3911 if (vfs_quota_enabled) { 3912 error = VOP_GETATTR(vp, &vattr); 3913 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3914 uid = vattr.va_uid; 3915 gid = vattr.va_gid; 3916 old_size = vattr.va_size; 3917 } 3918 3919 if ((error = vn_writechk(vp, NULL)) == 0) { 3920 VATTR_NULL(&vattr); 3921 vattr.va_size = length; 3922 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3923 mp = vq_vptomp(vp); 3924 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3925 } 3926 vn_unlock(vp); 3927 done: 3928 fdrop(fp); 3929 return (error); 3930 } 3931 3932 /* 3933 * ftruncate_args(int fd, int pad, off_t length) 3934 * 3935 * Truncate a file given a file descriptor. 3936 */ 3937 int 3938 sys_ftruncate(struct ftruncate_args *uap) 3939 { 3940 int error; 3941 3942 error = kern_ftruncate(uap->fd, uap->length); 3943 3944 return (error); 3945 } 3946 3947 /* 3948 * fsync(int fd) 3949 * 3950 * Sync an open file. 3951 */ 3952 int 3953 sys_fsync(struct fsync_args *uap) 3954 { 3955 struct thread *td = curthread; 3956 struct vnode *vp; 3957 struct file *fp; 3958 vm_object_t obj; 3959 int error; 3960 3961 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3962 return (error); 3963 vp = (struct vnode *)fp->f_data; 3964 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3965 if ((obj = vp->v_object) != NULL) { 3966 if (vp->v_mount == NULL || 3967 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3968 vm_object_page_clean(obj, 0, 0, 0); 3969 } 3970 } 3971 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3972 if (error == 0 && vp->v_mount) 3973 error = buf_fsync(vp); 3974 vn_unlock(vp); 3975 fdrop(fp); 3976 3977 return (error); 3978 } 3979 3980 int 3981 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3982 { 3983 struct nchandle fnchd; 3984 struct nchandle tnchd; 3985 struct namecache *ncp; 3986 struct vnode *fdvp; 3987 struct vnode *tdvp; 3988 struct mount *mp; 3989 int error; 3990 u_int fncp_gen; 3991 u_int tncp_gen; 3992 3993 bwillinode(1); 3994 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3995 if ((error = nlookup(fromnd)) != 0) 3996 return (error); 3997 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 3998 return (ENOENT); 3999 fnchd.mount = fromnd->nl_nch.mount; 4000 cache_hold(&fnchd); 4001 4002 /* 4003 * unlock the source nch so we can lookup the target nch without 4004 * deadlocking. The target may or may not exist so we do not check 4005 * for a target vp like kern_mkdir() and other creation functions do. 4006 * 4007 * The source and target directories are ref'd and rechecked after 4008 * everything is relocked to determine if the source or target file 4009 * has been renamed. 4010 */ 4011 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4012 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4013 4014 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4015 4016 cache_unlock(&fromnd->nl_nch); 4017 4018 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4019 if ((error = nlookup(tond)) != 0) { 4020 cache_drop(&fnchd); 4021 return (error); 4022 } 4023 tncp_gen = tond->nl_nch.ncp->nc_generation; 4024 4025 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4026 cache_drop(&fnchd); 4027 return (ENOENT); 4028 } 4029 tnchd.mount = tond->nl_nch.mount; 4030 cache_hold(&tnchd); 4031 4032 /* 4033 * If the source and target are the same there is nothing to do 4034 */ 4035 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4036 cache_drop(&fnchd); 4037 cache_drop(&tnchd); 4038 return (0); 4039 } 4040 4041 /* 4042 * Mount points cannot be renamed or overwritten 4043 */ 4044 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4045 NCF_ISMOUNTPT 4046 ) { 4047 cache_drop(&fnchd); 4048 cache_drop(&tnchd); 4049 return (EINVAL); 4050 } 4051 4052 /* 4053 * Relock the source ncp. cache_relock() will deal with any 4054 * deadlocks against the already-locked tond and will also 4055 * make sure both are resolved. 4056 * 4057 * NOTE AFTER RELOCKING: The source or target ncp may have become 4058 * invalid while they were unlocked, nc_vp and nc_mount could 4059 * be NULL. 4060 */ 4061 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4062 &tond->nl_nch, tond->nl_cred); 4063 fromnd->nl_flags |= NLC_NCPISLOCKED; 4064 4065 /* 4066 * If the namecache generation changed for either fromnd or tond, 4067 * we must retry. 4068 */ 4069 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4070 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4071 kprintf("kern_rename: retry due to gen on: " 4072 "\"%s\" -> \"%s\"\n", 4073 fromnd->nl_nch.ncp->nc_name, 4074 tond->nl_nch.ncp->nc_name); 4075 cache_drop(&fnchd); 4076 cache_drop(&tnchd); 4077 return (EAGAIN); 4078 } 4079 4080 /* 4081 * If either fromnd or tond are marked destroyed a ripout occured 4082 * out from under us and we must retry. 4083 */ 4084 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4085 fromnd->nl_nch.ncp->nc_vp == NULL || 4086 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4087 kprintf("kern_rename: retry due to ripout on: " 4088 "\"%s\" -> \"%s\"\n", 4089 fromnd->nl_nch.ncp->nc_name, 4090 tond->nl_nch.ncp->nc_name); 4091 cache_drop(&fnchd); 4092 cache_drop(&tnchd); 4093 return (EAGAIN); 4094 } 4095 4096 /* 4097 * Make sure the parent directories linkages are the same. 4098 * XXX shouldn't be needed any more w/ generation check above. 4099 */ 4100 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4101 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4102 cache_drop(&fnchd); 4103 cache_drop(&tnchd); 4104 return (ENOENT); 4105 } 4106 4107 /* 4108 * Both the source and target must be within the same filesystem and 4109 * in the same filesystem as their parent directories within the 4110 * namecache topology. 4111 * 4112 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4113 */ 4114 mp = fnchd.mount; 4115 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4116 mp != tond->nl_nch.mount) { 4117 cache_drop(&fnchd); 4118 cache_drop(&tnchd); 4119 return (EXDEV); 4120 } 4121 4122 /* 4123 * Make sure the mount point is writable 4124 */ 4125 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4126 cache_drop(&fnchd); 4127 cache_drop(&tnchd); 4128 return (error); 4129 } 4130 4131 /* 4132 * If the target exists and either the source or target is a directory, 4133 * then both must be directories. 4134 * 4135 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4136 * have become NULL. 4137 */ 4138 if (tond->nl_nch.ncp->nc_vp) { 4139 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4140 error = ENOENT; 4141 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4142 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4143 error = ENOTDIR; 4144 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4145 error = EISDIR; 4146 } 4147 } 4148 4149 /* 4150 * You cannot rename a source into itself or a subdirectory of itself. 4151 * We check this by travsersing the target directory upwards looking 4152 * for a match against the source. 4153 * 4154 * XXX MPSAFE 4155 */ 4156 if (error == 0) { 4157 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4158 if (fromnd->nl_nch.ncp == ncp) { 4159 error = EINVAL; 4160 break; 4161 } 4162 } 4163 } 4164 4165 cache_drop(&fnchd); 4166 cache_drop(&tnchd); 4167 4168 /* 4169 * Even though the namespaces are different, they may still represent 4170 * hardlinks to the same file. The filesystem might have a hard time 4171 * with this so we issue a NREMOVE of the source instead of a NRENAME 4172 * when we detect the situation. 4173 */ 4174 if (error == 0) { 4175 fdvp = fromnd->nl_dvp; 4176 tdvp = tond->nl_dvp; 4177 if (fdvp == NULL || tdvp == NULL) { 4178 error = EPERM; 4179 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4180 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4181 fromnd->nl_cred); 4182 } else { 4183 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4184 fdvp, tdvp, tond->nl_cred); 4185 } 4186 } 4187 return (error); 4188 } 4189 4190 /* 4191 * rename_args(char *from, char *to) 4192 * 4193 * Rename files. Source and destination must either both be directories, 4194 * or both not be directories. If target is a directory, it must be empty. 4195 */ 4196 int 4197 sys_rename(struct rename_args *uap) 4198 { 4199 struct nlookupdata fromnd, tond; 4200 int error; 4201 4202 do { 4203 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4204 if (error == 0) { 4205 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4206 if (error == 0) 4207 error = kern_rename(&fromnd, &tond); 4208 nlookup_done(&tond); 4209 } 4210 nlookup_done(&fromnd); 4211 } while (error == EAGAIN); 4212 return (error); 4213 } 4214 4215 /* 4216 * renameat_args(int oldfd, char *old, int newfd, char *new) 4217 * 4218 * Rename files using paths relative to the directories associated with 4219 * oldfd and newfd. Source and destination must either both be directories, 4220 * or both not be directories. If target is a directory, it must be empty. 4221 */ 4222 int 4223 sys_renameat(struct renameat_args *uap) 4224 { 4225 struct nlookupdata oldnd, newnd; 4226 struct file *oldfp, *newfp; 4227 int error; 4228 4229 do { 4230 error = nlookup_init_at(&oldnd, &oldfp, 4231 uap->oldfd, uap->old, 4232 UIO_USERSPACE, 0); 4233 if (error == 0) { 4234 error = nlookup_init_at(&newnd, &newfp, 4235 uap->newfd, uap->new, 4236 UIO_USERSPACE, 0); 4237 if (error == 0) 4238 error = kern_rename(&oldnd, &newnd); 4239 nlookup_done_at(&newnd, newfp); 4240 } 4241 nlookup_done_at(&oldnd, oldfp); 4242 } while (error == EAGAIN); 4243 return (error); 4244 } 4245 4246 int 4247 kern_mkdir(struct nlookupdata *nd, int mode) 4248 { 4249 struct thread *td = curthread; 4250 struct proc *p = td->td_proc; 4251 struct vnode *vp; 4252 struct vattr vattr; 4253 int error; 4254 4255 bwillinode(1); 4256 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4257 if ((error = nlookup(nd)) != 0) 4258 return (error); 4259 4260 if (nd->nl_nch.ncp->nc_vp) 4261 return (EEXIST); 4262 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4263 return (error); 4264 VATTR_NULL(&vattr); 4265 vattr.va_type = VDIR; 4266 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4267 4268 vp = NULL; 4269 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4270 if (error == 0) 4271 vput(vp); 4272 return (error); 4273 } 4274 4275 /* 4276 * mkdir_args(char *path, int mode) 4277 * 4278 * Make a directory file. 4279 */ 4280 int 4281 sys_mkdir(struct mkdir_args *uap) 4282 { 4283 struct nlookupdata nd; 4284 int error; 4285 4286 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4287 if (error == 0) 4288 error = kern_mkdir(&nd, uap->mode); 4289 nlookup_done(&nd); 4290 return (error); 4291 } 4292 4293 /* 4294 * mkdirat_args(int fd, char *path, mode_t mode) 4295 * 4296 * Make a directory file. The path is relative to the directory associated 4297 * with fd. 4298 */ 4299 int 4300 sys_mkdirat(struct mkdirat_args *uap) 4301 { 4302 struct nlookupdata nd; 4303 struct file *fp; 4304 int error; 4305 4306 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4307 if (error == 0) 4308 error = kern_mkdir(&nd, uap->mode); 4309 nlookup_done_at(&nd, fp); 4310 return (error); 4311 } 4312 4313 int 4314 kern_rmdir(struct nlookupdata *nd) 4315 { 4316 int error; 4317 4318 bwillinode(1); 4319 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4320 if ((error = nlookup(nd)) != 0) 4321 return (error); 4322 4323 /* 4324 * Do not allow directories representing mount points to be 4325 * deleted, even if empty. Check write perms on mount point 4326 * in case the vnode is aliased (aka nullfs). 4327 */ 4328 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4329 return (EBUSY); 4330 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4331 return (error); 4332 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4333 return (error); 4334 } 4335 4336 /* 4337 * rmdir_args(char *path) 4338 * 4339 * Remove a directory file. 4340 */ 4341 int 4342 sys_rmdir(struct rmdir_args *uap) 4343 { 4344 struct nlookupdata nd; 4345 int error; 4346 4347 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4348 if (error == 0) 4349 error = kern_rmdir(&nd); 4350 nlookup_done(&nd); 4351 return (error); 4352 } 4353 4354 int 4355 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4356 enum uio_seg direction) 4357 { 4358 struct thread *td = curthread; 4359 struct vnode *vp; 4360 struct file *fp; 4361 struct uio auio; 4362 struct iovec aiov; 4363 off_t loff; 4364 int error, eofflag; 4365 4366 if ((error = holdvnode(td, fd, &fp)) != 0) 4367 return (error); 4368 if ((fp->f_flag & FREAD) == 0) { 4369 error = EBADF; 4370 goto done; 4371 } 4372 vp = (struct vnode *)fp->f_data; 4373 if (vp->v_type != VDIR) { 4374 error = EINVAL; 4375 goto done; 4376 } 4377 aiov.iov_base = buf; 4378 aiov.iov_len = count; 4379 auio.uio_iov = &aiov; 4380 auio.uio_iovcnt = 1; 4381 auio.uio_rw = UIO_READ; 4382 auio.uio_segflg = direction; 4383 auio.uio_td = td; 4384 auio.uio_resid = count; 4385 loff = auio.uio_offset = fp->f_offset; 4386 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4387 fp->f_offset = auio.uio_offset; 4388 if (error) 4389 goto done; 4390 4391 /* 4392 * WARNING! *basep may not be wide enough to accomodate the 4393 * seek offset. XXX should we hack this to return the upper 32 bits 4394 * for offsets greater then 4G? 4395 */ 4396 if (basep) { 4397 *basep = (long)loff; 4398 } 4399 *res = count - auio.uio_resid; 4400 done: 4401 fdrop(fp); 4402 return (error); 4403 } 4404 4405 /* 4406 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4407 * 4408 * Read a block of directory entries in a file system independent format. 4409 */ 4410 int 4411 sys_getdirentries(struct getdirentries_args *uap) 4412 { 4413 long base; 4414 int error; 4415 4416 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4417 &uap->sysmsg_result, UIO_USERSPACE); 4418 4419 if (error == 0 && uap->basep) 4420 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4421 return (error); 4422 } 4423 4424 /* 4425 * getdents_args(int fd, char *buf, size_t count) 4426 */ 4427 int 4428 sys_getdents(struct getdents_args *uap) 4429 { 4430 int error; 4431 4432 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4433 &uap->sysmsg_result, UIO_USERSPACE); 4434 4435 return (error); 4436 } 4437 4438 /* 4439 * Set the mode mask for creation of filesystem nodes. 4440 * 4441 * umask(int newmask) 4442 */ 4443 int 4444 sys_umask(struct umask_args *uap) 4445 { 4446 struct thread *td = curthread; 4447 struct proc *p = td->td_proc; 4448 struct filedesc *fdp; 4449 4450 fdp = p->p_fd; 4451 uap->sysmsg_result = fdp->fd_cmask; 4452 fdp->fd_cmask = uap->newmask & ALLPERMS; 4453 return (0); 4454 } 4455 4456 /* 4457 * revoke(char *path) 4458 * 4459 * Void all references to file by ripping underlying filesystem 4460 * away from vnode. 4461 */ 4462 int 4463 sys_revoke(struct revoke_args *uap) 4464 { 4465 struct nlookupdata nd; 4466 struct vattr vattr; 4467 struct vnode *vp; 4468 struct ucred *cred; 4469 int error; 4470 4471 vp = NULL; 4472 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4473 if (error == 0) 4474 error = nlookup(&nd); 4475 if (error == 0) 4476 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4477 cred = crhold(nd.nl_cred); 4478 nlookup_done(&nd); 4479 if (error == 0) { 4480 if (error == 0) 4481 error = VOP_GETATTR(vp, &vattr); 4482 if (error == 0 && cred->cr_uid != vattr.va_uid) 4483 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4484 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4485 if (vcount(vp) > 0) 4486 error = vrevoke(vp, cred); 4487 } else if (error == 0) { 4488 error = vrevoke(vp, cred); 4489 } 4490 vrele(vp); 4491 } 4492 if (cred) 4493 crfree(cred); 4494 return (error); 4495 } 4496 4497 /* 4498 * getfh_args(char *fname, fhandle_t *fhp) 4499 * 4500 * Get (NFS) file handle 4501 * 4502 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4503 * mount. This allows nullfs mounts to be explicitly exported. 4504 * 4505 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4506 * 4507 * nullfs mounts of subdirectories are not safe. That is, it will 4508 * work, but you do not really have protection against access to 4509 * the related parent directories. 4510 */ 4511 int 4512 sys_getfh(struct getfh_args *uap) 4513 { 4514 struct thread *td = curthread; 4515 struct nlookupdata nd; 4516 fhandle_t fh; 4517 struct vnode *vp; 4518 struct mount *mp; 4519 int error; 4520 4521 /* 4522 * Must be super user 4523 */ 4524 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4525 return (error); 4526 4527 vp = NULL; 4528 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4529 if (error == 0) 4530 error = nlookup(&nd); 4531 if (error == 0) 4532 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4533 mp = nd.nl_nch.mount; 4534 nlookup_done(&nd); 4535 if (error == 0) { 4536 bzero(&fh, sizeof(fh)); 4537 fh.fh_fsid = mp->mnt_stat.f_fsid; 4538 error = VFS_VPTOFH(vp, &fh.fh_fid); 4539 vput(vp); 4540 if (error == 0) 4541 error = copyout(&fh, uap->fhp, sizeof(fh)); 4542 } 4543 return (error); 4544 } 4545 4546 /* 4547 * fhopen_args(const struct fhandle *u_fhp, int flags) 4548 * 4549 * syscall for the rpc.lockd to use to translate a NFS file handle into 4550 * an open descriptor. 4551 * 4552 * warning: do not remove the priv_check() call or this becomes one giant 4553 * security hole. 4554 */ 4555 int 4556 sys_fhopen(struct fhopen_args *uap) 4557 { 4558 struct thread *td = curthread; 4559 struct filedesc *fdp = td->td_proc->p_fd; 4560 struct mount *mp; 4561 struct vnode *vp; 4562 struct fhandle fhp; 4563 struct vattr vat; 4564 struct vattr *vap = &vat; 4565 struct flock lf; 4566 int fmode, mode, error = 0, type; 4567 struct file *nfp; 4568 struct file *fp; 4569 int indx; 4570 4571 /* 4572 * Must be super user 4573 */ 4574 error = priv_check(td, PRIV_ROOT); 4575 if (error) 4576 return (error); 4577 4578 fmode = FFLAGS(uap->flags); 4579 4580 /* 4581 * Why not allow a non-read/write open for our lockd? 4582 */ 4583 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4584 return (EINVAL); 4585 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4586 if (error) 4587 return(error); 4588 4589 /* 4590 * Find the mount point 4591 */ 4592 mp = vfs_getvfs(&fhp.fh_fsid); 4593 if (mp == NULL) { 4594 error = ESTALE; 4595 goto done2; 4596 } 4597 /* now give me my vnode, it gets returned to me locked */ 4598 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4599 if (error) 4600 goto done; 4601 /* 4602 * from now on we have to make sure not 4603 * to forget about the vnode 4604 * any error that causes an abort must vput(vp) 4605 * just set error = err and 'goto bad;'. 4606 */ 4607 4608 /* 4609 * from vn_open 4610 */ 4611 if (vp->v_type == VLNK) { 4612 error = EMLINK; 4613 goto bad; 4614 } 4615 if (vp->v_type == VSOCK) { 4616 error = EOPNOTSUPP; 4617 goto bad; 4618 } 4619 mode = 0; 4620 if (fmode & (FWRITE | O_TRUNC)) { 4621 if (vp->v_type == VDIR) { 4622 error = EISDIR; 4623 goto bad; 4624 } 4625 error = vn_writechk(vp, NULL); 4626 if (error) 4627 goto bad; 4628 mode |= VWRITE; 4629 } 4630 if (fmode & FREAD) 4631 mode |= VREAD; 4632 if (mode) { 4633 error = VOP_ACCESS(vp, mode, td->td_ucred); 4634 if (error) 4635 goto bad; 4636 } 4637 if (fmode & O_TRUNC) { 4638 vn_unlock(vp); /* XXX */ 4639 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4640 VATTR_NULL(vap); 4641 vap->va_size = 0; 4642 error = VOP_SETATTR(vp, vap, td->td_ucred); 4643 if (error) 4644 goto bad; 4645 } 4646 4647 /* 4648 * VOP_OPEN needs the file pointer so it can potentially override 4649 * it. 4650 * 4651 * WARNING! no f_nchandle will be associated when fhopen()ing a 4652 * directory. XXX 4653 */ 4654 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4655 goto bad; 4656 fp = nfp; 4657 4658 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4659 if (error) { 4660 /* 4661 * setting f_ops this way prevents VOP_CLOSE from being 4662 * called or fdrop() releasing the vp from v_data. Since 4663 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4664 */ 4665 fp->f_ops = &badfileops; 4666 fp->f_data = NULL; 4667 goto bad_drop; 4668 } 4669 4670 /* 4671 * The fp is given its own reference, we still have our ref and lock. 4672 * 4673 * Assert that all regular files must be created with a VM object. 4674 */ 4675 if (vp->v_type == VREG && vp->v_object == NULL) { 4676 kprintf("fhopen: regular file did not " 4677 "have VM object: %p\n", 4678 vp); 4679 goto bad_drop; 4680 } 4681 4682 /* 4683 * The open was successful. Handle any locking requirements. 4684 */ 4685 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4686 lf.l_whence = SEEK_SET; 4687 lf.l_start = 0; 4688 lf.l_len = 0; 4689 if (fmode & O_EXLOCK) 4690 lf.l_type = F_WRLCK; 4691 else 4692 lf.l_type = F_RDLCK; 4693 if (fmode & FNONBLOCK) 4694 type = 0; 4695 else 4696 type = F_WAIT; 4697 vn_unlock(vp); 4698 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4699 &lf, type)) != 0) { 4700 /* 4701 * release our private reference. 4702 */ 4703 fsetfd(fdp, NULL, indx); 4704 fdrop(fp); 4705 vrele(vp); 4706 goto done; 4707 } 4708 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4709 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4710 } 4711 4712 /* 4713 * Clean up. Associate the file pointer with the previously 4714 * reserved descriptor and return it. 4715 */ 4716 vput(vp); 4717 if (uap->flags & O_CLOEXEC) 4718 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4719 fsetfd(fdp, fp, indx); 4720 fdrop(fp); 4721 uap->sysmsg_result = indx; 4722 mount_drop(mp); 4723 4724 return (error); 4725 4726 bad_drop: 4727 fsetfd(fdp, NULL, indx); 4728 fdrop(fp); 4729 bad: 4730 vput(vp); 4731 done: 4732 mount_drop(mp); 4733 done2: 4734 return (error); 4735 } 4736 4737 /* 4738 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4739 */ 4740 int 4741 sys_fhstat(struct fhstat_args *uap) 4742 { 4743 struct thread *td = curthread; 4744 struct stat sb; 4745 fhandle_t fh; 4746 struct mount *mp; 4747 struct vnode *vp; 4748 int error; 4749 4750 /* 4751 * Must be super user 4752 */ 4753 error = priv_check(td, PRIV_ROOT); 4754 if (error) 4755 return (error); 4756 4757 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4758 if (error) 4759 return (error); 4760 4761 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4762 error = ESTALE; 4763 if (error == 0) { 4764 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4765 error = vn_stat(vp, &sb, td->td_ucred); 4766 vput(vp); 4767 } 4768 } 4769 if (error == 0) 4770 error = copyout(&sb, uap->sb, sizeof(sb)); 4771 if (mp) 4772 mount_drop(mp); 4773 4774 return (error); 4775 } 4776 4777 /* 4778 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4779 */ 4780 int 4781 sys_fhstatfs(struct fhstatfs_args *uap) 4782 { 4783 struct thread *td = curthread; 4784 struct proc *p = td->td_proc; 4785 struct statfs *sp; 4786 struct mount *mp; 4787 struct vnode *vp; 4788 struct statfs sb; 4789 char *fullpath, *freepath; 4790 fhandle_t fh; 4791 int error; 4792 4793 /* 4794 * Must be super user 4795 */ 4796 if ((error = priv_check(td, PRIV_ROOT))) 4797 return (error); 4798 4799 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4800 return (error); 4801 4802 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4803 error = ESTALE; 4804 goto done; 4805 } 4806 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4807 error = ESTALE; 4808 goto done; 4809 } 4810 4811 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4812 goto done; 4813 mp = vp->v_mount; 4814 sp = &mp->mnt_stat; 4815 vput(vp); 4816 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4817 goto done; 4818 4819 error = mount_path(p, mp, &fullpath, &freepath); 4820 if (error) 4821 goto done; 4822 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4823 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4824 kfree(freepath, M_TEMP); 4825 4826 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4827 if (priv_check(td, PRIV_ROOT)) { 4828 bcopy(sp, &sb, sizeof(sb)); 4829 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4830 sp = &sb; 4831 } 4832 error = copyout(sp, uap->buf, sizeof(*sp)); 4833 done: 4834 if (mp) 4835 mount_drop(mp); 4836 4837 return (error); 4838 } 4839 4840 /* 4841 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4842 */ 4843 int 4844 sys_fhstatvfs(struct fhstatvfs_args *uap) 4845 { 4846 struct thread *td = curthread; 4847 struct proc *p = td->td_proc; 4848 struct statvfs *sp; 4849 struct mount *mp; 4850 struct vnode *vp; 4851 fhandle_t fh; 4852 int error; 4853 4854 /* 4855 * Must be super user 4856 */ 4857 if ((error = priv_check(td, PRIV_ROOT))) 4858 return (error); 4859 4860 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4861 return (error); 4862 4863 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4864 error = ESTALE; 4865 goto done; 4866 } 4867 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4868 error = ESTALE; 4869 goto done; 4870 } 4871 4872 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4873 goto done; 4874 mp = vp->v_mount; 4875 sp = &mp->mnt_vstat; 4876 vput(vp); 4877 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4878 goto done; 4879 4880 sp->f_flag = 0; 4881 if (mp->mnt_flag & MNT_RDONLY) 4882 sp->f_flag |= ST_RDONLY; 4883 if (mp->mnt_flag & MNT_NOSUID) 4884 sp->f_flag |= ST_NOSUID; 4885 error = copyout(sp, uap->buf, sizeof(*sp)); 4886 done: 4887 if (mp) 4888 mount_drop(mp); 4889 return (error); 4890 } 4891 4892 4893 /* 4894 * Syscall to push extended attribute configuration information into the 4895 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4896 * a command (int cmd), and attribute name and misc data. For now, the 4897 * attribute name is left in userspace for consumption by the VFS_op. 4898 * It will probably be changed to be copied into sysspace by the 4899 * syscall in the future, once issues with various consumers of the 4900 * attribute code have raised their hands. 4901 * 4902 * Currently this is used only by UFS Extended Attributes. 4903 */ 4904 int 4905 sys_extattrctl(struct extattrctl_args *uap) 4906 { 4907 struct nlookupdata nd; 4908 struct vnode *vp; 4909 char attrname[EXTATTR_MAXNAMELEN]; 4910 int error; 4911 size_t size; 4912 4913 attrname[0] = 0; 4914 vp = NULL; 4915 error = 0; 4916 4917 if (error == 0 && uap->filename) { 4918 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4919 NLC_FOLLOW); 4920 if (error == 0) 4921 error = nlookup(&nd); 4922 if (error == 0) 4923 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4924 nlookup_done(&nd); 4925 } 4926 4927 if (error == 0 && uap->attrname) { 4928 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4929 &size); 4930 } 4931 4932 if (error == 0) { 4933 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4934 if (error == 0) 4935 error = nlookup(&nd); 4936 if (error == 0) 4937 error = ncp_writechk(&nd.nl_nch); 4938 if (error == 0) { 4939 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4940 uap->attrnamespace, 4941 uap->attrname, nd.nl_cred); 4942 } 4943 nlookup_done(&nd); 4944 } 4945 4946 return (error); 4947 } 4948 4949 /* 4950 * Syscall to get a named extended attribute on a file or directory. 4951 */ 4952 int 4953 sys_extattr_set_file(struct extattr_set_file_args *uap) 4954 { 4955 char attrname[EXTATTR_MAXNAMELEN]; 4956 struct nlookupdata nd; 4957 struct vnode *vp; 4958 struct uio auio; 4959 struct iovec aiov; 4960 int error; 4961 4962 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4963 if (error) 4964 return (error); 4965 4966 vp = NULL; 4967 4968 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4969 if (error == 0) 4970 error = nlookup(&nd); 4971 if (error == 0) 4972 error = ncp_writechk(&nd.nl_nch); 4973 if (error == 0) 4974 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4975 if (error) { 4976 nlookup_done(&nd); 4977 return (error); 4978 } 4979 4980 bzero(&auio, sizeof(auio)); 4981 aiov.iov_base = uap->data; 4982 aiov.iov_len = uap->nbytes; 4983 auio.uio_iov = &aiov; 4984 auio.uio_iovcnt = 1; 4985 auio.uio_offset = 0; 4986 auio.uio_resid = uap->nbytes; 4987 auio.uio_rw = UIO_WRITE; 4988 auio.uio_td = curthread; 4989 4990 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4991 &auio, nd.nl_cred); 4992 4993 vput(vp); 4994 nlookup_done(&nd); 4995 return (error); 4996 } 4997 4998 /* 4999 * Syscall to get a named extended attribute on a file or directory. 5000 */ 5001 int 5002 sys_extattr_get_file(struct extattr_get_file_args *uap) 5003 { 5004 char attrname[EXTATTR_MAXNAMELEN]; 5005 struct nlookupdata nd; 5006 struct uio auio; 5007 struct iovec aiov; 5008 struct vnode *vp; 5009 int error; 5010 5011 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5012 if (error) 5013 return (error); 5014 5015 vp = NULL; 5016 5017 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5018 if (error == 0) 5019 error = nlookup(&nd); 5020 if (error == 0) 5021 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5022 if (error) { 5023 nlookup_done(&nd); 5024 return (error); 5025 } 5026 5027 bzero(&auio, sizeof(auio)); 5028 aiov.iov_base = uap->data; 5029 aiov.iov_len = uap->nbytes; 5030 auio.uio_iov = &aiov; 5031 auio.uio_iovcnt = 1; 5032 auio.uio_offset = 0; 5033 auio.uio_resid = uap->nbytes; 5034 auio.uio_rw = UIO_READ; 5035 auio.uio_td = curthread; 5036 5037 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5038 &auio, nd.nl_cred); 5039 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5040 5041 vput(vp); 5042 nlookup_done(&nd); 5043 return(error); 5044 } 5045 5046 /* 5047 * Syscall to delete a named extended attribute from a file or directory. 5048 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5049 */ 5050 int 5051 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5052 { 5053 char attrname[EXTATTR_MAXNAMELEN]; 5054 struct nlookupdata nd; 5055 struct vnode *vp; 5056 int error; 5057 5058 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5059 if (error) 5060 return(error); 5061 5062 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5063 if (error == 0) 5064 error = nlookup(&nd); 5065 if (error == 0) 5066 error = ncp_writechk(&nd.nl_nch); 5067 if (error == 0) { 5068 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5069 if (error == 0) { 5070 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5071 attrname, NULL, nd.nl_cred); 5072 vput(vp); 5073 } 5074 } 5075 nlookup_done(&nd); 5076 return(error); 5077 } 5078 5079 /* 5080 * Determine if the mount is visible to the process. 5081 */ 5082 static int 5083 chroot_visible_mnt(struct mount *mp, struct proc *p) 5084 { 5085 struct nchandle nch; 5086 5087 /* 5088 * Traverse from the mount point upwards. If we hit the process 5089 * root then the mount point is visible to the process. 5090 */ 5091 nch = mp->mnt_ncmountpt; 5092 while (nch.ncp) { 5093 if (nch.mount == p->p_fd->fd_nrdir.mount && 5094 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5095 return(1); 5096 } 5097 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5098 nch = nch.mount->mnt_ncmounton; 5099 } else { 5100 nch.ncp = nch.ncp->nc_parent; 5101 } 5102 } 5103 5104 /* 5105 * If the mount point is not visible to the process, but the 5106 * process root is in a subdirectory of the mount, return 5107 * TRUE anyway. 5108 */ 5109 if (p->p_fd->fd_nrdir.mount == mp) 5110 return(1); 5111 5112 return(0); 5113 } 5114 5115