1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 84 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 85 static int getutimes (struct timeval *, struct timespec *); 86 static int getutimens (const struct timespec *, struct timespec *, int *); 87 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 88 static int setfmode (struct vnode *, int); 89 static int setfflags (struct vnode *, int); 90 static int setutimes (struct vnode *, struct vattr *, 91 const struct timespec *, int); 92 93 static int usermount = 0; /* if 1, non-root can mount fs. */ 94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 95 "Allow non-root users to mount filesystems"); 96 97 static int debug_unmount = 0; /* if 1 loop until unmount success */ 98 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 99 "Stall failed unmounts in loop"); 100 /* 101 * Virtual File System System Calls 102 */ 103 104 /* 105 * Mount a file system. 106 * 107 * mount_args(char *type, char *path, int flags, caddr_t data) 108 * 109 * MPALMOSTSAFE 110 */ 111 int 112 sys_mount(struct mount_args *uap) 113 { 114 struct thread *td = curthread; 115 struct vnode *vp; 116 struct nchandle nch; 117 struct mount *mp, *nullmp; 118 struct vfsconf *vfsp; 119 int error, flag = 0, flag2 = 0; 120 int hasmount; 121 struct vattr va; 122 struct nlookupdata nd; 123 char fstypename[MFSNAMELEN]; 124 struct ucred *cred; 125 126 cred = td->td_ucred; 127 if (jailed(cred)) { 128 error = EPERM; 129 goto done; 130 } 131 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 132 goto done; 133 134 /* 135 * Do not allow NFS export by non-root users. 136 */ 137 if (uap->flags & MNT_EXPORTED) { 138 error = priv_check(td, PRIV_ROOT); 139 if (error) 140 goto done; 141 } 142 /* 143 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 144 */ 145 if (priv_check(td, PRIV_ROOT)) 146 uap->flags |= MNT_NOSUID | MNT_NODEV; 147 148 /* 149 * Lookup the requested path and extract the nch and vnode. 150 */ 151 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 152 if (error == 0) { 153 if ((error = nlookup(&nd)) == 0) { 154 if (nd.nl_nch.ncp->nc_vp == NULL) 155 error = ENOENT; 156 } 157 } 158 if (error) { 159 nlookup_done(&nd); 160 goto done; 161 } 162 163 /* 164 * If the target filesystem is resolved via a nullfs mount, then 165 * nd.nl_nch.mount will be pointing to the nullfs mount structure 166 * instead of the target file system. We need it in case we are 167 * doing an update. 168 */ 169 nullmp = nd.nl_nch.mount; 170 171 /* 172 * Extract the locked+refd ncp and cleanup the nd structure 173 */ 174 nch = nd.nl_nch; 175 cache_zero(&nd.nl_nch); 176 nlookup_done(&nd); 177 178 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 179 (mp = cache_findmount(&nch)) != NULL) { 180 cache_dropmount(mp); 181 hasmount = 1; 182 } else { 183 hasmount = 0; 184 } 185 186 187 /* 188 * now we have the locked ref'd nch and unreferenced vnode. 189 */ 190 vp = nch.ncp->nc_vp; 191 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 192 cache_put(&nch); 193 goto done; 194 } 195 cache_unlock(&nch); 196 197 /* 198 * Extract the file system type. We need to know this early, to take 199 * appropriate actions if we are dealing with a nullfs. 200 */ 201 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 202 cache_drop(&nch); 203 vput(vp); 204 goto done; 205 } 206 207 /* 208 * Now we have an unlocked ref'd nch and a locked ref'd vp 209 */ 210 if (uap->flags & MNT_UPDATE) { 211 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 212 cache_drop(&nch); 213 vput(vp); 214 error = EINVAL; 215 goto done; 216 } 217 218 if (strncmp(fstypename, "null", 5) == 0) { 219 KKASSERT(nullmp); 220 mp = nullmp; 221 } else { 222 mp = vp->v_mount; 223 } 224 225 flag = mp->mnt_flag; 226 flag2 = mp->mnt_kern_flag; 227 /* 228 * We only allow the filesystem to be reloaded if it 229 * is currently mounted read-only. 230 */ 231 if ((uap->flags & MNT_RELOAD) && 232 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 233 cache_drop(&nch); 234 vput(vp); 235 error = EOPNOTSUPP; /* Needs translation */ 236 goto done; 237 } 238 /* 239 * Only root, or the user that did the original mount is 240 * permitted to update it. 241 */ 242 if (mp->mnt_stat.f_owner != cred->cr_uid && 243 (error = priv_check(td, PRIV_ROOT))) { 244 cache_drop(&nch); 245 vput(vp); 246 goto done; 247 } 248 if (vfs_busy(mp, LK_NOWAIT)) { 249 cache_drop(&nch); 250 vput(vp); 251 error = EBUSY; 252 goto done; 253 } 254 if (hasmount) { 255 cache_drop(&nch); 256 vfs_unbusy(mp); 257 vput(vp); 258 error = EBUSY; 259 goto done; 260 } 261 mp->mnt_flag |= 262 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 263 lwkt_gettoken(&mp->mnt_token); 264 vn_unlock(vp); 265 vfsp = mp->mnt_vfc; 266 goto update; 267 } 268 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && 275 (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if (hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 343 vfsp->vfc_refcount++; 344 mp->mnt_stat.f_type = vfsp->vfc_typenum; 345 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 346 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 347 mp->mnt_stat.f_owner = cred->cr_uid; 348 lwkt_gettoken(&mp->mnt_token); 349 vn_unlock(vp); 350 update: 351 /* 352 * (per-mount token acquired at this point) 353 * 354 * Set the mount level flags. 355 */ 356 if (uap->flags & MNT_RDONLY) 357 mp->mnt_flag |= MNT_RDONLY; 358 else if (mp->mnt_flag & MNT_RDONLY) 359 mp->mnt_kern_flag |= MNTK_WANTRDWR; 360 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 361 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 362 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 363 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 364 MNT_AUTOMOUNTED); 365 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 366 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 367 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 368 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 369 MNT_AUTOMOUNTED); 370 371 /* 372 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 373 * This way the initial VFS_MOUNT() call will also be MPSAFE. 374 */ 375 if (vfsp->vfc_flags & VFCF_MPSAFE) 376 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 377 378 /* 379 * Mount the filesystem. 380 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 381 * get. 382 */ 383 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 384 if (mp->mnt_flag & MNT_UPDATE) { 385 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 386 mp->mnt_flag &= ~MNT_RDONLY; 387 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 388 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 389 if (error) { 390 mp->mnt_flag = flag; 391 mp->mnt_kern_flag = flag2; 392 } 393 lwkt_reltoken(&mp->mnt_token); 394 vfs_unbusy(mp); 395 vrele(vp); 396 cache_drop(&nch); 397 goto done; 398 } 399 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 400 401 /* 402 * Put the new filesystem on the mount list after root. The mount 403 * point gets its own mnt_ncmountpt (unless the VFS already set one 404 * up) which represents the root of the mount. The lookup code 405 * detects the mount point going forward and checks the root of 406 * the mount going backwards. 407 * 408 * It is not necessary to invalidate or purge the vnode underneath 409 * because elements under the mount will be given their own glue 410 * namecache record. 411 */ 412 if (!error) { 413 if (mp->mnt_ncmountpt.ncp == NULL) { 414 /* 415 * Allocate, then unlock, but leave the ref intact. 416 * This is the mnt_refs (1) that we will retain 417 * through to the unmount. 418 */ 419 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 420 cache_unlock(&mp->mnt_ncmountpt); 421 } 422 vn_unlock(vp); 423 mp->mnt_ncmounton = nch; /* inherits ref */ 424 cache_lock(&nch); 425 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 426 cache_unlock(&nch); 427 cache_ismounting(mp); 428 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 429 430 mountlist_insert(mp, MNTINS_LAST); 431 vn_unlock(vp); 432 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 433 error = vfs_allocate_syncvnode(mp); 434 lwkt_reltoken(&mp->mnt_token); 435 vfs_unbusy(mp); 436 error = VFS_START(mp, 0); 437 vrele(vp); 438 KNOTE(&fs_klist, VQ_MOUNT); 439 } else { 440 vn_syncer_thr_stop(mp); 441 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 442 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 443 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 444 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 445 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 446 mp->mnt_vfc->vfc_refcount--; 447 lwkt_reltoken(&mp->mnt_token); 448 vfs_unbusy(mp); 449 kfree(mp, M_MOUNT); 450 cache_drop(&nch); 451 vput(vp); 452 } 453 done: 454 return (error); 455 } 456 457 /* 458 * Scan all active processes to see if any of them have a current 459 * or root directory onto which the new filesystem has just been 460 * mounted. If so, replace them with the new mount point. 461 * 462 * Both old_nch and new_nch are ref'd on call but not locked. 463 * new_nch must be temporarily locked so it can be associated with the 464 * vnode representing the root of the mount point. 465 */ 466 struct checkdirs_info { 467 struct nchandle old_nch; 468 struct nchandle new_nch; 469 struct vnode *old_vp; 470 struct vnode *new_vp; 471 }; 472 473 static int checkdirs_callback(struct proc *p, void *data); 474 475 static void 476 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 477 { 478 struct checkdirs_info info; 479 struct vnode *olddp; 480 struct vnode *newdp; 481 struct mount *mp; 482 483 /* 484 * If the old mount point's vnode has a usecount of 1, it is not 485 * being held as a descriptor anywhere. 486 */ 487 olddp = old_nch->ncp->nc_vp; 488 if (olddp == NULL || VREFCNT(olddp) == 1) 489 return; 490 491 /* 492 * Force the root vnode of the new mount point to be resolved 493 * so we can update any matching processes. 494 */ 495 mp = new_nch->mount; 496 if (VFS_ROOT(mp, &newdp)) 497 panic("mount: lost mount"); 498 vn_unlock(newdp); 499 cache_lock(new_nch); 500 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 501 cache_setunresolved(new_nch); 502 cache_setvp(new_nch, newdp); 503 cache_unlock(new_nch); 504 505 /* 506 * Special handling of the root node 507 */ 508 if (rootvnode == olddp) { 509 vref(newdp); 510 vfs_cache_setroot(newdp, cache_hold(new_nch)); 511 } 512 513 /* 514 * Pass newdp separately so the callback does not have to access 515 * it via new_nch->ncp->nc_vp. 516 */ 517 info.old_nch = *old_nch; 518 info.new_nch = *new_nch; 519 info.new_vp = newdp; 520 allproc_scan(checkdirs_callback, &info, 0); 521 vput(newdp); 522 } 523 524 /* 525 * NOTE: callback is not MP safe because the scanned process's filedesc 526 * structure can be ripped out from under us, amoung other things. 527 */ 528 static int 529 checkdirs_callback(struct proc *p, void *data) 530 { 531 struct checkdirs_info *info = data; 532 struct filedesc *fdp; 533 struct nchandle ncdrop1; 534 struct nchandle ncdrop2; 535 struct vnode *vprele1; 536 struct vnode *vprele2; 537 538 if ((fdp = p->p_fd) != NULL) { 539 cache_zero(&ncdrop1); 540 cache_zero(&ncdrop2); 541 vprele1 = NULL; 542 vprele2 = NULL; 543 544 /* 545 * MPUNSAFE - XXX fdp can be pulled out from under a 546 * foreign process. 547 * 548 * A shared filedesc is ok, we don't have to copy it 549 * because we are making this change globally. 550 */ 551 spin_lock(&fdp->fd_spin); 552 if (fdp->fd_ncdir.mount == info->old_nch.mount && 553 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 554 vprele1 = fdp->fd_cdir; 555 vref(info->new_vp); 556 fdp->fd_cdir = info->new_vp; 557 ncdrop1 = fdp->fd_ncdir; 558 cache_copy(&info->new_nch, &fdp->fd_ncdir); 559 } 560 if (fdp->fd_nrdir.mount == info->old_nch.mount && 561 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 562 vprele2 = fdp->fd_rdir; 563 vref(info->new_vp); 564 fdp->fd_rdir = info->new_vp; 565 ncdrop2 = fdp->fd_nrdir; 566 cache_copy(&info->new_nch, &fdp->fd_nrdir); 567 } 568 spin_unlock(&fdp->fd_spin); 569 if (ncdrop1.ncp) 570 cache_drop(&ncdrop1); 571 if (ncdrop2.ncp) 572 cache_drop(&ncdrop2); 573 if (vprele1) 574 vrele(vprele1); 575 if (vprele2) 576 vrele(vprele2); 577 } 578 return(0); 579 } 580 581 /* 582 * Unmount a file system. 583 * 584 * Note: unmount takes a path to the vnode mounted on as argument, 585 * not special file (as before). 586 * 587 * umount_args(char *path, int flags) 588 * 589 * MPALMOSTSAFE 590 */ 591 int 592 sys_unmount(struct unmount_args *uap) 593 { 594 struct thread *td = curthread; 595 struct proc *p __debugvar = td->td_proc; 596 struct mount *mp = NULL; 597 struct nlookupdata nd; 598 int error; 599 600 KKASSERT(p); 601 if (td->td_ucred->cr_prison != NULL) { 602 error = EPERM; 603 goto done; 604 } 605 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 606 goto done; 607 608 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 609 NLC_FOLLOW | NLC_IGNBADDIR); 610 if (error == 0) 611 error = nlookup(&nd); 612 if (error) 613 goto out; 614 615 mp = nd.nl_nch.mount; 616 617 /* 618 * Only root, or the user that did the original mount is 619 * permitted to unmount this filesystem. 620 */ 621 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 622 (error = priv_check(td, PRIV_ROOT))) 623 goto out; 624 625 /* 626 * Don't allow unmounting the root file system. 627 */ 628 if (mp->mnt_flag & MNT_ROOTFS) { 629 error = EINVAL; 630 goto out; 631 } 632 633 /* 634 * Must be the root of the filesystem 635 */ 636 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 637 error = EINVAL; 638 goto out; 639 } 640 641 /* 642 * If no error try to issue the unmount. We lose our cache 643 * ref when we call nlookup_done so we must hold the mount point 644 * to prevent use-after-free races. 645 */ 646 out: 647 if (error == 0) { 648 mount_hold(mp); 649 nlookup_done(&nd); 650 error = dounmount(mp, uap->flags, 0); 651 mount_drop(mp); 652 } else { 653 nlookup_done(&nd); 654 } 655 done: 656 return (error); 657 } 658 659 /* 660 * Do the actual file system unmount (interlocked against the mountlist 661 * token and mp->mnt_token). 662 */ 663 static int 664 dounmount_interlock(struct mount *mp) 665 { 666 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 667 return (EBUSY); 668 mp->mnt_kern_flag |= MNTK_UNMOUNT; 669 return(0); 670 } 671 672 static int 673 unmount_allproc_cb(struct proc *p, void *arg) 674 { 675 struct mount *mp; 676 677 if (p->p_textnch.ncp == NULL) 678 return 0; 679 680 mp = (struct mount *)arg; 681 if (p->p_textnch.mount == mp) 682 cache_drop(&p->p_textnch); 683 684 return 0; 685 } 686 687 /* 688 * The guts of the unmount code. The mount owns one ref and one hold 689 * count. If we successfully interlock the unmount, those refs are ours. 690 * (The ref is from mnt_ncmountpt). 691 * 692 * When halting we shortcut certain mount types such as devfs by not actually 693 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 694 * from the mountlist so higher-level filesytems can unmount cleanly. 695 * 696 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 697 */ 698 int 699 dounmount(struct mount *mp, int flags, int halting) 700 { 701 struct namecache *ncp; 702 struct nchandle nch; 703 struct vnode *vp; 704 int error; 705 int async_flag; 706 int lflags; 707 int freeok = 1; 708 int hadsyncer = 0; 709 int retry; 710 int quickhalt; 711 712 lwkt_gettoken(&mp->mnt_token); 713 714 /* 715 * When halting, certain mount points can essentially just 716 * be unhooked and otherwise ignored. 717 */ 718 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 719 quickhalt = 1; 720 freeok = 0; 721 } else { 722 quickhalt = 0; 723 } 724 725 726 /* 727 * Exclusive access for unmounting purposes. 728 */ 729 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 730 goto out; 731 732 /* 733 * We now 'own' the last mp->mnt_refs 734 * 735 * Allow filesystems to detect that a forced unmount is in progress. 736 */ 737 if (flags & MNT_FORCE) 738 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 739 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 740 error = lockmgr(&mp->mnt_lock, lflags); 741 if (error) { 742 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 743 if (mp->mnt_kern_flag & MNTK_MWAIT) { 744 mp->mnt_kern_flag &= ~MNTK_MWAIT; 745 wakeup(mp); 746 } 747 goto out; 748 } 749 750 if (mp->mnt_flag & MNT_EXPUBLIC) 751 vfs_setpublicfs(NULL, NULL, NULL); 752 753 vfs_msync(mp, MNT_WAIT); 754 async_flag = mp->mnt_flag & MNT_ASYNC; 755 mp->mnt_flag &=~ MNT_ASYNC; 756 757 /* 758 * Decomission our special mnt_syncer vnode. This also stops 759 * the vnlru code. If we are unable to unmount we recommission 760 * the vnode. 761 * 762 * Then sync the filesystem. 763 */ 764 if ((vp = mp->mnt_syncer) != NULL) { 765 mp->mnt_syncer = NULL; 766 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 767 vrele(vp); 768 hadsyncer = 1; 769 } 770 771 /* 772 * Sync normally-mounted filesystem. 773 */ 774 if (quickhalt == 0) { 775 if ((mp->mnt_flag & MNT_RDONLY) == 0) 776 VFS_SYNC(mp, MNT_WAIT); 777 } 778 779 /* 780 * nchandle records ref the mount structure. Expect a count of 1 781 * (our mount->mnt_ncmountpt). 782 * 783 * Scans can get temporary refs on a mountpoint (thought really 784 * heavy duty stuff like cache_findmount() do not). 785 */ 786 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 787 /* 788 * Invalidate the namecache topology under the mount. 789 * nullfs mounts alias a real mount's namecache topology 790 * and it should not be invalidated in that case. 791 */ 792 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 793 cache_lock(&mp->mnt_ncmountpt); 794 cache_inval(&mp->mnt_ncmountpt, 795 CINV_DESTROY|CINV_CHILDREN); 796 cache_unlock(&mp->mnt_ncmountpt); 797 } 798 799 /* 800 * Clear pcpu caches 801 */ 802 cache_unmounting(mp); 803 if (mp->mnt_refs != 1) 804 cache_clearmntcache(); 805 806 /* 807 * Break out if we are good. Don't count ncp refs if the 808 * mount is aliased. 809 */ 810 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 811 NULL : mp->mnt_ncmountpt.ncp; 812 if (mp->mnt_refs == 1 && 813 (ncp == NULL || (ncp->nc_refs == 1 && 814 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 815 break; 816 } 817 818 /* 819 * If forcing the unmount, clean out any p->p_textnch 820 * nchandles that match this mount. 821 */ 822 if (flags & MNT_FORCE) 823 allproc_scan(&unmount_allproc_cb, mp, 0); 824 825 /* 826 * Sleep and retry. 827 */ 828 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 829 if ((retry & 15) == 15) { 830 mount_warning(mp, 831 "(%p) debug - retry %d, " 832 "%d namecache refs, %d mount refs", 833 mp, retry, 834 (ncp ? ncp->nc_refs - 1 : 0), 835 mp->mnt_refs - 1); 836 } 837 } 838 839 error = 0; 840 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 841 NULL : mp->mnt_ncmountpt.ncp; 842 if (mp->mnt_refs != 1 || 843 (ncp != NULL && (ncp->nc_refs != 1 || 844 TAILQ_FIRST(&ncp->nc_list)))) { 845 mount_warning(mp, 846 "(%p): %d namecache refs, %d mount refs " 847 "still present", 848 mp, 849 (ncp ? ncp->nc_refs - 1 : 0), 850 mp->mnt_refs - 1); 851 if (flags & MNT_FORCE) { 852 freeok = 0; 853 mount_warning(mp, "forcing unmount\n"); 854 } else { 855 error = EBUSY; 856 } 857 } 858 859 /* 860 * So far so good, sync the filesystem once more and 861 * call the VFS unmount code if the sync succeeds. 862 */ 863 if (error == 0 && quickhalt == 0) { 864 if (mp->mnt_flag & MNT_RDONLY) { 865 error = VFS_UNMOUNT(mp, flags); 866 } else { 867 error = VFS_SYNC(mp, MNT_WAIT); 868 if (error == 0 || /* no error */ 869 error == EOPNOTSUPP || /* no sync avail */ 870 (flags & MNT_FORCE)) { /* force anyway */ 871 error = VFS_UNMOUNT(mp, flags); 872 } 873 } 874 if (error) { 875 mount_warning(mp, 876 "(%p) unmount: vfs refused to unmount, " 877 "error %d", 878 mp, error); 879 } 880 } 881 882 /* 883 * If an error occurred we can still recover, restoring the 884 * syncer vnode and misc flags. 885 */ 886 if (error) { 887 if (mp->mnt_syncer == NULL && hadsyncer) 888 vfs_allocate_syncvnode(mp); 889 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 890 mp->mnt_flag |= async_flag; 891 lockmgr(&mp->mnt_lock, LK_RELEASE); 892 if (mp->mnt_kern_flag & MNTK_MWAIT) { 893 mp->mnt_kern_flag &= ~MNTK_MWAIT; 894 wakeup(mp); 895 } 896 goto out; 897 } 898 /* 899 * Clean up any journals still associated with the mount after 900 * filesystem activity has ceased. 901 */ 902 journal_remove_all_journals(mp, 903 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 904 905 mountlist_remove(mp); 906 907 /* 908 * Remove any installed vnode ops here so the individual VFSs don't 909 * have to. 910 * 911 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 912 * 913 * When quickhalting we have to keep these intact because the 914 * underlying vnodes have not been destroyed, and some might be 915 * dirty. 916 */ 917 if (quickhalt == 0) { 918 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 919 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 920 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 921 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 922 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 923 } 924 925 if (mp->mnt_ncmountpt.ncp != NULL) { 926 nch = mp->mnt_ncmountpt; 927 cache_zero(&mp->mnt_ncmountpt); 928 cache_clrmountpt(&nch); 929 cache_drop(&nch); 930 } 931 if (mp->mnt_ncmounton.ncp != NULL) { 932 cache_unmounting(mp); 933 nch = mp->mnt_ncmounton; 934 cache_zero(&mp->mnt_ncmounton); 935 cache_clrmountpt(&nch); 936 cache_drop(&nch); 937 } 938 939 mp->mnt_vfc->vfc_refcount--; 940 941 /* 942 * If not quickhalting the mount, we expect there to be no 943 * vnodes left. 944 */ 945 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 946 panic("unmount: dangling vnode"); 947 948 /* 949 * Release the lock 950 */ 951 lockmgr(&mp->mnt_lock, LK_RELEASE); 952 if (mp->mnt_kern_flag & MNTK_MWAIT) { 953 mp->mnt_kern_flag &= ~MNTK_MWAIT; 954 wakeup(mp); 955 } 956 957 /* 958 * If we reach here and freeok != 0 we must free the mount. 959 * mnt_refs should already have dropped to 0, so if it is not 960 * zero we must cycle the caches and wait. 961 * 962 * When we are satisfied that the mount has disconnected we can 963 * drop the hold on the mp that represented the mount (though the 964 * caller might actually have another, so the caller's drop may 965 * do the actual free). 966 */ 967 if (freeok) { 968 if (mp->mnt_refs > 0) 969 cache_clearmntcache(); 970 while (mp->mnt_refs > 0) { 971 cache_unmounting(mp); 972 wakeup(mp); 973 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 974 cache_clearmntcache(); 975 } 976 lwkt_reltoken(&mp->mnt_token); 977 mount_drop(mp); 978 mp = NULL; 979 } else { 980 cache_clearmntcache(); 981 } 982 error = 0; 983 KNOTE(&fs_klist, VQ_UNMOUNT); 984 out: 985 if (mp) 986 lwkt_reltoken(&mp->mnt_token); 987 return (error); 988 } 989 990 static 991 void 992 mount_warning(struct mount *mp, const char *ctl, ...) 993 { 994 char *ptr; 995 char *buf; 996 __va_list va; 997 998 __va_start(va, ctl); 999 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1000 &ptr, &buf, 0) == 0) { 1001 kprintf("unmount(%s): ", ptr); 1002 kvprintf(ctl, va); 1003 kprintf("\n"); 1004 kfree(buf, M_TEMP); 1005 } else { 1006 kprintf("unmount(%p", mp); 1007 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1008 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1009 kprintf("): "); 1010 kvprintf(ctl, va); 1011 kprintf("\n"); 1012 } 1013 __va_end(va); 1014 } 1015 1016 /* 1017 * Shim cache_fullpath() to handle the case where a process is chrooted into 1018 * a subdirectory of a mount. In this case if the root mount matches the 1019 * process root directory's mount we have to specify the process's root 1020 * directory instead of the mount point, because the mount point might 1021 * be above the root directory. 1022 */ 1023 static 1024 int 1025 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1026 { 1027 struct nchandle *nch; 1028 1029 if (p && p->p_fd->fd_nrdir.mount == mp) 1030 nch = &p->p_fd->fd_nrdir; 1031 else 1032 nch = &mp->mnt_ncmountpt; 1033 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1034 } 1035 1036 /* 1037 * Sync each mounted filesystem. 1038 */ 1039 1040 #ifdef DEBUG 1041 static int syncprt = 0; 1042 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1043 #endif /* DEBUG */ 1044 1045 static int sync_callback(struct mount *mp, void *data); 1046 1047 int 1048 sys_sync(struct sync_args *uap) 1049 { 1050 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1051 return (0); 1052 } 1053 1054 static 1055 int 1056 sync_callback(struct mount *mp, void *data __unused) 1057 { 1058 int asyncflag; 1059 1060 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1061 lwkt_gettoken(&mp->mnt_token); 1062 asyncflag = mp->mnt_flag & MNT_ASYNC; 1063 mp->mnt_flag &= ~MNT_ASYNC; 1064 lwkt_reltoken(&mp->mnt_token); 1065 vfs_msync(mp, MNT_NOWAIT); 1066 VFS_SYNC(mp, MNT_NOWAIT); 1067 lwkt_gettoken(&mp->mnt_token); 1068 mp->mnt_flag |= asyncflag; 1069 lwkt_reltoken(&mp->mnt_token); 1070 } 1071 return(0); 1072 } 1073 1074 /* XXX PRISON: could be per prison flag */ 1075 static int prison_quotas; 1076 #if 0 1077 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1078 #endif 1079 1080 /* 1081 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1082 * 1083 * Change filesystem quotas. 1084 * 1085 * MPALMOSTSAFE 1086 */ 1087 int 1088 sys_quotactl(struct quotactl_args *uap) 1089 { 1090 struct nlookupdata nd; 1091 struct thread *td; 1092 struct mount *mp; 1093 int error; 1094 1095 td = curthread; 1096 if (td->td_ucred->cr_prison && !prison_quotas) { 1097 error = EPERM; 1098 goto done; 1099 } 1100 1101 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1102 if (error == 0) 1103 error = nlookup(&nd); 1104 if (error == 0) { 1105 mp = nd.nl_nch.mount; 1106 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1107 uap->arg, nd.nl_cred); 1108 } 1109 nlookup_done(&nd); 1110 done: 1111 return (error); 1112 } 1113 1114 /* 1115 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1116 * void *buf, int buflen) 1117 * 1118 * This function operates on a mount point and executes the specified 1119 * operation using the specified control data, and possibly returns data. 1120 * 1121 * The actual number of bytes stored in the result buffer is returned, 0 1122 * if none, otherwise an error is returned. 1123 * 1124 * MPALMOSTSAFE 1125 */ 1126 int 1127 sys_mountctl(struct mountctl_args *uap) 1128 { 1129 struct thread *td = curthread; 1130 struct file *fp; 1131 void *ctl = NULL; 1132 void *buf = NULL; 1133 char *path = NULL; 1134 int error; 1135 1136 /* 1137 * Sanity and permissions checks. We must be root. 1138 */ 1139 if (td->td_ucred->cr_prison != NULL) 1140 return (EPERM); 1141 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1142 (error = priv_check(td, PRIV_ROOT)) != 0) 1143 return (error); 1144 1145 /* 1146 * Argument length checks 1147 */ 1148 if (uap->ctllen < 0 || uap->ctllen > 1024) 1149 return (EINVAL); 1150 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1151 return (EINVAL); 1152 if (uap->path == NULL) 1153 return (EINVAL); 1154 1155 /* 1156 * Allocate the necessary buffers and copyin data 1157 */ 1158 path = objcache_get(namei_oc, M_WAITOK); 1159 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1160 if (error) 1161 goto done; 1162 1163 if (uap->ctllen) { 1164 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1165 error = copyin(uap->ctl, ctl, uap->ctllen); 1166 if (error) 1167 goto done; 1168 } 1169 if (uap->buflen) 1170 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1171 1172 /* 1173 * Validate the descriptor 1174 */ 1175 if (uap->fd >= 0) { 1176 fp = holdfp(td, uap->fd, -1); 1177 if (fp == NULL) { 1178 error = EBADF; 1179 goto done; 1180 } 1181 } else { 1182 fp = NULL; 1183 } 1184 1185 /* 1186 * Execute the internal kernel function and clean up. 1187 */ 1188 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1189 buf, uap->buflen, &uap->sysmsg_result); 1190 if (fp) 1191 dropfp(td, uap->fd, fp); 1192 if (error == 0 && uap->sysmsg_result > 0) 1193 error = copyout(buf, uap->buf, uap->sysmsg_result); 1194 done: 1195 if (path) 1196 objcache_put(namei_oc, path); 1197 if (ctl) 1198 kfree(ctl, M_TEMP); 1199 if (buf) 1200 kfree(buf, M_TEMP); 1201 return (error); 1202 } 1203 1204 /* 1205 * Execute a mount control operation by resolving the path to a mount point 1206 * and calling vop_mountctl(). 1207 * 1208 * Use the mount point from the nch instead of the vnode so nullfs mounts 1209 * can properly spike the VOP. 1210 */ 1211 int 1212 kern_mountctl(const char *path, int op, struct file *fp, 1213 const void *ctl, int ctllen, 1214 void *buf, int buflen, int *res) 1215 { 1216 struct vnode *vp; 1217 struct nlookupdata nd; 1218 struct nchandle nch; 1219 struct mount *mp; 1220 int error; 1221 1222 *res = 0; 1223 vp = NULL; 1224 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1225 if (error) 1226 return (error); 1227 error = nlookup(&nd); 1228 if (error) { 1229 nlookup_done(&nd); 1230 return (error); 1231 } 1232 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1233 if (error) { 1234 nlookup_done(&nd); 1235 return (error); 1236 } 1237 1238 /* 1239 * Yes, all this is needed to use the nch.mount below, because 1240 * we must maintain a ref on the mount to avoid ripouts (e.g. 1241 * due to heavy mount/unmount use by synth or poudriere). 1242 */ 1243 nch = nd.nl_nch; 1244 cache_zero(&nd.nl_nch); 1245 cache_unlock(&nch); 1246 nlookup_done(&nd); 1247 vn_unlock(vp); 1248 1249 mp = nch.mount; 1250 1251 /* 1252 * Must be the root of the filesystem 1253 */ 1254 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1255 cache_drop(&nch); 1256 vrele(vp); 1257 return (EINVAL); 1258 } 1259 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1260 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1261 path); 1262 cache_drop(&nch); 1263 vrele(vp); 1264 return (EINVAL); 1265 } 1266 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1267 buf, buflen, res); 1268 vrele(vp); 1269 cache_drop(&nch); 1270 1271 return (error); 1272 } 1273 1274 int 1275 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1276 { 1277 struct thread *td = curthread; 1278 struct proc *p = td->td_proc; 1279 struct mount *mp; 1280 struct statfs *sp; 1281 char *fullpath, *freepath; 1282 int error; 1283 1284 if ((error = nlookup(nd)) != 0) 1285 return (error); 1286 mp = nd->nl_nch.mount; 1287 sp = &mp->mnt_stat; 1288 1289 /* 1290 * Ignore refresh error, user should have visibility. 1291 * This can happen if a NFS mount goes bad (e.g. server 1292 * revokes perms or goes down). 1293 */ 1294 error = VFS_STATFS(mp, sp, nd->nl_cred); 1295 /* ignore error */ 1296 1297 error = mount_path(p, mp, &fullpath, &freepath); 1298 if (error) 1299 return(error); 1300 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1301 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1302 kfree(freepath, M_TEMP); 1303 1304 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1305 bcopy(sp, buf, sizeof(*buf)); 1306 /* Only root should have access to the fsid's. */ 1307 if (priv_check(td, PRIV_ROOT)) 1308 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1309 return (0); 1310 } 1311 1312 /* 1313 * statfs_args(char *path, struct statfs *buf) 1314 * 1315 * Get filesystem statistics. 1316 */ 1317 int 1318 sys_statfs(struct statfs_args *uap) 1319 { 1320 struct nlookupdata nd; 1321 struct statfs buf; 1322 int error; 1323 1324 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1325 if (error == 0) 1326 error = kern_statfs(&nd, &buf); 1327 nlookup_done(&nd); 1328 if (error == 0) 1329 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1330 return (error); 1331 } 1332 1333 int 1334 kern_fstatfs(int fd, struct statfs *buf) 1335 { 1336 struct thread *td = curthread; 1337 struct proc *p = td->td_proc; 1338 struct file *fp; 1339 struct mount *mp; 1340 struct statfs *sp; 1341 char *fullpath, *freepath; 1342 int error; 1343 1344 KKASSERT(p); 1345 if ((error = holdvnode(td, fd, &fp)) != 0) 1346 return (error); 1347 1348 /* 1349 * Try to use mount info from any overlays rather than the 1350 * mount info for the underlying vnode, otherwise we will 1351 * fail when operating on null-mounted paths inside a chroot. 1352 */ 1353 if ((mp = fp->f_nchandle.mount) == NULL) 1354 mp = ((struct vnode *)fp->f_data)->v_mount; 1355 if (mp == NULL) { 1356 error = EBADF; 1357 goto done; 1358 } 1359 if (fp->f_cred == NULL) { 1360 error = EINVAL; 1361 goto done; 1362 } 1363 1364 /* 1365 * Ignore refresh error, user should have visibility. 1366 * This can happen if a NFS mount goes bad (e.g. server 1367 * revokes perms or goes down). 1368 */ 1369 sp = &mp->mnt_stat; 1370 error = VFS_STATFS(mp, sp, fp->f_cred); 1371 1372 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1373 goto done; 1374 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1375 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1376 kfree(freepath, M_TEMP); 1377 1378 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1379 bcopy(sp, buf, sizeof(*buf)); 1380 1381 /* Only root should have access to the fsid's. */ 1382 if (priv_check(td, PRIV_ROOT)) 1383 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1384 error = 0; 1385 done: 1386 fdrop(fp); 1387 return (error); 1388 } 1389 1390 /* 1391 * fstatfs_args(int fd, struct statfs *buf) 1392 * 1393 * Get filesystem statistics. 1394 */ 1395 int 1396 sys_fstatfs(struct fstatfs_args *uap) 1397 { 1398 struct statfs buf; 1399 int error; 1400 1401 error = kern_fstatfs(uap->fd, &buf); 1402 1403 if (error == 0) 1404 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1405 return (error); 1406 } 1407 1408 int 1409 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1410 { 1411 struct mount *mp; 1412 struct statvfs *sp; 1413 int error; 1414 1415 if ((error = nlookup(nd)) != 0) 1416 return (error); 1417 mp = nd->nl_nch.mount; 1418 sp = &mp->mnt_vstat; 1419 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1420 return (error); 1421 1422 sp->f_flag = 0; 1423 if (mp->mnt_flag & MNT_RDONLY) 1424 sp->f_flag |= ST_RDONLY; 1425 if (mp->mnt_flag & MNT_NOSUID) 1426 sp->f_flag |= ST_NOSUID; 1427 bcopy(sp, buf, sizeof(*buf)); 1428 return (0); 1429 } 1430 1431 /* 1432 * statfs_args(char *path, struct statfs *buf) 1433 * 1434 * Get filesystem statistics. 1435 */ 1436 int 1437 sys_statvfs(struct statvfs_args *uap) 1438 { 1439 struct nlookupdata nd; 1440 struct statvfs buf; 1441 int error; 1442 1443 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1444 if (error == 0) 1445 error = kern_statvfs(&nd, &buf); 1446 nlookup_done(&nd); 1447 if (error == 0) 1448 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1449 return (error); 1450 } 1451 1452 int 1453 kern_fstatvfs(int fd, struct statvfs *buf) 1454 { 1455 struct thread *td = curthread; 1456 struct file *fp; 1457 struct mount *mp; 1458 struct statvfs *sp; 1459 int error; 1460 1461 if ((error = holdvnode(td, fd, &fp)) != 0) 1462 return (error); 1463 if ((mp = fp->f_nchandle.mount) == NULL) 1464 mp = ((struct vnode *)fp->f_data)->v_mount; 1465 if (mp == NULL) { 1466 error = EBADF; 1467 goto done; 1468 } 1469 if (fp->f_cred == NULL) { 1470 error = EINVAL; 1471 goto done; 1472 } 1473 sp = &mp->mnt_vstat; 1474 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1475 goto done; 1476 1477 sp->f_flag = 0; 1478 if (mp->mnt_flag & MNT_RDONLY) 1479 sp->f_flag |= ST_RDONLY; 1480 if (mp->mnt_flag & MNT_NOSUID) 1481 sp->f_flag |= ST_NOSUID; 1482 1483 bcopy(sp, buf, sizeof(*buf)); 1484 error = 0; 1485 done: 1486 fdrop(fp); 1487 return (error); 1488 } 1489 1490 /* 1491 * fstatfs_args(int fd, struct statfs *buf) 1492 * 1493 * Get filesystem statistics. 1494 */ 1495 int 1496 sys_fstatvfs(struct fstatvfs_args *uap) 1497 { 1498 struct statvfs buf; 1499 int error; 1500 1501 error = kern_fstatvfs(uap->fd, &buf); 1502 1503 if (error == 0) 1504 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1505 return (error); 1506 } 1507 1508 /* 1509 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1510 * 1511 * Get statistics on all filesystems. 1512 */ 1513 1514 struct getfsstat_info { 1515 struct statfs *sfsp; 1516 long count; 1517 long maxcount; 1518 int error; 1519 int flags; 1520 struct thread *td; 1521 }; 1522 1523 static int getfsstat_callback(struct mount *, void *); 1524 1525 int 1526 sys_getfsstat(struct getfsstat_args *uap) 1527 { 1528 struct thread *td = curthread; 1529 struct getfsstat_info info; 1530 1531 bzero(&info, sizeof(info)); 1532 1533 info.maxcount = uap->bufsize / sizeof(struct statfs); 1534 info.sfsp = uap->buf; 1535 info.count = 0; 1536 info.flags = uap->flags; 1537 info.td = td; 1538 1539 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1540 if (info.sfsp && info.count > info.maxcount) 1541 uap->sysmsg_result = info.maxcount; 1542 else 1543 uap->sysmsg_result = info.count; 1544 return (info.error); 1545 } 1546 1547 static int 1548 getfsstat_callback(struct mount *mp, void *data) 1549 { 1550 struct getfsstat_info *info = data; 1551 struct statfs *sp; 1552 char *freepath; 1553 char *fullpath; 1554 int error; 1555 1556 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1557 return(0); 1558 1559 if (info->sfsp && info->count < info->maxcount) { 1560 sp = &mp->mnt_stat; 1561 1562 /* 1563 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1564 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1565 * overrides MNT_WAIT. 1566 * 1567 * Ignore refresh error, user should have visibility. 1568 * This can happen if a NFS mount goes bad (e.g. server 1569 * revokes perms or goes down). 1570 */ 1571 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1572 (info->flags & MNT_WAIT)) && 1573 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1574 /* ignore error */ 1575 } 1576 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1577 1578 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1579 if (error) { 1580 info->error = error; 1581 return(-1); 1582 } 1583 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1584 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1585 kfree(freepath, M_TEMP); 1586 1587 error = copyout(sp, info->sfsp, sizeof(*sp)); 1588 if (error) { 1589 info->error = error; 1590 return (-1); 1591 } 1592 ++info->sfsp; 1593 } 1594 info->count++; 1595 return(0); 1596 } 1597 1598 /* 1599 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1600 long bufsize, int flags) 1601 * 1602 * Get statistics on all filesystems. 1603 */ 1604 1605 struct getvfsstat_info { 1606 struct statfs *sfsp; 1607 struct statvfs *vsfsp; 1608 long count; 1609 long maxcount; 1610 int error; 1611 int flags; 1612 struct thread *td; 1613 }; 1614 1615 static int getvfsstat_callback(struct mount *, void *); 1616 1617 int 1618 sys_getvfsstat(struct getvfsstat_args *uap) 1619 { 1620 struct thread *td = curthread; 1621 struct getvfsstat_info info; 1622 1623 bzero(&info, sizeof(info)); 1624 1625 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1626 info.sfsp = uap->buf; 1627 info.vsfsp = uap->vbuf; 1628 info.count = 0; 1629 info.flags = uap->flags; 1630 info.td = td; 1631 1632 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1633 if (info.vsfsp && info.count > info.maxcount) 1634 uap->sysmsg_result = info.maxcount; 1635 else 1636 uap->sysmsg_result = info.count; 1637 return (info.error); 1638 } 1639 1640 static int 1641 getvfsstat_callback(struct mount *mp, void *data) 1642 { 1643 struct getvfsstat_info *info = data; 1644 struct statfs *sp; 1645 struct statvfs *vsp; 1646 char *freepath; 1647 char *fullpath; 1648 int error; 1649 1650 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1651 return(0); 1652 1653 if (info->vsfsp && info->count < info->maxcount) { 1654 sp = &mp->mnt_stat; 1655 vsp = &mp->mnt_vstat; 1656 1657 /* 1658 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1659 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1660 * overrides MNT_WAIT. 1661 * 1662 * Ignore refresh error, user should have visibility. 1663 * This can happen if a NFS mount goes bad (e.g. server 1664 * revokes perms or goes down). 1665 */ 1666 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1667 (info->flags & MNT_WAIT)) && 1668 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1669 /* ignore error */ 1670 } 1671 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1672 1673 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1674 (info->flags & MNT_WAIT)) && 1675 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1676 /* ignore error */ 1677 } 1678 vsp->f_flag = 0; 1679 if (mp->mnt_flag & MNT_RDONLY) 1680 vsp->f_flag |= ST_RDONLY; 1681 if (mp->mnt_flag & MNT_NOSUID) 1682 vsp->f_flag |= ST_NOSUID; 1683 1684 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1685 if (error) { 1686 info->error = error; 1687 return(-1); 1688 } 1689 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1690 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1691 kfree(freepath, M_TEMP); 1692 1693 error = copyout(sp, info->sfsp, sizeof(*sp)); 1694 if (error == 0) 1695 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1696 if (error) { 1697 info->error = error; 1698 return (-1); 1699 } 1700 ++info->sfsp; 1701 ++info->vsfsp; 1702 } 1703 info->count++; 1704 return(0); 1705 } 1706 1707 1708 /* 1709 * fchdir_args(int fd) 1710 * 1711 * Change current working directory to a given file descriptor. 1712 */ 1713 int 1714 sys_fchdir(struct fchdir_args *uap) 1715 { 1716 struct thread *td = curthread; 1717 struct proc *p = td->td_proc; 1718 struct filedesc *fdp = p->p_fd; 1719 struct vnode *vp, *ovp; 1720 struct mount *mp; 1721 struct file *fp; 1722 struct nchandle nch, onch, tnch; 1723 int error; 1724 1725 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1726 return (error); 1727 lwkt_gettoken(&p->p_token); 1728 vp = (struct vnode *)fp->f_data; 1729 vref(vp); 1730 vn_lock(vp, LK_SHARED | LK_RETRY); 1731 if (fp->f_nchandle.ncp == NULL) 1732 error = ENOTDIR; 1733 else 1734 error = checkvp_chdir(vp, td); 1735 if (error) { 1736 vput(vp); 1737 goto done; 1738 } 1739 cache_copy(&fp->f_nchandle, &nch); 1740 1741 /* 1742 * If the ncp has become a mount point, traverse through 1743 * the mount point. 1744 */ 1745 1746 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1747 (mp = cache_findmount(&nch)) != NULL 1748 ) { 1749 error = nlookup_mp(mp, &tnch); 1750 if (error == 0) { 1751 cache_unlock(&tnch); /* leave ref intact */ 1752 vput(vp); 1753 vp = tnch.ncp->nc_vp; 1754 error = vget(vp, LK_SHARED); 1755 KKASSERT(error == 0); 1756 cache_drop(&nch); 1757 nch = tnch; 1758 } 1759 cache_dropmount(mp); 1760 } 1761 if (error == 0) { 1762 spin_lock(&fdp->fd_spin); 1763 ovp = fdp->fd_cdir; 1764 onch = fdp->fd_ncdir; 1765 fdp->fd_cdir = vp; 1766 fdp->fd_ncdir = nch; 1767 spin_unlock(&fdp->fd_spin); 1768 vn_unlock(vp); /* leave ref intact */ 1769 cache_drop(&onch); 1770 vrele(ovp); 1771 } else { 1772 cache_drop(&nch); 1773 vput(vp); 1774 } 1775 fdrop(fp); 1776 done: 1777 lwkt_reltoken(&p->p_token); 1778 return (error); 1779 } 1780 1781 int 1782 kern_chdir(struct nlookupdata *nd) 1783 { 1784 struct thread *td = curthread; 1785 struct proc *p = td->td_proc; 1786 struct filedesc *fdp = p->p_fd; 1787 struct vnode *vp, *ovp; 1788 struct nchandle onch; 1789 int error; 1790 1791 nd->nl_flags |= NLC_SHAREDLOCK; 1792 if ((error = nlookup(nd)) != 0) 1793 return (error); 1794 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1795 return (ENOENT); 1796 if ((error = vget(vp, LK_SHARED)) != 0) 1797 return (error); 1798 1799 lwkt_gettoken(&p->p_token); 1800 error = checkvp_chdir(vp, td); 1801 vn_unlock(vp); 1802 if (error == 0) { 1803 spin_lock(&fdp->fd_spin); 1804 ovp = fdp->fd_cdir; 1805 onch = fdp->fd_ncdir; 1806 fdp->fd_ncdir = nd->nl_nch; 1807 fdp->fd_cdir = vp; 1808 spin_unlock(&fdp->fd_spin); 1809 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1810 cache_drop(&onch); 1811 vrele(ovp); 1812 cache_zero(&nd->nl_nch); 1813 } else { 1814 vrele(vp); 1815 } 1816 lwkt_reltoken(&p->p_token); 1817 return (error); 1818 } 1819 1820 /* 1821 * chdir_args(char *path) 1822 * 1823 * Change current working directory (``.''). 1824 */ 1825 int 1826 sys_chdir(struct chdir_args *uap) 1827 { 1828 struct nlookupdata nd; 1829 int error; 1830 1831 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1832 if (error == 0) 1833 error = kern_chdir(&nd); 1834 nlookup_done(&nd); 1835 return (error); 1836 } 1837 1838 /* 1839 * Helper function for raised chroot(2) security function: Refuse if 1840 * any filedescriptors are open directories. 1841 */ 1842 static int 1843 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1844 { 1845 struct vnode *vp; 1846 struct file *fp; 1847 int error; 1848 int fd; 1849 1850 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1851 if ((error = holdvnode(td, fd, &fp)) != 0) 1852 continue; 1853 vp = (struct vnode *)fp->f_data; 1854 if (vp->v_type != VDIR) { 1855 fdrop(fp); 1856 continue; 1857 } 1858 fdrop(fp); 1859 return(EPERM); 1860 } 1861 return (0); 1862 } 1863 1864 /* 1865 * This sysctl determines if we will allow a process to chroot(2) if it 1866 * has a directory open: 1867 * 0: disallowed for all processes. 1868 * 1: allowed for processes that were not already chroot(2)'ed. 1869 * 2: allowed for all processes. 1870 */ 1871 1872 static int chroot_allow_open_directories = 1; 1873 1874 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1875 &chroot_allow_open_directories, 0, ""); 1876 1877 /* 1878 * chroot to the specified namecache entry. We obtain the vp from the 1879 * namecache data. The passed ncp must be locked and referenced and will 1880 * remain locked and referenced on return. 1881 */ 1882 int 1883 kern_chroot(struct nchandle *nch) 1884 { 1885 struct thread *td = curthread; 1886 struct proc *p = td->td_proc; 1887 struct filedesc *fdp = p->p_fd; 1888 struct vnode *vp; 1889 int error; 1890 1891 /* 1892 * Only privileged user can chroot 1893 */ 1894 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1895 if (error) 1896 return (error); 1897 1898 /* 1899 * Disallow open directory descriptors (fchdir() breakouts). 1900 */ 1901 if (chroot_allow_open_directories == 0 || 1902 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1903 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1904 return (error); 1905 } 1906 if ((vp = nch->ncp->nc_vp) == NULL) 1907 return (ENOENT); 1908 1909 if ((error = vget(vp, LK_SHARED)) != 0) 1910 return (error); 1911 1912 /* 1913 * Check the validity of vp as a directory to change to and 1914 * associate it with rdir/jdir. 1915 */ 1916 error = checkvp_chdir(vp, td); 1917 vn_unlock(vp); /* leave reference intact */ 1918 if (error == 0) { 1919 lwkt_gettoken(&p->p_token); 1920 vrele(fdp->fd_rdir); 1921 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1922 cache_drop(&fdp->fd_nrdir); 1923 cache_copy(nch, &fdp->fd_nrdir); 1924 if (fdp->fd_jdir == NULL) { 1925 fdp->fd_jdir = vp; 1926 vref(fdp->fd_jdir); 1927 cache_copy(nch, &fdp->fd_njdir); 1928 } 1929 if ((p->p_flags & P_DIDCHROOT) == 0) { 1930 p->p_flags |= P_DIDCHROOT; 1931 if (p->p_depth <= 65535 - 32) 1932 p->p_depth += 32; 1933 } 1934 lwkt_reltoken(&p->p_token); 1935 } else { 1936 vrele(vp); 1937 } 1938 return (error); 1939 } 1940 1941 /* 1942 * chroot_args(char *path) 1943 * 1944 * Change notion of root (``/'') directory. 1945 */ 1946 int 1947 sys_chroot(struct chroot_args *uap) 1948 { 1949 struct thread *td __debugvar = curthread; 1950 struct nlookupdata nd; 1951 int error; 1952 1953 KKASSERT(td->td_proc); 1954 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1955 if (error == 0) { 1956 nd.nl_flags |= NLC_EXEC; 1957 error = nlookup(&nd); 1958 if (error == 0) 1959 error = kern_chroot(&nd.nl_nch); 1960 } 1961 nlookup_done(&nd); 1962 return(error); 1963 } 1964 1965 int 1966 sys_chroot_kernel(struct chroot_kernel_args *uap) 1967 { 1968 struct thread *td = curthread; 1969 struct nlookupdata nd; 1970 struct nchandle *nch; 1971 struct vnode *vp; 1972 int error; 1973 1974 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1975 if (error) 1976 goto error_nond; 1977 1978 error = nlookup(&nd); 1979 if (error) 1980 goto error_out; 1981 1982 nch = &nd.nl_nch; 1983 1984 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1985 if (error) 1986 goto error_out; 1987 1988 if ((vp = nch->ncp->nc_vp) == NULL) { 1989 error = ENOENT; 1990 goto error_out; 1991 } 1992 1993 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1994 goto error_out; 1995 1996 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1997 vfs_cache_setroot(vp, cache_hold(nch)); 1998 1999 error_out: 2000 nlookup_done(&nd); 2001 error_nond: 2002 return(error); 2003 } 2004 2005 /* 2006 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2007 * determine whether it is legal to chdir to the vnode. The vnode's state 2008 * is not changed by this call. 2009 */ 2010 static int 2011 checkvp_chdir(struct vnode *vp, struct thread *td) 2012 { 2013 int error; 2014 2015 if (vp->v_type != VDIR) 2016 error = ENOTDIR; 2017 else 2018 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2019 return (error); 2020 } 2021 2022 int 2023 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2024 { 2025 struct thread *td = curthread; 2026 struct proc *p = td->td_proc; 2027 struct lwp *lp = td->td_lwp; 2028 struct filedesc *fdp = p->p_fd; 2029 int cmode, flags; 2030 struct file *nfp; 2031 struct file *fp; 2032 struct vnode *vp; 2033 int type, indx, error = 0; 2034 struct flock lf; 2035 2036 if ((oflags & O_ACCMODE) == O_ACCMODE) 2037 return (EINVAL); 2038 flags = FFLAGS(oflags); 2039 error = falloc(lp, &nfp, NULL); 2040 if (error) 2041 return (error); 2042 fp = nfp; 2043 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2044 2045 /* 2046 * XXX p_dupfd is a real mess. It allows a device to return a 2047 * file descriptor to be duplicated rather then doing the open 2048 * itself. 2049 */ 2050 lp->lwp_dupfd = -1; 2051 2052 /* 2053 * Call vn_open() to do the lookup and assign the vnode to the 2054 * file pointer. vn_open() does not change the ref count on fp 2055 * and the vnode, on success, will be inherited by the file pointer 2056 * and unlocked. 2057 * 2058 * Request a shared lock on the vnode if possible. 2059 * 2060 * Executable binaries can race VTEXT against O_RDWR opens, so 2061 * use an exclusive lock for O_RDWR opens as well. 2062 * 2063 * NOTE: We need a flag to separate terminal vnode locking from 2064 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2065 * and O_RDWR only need to lock the terminal vnode exclusively. 2066 */ 2067 nd->nl_flags |= NLC_LOCKVP; 2068 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0) 2069 nd->nl_flags |= NLC_SHAREDLOCK; 2070 2071 error = vn_open(nd, fp, flags, cmode); 2072 nlookup_done(nd); 2073 2074 if (error) { 2075 /* 2076 * handle special fdopen() case. bleh. dupfdopen() is 2077 * responsible for dropping the old contents of ofiles[indx] 2078 * if it succeeds. 2079 * 2080 * Note that fsetfd() will add a ref to fp which represents 2081 * the fd_files[] assignment. We must still drop our 2082 * reference. 2083 */ 2084 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2085 if (fdalloc(p, 0, &indx) == 0) { 2086 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2087 if (error == 0) { 2088 *res = indx; 2089 fdrop(fp); /* our ref */ 2090 return (0); 2091 } 2092 fsetfd(fdp, NULL, indx); 2093 } 2094 } 2095 fdrop(fp); /* our ref */ 2096 if (error == ERESTART) 2097 error = EINTR; 2098 return (error); 2099 } 2100 2101 /* 2102 * ref the vnode for ourselves so it can't be ripped out from under 2103 * is. XXX need an ND flag to request that the vnode be returned 2104 * anyway. 2105 * 2106 * Reserve a file descriptor but do not assign it until the open 2107 * succeeds. 2108 */ 2109 vp = (struct vnode *)fp->f_data; 2110 vref(vp); 2111 if ((error = fdalloc(p, 0, &indx)) != 0) { 2112 fdrop(fp); 2113 vrele(vp); 2114 return (error); 2115 } 2116 2117 /* 2118 * If no error occurs the vp will have been assigned to the file 2119 * pointer. 2120 */ 2121 lp->lwp_dupfd = 0; 2122 2123 if (flags & (O_EXLOCK | O_SHLOCK)) { 2124 lf.l_whence = SEEK_SET; 2125 lf.l_start = 0; 2126 lf.l_len = 0; 2127 if (flags & O_EXLOCK) 2128 lf.l_type = F_WRLCK; 2129 else 2130 lf.l_type = F_RDLCK; 2131 if (flags & FNONBLOCK) 2132 type = 0; 2133 else 2134 type = F_WAIT; 2135 2136 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2137 /* 2138 * lock request failed. Clean up the reserved 2139 * descriptor. 2140 */ 2141 vrele(vp); 2142 fsetfd(fdp, NULL, indx); 2143 fdrop(fp); 2144 return (error); 2145 } 2146 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2147 } 2148 #if 0 2149 /* 2150 * Assert that all regular file vnodes were created with a object. 2151 */ 2152 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2153 ("open: regular file has no backing object after vn_open")); 2154 #endif 2155 2156 vrele(vp); 2157 2158 /* 2159 * release our private reference, leaving the one associated with the 2160 * descriptor table intact. 2161 */ 2162 if (oflags & O_CLOEXEC) 2163 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2164 fsetfd(fdp, fp, indx); 2165 fdrop(fp); 2166 *res = indx; 2167 2168 return (error); 2169 } 2170 2171 /* 2172 * open_args(char *path, int flags, int mode) 2173 * 2174 * Check permissions, allocate an open file structure, 2175 * and call the device open routine if any. 2176 */ 2177 int 2178 sys_open(struct open_args *uap) 2179 { 2180 struct nlookupdata nd; 2181 int error; 2182 2183 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2184 if (error == 0) { 2185 error = kern_open(&nd, uap->flags, 2186 uap->mode, &uap->sysmsg_result); 2187 } 2188 nlookup_done(&nd); 2189 return (error); 2190 } 2191 2192 /* 2193 * openat_args(int fd, char *path, int flags, int mode) 2194 */ 2195 int 2196 sys_openat(struct openat_args *uap) 2197 { 2198 struct nlookupdata nd; 2199 int error; 2200 struct file *fp; 2201 2202 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2203 if (error == 0) { 2204 error = kern_open(&nd, uap->flags, uap->mode, 2205 &uap->sysmsg_result); 2206 } 2207 nlookup_done_at(&nd, fp); 2208 return (error); 2209 } 2210 2211 int 2212 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2213 { 2214 struct thread *td = curthread; 2215 struct proc *p = td->td_proc; 2216 struct vnode *vp; 2217 struct vattr vattr; 2218 int error; 2219 int whiteout = 0; 2220 2221 KKASSERT(p); 2222 2223 VATTR_NULL(&vattr); 2224 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2225 vattr.va_rmajor = rmajor; 2226 vattr.va_rminor = rminor; 2227 2228 switch (mode & S_IFMT) { 2229 case S_IFMT: /* used by badsect to flag bad sectors */ 2230 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2231 vattr.va_type = VBAD; 2232 break; 2233 case S_IFCHR: 2234 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2235 vattr.va_type = VCHR; 2236 break; 2237 case S_IFBLK: 2238 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2239 vattr.va_type = VBLK; 2240 break; 2241 case S_IFWHT: 2242 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2243 whiteout = 1; 2244 break; 2245 case S_IFDIR: /* special directories support for HAMMER */ 2246 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2247 vattr.va_type = VDIR; 2248 break; 2249 default: 2250 error = EINVAL; 2251 break; 2252 } 2253 2254 if (error) 2255 return (error); 2256 2257 bwillinode(1); 2258 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2259 if ((error = nlookup(nd)) != 0) 2260 return (error); 2261 if (nd->nl_nch.ncp->nc_vp) 2262 return (EEXIST); 2263 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2264 return (error); 2265 2266 if (whiteout) { 2267 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2268 nd->nl_cred, NAMEI_CREATE); 2269 } else { 2270 vp = NULL; 2271 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2272 &vp, nd->nl_cred, &vattr); 2273 if (error == 0) 2274 vput(vp); 2275 } 2276 return (error); 2277 } 2278 2279 /* 2280 * mknod_args(char *path, int mode, int dev) 2281 * 2282 * Create a special file. 2283 */ 2284 int 2285 sys_mknod(struct mknod_args *uap) 2286 { 2287 struct nlookupdata nd; 2288 int error; 2289 2290 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2291 if (error == 0) { 2292 error = kern_mknod(&nd, uap->mode, 2293 umajor(uap->dev), uminor(uap->dev)); 2294 } 2295 nlookup_done(&nd); 2296 return (error); 2297 } 2298 2299 /* 2300 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2301 * 2302 * Create a special file. The path is relative to the directory associated 2303 * with fd. 2304 */ 2305 int 2306 sys_mknodat(struct mknodat_args *uap) 2307 { 2308 struct nlookupdata nd; 2309 struct file *fp; 2310 int error; 2311 2312 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2313 if (error == 0) { 2314 error = kern_mknod(&nd, uap->mode, 2315 umajor(uap->dev), uminor(uap->dev)); 2316 } 2317 nlookup_done_at(&nd, fp); 2318 return (error); 2319 } 2320 2321 int 2322 kern_mkfifo(struct nlookupdata *nd, int mode) 2323 { 2324 struct thread *td = curthread; 2325 struct proc *p = td->td_proc; 2326 struct vattr vattr; 2327 struct vnode *vp; 2328 int error; 2329 2330 bwillinode(1); 2331 2332 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2333 if ((error = nlookup(nd)) != 0) 2334 return (error); 2335 if (nd->nl_nch.ncp->nc_vp) 2336 return (EEXIST); 2337 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2338 return (error); 2339 2340 VATTR_NULL(&vattr); 2341 vattr.va_type = VFIFO; 2342 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2343 vp = NULL; 2344 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2345 if (error == 0) 2346 vput(vp); 2347 return (error); 2348 } 2349 2350 /* 2351 * mkfifo_args(char *path, int mode) 2352 * 2353 * Create a named pipe. 2354 */ 2355 int 2356 sys_mkfifo(struct mkfifo_args *uap) 2357 { 2358 struct nlookupdata nd; 2359 int error; 2360 2361 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2362 if (error == 0) 2363 error = kern_mkfifo(&nd, uap->mode); 2364 nlookup_done(&nd); 2365 return (error); 2366 } 2367 2368 /* 2369 * mkfifoat_args(int fd, char *path, mode_t mode) 2370 * 2371 * Create a named pipe. The path is relative to the directory associated 2372 * with fd. 2373 */ 2374 int 2375 sys_mkfifoat(struct mkfifoat_args *uap) 2376 { 2377 struct nlookupdata nd; 2378 struct file *fp; 2379 int error; 2380 2381 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2382 if (error == 0) 2383 error = kern_mkfifo(&nd, uap->mode); 2384 nlookup_done_at(&nd, fp); 2385 return (error); 2386 } 2387 2388 static int hardlink_check_uid = 0; 2389 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2390 &hardlink_check_uid, 0, 2391 "Unprivileged processes cannot create hard links to files owned by other " 2392 "users"); 2393 static int hardlink_check_gid = 0; 2394 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2395 &hardlink_check_gid, 0, 2396 "Unprivileged processes cannot create hard links to files owned by other " 2397 "groups"); 2398 2399 static int 2400 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2401 { 2402 struct vattr va; 2403 int error; 2404 2405 /* 2406 * Shortcut if disabled 2407 */ 2408 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2409 return (0); 2410 2411 /* 2412 * Privileged user can always hardlink 2413 */ 2414 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2415 return (0); 2416 2417 /* 2418 * Otherwise only if the originating file is owned by the 2419 * same user or group. Note that any group is allowed if 2420 * the file is owned by the caller. 2421 */ 2422 error = VOP_GETATTR(vp, &va); 2423 if (error != 0) 2424 return (error); 2425 2426 if (hardlink_check_uid) { 2427 if (cred->cr_uid != va.va_uid) 2428 return (EPERM); 2429 } 2430 2431 if (hardlink_check_gid) { 2432 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2433 return (EPERM); 2434 } 2435 2436 return (0); 2437 } 2438 2439 int 2440 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2441 { 2442 struct thread *td = curthread; 2443 struct vnode *vp; 2444 int error; 2445 2446 /* 2447 * Lookup the source and obtained a locked vnode. 2448 * 2449 * You may only hardlink a file which you have write permission 2450 * on or which you own. 2451 * 2452 * XXX relookup on vget failure / race ? 2453 */ 2454 bwillinode(1); 2455 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2456 if ((error = nlookup(nd)) != 0) 2457 return (error); 2458 vp = nd->nl_nch.ncp->nc_vp; 2459 KKASSERT(vp != NULL); 2460 if (vp->v_type == VDIR) 2461 return (EPERM); /* POSIX */ 2462 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2463 return (error); 2464 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2465 return (error); 2466 2467 /* 2468 * Unlock the source so we can lookup the target without deadlocking 2469 * (XXX vp is locked already, possible other deadlock?). The target 2470 * must not exist. 2471 */ 2472 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2473 nd->nl_flags &= ~NLC_NCPISLOCKED; 2474 cache_unlock(&nd->nl_nch); 2475 vn_unlock(vp); 2476 2477 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2478 if ((error = nlookup(linknd)) != 0) { 2479 vrele(vp); 2480 return (error); 2481 } 2482 if (linknd->nl_nch.ncp->nc_vp) { 2483 vrele(vp); 2484 return (EEXIST); 2485 } 2486 VFS_MODIFYING(vp->v_mount); 2487 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2488 if (error) { 2489 vrele(vp); 2490 return (error); 2491 } 2492 2493 /* 2494 * Finally run the new API VOP. 2495 */ 2496 error = can_hardlink(vp, td, td->td_ucred); 2497 if (error == 0) { 2498 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2499 vp, linknd->nl_cred); 2500 } 2501 vput(vp); 2502 return (error); 2503 } 2504 2505 /* 2506 * link_args(char *path, char *link) 2507 * 2508 * Make a hard file link. 2509 */ 2510 int 2511 sys_link(struct link_args *uap) 2512 { 2513 struct nlookupdata nd, linknd; 2514 int error; 2515 2516 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2517 if (error == 0) { 2518 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2519 if (error == 0) 2520 error = kern_link(&nd, &linknd); 2521 nlookup_done(&linknd); 2522 } 2523 nlookup_done(&nd); 2524 return (error); 2525 } 2526 2527 /* 2528 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2529 * 2530 * Make a hard file link. The path1 argument is relative to the directory 2531 * associated with fd1, and similarly the path2 argument is relative to 2532 * the directory associated with fd2. 2533 */ 2534 int 2535 sys_linkat(struct linkat_args *uap) 2536 { 2537 struct nlookupdata nd, linknd; 2538 struct file *fp1, *fp2; 2539 int error; 2540 2541 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2542 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2543 if (error == 0) { 2544 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2545 uap->path2, UIO_USERSPACE, 0); 2546 if (error == 0) 2547 error = kern_link(&nd, &linknd); 2548 nlookup_done_at(&linknd, fp2); 2549 } 2550 nlookup_done_at(&nd, fp1); 2551 return (error); 2552 } 2553 2554 int 2555 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2556 { 2557 struct vattr vattr; 2558 struct vnode *vp; 2559 struct vnode *dvp; 2560 int error; 2561 2562 bwillinode(1); 2563 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2564 if ((error = nlookup(nd)) != 0) 2565 return (error); 2566 if (nd->nl_nch.ncp->nc_vp) 2567 return (EEXIST); 2568 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2569 return (error); 2570 dvp = nd->nl_dvp; 2571 VATTR_NULL(&vattr); 2572 vattr.va_mode = mode; 2573 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2574 if (error == 0) 2575 vput(vp); 2576 return (error); 2577 } 2578 2579 /* 2580 * symlink(char *path, char *link) 2581 * 2582 * Make a symbolic link. 2583 */ 2584 int 2585 sys_symlink(struct symlink_args *uap) 2586 { 2587 struct thread *td = curthread; 2588 struct nlookupdata nd; 2589 char *path; 2590 int error; 2591 int mode; 2592 2593 path = objcache_get(namei_oc, M_WAITOK); 2594 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2595 if (error == 0) { 2596 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2597 if (error == 0) { 2598 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2599 error = kern_symlink(&nd, path, mode); 2600 } 2601 nlookup_done(&nd); 2602 } 2603 objcache_put(namei_oc, path); 2604 return (error); 2605 } 2606 2607 /* 2608 * symlinkat_args(char *path1, int fd, char *path2) 2609 * 2610 * Make a symbolic link. The path2 argument is relative to the directory 2611 * associated with fd. 2612 */ 2613 int 2614 sys_symlinkat(struct symlinkat_args *uap) 2615 { 2616 struct thread *td = curthread; 2617 struct nlookupdata nd; 2618 struct file *fp; 2619 char *path1; 2620 int error; 2621 int mode; 2622 2623 path1 = objcache_get(namei_oc, M_WAITOK); 2624 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2625 if (error == 0) { 2626 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2627 UIO_USERSPACE, 0); 2628 if (error == 0) { 2629 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2630 error = kern_symlink(&nd, path1, mode); 2631 } 2632 nlookup_done_at(&nd, fp); 2633 } 2634 objcache_put(namei_oc, path1); 2635 return (error); 2636 } 2637 2638 /* 2639 * undelete_args(char *path) 2640 * 2641 * Delete a whiteout from the filesystem. 2642 */ 2643 int 2644 sys_undelete(struct undelete_args *uap) 2645 { 2646 struct nlookupdata nd; 2647 int error; 2648 2649 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2650 bwillinode(1); 2651 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2652 if (error == 0) 2653 error = nlookup(&nd); 2654 if (error == 0) 2655 error = ncp_writechk(&nd.nl_nch); 2656 if (error == 0) { 2657 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2658 NAMEI_DELETE); 2659 } 2660 nlookup_done(&nd); 2661 return (error); 2662 } 2663 2664 int 2665 kern_unlink(struct nlookupdata *nd) 2666 { 2667 int error; 2668 2669 bwillinode(1); 2670 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2671 if ((error = nlookup(nd)) != 0) 2672 return (error); 2673 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2674 return (error); 2675 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2676 return (error); 2677 } 2678 2679 /* 2680 * unlink_args(char *path) 2681 * 2682 * Delete a name from the filesystem. 2683 */ 2684 int 2685 sys_unlink(struct unlink_args *uap) 2686 { 2687 struct nlookupdata nd; 2688 int error; 2689 2690 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2691 if (error == 0) 2692 error = kern_unlink(&nd); 2693 nlookup_done(&nd); 2694 return (error); 2695 } 2696 2697 2698 /* 2699 * unlinkat_args(int fd, char *path, int flags) 2700 * 2701 * Delete the file or directory entry pointed to by fd/path. 2702 */ 2703 int 2704 sys_unlinkat(struct unlinkat_args *uap) 2705 { 2706 struct nlookupdata nd; 2707 struct file *fp; 2708 int error; 2709 2710 if (uap->flags & ~AT_REMOVEDIR) 2711 return (EINVAL); 2712 2713 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2714 if (error == 0) { 2715 if (uap->flags & AT_REMOVEDIR) 2716 error = kern_rmdir(&nd); 2717 else 2718 error = kern_unlink(&nd); 2719 } 2720 nlookup_done_at(&nd, fp); 2721 return (error); 2722 } 2723 2724 int 2725 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2726 { 2727 struct thread *td = curthread; 2728 struct file *fp; 2729 struct vnode *vp; 2730 struct vattr vattr; 2731 off_t new_offset; 2732 int error; 2733 2734 fp = holdfp(td, fd, -1); 2735 if (fp == NULL) 2736 return (EBADF); 2737 if (fp->f_type != DTYPE_VNODE) { 2738 error = ESPIPE; 2739 goto done; 2740 } 2741 vp = (struct vnode *)fp->f_data; 2742 2743 switch (whence) { 2744 case L_INCR: 2745 spin_lock(&fp->f_spin); 2746 new_offset = fp->f_offset + offset; 2747 error = 0; 2748 break; 2749 case L_XTND: 2750 error = VOP_GETATTR(vp, &vattr); 2751 spin_lock(&fp->f_spin); 2752 new_offset = offset + vattr.va_size; 2753 break; 2754 case L_SET: 2755 new_offset = offset; 2756 error = 0; 2757 spin_lock(&fp->f_spin); 2758 break; 2759 default: 2760 new_offset = 0; 2761 error = EINVAL; 2762 spin_lock(&fp->f_spin); 2763 break; 2764 } 2765 2766 /* 2767 * Validate the seek position. Negative offsets are not allowed 2768 * for regular files or directories. 2769 * 2770 * Normally we would also not want to allow negative offsets for 2771 * character and block-special devices. However kvm addresses 2772 * on 64 bit architectures might appear to be negative and must 2773 * be allowed. 2774 */ 2775 if (error == 0) { 2776 if (new_offset < 0 && 2777 (vp->v_type == VREG || vp->v_type == VDIR)) { 2778 error = EINVAL; 2779 } else { 2780 fp->f_offset = new_offset; 2781 } 2782 } 2783 *res = fp->f_offset; 2784 spin_unlock(&fp->f_spin); 2785 done: 2786 dropfp(td, fd, fp); 2787 2788 return (error); 2789 } 2790 2791 /* 2792 * lseek_args(int fd, int pad, off_t offset, int whence) 2793 * 2794 * Reposition read/write file offset. 2795 */ 2796 int 2797 sys_lseek(struct lseek_args *uap) 2798 { 2799 int error; 2800 2801 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2802 &uap->sysmsg_offset); 2803 2804 return (error); 2805 } 2806 2807 /* 2808 * Check if current process can access given file. amode is a bitmask of *_OK 2809 * access bits. flags is a bitmask of AT_* flags. 2810 */ 2811 int 2812 kern_access(struct nlookupdata *nd, int amode, int flags) 2813 { 2814 struct vnode *vp; 2815 int error, mode; 2816 2817 if (flags & ~AT_EACCESS) 2818 return (EINVAL); 2819 nd->nl_flags |= NLC_SHAREDLOCK; 2820 if ((error = nlookup(nd)) != 0) 2821 return (error); 2822 retry: 2823 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2824 if (error) 2825 return (error); 2826 2827 /* Flags == 0 means only check for existence. */ 2828 if (amode) { 2829 mode = 0; 2830 if (amode & R_OK) 2831 mode |= VREAD; 2832 if (amode & W_OK) 2833 mode |= VWRITE; 2834 if (amode & X_OK) 2835 mode |= VEXEC; 2836 if ((mode & VWRITE) == 0 || 2837 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2838 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2839 2840 /* 2841 * If the file handle is stale we have to re-resolve the 2842 * entry with the ncp held exclusively. This is a hack 2843 * at the moment. 2844 */ 2845 if (error == ESTALE) { 2846 vput(vp); 2847 cache_unlock(&nd->nl_nch); 2848 cache_lock(&nd->nl_nch); 2849 cache_setunresolved(&nd->nl_nch); 2850 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2851 if (error == 0) { 2852 vp = NULL; 2853 goto retry; 2854 } 2855 return(error); 2856 } 2857 } 2858 vput(vp); 2859 return (error); 2860 } 2861 2862 /* 2863 * access_args(char *path, int flags) 2864 * 2865 * Check access permissions. 2866 */ 2867 int 2868 sys_access(struct access_args *uap) 2869 { 2870 struct nlookupdata nd; 2871 int error; 2872 2873 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2874 if (error == 0) 2875 error = kern_access(&nd, uap->flags, 0); 2876 nlookup_done(&nd); 2877 return (error); 2878 } 2879 2880 2881 /* 2882 * eaccess_args(char *path, int flags) 2883 * 2884 * Check access permissions. 2885 */ 2886 int 2887 sys_eaccess(struct eaccess_args *uap) 2888 { 2889 struct nlookupdata nd; 2890 int error; 2891 2892 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2893 if (error == 0) 2894 error = kern_access(&nd, uap->flags, AT_EACCESS); 2895 nlookup_done(&nd); 2896 return (error); 2897 } 2898 2899 2900 /* 2901 * faccessat_args(int fd, char *path, int amode, int flags) 2902 * 2903 * Check access permissions. 2904 */ 2905 int 2906 sys_faccessat(struct faccessat_args *uap) 2907 { 2908 struct nlookupdata nd; 2909 struct file *fp; 2910 int error; 2911 2912 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2913 NLC_FOLLOW); 2914 if (error == 0) 2915 error = kern_access(&nd, uap->amode, uap->flags); 2916 nlookup_done_at(&nd, fp); 2917 return (error); 2918 } 2919 2920 int 2921 kern_stat(struct nlookupdata *nd, struct stat *st) 2922 { 2923 int error; 2924 struct vnode *vp; 2925 2926 nd->nl_flags |= NLC_SHAREDLOCK; 2927 if ((error = nlookup(nd)) != 0) 2928 return (error); 2929 again: 2930 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2931 return (ENOENT); 2932 2933 if ((error = vget(vp, LK_SHARED)) != 0) 2934 return (error); 2935 error = vn_stat(vp, st, nd->nl_cred); 2936 2937 /* 2938 * If the file handle is stale we have to re-resolve the 2939 * entry with the ncp held exclusively. This is a hack 2940 * at the moment. 2941 */ 2942 if (error == ESTALE) { 2943 vput(vp); 2944 cache_unlock(&nd->nl_nch); 2945 cache_lock(&nd->nl_nch); 2946 cache_setunresolved(&nd->nl_nch); 2947 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2948 if (error == 0) 2949 goto again; 2950 } else { 2951 vput(vp); 2952 } 2953 return (error); 2954 } 2955 2956 /* 2957 * stat_args(char *path, struct stat *ub) 2958 * 2959 * Get file status; this version follows links. 2960 */ 2961 int 2962 sys_stat(struct stat_args *uap) 2963 { 2964 struct nlookupdata nd; 2965 struct stat st; 2966 int error; 2967 2968 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2969 if (error == 0) { 2970 error = kern_stat(&nd, &st); 2971 if (error == 0) 2972 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2973 } 2974 nlookup_done(&nd); 2975 return (error); 2976 } 2977 2978 /* 2979 * lstat_args(char *path, struct stat *ub) 2980 * 2981 * Get file status; this version does not follow links. 2982 */ 2983 int 2984 sys_lstat(struct lstat_args *uap) 2985 { 2986 struct nlookupdata nd; 2987 struct stat st; 2988 int error; 2989 2990 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2991 if (error == 0) { 2992 error = kern_stat(&nd, &st); 2993 if (error == 0) 2994 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2995 } 2996 nlookup_done(&nd); 2997 return (error); 2998 } 2999 3000 /* 3001 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3002 * 3003 * Get status of file pointed to by fd/path. 3004 */ 3005 int 3006 sys_fstatat(struct fstatat_args *uap) 3007 { 3008 struct nlookupdata nd; 3009 struct stat st; 3010 int error; 3011 int flags; 3012 struct file *fp; 3013 3014 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3015 return (EINVAL); 3016 3017 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3018 3019 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3020 UIO_USERSPACE, flags); 3021 if (error == 0) { 3022 error = kern_stat(&nd, &st); 3023 if (error == 0) 3024 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3025 } 3026 nlookup_done_at(&nd, fp); 3027 return (error); 3028 } 3029 3030 static int 3031 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3032 { 3033 struct nlookupdata nd; 3034 struct vnode *vp; 3035 int error; 3036 3037 vp = NULL; 3038 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3039 if (error == 0) 3040 error = nlookup(&nd); 3041 if (error == 0) 3042 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3043 nlookup_done(&nd); 3044 if (error == 0) { 3045 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3046 vput(vp); 3047 } 3048 return (error); 3049 } 3050 3051 /* 3052 * pathconf_Args(char *path, int name) 3053 * 3054 * Get configurable pathname variables. 3055 */ 3056 int 3057 sys_pathconf(struct pathconf_args *uap) 3058 { 3059 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3060 &uap->sysmsg_reg)); 3061 } 3062 3063 /* 3064 * lpathconf_Args(char *path, int name) 3065 * 3066 * Get configurable pathname variables, but don't follow symlinks. 3067 */ 3068 int 3069 sys_lpathconf(struct lpathconf_args *uap) 3070 { 3071 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3072 } 3073 3074 /* 3075 * XXX: daver 3076 * kern_readlink isn't properly split yet. There is a copyin burried 3077 * in VOP_READLINK(). 3078 */ 3079 int 3080 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3081 { 3082 struct thread *td = curthread; 3083 struct vnode *vp; 3084 struct iovec aiov; 3085 struct uio auio; 3086 int error; 3087 3088 nd->nl_flags |= NLC_SHAREDLOCK; 3089 if ((error = nlookup(nd)) != 0) 3090 return (error); 3091 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3092 if (error) 3093 return (error); 3094 if (vp->v_type != VLNK) { 3095 error = EINVAL; 3096 } else { 3097 aiov.iov_base = buf; 3098 aiov.iov_len = count; 3099 auio.uio_iov = &aiov; 3100 auio.uio_iovcnt = 1; 3101 auio.uio_offset = 0; 3102 auio.uio_rw = UIO_READ; 3103 auio.uio_segflg = UIO_USERSPACE; 3104 auio.uio_td = td; 3105 auio.uio_resid = count; 3106 error = VOP_READLINK(vp, &auio, td->td_ucred); 3107 } 3108 vput(vp); 3109 *res = count - auio.uio_resid; 3110 return (error); 3111 } 3112 3113 /* 3114 * readlink_args(char *path, char *buf, int count) 3115 * 3116 * Return target name of a symbolic link. 3117 */ 3118 int 3119 sys_readlink(struct readlink_args *uap) 3120 { 3121 struct nlookupdata nd; 3122 int error; 3123 3124 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3125 if (error == 0) { 3126 error = kern_readlink(&nd, uap->buf, uap->count, 3127 &uap->sysmsg_result); 3128 } 3129 nlookup_done(&nd); 3130 return (error); 3131 } 3132 3133 /* 3134 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3135 * 3136 * Return target name of a symbolic link. The path is relative to the 3137 * directory associated with fd. 3138 */ 3139 int 3140 sys_readlinkat(struct readlinkat_args *uap) 3141 { 3142 struct nlookupdata nd; 3143 struct file *fp; 3144 int error; 3145 3146 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3147 if (error == 0) { 3148 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3149 &uap->sysmsg_result); 3150 } 3151 nlookup_done_at(&nd, fp); 3152 return (error); 3153 } 3154 3155 static int 3156 setfflags(struct vnode *vp, int flags) 3157 { 3158 struct thread *td = curthread; 3159 int error; 3160 struct vattr vattr; 3161 3162 /* 3163 * Prevent non-root users from setting flags on devices. When 3164 * a device is reused, users can retain ownership of the device 3165 * if they are allowed to set flags and programs assume that 3166 * chown can't fail when done as root. 3167 */ 3168 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3169 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3170 return (error); 3171 3172 /* 3173 * note: vget is required for any operation that might mod the vnode 3174 * so VINACTIVE is properly cleared. 3175 */ 3176 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3177 VATTR_NULL(&vattr); 3178 vattr.va_flags = flags; 3179 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3180 vput(vp); 3181 } 3182 return (error); 3183 } 3184 3185 /* 3186 * chflags(char *path, int flags) 3187 * 3188 * Change flags of a file given a path name. 3189 */ 3190 int 3191 sys_chflags(struct chflags_args *uap) 3192 { 3193 struct nlookupdata nd; 3194 struct vnode *vp; 3195 int error; 3196 3197 vp = NULL; 3198 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3199 if (error == 0) 3200 error = nlookup(&nd); 3201 if (error == 0) 3202 error = ncp_writechk(&nd.nl_nch); 3203 if (error == 0) 3204 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3205 nlookup_done(&nd); 3206 if (error == 0) { 3207 error = setfflags(vp, uap->flags); 3208 vrele(vp); 3209 } 3210 return (error); 3211 } 3212 3213 /* 3214 * lchflags(char *path, int flags) 3215 * 3216 * Change flags of a file given a path name, but don't follow symlinks. 3217 */ 3218 int 3219 sys_lchflags(struct lchflags_args *uap) 3220 { 3221 struct nlookupdata nd; 3222 struct vnode *vp; 3223 int error; 3224 3225 vp = NULL; 3226 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3227 if (error == 0) 3228 error = nlookup(&nd); 3229 if (error == 0) 3230 error = ncp_writechk(&nd.nl_nch); 3231 if (error == 0) 3232 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3233 nlookup_done(&nd); 3234 if (error == 0) { 3235 error = setfflags(vp, uap->flags); 3236 vrele(vp); 3237 } 3238 return (error); 3239 } 3240 3241 /* 3242 * fchflags_args(int fd, int flags) 3243 * 3244 * Change flags of a file given a file descriptor. 3245 */ 3246 int 3247 sys_fchflags(struct fchflags_args *uap) 3248 { 3249 struct thread *td = curthread; 3250 struct file *fp; 3251 int error; 3252 3253 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3254 return (error); 3255 if (fp->f_nchandle.ncp) 3256 error = ncp_writechk(&fp->f_nchandle); 3257 if (error == 0) 3258 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3259 fdrop(fp); 3260 return (error); 3261 } 3262 3263 /* 3264 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3265 * change flags given a pathname relative to a filedescriptor 3266 */ 3267 int sys_chflagsat(struct chflagsat_args *uap) 3268 { 3269 struct nlookupdata nd; 3270 struct vnode *vp; 3271 struct file *fp; 3272 int error; 3273 int lookupflags; 3274 3275 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3276 return (EINVAL); 3277 3278 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3279 3280 vp = NULL; 3281 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3282 if (error == 0) 3283 error = nlookup(&nd); 3284 if (error == 0) 3285 error = ncp_writechk(&nd.nl_nch); 3286 if (error == 0) 3287 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3288 nlookup_done_at(&nd, fp); 3289 if (error == 0) { 3290 error = setfflags(vp, uap->flags); 3291 vrele(vp); 3292 } 3293 return (error); 3294 } 3295 3296 3297 static int 3298 setfmode(struct vnode *vp, int mode) 3299 { 3300 struct thread *td = curthread; 3301 int error; 3302 struct vattr vattr; 3303 3304 /* 3305 * note: vget is required for any operation that might mod the vnode 3306 * so VINACTIVE is properly cleared. 3307 */ 3308 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3309 VATTR_NULL(&vattr); 3310 vattr.va_mode = mode & ALLPERMS; 3311 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3312 cache_inval_wxok(vp); 3313 vput(vp); 3314 } 3315 return error; 3316 } 3317 3318 int 3319 kern_chmod(struct nlookupdata *nd, int mode) 3320 { 3321 struct vnode *vp; 3322 int error; 3323 3324 if ((error = nlookup(nd)) != 0) 3325 return (error); 3326 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3327 return (error); 3328 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3329 error = setfmode(vp, mode); 3330 vrele(vp); 3331 return (error); 3332 } 3333 3334 /* 3335 * chmod_args(char *path, int mode) 3336 * 3337 * Change mode of a file given path name. 3338 */ 3339 int 3340 sys_chmod(struct chmod_args *uap) 3341 { 3342 struct nlookupdata nd; 3343 int error; 3344 3345 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3346 if (error == 0) 3347 error = kern_chmod(&nd, uap->mode); 3348 nlookup_done(&nd); 3349 return (error); 3350 } 3351 3352 /* 3353 * lchmod_args(char *path, int mode) 3354 * 3355 * Change mode of a file given path name (don't follow links.) 3356 */ 3357 int 3358 sys_lchmod(struct lchmod_args *uap) 3359 { 3360 struct nlookupdata nd; 3361 int error; 3362 3363 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3364 if (error == 0) 3365 error = kern_chmod(&nd, uap->mode); 3366 nlookup_done(&nd); 3367 return (error); 3368 } 3369 3370 /* 3371 * fchmod_args(int fd, int mode) 3372 * 3373 * Change mode of a file given a file descriptor. 3374 */ 3375 int 3376 sys_fchmod(struct fchmod_args *uap) 3377 { 3378 struct thread *td = curthread; 3379 struct file *fp; 3380 int error; 3381 3382 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3383 return (error); 3384 if (fp->f_nchandle.ncp) 3385 error = ncp_writechk(&fp->f_nchandle); 3386 if (error == 0) 3387 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3388 fdrop(fp); 3389 return (error); 3390 } 3391 3392 /* 3393 * fchmodat_args(char *path, int mode) 3394 * 3395 * Change mode of a file pointed to by fd/path. 3396 */ 3397 int 3398 sys_fchmodat(struct fchmodat_args *uap) 3399 { 3400 struct nlookupdata nd; 3401 struct file *fp; 3402 int error; 3403 int flags; 3404 3405 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3406 return (EINVAL); 3407 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3408 3409 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3410 UIO_USERSPACE, flags); 3411 if (error == 0) 3412 error = kern_chmod(&nd, uap->mode); 3413 nlookup_done_at(&nd, fp); 3414 return (error); 3415 } 3416 3417 static int 3418 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3419 { 3420 struct thread *td = curthread; 3421 int error; 3422 struct vattr vattr; 3423 uid_t o_uid; 3424 gid_t o_gid; 3425 uint64_t size; 3426 3427 /* 3428 * note: vget is required for any operation that might mod the vnode 3429 * so VINACTIVE is properly cleared. 3430 */ 3431 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3432 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3433 return error; 3434 o_uid = vattr.va_uid; 3435 o_gid = vattr.va_gid; 3436 size = vattr.va_size; 3437 3438 VATTR_NULL(&vattr); 3439 vattr.va_uid = uid; 3440 vattr.va_gid = gid; 3441 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3442 vput(vp); 3443 } 3444 3445 if (error == 0) { 3446 if (uid == -1) 3447 uid = o_uid; 3448 if (gid == -1) 3449 gid = o_gid; 3450 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3451 VFS_ACCOUNT(mp, uid, gid, size); 3452 } 3453 3454 return error; 3455 } 3456 3457 int 3458 kern_chown(struct nlookupdata *nd, int uid, int gid) 3459 { 3460 struct vnode *vp; 3461 int error; 3462 3463 if ((error = nlookup(nd)) != 0) 3464 return (error); 3465 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3466 return (error); 3467 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3468 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3469 vrele(vp); 3470 return (error); 3471 } 3472 3473 /* 3474 * chown(char *path, int uid, int gid) 3475 * 3476 * Set ownership given a path name. 3477 */ 3478 int 3479 sys_chown(struct chown_args *uap) 3480 { 3481 struct nlookupdata nd; 3482 int error; 3483 3484 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3485 if (error == 0) 3486 error = kern_chown(&nd, uap->uid, uap->gid); 3487 nlookup_done(&nd); 3488 return (error); 3489 } 3490 3491 /* 3492 * lchown_args(char *path, int uid, int gid) 3493 * 3494 * Set ownership given a path name, do not cross symlinks. 3495 */ 3496 int 3497 sys_lchown(struct lchown_args *uap) 3498 { 3499 struct nlookupdata nd; 3500 int error; 3501 3502 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3503 if (error == 0) 3504 error = kern_chown(&nd, uap->uid, uap->gid); 3505 nlookup_done(&nd); 3506 return (error); 3507 } 3508 3509 /* 3510 * fchown_args(int fd, int uid, int gid) 3511 * 3512 * Set ownership given a file descriptor. 3513 */ 3514 int 3515 sys_fchown(struct fchown_args *uap) 3516 { 3517 struct thread *td = curthread; 3518 struct proc *p = td->td_proc; 3519 struct file *fp; 3520 int error; 3521 3522 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3523 return (error); 3524 if (fp->f_nchandle.ncp) 3525 error = ncp_writechk(&fp->f_nchandle); 3526 if (error == 0) 3527 error = setfown(p->p_fd->fd_ncdir.mount, 3528 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3529 fdrop(fp); 3530 return (error); 3531 } 3532 3533 /* 3534 * fchownat(int fd, char *path, int uid, int gid, int flags) 3535 * 3536 * Set ownership of file pointed to by fd/path. 3537 */ 3538 int 3539 sys_fchownat(struct fchownat_args *uap) 3540 { 3541 struct nlookupdata nd; 3542 struct file *fp; 3543 int error; 3544 int flags; 3545 3546 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3547 return (EINVAL); 3548 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3549 3550 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3551 UIO_USERSPACE, flags); 3552 if (error == 0) 3553 error = kern_chown(&nd, uap->uid, uap->gid); 3554 nlookup_done_at(&nd, fp); 3555 return (error); 3556 } 3557 3558 3559 static int 3560 getutimes(struct timeval *tvp, struct timespec *tsp) 3561 { 3562 struct timeval tv[2]; 3563 int error; 3564 3565 if (tvp == NULL) { 3566 microtime(&tv[0]); 3567 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3568 tsp[1] = tsp[0]; 3569 } else { 3570 if ((error = itimerfix(tvp)) != 0) 3571 return (error); 3572 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3573 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3574 } 3575 return 0; 3576 } 3577 3578 static int 3579 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3580 { 3581 struct timespec tsnow; 3582 int error; 3583 3584 *nullflag = 0; 3585 nanotime(&tsnow); 3586 if (ts == NULL) { 3587 newts[0] = tsnow; 3588 newts[1] = tsnow; 3589 *nullflag = 1; 3590 return (0); 3591 } 3592 3593 newts[0] = ts[0]; 3594 newts[1] = ts[1]; 3595 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3596 return (0); 3597 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3598 *nullflag = 1; 3599 3600 if (newts[0].tv_nsec == UTIME_OMIT) 3601 newts[0].tv_sec = VNOVAL; 3602 else if (newts[0].tv_nsec == UTIME_NOW) 3603 newts[0] = tsnow; 3604 else if ((error = itimespecfix(&newts[0])) != 0) 3605 return (error); 3606 3607 if (newts[1].tv_nsec == UTIME_OMIT) 3608 newts[1].tv_sec = VNOVAL; 3609 else if (newts[1].tv_nsec == UTIME_NOW) 3610 newts[1] = tsnow; 3611 else if ((error = itimespecfix(&newts[1])) != 0) 3612 return (error); 3613 3614 return (0); 3615 } 3616 3617 static int 3618 setutimes(struct vnode *vp, struct vattr *vattr, 3619 const struct timespec *ts, int nullflag) 3620 { 3621 struct thread *td = curthread; 3622 int error; 3623 3624 VATTR_NULL(vattr); 3625 vattr->va_atime = ts[0]; 3626 vattr->va_mtime = ts[1]; 3627 if (nullflag) 3628 vattr->va_vaflags |= VA_UTIMES_NULL; 3629 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3630 3631 return error; 3632 } 3633 3634 int 3635 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3636 { 3637 struct timespec ts[2]; 3638 int error; 3639 3640 if (tptr) { 3641 if ((error = getutimes(tptr, ts)) != 0) 3642 return (error); 3643 } 3644 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3645 return (error); 3646 } 3647 3648 /* 3649 * utimes_args(char *path, struct timeval *tptr) 3650 * 3651 * Set the access and modification times of a file. 3652 */ 3653 int 3654 sys_utimes(struct utimes_args *uap) 3655 { 3656 struct timeval tv[2]; 3657 struct nlookupdata nd; 3658 int error; 3659 3660 if (uap->tptr) { 3661 error = copyin(uap->tptr, tv, sizeof(tv)); 3662 if (error) 3663 return (error); 3664 } 3665 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3666 if (error == 0) 3667 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3668 nlookup_done(&nd); 3669 return (error); 3670 } 3671 3672 /* 3673 * lutimes_args(char *path, struct timeval *tptr) 3674 * 3675 * Set the access and modification times of a file. 3676 */ 3677 int 3678 sys_lutimes(struct lutimes_args *uap) 3679 { 3680 struct timeval tv[2]; 3681 struct nlookupdata nd; 3682 int error; 3683 3684 if (uap->tptr) { 3685 error = copyin(uap->tptr, tv, sizeof(tv)); 3686 if (error) 3687 return (error); 3688 } 3689 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3690 if (error == 0) 3691 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3692 nlookup_done(&nd); 3693 return (error); 3694 } 3695 3696 /* 3697 * Set utimes on a file descriptor. The creds used to open the 3698 * file are used to determine whether the operation is allowed 3699 * or not. 3700 */ 3701 int 3702 kern_futimens(int fd, struct timespec *ts) 3703 { 3704 struct thread *td = curthread; 3705 struct timespec newts[2]; 3706 struct file *fp; 3707 struct vnode *vp; 3708 struct vattr vattr; 3709 int nullflag; 3710 int error; 3711 3712 error = getutimens(ts, newts, &nullflag); 3713 if (error) 3714 return (error); 3715 if ((error = holdvnode(td, fd, &fp)) != 0) 3716 return (error); 3717 if (fp->f_nchandle.ncp) 3718 error = ncp_writechk(&fp->f_nchandle); 3719 if (error == 0) { 3720 vp = fp->f_data; 3721 error = vget(vp, LK_EXCLUSIVE); 3722 if (error == 0) { 3723 error = VOP_GETATTR(vp, &vattr); 3724 if (error == 0) { 3725 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3726 fp->f_cred); 3727 } 3728 if (error == 0) { 3729 error = setutimes(vp, &vattr, newts, nullflag); 3730 } 3731 vput(vp); 3732 } 3733 } 3734 fdrop(fp); 3735 return (error); 3736 } 3737 3738 /* 3739 * futimens_args(int fd, struct timespec *ts) 3740 * 3741 * Set the access and modification times of a file. 3742 */ 3743 int 3744 sys_futimens(struct futimens_args *uap) 3745 { 3746 struct timespec ts[2]; 3747 int error; 3748 3749 if (uap->ts) { 3750 error = copyin(uap->ts, ts, sizeof(ts)); 3751 if (error) 3752 return (error); 3753 } 3754 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3755 return (error); 3756 } 3757 3758 int 3759 kern_futimes(int fd, struct timeval *tptr) 3760 { 3761 struct timespec ts[2]; 3762 int error; 3763 3764 if (tptr) { 3765 if ((error = getutimes(tptr, ts)) != 0) 3766 return (error); 3767 } 3768 error = kern_futimens(fd, tptr ? ts : NULL); 3769 return (error); 3770 } 3771 3772 /* 3773 * futimes_args(int fd, struct timeval *tptr) 3774 * 3775 * Set the access and modification times of a file. 3776 */ 3777 int 3778 sys_futimes(struct futimes_args *uap) 3779 { 3780 struct timeval tv[2]; 3781 int error; 3782 3783 if (uap->tptr) { 3784 error = copyin(uap->tptr, tv, sizeof(tv)); 3785 if (error) 3786 return (error); 3787 } 3788 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3789 return (error); 3790 } 3791 3792 int 3793 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3794 { 3795 struct timespec newts[2]; 3796 struct vnode *vp; 3797 struct vattr vattr; 3798 int nullflag; 3799 int error; 3800 3801 if (flags & ~AT_SYMLINK_NOFOLLOW) 3802 return (EINVAL); 3803 3804 error = getutimens(ts, newts, &nullflag); 3805 if (error) 3806 return (error); 3807 3808 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3809 if ((error = nlookup(nd)) != 0) 3810 return (error); 3811 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3812 return (error); 3813 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3814 return (error); 3815 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3816 error = vget(vp, LK_EXCLUSIVE); 3817 if (error == 0) { 3818 error = setutimes(vp, &vattr, newts, nullflag); 3819 vput(vp); 3820 } 3821 } 3822 vrele(vp); 3823 return (error); 3824 } 3825 3826 /* 3827 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3828 * 3829 * Set file access and modification times of a file. 3830 */ 3831 int 3832 sys_utimensat(struct utimensat_args *uap) 3833 { 3834 struct timespec ts[2]; 3835 struct nlookupdata nd; 3836 struct file *fp; 3837 int error; 3838 int flags; 3839 3840 if (uap->ts) { 3841 error = copyin(uap->ts, ts, sizeof(ts)); 3842 if (error) 3843 return (error); 3844 } 3845 3846 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3847 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3848 UIO_USERSPACE, flags); 3849 if (error == 0) 3850 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3851 nlookup_done_at(&nd, fp); 3852 return (error); 3853 } 3854 3855 int 3856 kern_truncate(struct nlookupdata *nd, off_t length) 3857 { 3858 struct vnode *vp; 3859 struct vattr vattr; 3860 int error; 3861 uid_t uid = 0; 3862 gid_t gid = 0; 3863 uint64_t old_size = 0; 3864 3865 if (length < 0) 3866 return(EINVAL); 3867 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3868 if ((error = nlookup(nd)) != 0) 3869 return (error); 3870 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3871 return (error); 3872 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3873 return (error); 3874 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3875 if (error) { 3876 vrele(vp); 3877 return (error); 3878 } 3879 if (vp->v_type == VDIR) { 3880 error = EISDIR; 3881 goto done; 3882 } 3883 if (vfs_quota_enabled) { 3884 error = VOP_GETATTR(vp, &vattr); 3885 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3886 uid = vattr.va_uid; 3887 gid = vattr.va_gid; 3888 old_size = vattr.va_size; 3889 } 3890 3891 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3892 VATTR_NULL(&vattr); 3893 vattr.va_size = length; 3894 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3895 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3896 } 3897 done: 3898 vput(vp); 3899 return (error); 3900 } 3901 3902 /* 3903 * truncate(char *path, int pad, off_t length) 3904 * 3905 * Truncate a file given its path name. 3906 */ 3907 int 3908 sys_truncate(struct truncate_args *uap) 3909 { 3910 struct nlookupdata nd; 3911 int error; 3912 3913 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3914 if (error == 0) 3915 error = kern_truncate(&nd, uap->length); 3916 nlookup_done(&nd); 3917 return error; 3918 } 3919 3920 int 3921 kern_ftruncate(int fd, off_t length) 3922 { 3923 struct thread *td = curthread; 3924 struct vattr vattr; 3925 struct vnode *vp; 3926 struct file *fp; 3927 int error; 3928 uid_t uid = 0; 3929 gid_t gid = 0; 3930 uint64_t old_size = 0; 3931 struct mount *mp; 3932 3933 if (length < 0) 3934 return(EINVAL); 3935 if ((error = holdvnode(td, fd, &fp)) != 0) 3936 return (error); 3937 if (fp->f_nchandle.ncp) { 3938 error = ncp_writechk(&fp->f_nchandle); 3939 if (error) 3940 goto done; 3941 } 3942 if ((fp->f_flag & FWRITE) == 0) { 3943 error = EINVAL; 3944 goto done; 3945 } 3946 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3947 error = EINVAL; 3948 goto done; 3949 } 3950 vp = (struct vnode *)fp->f_data; 3951 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3952 if (vp->v_type == VDIR) { 3953 error = EISDIR; 3954 vn_unlock(vp); 3955 goto done; 3956 } 3957 3958 if (vfs_quota_enabled) { 3959 error = VOP_GETATTR(vp, &vattr); 3960 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3961 uid = vattr.va_uid; 3962 gid = vattr.va_gid; 3963 old_size = vattr.va_size; 3964 } 3965 3966 if ((error = vn_writechk(vp, NULL)) == 0) { 3967 VATTR_NULL(&vattr); 3968 vattr.va_size = length; 3969 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3970 mp = vq_vptomp(vp); 3971 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3972 } 3973 vn_unlock(vp); 3974 done: 3975 fdrop(fp); 3976 return (error); 3977 } 3978 3979 /* 3980 * ftruncate_args(int fd, int pad, off_t length) 3981 * 3982 * Truncate a file given a file descriptor. 3983 */ 3984 int 3985 sys_ftruncate(struct ftruncate_args *uap) 3986 { 3987 int error; 3988 3989 error = kern_ftruncate(uap->fd, uap->length); 3990 3991 return (error); 3992 } 3993 3994 /* 3995 * fsync(int fd) 3996 * 3997 * Sync an open file. 3998 */ 3999 int 4000 sys_fsync(struct fsync_args *uap) 4001 { 4002 struct thread *td = curthread; 4003 struct vnode *vp; 4004 struct file *fp; 4005 vm_object_t obj; 4006 int error; 4007 4008 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4009 return (error); 4010 vp = (struct vnode *)fp->f_data; 4011 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4012 if ((obj = vp->v_object) != NULL) { 4013 if (vp->v_mount == NULL || 4014 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4015 vm_object_page_clean(obj, 0, 0, 0); 4016 } 4017 } 4018 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 4019 if (error == 0 && vp->v_mount) 4020 error = buf_fsync(vp); 4021 vn_unlock(vp); 4022 fdrop(fp); 4023 4024 return (error); 4025 } 4026 4027 int 4028 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4029 { 4030 struct nchandle fnchd; 4031 struct nchandle tnchd; 4032 struct namecache *ncp; 4033 struct vnode *fdvp; 4034 struct vnode *tdvp; 4035 struct mount *mp; 4036 int error; 4037 u_int fncp_gen; 4038 u_int tncp_gen; 4039 4040 bwillinode(1); 4041 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4042 if ((error = nlookup(fromnd)) != 0) 4043 return (error); 4044 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4045 return (ENOENT); 4046 fnchd.mount = fromnd->nl_nch.mount; 4047 cache_hold(&fnchd); 4048 4049 /* 4050 * unlock the source nch so we can lookup the target nch without 4051 * deadlocking. The target may or may not exist so we do not check 4052 * for a target vp like kern_mkdir() and other creation functions do. 4053 * 4054 * The source and target directories are ref'd and rechecked after 4055 * everything is relocked to determine if the source or target file 4056 * has been renamed. 4057 */ 4058 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4059 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4060 4061 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4062 4063 cache_unlock(&fromnd->nl_nch); 4064 4065 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4066 if ((error = nlookup(tond)) != 0) { 4067 cache_drop(&fnchd); 4068 return (error); 4069 } 4070 tncp_gen = tond->nl_nch.ncp->nc_generation; 4071 4072 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4073 cache_drop(&fnchd); 4074 return (ENOENT); 4075 } 4076 tnchd.mount = tond->nl_nch.mount; 4077 cache_hold(&tnchd); 4078 4079 /* 4080 * If the source and target are the same there is nothing to do 4081 */ 4082 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4083 cache_drop(&fnchd); 4084 cache_drop(&tnchd); 4085 return (0); 4086 } 4087 4088 /* 4089 * Mount points cannot be renamed or overwritten 4090 */ 4091 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4092 NCF_ISMOUNTPT 4093 ) { 4094 cache_drop(&fnchd); 4095 cache_drop(&tnchd); 4096 return (EINVAL); 4097 } 4098 4099 /* 4100 * Relock the source ncp. cache_relock() will deal with any 4101 * deadlocks against the already-locked tond and will also 4102 * make sure both are resolved. 4103 * 4104 * NOTE AFTER RELOCKING: The source or target ncp may have become 4105 * invalid while they were unlocked, nc_vp and nc_mount could 4106 * be NULL. 4107 */ 4108 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4109 &tond->nl_nch, tond->nl_cred); 4110 fromnd->nl_flags |= NLC_NCPISLOCKED; 4111 4112 /* 4113 * If the namecache generation changed for either fromnd or tond, 4114 * we must retry. 4115 */ 4116 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4117 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4118 kprintf("kern_rename: retry due to gen on: " 4119 "\"%s\" -> \"%s\"\n", 4120 fromnd->nl_nch.ncp->nc_name, 4121 tond->nl_nch.ncp->nc_name); 4122 cache_drop(&fnchd); 4123 cache_drop(&tnchd); 4124 return (EAGAIN); 4125 } 4126 4127 /* 4128 * If either fromnd or tond are marked destroyed a ripout occured 4129 * out from under us and we must retry. 4130 */ 4131 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4132 fromnd->nl_nch.ncp->nc_vp == NULL || 4133 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4134 kprintf("kern_rename: retry due to ripout on: " 4135 "\"%s\" -> \"%s\"\n", 4136 fromnd->nl_nch.ncp->nc_name, 4137 tond->nl_nch.ncp->nc_name); 4138 cache_drop(&fnchd); 4139 cache_drop(&tnchd); 4140 return (EAGAIN); 4141 } 4142 4143 /* 4144 * Make sure the parent directories linkages are the same. 4145 * XXX shouldn't be needed any more w/ generation check above. 4146 */ 4147 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4148 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4149 cache_drop(&fnchd); 4150 cache_drop(&tnchd); 4151 return (ENOENT); 4152 } 4153 4154 /* 4155 * Both the source and target must be within the same filesystem and 4156 * in the same filesystem as their parent directories within the 4157 * namecache topology. 4158 * 4159 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4160 */ 4161 mp = fnchd.mount; 4162 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4163 mp != tond->nl_nch.mount) { 4164 cache_drop(&fnchd); 4165 cache_drop(&tnchd); 4166 return (EXDEV); 4167 } 4168 4169 /* 4170 * Make sure the mount point is writable 4171 */ 4172 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4173 cache_drop(&fnchd); 4174 cache_drop(&tnchd); 4175 return (error); 4176 } 4177 4178 /* 4179 * If the target exists and either the source or target is a directory, 4180 * then both must be directories. 4181 * 4182 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4183 * have become NULL. 4184 */ 4185 if (tond->nl_nch.ncp->nc_vp) { 4186 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4187 error = ENOENT; 4188 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4189 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4190 error = ENOTDIR; 4191 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4192 error = EISDIR; 4193 } 4194 } 4195 4196 /* 4197 * You cannot rename a source into itself or a subdirectory of itself. 4198 * We check this by travsersing the target directory upwards looking 4199 * for a match against the source. 4200 * 4201 * XXX MPSAFE 4202 */ 4203 if (error == 0) { 4204 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4205 if (fromnd->nl_nch.ncp == ncp) { 4206 error = EINVAL; 4207 break; 4208 } 4209 } 4210 } 4211 4212 cache_drop(&fnchd); 4213 cache_drop(&tnchd); 4214 4215 /* 4216 * Even though the namespaces are different, they may still represent 4217 * hardlinks to the same file. The filesystem might have a hard time 4218 * with this so we issue a NREMOVE of the source instead of a NRENAME 4219 * when we detect the situation. 4220 */ 4221 if (error == 0) { 4222 fdvp = fromnd->nl_dvp; 4223 tdvp = tond->nl_dvp; 4224 if (fdvp == NULL || tdvp == NULL) { 4225 error = EPERM; 4226 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4227 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4228 fromnd->nl_cred); 4229 } else { 4230 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4231 fdvp, tdvp, tond->nl_cred); 4232 } 4233 } 4234 return (error); 4235 } 4236 4237 /* 4238 * rename_args(char *from, char *to) 4239 * 4240 * Rename files. Source and destination must either both be directories, 4241 * or both not be directories. If target is a directory, it must be empty. 4242 */ 4243 int 4244 sys_rename(struct rename_args *uap) 4245 { 4246 struct nlookupdata fromnd, tond; 4247 int error; 4248 4249 do { 4250 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4251 if (error == 0) { 4252 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4253 if (error == 0) 4254 error = kern_rename(&fromnd, &tond); 4255 nlookup_done(&tond); 4256 } 4257 nlookup_done(&fromnd); 4258 } while (error == EAGAIN); 4259 return (error); 4260 } 4261 4262 /* 4263 * renameat_args(int oldfd, char *old, int newfd, char *new) 4264 * 4265 * Rename files using paths relative to the directories associated with 4266 * oldfd and newfd. Source and destination must either both be directories, 4267 * or both not be directories. If target is a directory, it must be empty. 4268 */ 4269 int 4270 sys_renameat(struct renameat_args *uap) 4271 { 4272 struct nlookupdata oldnd, newnd; 4273 struct file *oldfp, *newfp; 4274 int error; 4275 4276 do { 4277 error = nlookup_init_at(&oldnd, &oldfp, 4278 uap->oldfd, uap->old, 4279 UIO_USERSPACE, 0); 4280 if (error == 0) { 4281 error = nlookup_init_at(&newnd, &newfp, 4282 uap->newfd, uap->new, 4283 UIO_USERSPACE, 0); 4284 if (error == 0) 4285 error = kern_rename(&oldnd, &newnd); 4286 nlookup_done_at(&newnd, newfp); 4287 } 4288 nlookup_done_at(&oldnd, oldfp); 4289 } while (error == EAGAIN); 4290 return (error); 4291 } 4292 4293 int 4294 kern_mkdir(struct nlookupdata *nd, int mode) 4295 { 4296 struct thread *td = curthread; 4297 struct proc *p = td->td_proc; 4298 struct vnode *vp; 4299 struct vattr vattr; 4300 int error; 4301 4302 bwillinode(1); 4303 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4304 if ((error = nlookup(nd)) != 0) 4305 return (error); 4306 4307 if (nd->nl_nch.ncp->nc_vp) 4308 return (EEXIST); 4309 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4310 return (error); 4311 VATTR_NULL(&vattr); 4312 vattr.va_type = VDIR; 4313 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4314 4315 vp = NULL; 4316 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4317 if (error == 0) 4318 vput(vp); 4319 return (error); 4320 } 4321 4322 /* 4323 * mkdir_args(char *path, int mode) 4324 * 4325 * Make a directory file. 4326 */ 4327 int 4328 sys_mkdir(struct mkdir_args *uap) 4329 { 4330 struct nlookupdata nd; 4331 int error; 4332 4333 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4334 if (error == 0) 4335 error = kern_mkdir(&nd, uap->mode); 4336 nlookup_done(&nd); 4337 return (error); 4338 } 4339 4340 /* 4341 * mkdirat_args(int fd, char *path, mode_t mode) 4342 * 4343 * Make a directory file. The path is relative to the directory associated 4344 * with fd. 4345 */ 4346 int 4347 sys_mkdirat(struct mkdirat_args *uap) 4348 { 4349 struct nlookupdata nd; 4350 struct file *fp; 4351 int error; 4352 4353 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4354 if (error == 0) 4355 error = kern_mkdir(&nd, uap->mode); 4356 nlookup_done_at(&nd, fp); 4357 return (error); 4358 } 4359 4360 int 4361 kern_rmdir(struct nlookupdata *nd) 4362 { 4363 int error; 4364 4365 bwillinode(1); 4366 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4367 if ((error = nlookup(nd)) != 0) 4368 return (error); 4369 4370 /* 4371 * Do not allow directories representing mount points to be 4372 * deleted, even if empty. Check write perms on mount point 4373 * in case the vnode is aliased (aka nullfs). 4374 */ 4375 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4376 return (EBUSY); 4377 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4378 return (error); 4379 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4380 return (error); 4381 } 4382 4383 /* 4384 * rmdir_args(char *path) 4385 * 4386 * Remove a directory file. 4387 */ 4388 int 4389 sys_rmdir(struct rmdir_args *uap) 4390 { 4391 struct nlookupdata nd; 4392 int error; 4393 4394 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4395 if (error == 0) 4396 error = kern_rmdir(&nd); 4397 nlookup_done(&nd); 4398 return (error); 4399 } 4400 4401 int 4402 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4403 enum uio_seg direction) 4404 { 4405 struct thread *td = curthread; 4406 struct vnode *vp; 4407 struct file *fp; 4408 struct uio auio; 4409 struct iovec aiov; 4410 off_t loff; 4411 int error, eofflag; 4412 4413 if ((error = holdvnode(td, fd, &fp)) != 0) 4414 return (error); 4415 if ((fp->f_flag & FREAD) == 0) { 4416 error = EBADF; 4417 goto done; 4418 } 4419 vp = (struct vnode *)fp->f_data; 4420 if (vp->v_type != VDIR) { 4421 error = EINVAL; 4422 goto done; 4423 } 4424 aiov.iov_base = buf; 4425 aiov.iov_len = count; 4426 auio.uio_iov = &aiov; 4427 auio.uio_iovcnt = 1; 4428 auio.uio_rw = UIO_READ; 4429 auio.uio_segflg = direction; 4430 auio.uio_td = td; 4431 auio.uio_resid = count; 4432 loff = auio.uio_offset = fp->f_offset; 4433 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4434 fp->f_offset = auio.uio_offset; 4435 if (error) 4436 goto done; 4437 4438 /* 4439 * WARNING! *basep may not be wide enough to accomodate the 4440 * seek offset. XXX should we hack this to return the upper 32 bits 4441 * for offsets greater then 4G? 4442 */ 4443 if (basep) { 4444 *basep = (long)loff; 4445 } 4446 *res = count - auio.uio_resid; 4447 done: 4448 fdrop(fp); 4449 return (error); 4450 } 4451 4452 /* 4453 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4454 * 4455 * Read a block of directory entries in a file system independent format. 4456 */ 4457 int 4458 sys_getdirentries(struct getdirentries_args *uap) 4459 { 4460 long base; 4461 int error; 4462 4463 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4464 &uap->sysmsg_result, UIO_USERSPACE); 4465 4466 if (error == 0 && uap->basep) 4467 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4468 return (error); 4469 } 4470 4471 /* 4472 * getdents_args(int fd, char *buf, size_t count) 4473 */ 4474 int 4475 sys_getdents(struct getdents_args *uap) 4476 { 4477 int error; 4478 4479 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4480 &uap->sysmsg_result, UIO_USERSPACE); 4481 4482 return (error); 4483 } 4484 4485 /* 4486 * Set the mode mask for creation of filesystem nodes. 4487 * 4488 * umask(int newmask) 4489 */ 4490 int 4491 sys_umask(struct umask_args *uap) 4492 { 4493 struct thread *td = curthread; 4494 struct proc *p = td->td_proc; 4495 struct filedesc *fdp; 4496 4497 fdp = p->p_fd; 4498 uap->sysmsg_result = fdp->fd_cmask; 4499 fdp->fd_cmask = uap->newmask & ALLPERMS; 4500 return (0); 4501 } 4502 4503 /* 4504 * revoke(char *path) 4505 * 4506 * Void all references to file by ripping underlying filesystem 4507 * away from vnode. 4508 */ 4509 int 4510 sys_revoke(struct revoke_args *uap) 4511 { 4512 struct nlookupdata nd; 4513 struct vattr vattr; 4514 struct vnode *vp; 4515 struct ucred *cred; 4516 int error; 4517 4518 vp = NULL; 4519 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4520 if (error == 0) 4521 error = nlookup(&nd); 4522 if (error == 0) 4523 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4524 cred = crhold(nd.nl_cred); 4525 nlookup_done(&nd); 4526 if (error == 0) { 4527 if (error == 0) 4528 error = VOP_GETATTR(vp, &vattr); 4529 if (error == 0 && cred->cr_uid != vattr.va_uid) 4530 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4531 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4532 if (vcount(vp) > 0) 4533 error = vrevoke(vp, cred); 4534 } else if (error == 0) { 4535 error = vrevoke(vp, cred); 4536 } 4537 vrele(vp); 4538 } 4539 if (cred) 4540 crfree(cred); 4541 return (error); 4542 } 4543 4544 /* 4545 * getfh_args(char *fname, fhandle_t *fhp) 4546 * 4547 * Get (NFS) file handle 4548 * 4549 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4550 * mount. This allows nullfs mounts to be explicitly exported. 4551 * 4552 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4553 * 4554 * nullfs mounts of subdirectories are not safe. That is, it will 4555 * work, but you do not really have protection against access to 4556 * the related parent directories. 4557 */ 4558 int 4559 sys_getfh(struct getfh_args *uap) 4560 { 4561 struct thread *td = curthread; 4562 struct nlookupdata nd; 4563 fhandle_t fh; 4564 struct vnode *vp; 4565 struct mount *mp; 4566 int error; 4567 4568 /* 4569 * Must be super user 4570 */ 4571 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4572 return (error); 4573 4574 vp = NULL; 4575 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4576 if (error == 0) 4577 error = nlookup(&nd); 4578 if (error == 0) 4579 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4580 mp = nd.nl_nch.mount; 4581 nlookup_done(&nd); 4582 if (error == 0) { 4583 bzero(&fh, sizeof(fh)); 4584 fh.fh_fsid = mp->mnt_stat.f_fsid; 4585 error = VFS_VPTOFH(vp, &fh.fh_fid); 4586 vput(vp); 4587 if (error == 0) 4588 error = copyout(&fh, uap->fhp, sizeof(fh)); 4589 } 4590 return (error); 4591 } 4592 4593 /* 4594 * fhopen_args(const struct fhandle *u_fhp, int flags) 4595 * 4596 * syscall for the rpc.lockd to use to translate a NFS file handle into 4597 * an open descriptor. 4598 * 4599 * warning: do not remove the priv_check() call or this becomes one giant 4600 * security hole. 4601 */ 4602 int 4603 sys_fhopen(struct fhopen_args *uap) 4604 { 4605 struct thread *td = curthread; 4606 struct filedesc *fdp = td->td_proc->p_fd; 4607 struct mount *mp; 4608 struct vnode *vp; 4609 struct fhandle fhp; 4610 struct vattr vat; 4611 struct vattr *vap = &vat; 4612 struct flock lf; 4613 int fmode, mode, error = 0, type; 4614 struct file *nfp; 4615 struct file *fp; 4616 int indx; 4617 4618 /* 4619 * Must be super user 4620 */ 4621 error = priv_check(td, PRIV_ROOT); 4622 if (error) 4623 return (error); 4624 4625 fmode = FFLAGS(uap->flags); 4626 4627 /* 4628 * Why not allow a non-read/write open for our lockd? 4629 */ 4630 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4631 return (EINVAL); 4632 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4633 if (error) 4634 return(error); 4635 4636 /* 4637 * Find the mount point 4638 */ 4639 mp = vfs_getvfs(&fhp.fh_fsid); 4640 if (mp == NULL) { 4641 error = ESTALE; 4642 goto done2; 4643 } 4644 /* now give me my vnode, it gets returned to me locked */ 4645 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4646 if (error) 4647 goto done; 4648 /* 4649 * from now on we have to make sure not 4650 * to forget about the vnode 4651 * any error that causes an abort must vput(vp) 4652 * just set error = err and 'goto bad;'. 4653 */ 4654 4655 /* 4656 * from vn_open 4657 */ 4658 if (vp->v_type == VLNK) { 4659 error = EMLINK; 4660 goto bad; 4661 } 4662 if (vp->v_type == VSOCK) { 4663 error = EOPNOTSUPP; 4664 goto bad; 4665 } 4666 mode = 0; 4667 if (fmode & (FWRITE | O_TRUNC)) { 4668 if (vp->v_type == VDIR) { 4669 error = EISDIR; 4670 goto bad; 4671 } 4672 error = vn_writechk(vp, NULL); 4673 if (error) 4674 goto bad; 4675 mode |= VWRITE; 4676 } 4677 if (fmode & FREAD) 4678 mode |= VREAD; 4679 if (mode) { 4680 error = VOP_ACCESS(vp, mode, td->td_ucred); 4681 if (error) 4682 goto bad; 4683 } 4684 if (fmode & O_TRUNC) { 4685 vn_unlock(vp); /* XXX */ 4686 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4687 VATTR_NULL(vap); 4688 vap->va_size = 0; 4689 error = VOP_SETATTR(vp, vap, td->td_ucred); 4690 if (error) 4691 goto bad; 4692 } 4693 4694 /* 4695 * VOP_OPEN needs the file pointer so it can potentially override 4696 * it. 4697 * 4698 * WARNING! no f_nchandle will be associated when fhopen()ing a 4699 * directory. XXX 4700 */ 4701 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4702 goto bad; 4703 fp = nfp; 4704 4705 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4706 if (error) { 4707 /* 4708 * setting f_ops this way prevents VOP_CLOSE from being 4709 * called or fdrop() releasing the vp from v_data. Since 4710 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4711 */ 4712 fp->f_ops = &badfileops; 4713 fp->f_data = NULL; 4714 goto bad_drop; 4715 } 4716 4717 /* 4718 * The fp is given its own reference, we still have our ref and lock. 4719 * 4720 * Assert that all regular files must be created with a VM object. 4721 */ 4722 if (vp->v_type == VREG && vp->v_object == NULL) { 4723 kprintf("fhopen: regular file did not " 4724 "have VM object: %p\n", 4725 vp); 4726 goto bad_drop; 4727 } 4728 4729 /* 4730 * The open was successful. Handle any locking requirements. 4731 */ 4732 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4733 lf.l_whence = SEEK_SET; 4734 lf.l_start = 0; 4735 lf.l_len = 0; 4736 if (fmode & O_EXLOCK) 4737 lf.l_type = F_WRLCK; 4738 else 4739 lf.l_type = F_RDLCK; 4740 if (fmode & FNONBLOCK) 4741 type = 0; 4742 else 4743 type = F_WAIT; 4744 vn_unlock(vp); 4745 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4746 &lf, type)) != 0) { 4747 /* 4748 * release our private reference. 4749 */ 4750 fsetfd(fdp, NULL, indx); 4751 fdrop(fp); 4752 vrele(vp); 4753 goto done; 4754 } 4755 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4756 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4757 } 4758 4759 /* 4760 * Clean up. Associate the file pointer with the previously 4761 * reserved descriptor and return it. 4762 */ 4763 vput(vp); 4764 if (uap->flags & O_CLOEXEC) 4765 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4766 fsetfd(fdp, fp, indx); 4767 fdrop(fp); 4768 uap->sysmsg_result = indx; 4769 mount_drop(mp); 4770 4771 return (error); 4772 4773 bad_drop: 4774 fsetfd(fdp, NULL, indx); 4775 fdrop(fp); 4776 bad: 4777 vput(vp); 4778 done: 4779 mount_drop(mp); 4780 done2: 4781 return (error); 4782 } 4783 4784 /* 4785 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4786 */ 4787 int 4788 sys_fhstat(struct fhstat_args *uap) 4789 { 4790 struct thread *td = curthread; 4791 struct stat sb; 4792 fhandle_t fh; 4793 struct mount *mp; 4794 struct vnode *vp; 4795 int error; 4796 4797 /* 4798 * Must be super user 4799 */ 4800 error = priv_check(td, PRIV_ROOT); 4801 if (error) 4802 return (error); 4803 4804 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4805 if (error) 4806 return (error); 4807 4808 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4809 error = ESTALE; 4810 if (error == 0) { 4811 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4812 error = vn_stat(vp, &sb, td->td_ucred); 4813 vput(vp); 4814 } 4815 } 4816 if (error == 0) 4817 error = copyout(&sb, uap->sb, sizeof(sb)); 4818 if (mp) 4819 mount_drop(mp); 4820 4821 return (error); 4822 } 4823 4824 /* 4825 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4826 */ 4827 int 4828 sys_fhstatfs(struct fhstatfs_args *uap) 4829 { 4830 struct thread *td = curthread; 4831 struct proc *p = td->td_proc; 4832 struct statfs *sp; 4833 struct mount *mp; 4834 struct vnode *vp; 4835 struct statfs sb; 4836 char *fullpath, *freepath; 4837 fhandle_t fh; 4838 int error; 4839 4840 /* 4841 * Must be super user 4842 */ 4843 if ((error = priv_check(td, PRIV_ROOT))) 4844 return (error); 4845 4846 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4847 return (error); 4848 4849 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4850 error = ESTALE; 4851 goto done; 4852 } 4853 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4854 error = ESTALE; 4855 goto done; 4856 } 4857 4858 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4859 goto done; 4860 mp = vp->v_mount; 4861 sp = &mp->mnt_stat; 4862 vput(vp); 4863 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4864 goto done; 4865 4866 error = mount_path(p, mp, &fullpath, &freepath); 4867 if (error) 4868 goto done; 4869 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4870 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4871 kfree(freepath, M_TEMP); 4872 4873 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4874 if (priv_check(td, PRIV_ROOT)) { 4875 bcopy(sp, &sb, sizeof(sb)); 4876 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4877 sp = &sb; 4878 } 4879 error = copyout(sp, uap->buf, sizeof(*sp)); 4880 done: 4881 if (mp) 4882 mount_drop(mp); 4883 4884 return (error); 4885 } 4886 4887 /* 4888 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4889 */ 4890 int 4891 sys_fhstatvfs(struct fhstatvfs_args *uap) 4892 { 4893 struct thread *td = curthread; 4894 struct proc *p = td->td_proc; 4895 struct statvfs *sp; 4896 struct mount *mp; 4897 struct vnode *vp; 4898 fhandle_t fh; 4899 int error; 4900 4901 /* 4902 * Must be super user 4903 */ 4904 if ((error = priv_check(td, PRIV_ROOT))) 4905 return (error); 4906 4907 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4908 return (error); 4909 4910 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4911 error = ESTALE; 4912 goto done; 4913 } 4914 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4915 error = ESTALE; 4916 goto done; 4917 } 4918 4919 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4920 goto done; 4921 mp = vp->v_mount; 4922 sp = &mp->mnt_vstat; 4923 vput(vp); 4924 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4925 goto done; 4926 4927 sp->f_flag = 0; 4928 if (mp->mnt_flag & MNT_RDONLY) 4929 sp->f_flag |= ST_RDONLY; 4930 if (mp->mnt_flag & MNT_NOSUID) 4931 sp->f_flag |= ST_NOSUID; 4932 error = copyout(sp, uap->buf, sizeof(*sp)); 4933 done: 4934 if (mp) 4935 mount_drop(mp); 4936 return (error); 4937 } 4938 4939 4940 /* 4941 * Syscall to push extended attribute configuration information into the 4942 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4943 * a command (int cmd), and attribute name and misc data. For now, the 4944 * attribute name is left in userspace for consumption by the VFS_op. 4945 * It will probably be changed to be copied into sysspace by the 4946 * syscall in the future, once issues with various consumers of the 4947 * attribute code have raised their hands. 4948 * 4949 * Currently this is used only by UFS Extended Attributes. 4950 */ 4951 int 4952 sys_extattrctl(struct extattrctl_args *uap) 4953 { 4954 struct nlookupdata nd; 4955 struct vnode *vp; 4956 char attrname[EXTATTR_MAXNAMELEN]; 4957 int error; 4958 size_t size; 4959 4960 attrname[0] = 0; 4961 vp = NULL; 4962 error = 0; 4963 4964 if (error == 0 && uap->filename) { 4965 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4966 NLC_FOLLOW); 4967 if (error == 0) 4968 error = nlookup(&nd); 4969 if (error == 0) 4970 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4971 nlookup_done(&nd); 4972 } 4973 4974 if (error == 0 && uap->attrname) { 4975 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4976 &size); 4977 } 4978 4979 if (error == 0) { 4980 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4981 if (error == 0) 4982 error = nlookup(&nd); 4983 if (error == 0) 4984 error = ncp_writechk(&nd.nl_nch); 4985 if (error == 0) { 4986 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4987 uap->attrnamespace, 4988 uap->attrname, nd.nl_cred); 4989 } 4990 nlookup_done(&nd); 4991 } 4992 4993 return (error); 4994 } 4995 4996 /* 4997 * Syscall to get a named extended attribute on a file or directory. 4998 */ 4999 int 5000 sys_extattr_set_file(struct extattr_set_file_args *uap) 5001 { 5002 char attrname[EXTATTR_MAXNAMELEN]; 5003 struct nlookupdata nd; 5004 struct vnode *vp; 5005 struct uio auio; 5006 struct iovec aiov; 5007 int error; 5008 5009 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5010 if (error) 5011 return (error); 5012 5013 vp = NULL; 5014 5015 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5016 if (error == 0) 5017 error = nlookup(&nd); 5018 if (error == 0) 5019 error = ncp_writechk(&nd.nl_nch); 5020 if (error == 0) 5021 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5022 if (error) { 5023 nlookup_done(&nd); 5024 return (error); 5025 } 5026 5027 bzero(&auio, sizeof(auio)); 5028 aiov.iov_base = uap->data; 5029 aiov.iov_len = uap->nbytes; 5030 auio.uio_iov = &aiov; 5031 auio.uio_iovcnt = 1; 5032 auio.uio_offset = 0; 5033 auio.uio_resid = uap->nbytes; 5034 auio.uio_rw = UIO_WRITE; 5035 auio.uio_td = curthread; 5036 5037 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5038 &auio, nd.nl_cred); 5039 5040 vput(vp); 5041 nlookup_done(&nd); 5042 return (error); 5043 } 5044 5045 /* 5046 * Syscall to get a named extended attribute on a file or directory. 5047 */ 5048 int 5049 sys_extattr_get_file(struct extattr_get_file_args *uap) 5050 { 5051 char attrname[EXTATTR_MAXNAMELEN]; 5052 struct nlookupdata nd; 5053 struct uio auio; 5054 struct iovec aiov; 5055 struct vnode *vp; 5056 int error; 5057 5058 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5059 if (error) 5060 return (error); 5061 5062 vp = NULL; 5063 5064 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5065 if (error == 0) 5066 error = nlookup(&nd); 5067 if (error == 0) 5068 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5069 if (error) { 5070 nlookup_done(&nd); 5071 return (error); 5072 } 5073 5074 bzero(&auio, sizeof(auio)); 5075 aiov.iov_base = uap->data; 5076 aiov.iov_len = uap->nbytes; 5077 auio.uio_iov = &aiov; 5078 auio.uio_iovcnt = 1; 5079 auio.uio_offset = 0; 5080 auio.uio_resid = uap->nbytes; 5081 auio.uio_rw = UIO_READ; 5082 auio.uio_td = curthread; 5083 5084 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5085 &auio, nd.nl_cred); 5086 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5087 5088 vput(vp); 5089 nlookup_done(&nd); 5090 return(error); 5091 } 5092 5093 /* 5094 * Syscall to delete a named extended attribute from a file or directory. 5095 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5096 */ 5097 int 5098 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5099 { 5100 char attrname[EXTATTR_MAXNAMELEN]; 5101 struct nlookupdata nd; 5102 struct vnode *vp; 5103 int error; 5104 5105 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5106 if (error) 5107 return(error); 5108 5109 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5110 if (error == 0) 5111 error = nlookup(&nd); 5112 if (error == 0) 5113 error = ncp_writechk(&nd.nl_nch); 5114 if (error == 0) { 5115 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5116 if (error == 0) { 5117 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5118 attrname, NULL, nd.nl_cred); 5119 vput(vp); 5120 } 5121 } 5122 nlookup_done(&nd); 5123 return(error); 5124 } 5125 5126 /* 5127 * Determine if the mount is visible to the process. 5128 */ 5129 static int 5130 chroot_visible_mnt(struct mount *mp, struct proc *p) 5131 { 5132 struct nchandle nch; 5133 5134 /* 5135 * Traverse from the mount point upwards. If we hit the process 5136 * root then the mount point is visible to the process. 5137 */ 5138 nch = mp->mnt_ncmountpt; 5139 while (nch.ncp) { 5140 if (nch.mount == p->p_fd->fd_nrdir.mount && 5141 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5142 return(1); 5143 } 5144 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5145 nch = nch.mount->mnt_ncmounton; 5146 } else { 5147 nch.ncp = nch.ncp->nc_parent; 5148 } 5149 } 5150 5151 /* 5152 * If the mount point is not visible to the process, but the 5153 * process root is in a subdirectory of the mount, return 5154 * TRUE anyway. 5155 */ 5156 if (p->p_fd->fd_nrdir.mount == mp) 5157 return(1); 5158 5159 return(0); 5160 } 5161 5162