1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 84 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 85 static int getutimes (struct timeval *, struct timespec *); 86 static int getutimens (const struct timespec *, struct timespec *, int *); 87 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 88 static int setfmode (struct vnode *, int); 89 static int setfflags (struct vnode *, u_long); 90 static int setutimes (struct vnode *, struct vattr *, 91 const struct timespec *, int); 92 93 static int usermount = 0; /* if 1, non-root can mount fs. */ 94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 95 "Allow non-root users to mount filesystems"); 96 97 static int debug_unmount = 0; /* if 1 loop until unmount success */ 98 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 99 "Stall failed unmounts in loop"); 100 /* 101 * Virtual File System System Calls 102 */ 103 104 /* 105 * Mount a file system. 106 * 107 * mount_args(char *type, char *path, int flags, caddr_t data) 108 * 109 * MPALMOSTSAFE 110 */ 111 int 112 sys_mount(struct mount_args *uap) 113 { 114 struct thread *td = curthread; 115 struct vnode *vp; 116 struct nchandle nch; 117 struct mount *mp, *nullmp; 118 struct vfsconf *vfsp; 119 int error, flag = 0, flag2 = 0; 120 int hasmount; 121 struct vattr va; 122 struct nlookupdata nd; 123 char fstypename[MFSNAMELEN]; 124 struct ucred *cred; 125 126 cred = td->td_ucred; 127 if (jailed(cred)) { 128 error = EPERM; 129 goto done; 130 } 131 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 132 goto done; 133 134 /* 135 * Do not allow NFS export by non-root users. 136 */ 137 if (uap->flags & MNT_EXPORTED) { 138 error = priv_check(td, PRIV_ROOT); 139 if (error) 140 goto done; 141 } 142 /* 143 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 144 */ 145 if (priv_check(td, PRIV_ROOT)) 146 uap->flags |= MNT_NOSUID | MNT_NODEV; 147 148 /* 149 * Lookup the requested path and extract the nch and vnode. 150 */ 151 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 152 if (error == 0) { 153 if ((error = nlookup(&nd)) == 0) { 154 if (nd.nl_nch.ncp->nc_vp == NULL) 155 error = ENOENT; 156 } 157 } 158 if (error) { 159 nlookup_done(&nd); 160 goto done; 161 } 162 163 /* 164 * If the target filesystem is resolved via a nullfs mount, then 165 * nd.nl_nch.mount will be pointing to the nullfs mount structure 166 * instead of the target file system. We need it in case we are 167 * doing an update. 168 */ 169 nullmp = nd.nl_nch.mount; 170 171 /* 172 * Extract the locked+refd ncp and cleanup the nd structure 173 */ 174 nch = nd.nl_nch; 175 cache_zero(&nd.nl_nch); 176 nlookup_done(&nd); 177 178 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 179 (mp = cache_findmount(&nch)) != NULL) { 180 cache_dropmount(mp); 181 hasmount = 1; 182 } else { 183 hasmount = 0; 184 } 185 186 187 /* 188 * now we have the locked ref'd nch and unreferenced vnode. 189 */ 190 vp = nch.ncp->nc_vp; 191 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 192 cache_put(&nch); 193 goto done; 194 } 195 cache_unlock(&nch); 196 197 /* 198 * Extract the file system type. We need to know this early, to take 199 * appropriate actions if we are dealing with a nullfs. 200 */ 201 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 202 cache_drop(&nch); 203 vput(vp); 204 goto done; 205 } 206 207 /* 208 * Now we have an unlocked ref'd nch and a locked ref'd vp 209 */ 210 if (uap->flags & MNT_UPDATE) { 211 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 212 cache_drop(&nch); 213 vput(vp); 214 error = EINVAL; 215 goto done; 216 } 217 218 if (strncmp(fstypename, "null", 5) == 0) { 219 KKASSERT(nullmp); 220 mp = nullmp; 221 } else { 222 mp = vp->v_mount; 223 } 224 225 flag = mp->mnt_flag; 226 flag2 = mp->mnt_kern_flag; 227 /* 228 * We only allow the filesystem to be reloaded if it 229 * is currently mounted read-only. 230 */ 231 if ((uap->flags & MNT_RELOAD) && 232 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 233 cache_drop(&nch); 234 vput(vp); 235 error = EOPNOTSUPP; /* Needs translation */ 236 goto done; 237 } 238 /* 239 * Only root, or the user that did the original mount is 240 * permitted to update it. 241 */ 242 if (mp->mnt_stat.f_owner != cred->cr_uid && 243 (error = priv_check(td, PRIV_ROOT))) { 244 cache_drop(&nch); 245 vput(vp); 246 goto done; 247 } 248 if (vfs_busy(mp, LK_NOWAIT)) { 249 cache_drop(&nch); 250 vput(vp); 251 error = EBUSY; 252 goto done; 253 } 254 if (hasmount) { 255 cache_drop(&nch); 256 vfs_unbusy(mp); 257 vput(vp); 258 error = EBUSY; 259 goto done; 260 } 261 mp->mnt_flag |= 262 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 263 lwkt_gettoken(&mp->mnt_token); 264 vn_unlock(vp); 265 vfsp = mp->mnt_vfc; 266 goto update; 267 } 268 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && 275 (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if (hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 343 vfsp->vfc_refcount++; 344 mp->mnt_stat.f_type = vfsp->vfc_typenum; 345 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 346 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 347 mp->mnt_stat.f_owner = cred->cr_uid; 348 lwkt_gettoken(&mp->mnt_token); 349 vn_unlock(vp); 350 update: 351 /* 352 * (per-mount token acquired at this point) 353 * 354 * Set the mount level flags. 355 */ 356 if (uap->flags & MNT_RDONLY) 357 mp->mnt_flag |= MNT_RDONLY; 358 else if (mp->mnt_flag & MNT_RDONLY) 359 mp->mnt_kern_flag |= MNTK_WANTRDWR; 360 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 361 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 362 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 363 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 364 MNT_AUTOMOUNTED); 365 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 366 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 367 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 368 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 369 MNT_AUTOMOUNTED); 370 371 /* 372 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 373 * This way the initial VFS_MOUNT() call will also be MPSAFE. 374 */ 375 if (vfsp->vfc_flags & VFCF_MPSAFE) 376 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 377 378 /* 379 * Mount the filesystem. 380 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 381 * get. 382 */ 383 if (mp->mnt_flag & MNT_UPDATE) { 384 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 385 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 386 mp->mnt_flag &= ~MNT_RDONLY; 387 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 388 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 389 if (error) { 390 mp->mnt_flag = flag; 391 mp->mnt_kern_flag = flag2; 392 } 393 lwkt_reltoken(&mp->mnt_token); 394 vfs_unbusy(mp); 395 vrele(vp); 396 cache_drop(&nch); 397 goto done; 398 } 399 mp->mnt_ncmounton = nch; 400 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 401 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 402 403 /* 404 * Put the new filesystem on the mount list after root. The mount 405 * point gets its own mnt_ncmountpt (unless the VFS already set one 406 * up) which represents the root of the mount. The lookup code 407 * detects the mount point going forward and checks the root of 408 * the mount going backwards. 409 * 410 * It is not necessary to invalidate or purge the vnode underneath 411 * because elements under the mount will be given their own glue 412 * namecache record. 413 */ 414 if (!error) { 415 if (mp->mnt_ncmountpt.ncp == NULL) { 416 /* 417 * Allocate, then unlock, but leave the ref intact. 418 * This is the mnt_refs (1) that we will retain 419 * through to the unmount. 420 */ 421 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 422 cache_unlock(&mp->mnt_ncmountpt); 423 } 424 vn_unlock(vp); 425 cache_lock(&nch); 426 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 427 cache_unlock(&nch); 428 cache_ismounting(mp); 429 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 430 431 mountlist_insert(mp, MNTINS_LAST); 432 vn_unlock(vp); 433 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 434 error = vfs_allocate_syncvnode(mp); 435 lwkt_reltoken(&mp->mnt_token); 436 vfs_unbusy(mp); 437 error = VFS_START(mp, 0); 438 vrele(vp); 439 KNOTE(&fs_klist, VQ_MOUNT); 440 } else { 441 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 442 vn_syncer_thr_stop(mp); 443 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 444 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 445 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 446 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 447 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 448 mp->mnt_vfc->vfc_refcount--; 449 lwkt_reltoken(&mp->mnt_token); 450 vfs_unbusy(mp); 451 kfree(mp, M_MOUNT); 452 cache_drop(&nch); 453 vput(vp); 454 } 455 done: 456 return (error); 457 } 458 459 /* 460 * Scan all active processes to see if any of them have a current 461 * or root directory onto which the new filesystem has just been 462 * mounted. If so, replace them with the new mount point. 463 * 464 * Both old_nch and new_nch are ref'd on call but not locked. 465 * new_nch must be temporarily locked so it can be associated with the 466 * vnode representing the root of the mount point. 467 */ 468 struct checkdirs_info { 469 struct nchandle old_nch; 470 struct nchandle new_nch; 471 struct vnode *old_vp; 472 struct vnode *new_vp; 473 }; 474 475 static int checkdirs_callback(struct proc *p, void *data); 476 477 static void 478 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 479 { 480 struct checkdirs_info info; 481 struct vnode *olddp; 482 struct vnode *newdp; 483 struct mount *mp; 484 485 /* 486 * If the old mount point's vnode has a usecount of 1, it is not 487 * being held as a descriptor anywhere. 488 */ 489 olddp = old_nch->ncp->nc_vp; 490 if (olddp == NULL || VREFCNT(olddp) == 1) 491 return; 492 493 /* 494 * Force the root vnode of the new mount point to be resolved 495 * so we can update any matching processes. 496 */ 497 mp = new_nch->mount; 498 if (VFS_ROOT(mp, &newdp)) 499 panic("mount: lost mount"); 500 vn_unlock(newdp); 501 cache_lock(new_nch); 502 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 503 cache_setunresolved(new_nch); 504 cache_setvp(new_nch, newdp); 505 cache_unlock(new_nch); 506 507 /* 508 * Special handling of the root node 509 */ 510 if (rootvnode == olddp) { 511 vref(newdp); 512 vfs_cache_setroot(newdp, cache_hold(new_nch)); 513 } 514 515 /* 516 * Pass newdp separately so the callback does not have to access 517 * it via new_nch->ncp->nc_vp. 518 */ 519 info.old_nch = *old_nch; 520 info.new_nch = *new_nch; 521 info.new_vp = newdp; 522 allproc_scan(checkdirs_callback, &info, 0); 523 vput(newdp); 524 } 525 526 /* 527 * NOTE: callback is not MP safe because the scanned process's filedesc 528 * structure can be ripped out from under us, amoung other things. 529 */ 530 static int 531 checkdirs_callback(struct proc *p, void *data) 532 { 533 struct checkdirs_info *info = data; 534 struct filedesc *fdp; 535 struct nchandle ncdrop1; 536 struct nchandle ncdrop2; 537 struct vnode *vprele1; 538 struct vnode *vprele2; 539 540 if ((fdp = p->p_fd) != NULL) { 541 cache_zero(&ncdrop1); 542 cache_zero(&ncdrop2); 543 vprele1 = NULL; 544 vprele2 = NULL; 545 546 /* 547 * MPUNSAFE - XXX fdp can be pulled out from under a 548 * foreign process. 549 * 550 * A shared filedesc is ok, we don't have to copy it 551 * because we are making this change globally. 552 */ 553 spin_lock(&fdp->fd_spin); 554 if (fdp->fd_ncdir.mount == info->old_nch.mount && 555 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 556 vprele1 = fdp->fd_cdir; 557 vref(info->new_vp); 558 fdp->fd_cdir = info->new_vp; 559 ncdrop1 = fdp->fd_ncdir; 560 cache_copy(&info->new_nch, &fdp->fd_ncdir); 561 } 562 if (fdp->fd_nrdir.mount == info->old_nch.mount && 563 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 564 vprele2 = fdp->fd_rdir; 565 vref(info->new_vp); 566 fdp->fd_rdir = info->new_vp; 567 ncdrop2 = fdp->fd_nrdir; 568 cache_copy(&info->new_nch, &fdp->fd_nrdir); 569 } 570 spin_unlock(&fdp->fd_spin); 571 if (ncdrop1.ncp) 572 cache_drop(&ncdrop1); 573 if (ncdrop2.ncp) 574 cache_drop(&ncdrop2); 575 if (vprele1) 576 vrele(vprele1); 577 if (vprele2) 578 vrele(vprele2); 579 } 580 return(0); 581 } 582 583 /* 584 * Unmount a file system. 585 * 586 * Note: unmount takes a path to the vnode mounted on as argument, 587 * not special file (as before). 588 * 589 * umount_args(char *path, int flags) 590 * 591 * MPALMOSTSAFE 592 */ 593 int 594 sys_unmount(struct unmount_args *uap) 595 { 596 struct thread *td = curthread; 597 struct proc *p __debugvar = td->td_proc; 598 struct mount *mp = NULL; 599 struct nlookupdata nd; 600 int error; 601 602 KKASSERT(p); 603 if (td->td_ucred->cr_prison != NULL) { 604 error = EPERM; 605 goto done; 606 } 607 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 608 goto done; 609 610 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 611 NLC_FOLLOW | NLC_IGNBADDIR); 612 if (error == 0) 613 error = nlookup(&nd); 614 if (error) 615 goto out; 616 617 mp = nd.nl_nch.mount; 618 619 /* 620 * Only root, or the user that did the original mount is 621 * permitted to unmount this filesystem. 622 */ 623 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 624 (error = priv_check(td, PRIV_ROOT))) 625 goto out; 626 627 /* 628 * Don't allow unmounting the root file system. 629 */ 630 if (mp->mnt_flag & MNT_ROOTFS) { 631 error = EINVAL; 632 goto out; 633 } 634 635 /* 636 * Must be the root of the filesystem 637 */ 638 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 639 error = EINVAL; 640 goto out; 641 } 642 643 /* 644 * If no error try to issue the unmount. We lose our cache 645 * ref when we call nlookup_done so we must hold the mount point 646 * to prevent use-after-free races. 647 */ 648 out: 649 if (error == 0) { 650 mount_hold(mp); 651 nlookup_done(&nd); 652 error = dounmount(mp, uap->flags, 0); 653 mount_drop(mp); 654 } else { 655 nlookup_done(&nd); 656 } 657 done: 658 return (error); 659 } 660 661 /* 662 * Do the actual file system unmount (interlocked against the mountlist 663 * token and mp->mnt_token). 664 */ 665 static int 666 dounmount_interlock(struct mount *mp) 667 { 668 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 669 return (EBUSY); 670 mp->mnt_kern_flag |= MNTK_UNMOUNT; 671 return(0); 672 } 673 674 static int 675 unmount_allproc_cb(struct proc *p, void *arg) 676 { 677 struct mount *mp; 678 679 if (p->p_textnch.ncp == NULL) 680 return 0; 681 682 mp = (struct mount *)arg; 683 if (p->p_textnch.mount == mp) 684 cache_drop(&p->p_textnch); 685 686 return 0; 687 } 688 689 /* 690 * The guts of the unmount code. The mount owns one ref and one hold 691 * count. If we successfully interlock the unmount, those refs are ours. 692 * (The ref is from mnt_ncmountpt). 693 * 694 * When halting we shortcut certain mount types such as devfs by not actually 695 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 696 * from the mountlist so higher-level filesytems can unmount cleanly. 697 * 698 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 699 */ 700 int 701 dounmount(struct mount *mp, int flags, int halting) 702 { 703 struct namecache *ncp; 704 struct nchandle nch; 705 struct vnode *vp; 706 int error; 707 int async_flag; 708 int lflags; 709 int freeok = 1; 710 int hadsyncer = 0; 711 int retry; 712 int quickhalt; 713 714 lwkt_gettoken(&mp->mnt_token); 715 716 /* 717 * When halting, certain mount points can essentially just 718 * be unhooked and otherwise ignored. 719 */ 720 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 721 quickhalt = 1; 722 freeok = 0; 723 } else { 724 quickhalt = 0; 725 } 726 727 728 /* 729 * Exclusive access for unmounting purposes. 730 */ 731 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 732 goto out; 733 734 /* 735 * We now 'own' the last mp->mnt_refs 736 * 737 * Allow filesystems to detect that a forced unmount is in progress. 738 */ 739 if (flags & MNT_FORCE) 740 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 741 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 742 error = lockmgr(&mp->mnt_lock, lflags); 743 if (error) { 744 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 745 if (mp->mnt_kern_flag & MNTK_MWAIT) { 746 mp->mnt_kern_flag &= ~MNTK_MWAIT; 747 wakeup(mp); 748 } 749 goto out; 750 } 751 752 if (mp->mnt_flag & MNT_EXPUBLIC) 753 vfs_setpublicfs(NULL, NULL, NULL); 754 755 vfs_msync(mp, MNT_WAIT); 756 async_flag = mp->mnt_flag & MNT_ASYNC; 757 mp->mnt_flag &=~ MNT_ASYNC; 758 759 /* 760 * Decomission our special mnt_syncer vnode. This also stops 761 * the vnlru code. If we are unable to unmount we recommission 762 * the vnode. 763 * 764 * Then sync the filesystem. 765 */ 766 if ((vp = mp->mnt_syncer) != NULL) { 767 mp->mnt_syncer = NULL; 768 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 769 vrele(vp); 770 hadsyncer = 1; 771 } 772 773 /* 774 * Sync normally-mounted filesystem. 775 */ 776 if (quickhalt == 0) { 777 if ((mp->mnt_flag & MNT_RDONLY) == 0) 778 VFS_SYNC(mp, MNT_WAIT); 779 } 780 781 /* 782 * nchandle records ref the mount structure. Expect a count of 1 783 * (our mount->mnt_ncmountpt). 784 * 785 * Scans can get temporary refs on a mountpoint (thought really 786 * heavy duty stuff like cache_findmount() do not). 787 */ 788 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 789 /* 790 * Invalidate the namecache topology under the mount. 791 * nullfs mounts alias a real mount's namecache topology 792 * and it should not be invalidated in that case. 793 */ 794 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 795 cache_lock(&mp->mnt_ncmountpt); 796 cache_inval(&mp->mnt_ncmountpt, 797 CINV_DESTROY|CINV_CHILDREN); 798 cache_unlock(&mp->mnt_ncmountpt); 799 } 800 801 /* 802 * Clear pcpu caches 803 */ 804 cache_unmounting(mp); 805 if (mp->mnt_refs != 1) 806 cache_clearmntcache(); 807 808 /* 809 * Break out if we are good. Don't count ncp refs if the 810 * mount is aliased. 811 */ 812 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 813 NULL : mp->mnt_ncmountpt.ncp; 814 if (mp->mnt_refs == 1 && 815 (ncp == NULL || (ncp->nc_refs == 1 && 816 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 817 break; 818 } 819 820 /* 821 * If forcing the unmount, clean out any p->p_textnch 822 * nchandles that match this mount. 823 */ 824 if (flags & MNT_FORCE) 825 allproc_scan(&unmount_allproc_cb, mp, 0); 826 827 /* 828 * Sleep and retry. 829 */ 830 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 831 if ((retry & 15) == 15) { 832 mount_warning(mp, 833 "(%p) debug - retry %d, " 834 "%d namecache refs, %d mount refs", 835 mp, retry, 836 (ncp ? ncp->nc_refs - 1 : 0), 837 mp->mnt_refs - 1); 838 } 839 } 840 841 error = 0; 842 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 843 NULL : mp->mnt_ncmountpt.ncp; 844 if (mp->mnt_refs != 1 || 845 (ncp != NULL && (ncp->nc_refs != 1 || 846 TAILQ_FIRST(&ncp->nc_list)))) { 847 mount_warning(mp, 848 "(%p): %d namecache refs, %d mount refs " 849 "still present", 850 mp, 851 (ncp ? ncp->nc_refs - 1 : 0), 852 mp->mnt_refs - 1); 853 if (flags & MNT_FORCE) { 854 freeok = 0; 855 mount_warning(mp, "forcing unmount\n"); 856 } else { 857 error = EBUSY; 858 } 859 } 860 861 /* 862 * So far so good, sync the filesystem once more and 863 * call the VFS unmount code if the sync succeeds. 864 */ 865 if (error == 0 && quickhalt == 0) { 866 if (mp->mnt_flag & MNT_RDONLY) { 867 error = VFS_UNMOUNT(mp, flags); 868 } else { 869 error = VFS_SYNC(mp, MNT_WAIT); 870 if (error == 0 || /* no error */ 871 error == EOPNOTSUPP || /* no sync avail */ 872 (flags & MNT_FORCE)) { /* force anyway */ 873 error = VFS_UNMOUNT(mp, flags); 874 } 875 } 876 if (error) { 877 mount_warning(mp, 878 "(%p) unmount: vfs refused to unmount, " 879 "error %d", 880 mp, error); 881 } 882 } 883 884 /* 885 * If an error occurred we can still recover, restoring the 886 * syncer vnode and misc flags. 887 */ 888 if (error) { 889 if (mp->mnt_syncer == NULL && hadsyncer) 890 vfs_allocate_syncvnode(mp); 891 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 892 mp->mnt_flag |= async_flag; 893 lockmgr(&mp->mnt_lock, LK_RELEASE); 894 if (mp->mnt_kern_flag & MNTK_MWAIT) { 895 mp->mnt_kern_flag &= ~MNTK_MWAIT; 896 wakeup(mp); 897 } 898 goto out; 899 } 900 /* 901 * Clean up any journals still associated with the mount after 902 * filesystem activity has ceased. 903 */ 904 journal_remove_all_journals(mp, 905 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 906 907 mountlist_remove(mp); 908 909 /* 910 * Remove any installed vnode ops here so the individual VFSs don't 911 * have to. 912 * 913 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 914 * 915 * When quickhalting we have to keep these intact because the 916 * underlying vnodes have not been destroyed, and some might be 917 * dirty. 918 */ 919 if (quickhalt == 0) { 920 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 921 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 922 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 923 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 924 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 925 } 926 927 if (mp->mnt_ncmountpt.ncp != NULL) { 928 nch = mp->mnt_ncmountpt; 929 cache_zero(&mp->mnt_ncmountpt); 930 cache_clrmountpt(&nch); 931 cache_drop(&nch); 932 } 933 if (mp->mnt_ncmounton.ncp != NULL) { 934 cache_unmounting(mp); 935 nch = mp->mnt_ncmounton; 936 cache_zero(&mp->mnt_ncmounton); 937 cache_clrmountpt(&nch); 938 cache_drop(&nch); 939 } 940 941 mp->mnt_vfc->vfc_refcount--; 942 943 /* 944 * If not quickhalting the mount, we expect there to be no 945 * vnodes left. 946 */ 947 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 948 panic("unmount: dangling vnode"); 949 950 /* 951 * Release the lock 952 */ 953 lockmgr(&mp->mnt_lock, LK_RELEASE); 954 if (mp->mnt_kern_flag & MNTK_MWAIT) { 955 mp->mnt_kern_flag &= ~MNTK_MWAIT; 956 wakeup(mp); 957 } 958 959 /* 960 * If we reach here and freeok != 0 we must free the mount. 961 * mnt_refs should already have dropped to 0, so if it is not 962 * zero we must cycle the caches and wait. 963 * 964 * When we are satisfied that the mount has disconnected we can 965 * drop the hold on the mp that represented the mount (though the 966 * caller might actually have another, so the caller's drop may 967 * do the actual free). 968 */ 969 if (freeok) { 970 if (mp->mnt_refs > 0) 971 cache_clearmntcache(); 972 while (mp->mnt_refs > 0) { 973 cache_unmounting(mp); 974 wakeup(mp); 975 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 976 cache_clearmntcache(); 977 } 978 lwkt_reltoken(&mp->mnt_token); 979 mount_drop(mp); 980 mp = NULL; 981 } else { 982 cache_clearmntcache(); 983 } 984 error = 0; 985 KNOTE(&fs_klist, VQ_UNMOUNT); 986 out: 987 if (mp) 988 lwkt_reltoken(&mp->mnt_token); 989 return (error); 990 } 991 992 static 993 void 994 mount_warning(struct mount *mp, const char *ctl, ...) 995 { 996 char *ptr; 997 char *buf; 998 __va_list va; 999 1000 __va_start(va, ctl); 1001 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1002 &ptr, &buf, 0) == 0) { 1003 kprintf("unmount(%s): ", ptr); 1004 kvprintf(ctl, va); 1005 kprintf("\n"); 1006 kfree(buf, M_TEMP); 1007 } else { 1008 kprintf("unmount(%p", mp); 1009 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1010 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1011 kprintf("): "); 1012 kvprintf(ctl, va); 1013 kprintf("\n"); 1014 } 1015 __va_end(va); 1016 } 1017 1018 /* 1019 * Shim cache_fullpath() to handle the case where a process is chrooted into 1020 * a subdirectory of a mount. In this case if the root mount matches the 1021 * process root directory's mount we have to specify the process's root 1022 * directory instead of the mount point, because the mount point might 1023 * be above the root directory. 1024 */ 1025 static 1026 int 1027 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1028 { 1029 struct nchandle *nch; 1030 1031 if (p && p->p_fd->fd_nrdir.mount == mp) 1032 nch = &p->p_fd->fd_nrdir; 1033 else 1034 nch = &mp->mnt_ncmountpt; 1035 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1036 } 1037 1038 /* 1039 * Sync each mounted filesystem. 1040 */ 1041 1042 #ifdef DEBUG 1043 static int syncprt = 0; 1044 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1045 #endif /* DEBUG */ 1046 1047 static int sync_callback(struct mount *mp, void *data); 1048 1049 int 1050 sys_sync(struct sync_args *uap) 1051 { 1052 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1053 return (0); 1054 } 1055 1056 static 1057 int 1058 sync_callback(struct mount *mp, void *data __unused) 1059 { 1060 int asyncflag; 1061 1062 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1063 lwkt_gettoken(&mp->mnt_token); 1064 asyncflag = mp->mnt_flag & MNT_ASYNC; 1065 mp->mnt_flag &= ~MNT_ASYNC; 1066 lwkt_reltoken(&mp->mnt_token); 1067 vfs_msync(mp, MNT_NOWAIT); 1068 VFS_SYNC(mp, MNT_NOWAIT); 1069 lwkt_gettoken(&mp->mnt_token); 1070 mp->mnt_flag |= asyncflag; 1071 lwkt_reltoken(&mp->mnt_token); 1072 } 1073 return(0); 1074 } 1075 1076 /* XXX PRISON: could be per prison flag */ 1077 static int prison_quotas; 1078 #if 0 1079 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1080 #endif 1081 1082 /* 1083 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1084 * 1085 * Change filesystem quotas. 1086 * 1087 * MPALMOSTSAFE 1088 */ 1089 int 1090 sys_quotactl(struct quotactl_args *uap) 1091 { 1092 struct nlookupdata nd; 1093 struct thread *td; 1094 struct mount *mp; 1095 int error; 1096 1097 td = curthread; 1098 if (td->td_ucred->cr_prison && !prison_quotas) { 1099 error = EPERM; 1100 goto done; 1101 } 1102 1103 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1104 if (error == 0) 1105 error = nlookup(&nd); 1106 if (error == 0) { 1107 mp = nd.nl_nch.mount; 1108 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1109 uap->arg, nd.nl_cred); 1110 } 1111 nlookup_done(&nd); 1112 done: 1113 return (error); 1114 } 1115 1116 /* 1117 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1118 * void *buf, int buflen) 1119 * 1120 * This function operates on a mount point and executes the specified 1121 * operation using the specified control data, and possibly returns data. 1122 * 1123 * The actual number of bytes stored in the result buffer is returned, 0 1124 * if none, otherwise an error is returned. 1125 * 1126 * MPALMOSTSAFE 1127 */ 1128 int 1129 sys_mountctl(struct mountctl_args *uap) 1130 { 1131 struct thread *td = curthread; 1132 struct file *fp; 1133 void *ctl = NULL; 1134 void *buf = NULL; 1135 char *path = NULL; 1136 int error; 1137 1138 /* 1139 * Sanity and permissions checks. We must be root. 1140 */ 1141 if (td->td_ucred->cr_prison != NULL) 1142 return (EPERM); 1143 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1144 (error = priv_check(td, PRIV_ROOT)) != 0) 1145 return (error); 1146 1147 /* 1148 * Argument length checks 1149 */ 1150 if (uap->ctllen < 0 || uap->ctllen > 1024) 1151 return (EINVAL); 1152 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1153 return (EINVAL); 1154 if (uap->path == NULL) 1155 return (EINVAL); 1156 1157 /* 1158 * Allocate the necessary buffers and copyin data 1159 */ 1160 path = objcache_get(namei_oc, M_WAITOK); 1161 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1162 if (error) 1163 goto done; 1164 1165 if (uap->ctllen) { 1166 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1167 error = copyin(uap->ctl, ctl, uap->ctllen); 1168 if (error) 1169 goto done; 1170 } 1171 if (uap->buflen) 1172 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1173 1174 /* 1175 * Validate the descriptor 1176 */ 1177 if (uap->fd >= 0) { 1178 fp = holdfp(td, uap->fd, -1); 1179 if (fp == NULL) { 1180 error = EBADF; 1181 goto done; 1182 } 1183 } else { 1184 fp = NULL; 1185 } 1186 1187 /* 1188 * Execute the internal kernel function and clean up. 1189 */ 1190 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1191 buf, uap->buflen, &uap->sysmsg_result); 1192 if (fp) 1193 dropfp(td, uap->fd, fp); 1194 if (error == 0 && uap->sysmsg_result > 0) 1195 error = copyout(buf, uap->buf, uap->sysmsg_result); 1196 done: 1197 if (path) 1198 objcache_put(namei_oc, path); 1199 if (ctl) 1200 kfree(ctl, M_TEMP); 1201 if (buf) 1202 kfree(buf, M_TEMP); 1203 return (error); 1204 } 1205 1206 /* 1207 * Execute a mount control operation by resolving the path to a mount point 1208 * and calling vop_mountctl(). 1209 * 1210 * Use the mount point from the nch instead of the vnode so nullfs mounts 1211 * can properly spike the VOP. 1212 */ 1213 int 1214 kern_mountctl(const char *path, int op, struct file *fp, 1215 const void *ctl, int ctllen, 1216 void *buf, int buflen, int *res) 1217 { 1218 struct vnode *vp; 1219 struct nlookupdata nd; 1220 struct nchandle nch; 1221 struct mount *mp; 1222 int error; 1223 1224 *res = 0; 1225 vp = NULL; 1226 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1227 if (error) 1228 return (error); 1229 error = nlookup(&nd); 1230 if (error) { 1231 nlookup_done(&nd); 1232 return (error); 1233 } 1234 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1235 if (error) { 1236 nlookup_done(&nd); 1237 return (error); 1238 } 1239 1240 /* 1241 * Yes, all this is needed to use the nch.mount below, because 1242 * we must maintain a ref on the mount to avoid ripouts (e.g. 1243 * due to heavy mount/unmount use by synth or poudriere). 1244 */ 1245 nch = nd.nl_nch; 1246 cache_zero(&nd.nl_nch); 1247 cache_unlock(&nch); 1248 nlookup_done(&nd); 1249 vn_unlock(vp); 1250 1251 mp = nch.mount; 1252 1253 /* 1254 * Must be the root of the filesystem 1255 */ 1256 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1257 cache_drop(&nch); 1258 vrele(vp); 1259 return (EINVAL); 1260 } 1261 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1262 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1263 path); 1264 cache_drop(&nch); 1265 vrele(vp); 1266 return (EINVAL); 1267 } 1268 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1269 buf, buflen, res); 1270 vrele(vp); 1271 cache_drop(&nch); 1272 1273 return (error); 1274 } 1275 1276 int 1277 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1278 { 1279 struct thread *td = curthread; 1280 struct proc *p = td->td_proc; 1281 struct mount *mp; 1282 struct statfs *sp; 1283 char *fullpath, *freepath; 1284 int error; 1285 1286 if ((error = nlookup(nd)) != 0) 1287 return (error); 1288 mp = nd->nl_nch.mount; 1289 sp = &mp->mnt_stat; 1290 1291 /* 1292 * Ignore refresh error, user should have visibility. 1293 * This can happen if a NFS mount goes bad (e.g. server 1294 * revokes perms or goes down). 1295 */ 1296 error = VFS_STATFS(mp, sp, nd->nl_cred); 1297 /* ignore error */ 1298 1299 error = mount_path(p, mp, &fullpath, &freepath); 1300 if (error) 1301 return(error); 1302 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1303 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1304 kfree(freepath, M_TEMP); 1305 1306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1307 bcopy(sp, buf, sizeof(*buf)); 1308 /* Only root should have access to the fsid's. */ 1309 if (priv_check(td, PRIV_ROOT)) 1310 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1311 return (0); 1312 } 1313 1314 /* 1315 * statfs_args(char *path, struct statfs *buf) 1316 * 1317 * Get filesystem statistics. 1318 */ 1319 int 1320 sys_statfs(struct statfs_args *uap) 1321 { 1322 struct nlookupdata nd; 1323 struct statfs buf; 1324 int error; 1325 1326 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1327 if (error == 0) 1328 error = kern_statfs(&nd, &buf); 1329 nlookup_done(&nd); 1330 if (error == 0) 1331 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1332 return (error); 1333 } 1334 1335 int 1336 kern_fstatfs(int fd, struct statfs *buf) 1337 { 1338 struct thread *td = curthread; 1339 struct proc *p = td->td_proc; 1340 struct file *fp; 1341 struct mount *mp; 1342 struct statfs *sp; 1343 char *fullpath, *freepath; 1344 int error; 1345 1346 KKASSERT(p); 1347 if ((error = holdvnode(td, fd, &fp)) != 0) 1348 return (error); 1349 1350 /* 1351 * Try to use mount info from any overlays rather than the 1352 * mount info for the underlying vnode, otherwise we will 1353 * fail when operating on null-mounted paths inside a chroot. 1354 */ 1355 if ((mp = fp->f_nchandle.mount) == NULL) 1356 mp = ((struct vnode *)fp->f_data)->v_mount; 1357 if (mp == NULL) { 1358 error = EBADF; 1359 goto done; 1360 } 1361 if (fp->f_cred == NULL) { 1362 error = EINVAL; 1363 goto done; 1364 } 1365 1366 /* 1367 * Ignore refresh error, user should have visibility. 1368 * This can happen if a NFS mount goes bad (e.g. server 1369 * revokes perms or goes down). 1370 */ 1371 sp = &mp->mnt_stat; 1372 error = VFS_STATFS(mp, sp, fp->f_cred); 1373 1374 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1375 goto done; 1376 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1377 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1378 kfree(freepath, M_TEMP); 1379 1380 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1381 bcopy(sp, buf, sizeof(*buf)); 1382 1383 /* Only root should have access to the fsid's. */ 1384 if (priv_check(td, PRIV_ROOT)) 1385 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1386 error = 0; 1387 done: 1388 fdrop(fp); 1389 return (error); 1390 } 1391 1392 /* 1393 * fstatfs_args(int fd, struct statfs *buf) 1394 * 1395 * Get filesystem statistics. 1396 */ 1397 int 1398 sys_fstatfs(struct fstatfs_args *uap) 1399 { 1400 struct statfs buf; 1401 int error; 1402 1403 error = kern_fstatfs(uap->fd, &buf); 1404 1405 if (error == 0) 1406 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1407 return (error); 1408 } 1409 1410 int 1411 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1412 { 1413 struct mount *mp; 1414 struct statvfs *sp; 1415 int error; 1416 1417 if ((error = nlookup(nd)) != 0) 1418 return (error); 1419 mp = nd->nl_nch.mount; 1420 sp = &mp->mnt_vstat; 1421 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1422 return (error); 1423 1424 sp->f_flag = 0; 1425 if (mp->mnt_flag & MNT_RDONLY) 1426 sp->f_flag |= ST_RDONLY; 1427 if (mp->mnt_flag & MNT_NOSUID) 1428 sp->f_flag |= ST_NOSUID; 1429 bcopy(sp, buf, sizeof(*buf)); 1430 return (0); 1431 } 1432 1433 /* 1434 * statfs_args(char *path, struct statfs *buf) 1435 * 1436 * Get filesystem statistics. 1437 */ 1438 int 1439 sys_statvfs(struct statvfs_args *uap) 1440 { 1441 struct nlookupdata nd; 1442 struct statvfs buf; 1443 int error; 1444 1445 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1446 if (error == 0) 1447 error = kern_statvfs(&nd, &buf); 1448 nlookup_done(&nd); 1449 if (error == 0) 1450 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1451 return (error); 1452 } 1453 1454 int 1455 kern_fstatvfs(int fd, struct statvfs *buf) 1456 { 1457 struct thread *td = curthread; 1458 struct file *fp; 1459 struct mount *mp; 1460 struct statvfs *sp; 1461 int error; 1462 1463 if ((error = holdvnode(td, fd, &fp)) != 0) 1464 return (error); 1465 if ((mp = fp->f_nchandle.mount) == NULL) 1466 mp = ((struct vnode *)fp->f_data)->v_mount; 1467 if (mp == NULL) { 1468 error = EBADF; 1469 goto done; 1470 } 1471 if (fp->f_cred == NULL) { 1472 error = EINVAL; 1473 goto done; 1474 } 1475 sp = &mp->mnt_vstat; 1476 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1477 goto done; 1478 1479 sp->f_flag = 0; 1480 if (mp->mnt_flag & MNT_RDONLY) 1481 sp->f_flag |= ST_RDONLY; 1482 if (mp->mnt_flag & MNT_NOSUID) 1483 sp->f_flag |= ST_NOSUID; 1484 1485 bcopy(sp, buf, sizeof(*buf)); 1486 error = 0; 1487 done: 1488 fdrop(fp); 1489 return (error); 1490 } 1491 1492 /* 1493 * fstatfs_args(int fd, struct statfs *buf) 1494 * 1495 * Get filesystem statistics. 1496 */ 1497 int 1498 sys_fstatvfs(struct fstatvfs_args *uap) 1499 { 1500 struct statvfs buf; 1501 int error; 1502 1503 error = kern_fstatvfs(uap->fd, &buf); 1504 1505 if (error == 0) 1506 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1507 return (error); 1508 } 1509 1510 /* 1511 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1512 * 1513 * Get statistics on all filesystems. 1514 */ 1515 1516 struct getfsstat_info { 1517 struct statfs *sfsp; 1518 long count; 1519 long maxcount; 1520 int error; 1521 int flags; 1522 struct thread *td; 1523 }; 1524 1525 static int getfsstat_callback(struct mount *, void *); 1526 1527 int 1528 sys_getfsstat(struct getfsstat_args *uap) 1529 { 1530 struct thread *td = curthread; 1531 struct getfsstat_info info; 1532 1533 bzero(&info, sizeof(info)); 1534 1535 info.maxcount = uap->bufsize / sizeof(struct statfs); 1536 info.sfsp = uap->buf; 1537 info.count = 0; 1538 info.flags = uap->flags; 1539 info.td = td; 1540 1541 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1542 if (info.sfsp && info.count > info.maxcount) 1543 uap->sysmsg_result = info.maxcount; 1544 else 1545 uap->sysmsg_result = info.count; 1546 return (info.error); 1547 } 1548 1549 static int 1550 getfsstat_callback(struct mount *mp, void *data) 1551 { 1552 struct getfsstat_info *info = data; 1553 struct statfs *sp; 1554 char *freepath; 1555 char *fullpath; 1556 int error; 1557 1558 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1559 return(0); 1560 1561 if (info->sfsp && info->count < info->maxcount) { 1562 sp = &mp->mnt_stat; 1563 1564 /* 1565 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1566 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1567 * overrides MNT_WAIT. 1568 * 1569 * Ignore refresh error, user should have visibility. 1570 * This can happen if a NFS mount goes bad (e.g. server 1571 * revokes perms or goes down). 1572 */ 1573 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1574 (info->flags & MNT_WAIT)) && 1575 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1576 /* ignore error */ 1577 } 1578 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1579 1580 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1581 if (error) { 1582 info->error = error; 1583 return(-1); 1584 } 1585 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1586 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1587 kfree(freepath, M_TEMP); 1588 1589 error = copyout(sp, info->sfsp, sizeof(*sp)); 1590 if (error) { 1591 info->error = error; 1592 return (-1); 1593 } 1594 ++info->sfsp; 1595 } 1596 info->count++; 1597 return(0); 1598 } 1599 1600 /* 1601 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1602 long bufsize, int flags) 1603 * 1604 * Get statistics on all filesystems. 1605 */ 1606 1607 struct getvfsstat_info { 1608 struct statfs *sfsp; 1609 struct statvfs *vsfsp; 1610 long count; 1611 long maxcount; 1612 int error; 1613 int flags; 1614 struct thread *td; 1615 }; 1616 1617 static int getvfsstat_callback(struct mount *, void *); 1618 1619 int 1620 sys_getvfsstat(struct getvfsstat_args *uap) 1621 { 1622 struct thread *td = curthread; 1623 struct getvfsstat_info info; 1624 1625 bzero(&info, sizeof(info)); 1626 1627 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1628 info.sfsp = uap->buf; 1629 info.vsfsp = uap->vbuf; 1630 info.count = 0; 1631 info.flags = uap->flags; 1632 info.td = td; 1633 1634 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1635 if (info.vsfsp && info.count > info.maxcount) 1636 uap->sysmsg_result = info.maxcount; 1637 else 1638 uap->sysmsg_result = info.count; 1639 return (info.error); 1640 } 1641 1642 static int 1643 getvfsstat_callback(struct mount *mp, void *data) 1644 { 1645 struct getvfsstat_info *info = data; 1646 struct statfs *sp; 1647 struct statvfs *vsp; 1648 char *freepath; 1649 char *fullpath; 1650 int error; 1651 1652 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1653 return(0); 1654 1655 if (info->vsfsp && info->count < info->maxcount) { 1656 sp = &mp->mnt_stat; 1657 vsp = &mp->mnt_vstat; 1658 1659 /* 1660 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1661 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1662 * overrides MNT_WAIT. 1663 * 1664 * Ignore refresh error, user should have visibility. 1665 * This can happen if a NFS mount goes bad (e.g. server 1666 * revokes perms or goes down). 1667 */ 1668 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1669 (info->flags & MNT_WAIT)) && 1670 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1671 /* ignore error */ 1672 } 1673 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1674 1675 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1676 (info->flags & MNT_WAIT)) && 1677 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1678 /* ignore error */ 1679 } 1680 vsp->f_flag = 0; 1681 if (mp->mnt_flag & MNT_RDONLY) 1682 vsp->f_flag |= ST_RDONLY; 1683 if (mp->mnt_flag & MNT_NOSUID) 1684 vsp->f_flag |= ST_NOSUID; 1685 1686 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1687 if (error) { 1688 info->error = error; 1689 return(-1); 1690 } 1691 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1692 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1693 kfree(freepath, M_TEMP); 1694 1695 error = copyout(sp, info->sfsp, sizeof(*sp)); 1696 if (error == 0) 1697 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1698 if (error) { 1699 info->error = error; 1700 return (-1); 1701 } 1702 ++info->sfsp; 1703 ++info->vsfsp; 1704 } 1705 info->count++; 1706 return(0); 1707 } 1708 1709 1710 /* 1711 * fchdir_args(int fd) 1712 * 1713 * Change current working directory to a given file descriptor. 1714 */ 1715 int 1716 sys_fchdir(struct fchdir_args *uap) 1717 { 1718 struct thread *td = curthread; 1719 struct proc *p = td->td_proc; 1720 struct filedesc *fdp = p->p_fd; 1721 struct vnode *vp, *ovp; 1722 struct mount *mp; 1723 struct file *fp; 1724 struct nchandle nch, onch, tnch; 1725 int error; 1726 1727 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1728 return (error); 1729 lwkt_gettoken(&p->p_token); 1730 vp = (struct vnode *)fp->f_data; 1731 vref(vp); 1732 vn_lock(vp, LK_SHARED | LK_RETRY); 1733 if (fp->f_nchandle.ncp == NULL) 1734 error = ENOTDIR; 1735 else 1736 error = checkvp_chdir(vp, td); 1737 if (error) { 1738 vput(vp); 1739 goto done; 1740 } 1741 cache_copy(&fp->f_nchandle, &nch); 1742 1743 /* 1744 * If the ncp has become a mount point, traverse through 1745 * the mount point. 1746 */ 1747 1748 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1749 (mp = cache_findmount(&nch)) != NULL 1750 ) { 1751 error = nlookup_mp(mp, &tnch); 1752 if (error == 0) { 1753 cache_unlock(&tnch); /* leave ref intact */ 1754 vput(vp); 1755 vp = tnch.ncp->nc_vp; 1756 error = vget(vp, LK_SHARED); 1757 KKASSERT(error == 0); 1758 cache_drop(&nch); 1759 nch = tnch; 1760 } 1761 cache_dropmount(mp); 1762 } 1763 if (error == 0) { 1764 spin_lock(&fdp->fd_spin); 1765 ovp = fdp->fd_cdir; 1766 onch = fdp->fd_ncdir; 1767 fdp->fd_cdir = vp; 1768 fdp->fd_ncdir = nch; 1769 spin_unlock(&fdp->fd_spin); 1770 vn_unlock(vp); /* leave ref intact */ 1771 cache_drop(&onch); 1772 vrele(ovp); 1773 } else { 1774 cache_drop(&nch); 1775 vput(vp); 1776 } 1777 fdrop(fp); 1778 done: 1779 lwkt_reltoken(&p->p_token); 1780 return (error); 1781 } 1782 1783 int 1784 kern_chdir(struct nlookupdata *nd) 1785 { 1786 struct thread *td = curthread; 1787 struct proc *p = td->td_proc; 1788 struct filedesc *fdp = p->p_fd; 1789 struct vnode *vp, *ovp; 1790 struct nchandle onch; 1791 int error; 1792 1793 nd->nl_flags |= NLC_SHAREDLOCK; 1794 if ((error = nlookup(nd)) != 0) 1795 return (error); 1796 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1797 return (ENOENT); 1798 if ((error = vget(vp, LK_SHARED)) != 0) 1799 return (error); 1800 1801 lwkt_gettoken(&p->p_token); 1802 error = checkvp_chdir(vp, td); 1803 vn_unlock(vp); 1804 if (error == 0) { 1805 spin_lock(&fdp->fd_spin); 1806 ovp = fdp->fd_cdir; 1807 onch = fdp->fd_ncdir; 1808 fdp->fd_ncdir = nd->nl_nch; 1809 fdp->fd_cdir = vp; 1810 spin_unlock(&fdp->fd_spin); 1811 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1812 cache_drop(&onch); 1813 vrele(ovp); 1814 cache_zero(&nd->nl_nch); 1815 } else { 1816 vrele(vp); 1817 } 1818 lwkt_reltoken(&p->p_token); 1819 return (error); 1820 } 1821 1822 /* 1823 * chdir_args(char *path) 1824 * 1825 * Change current working directory (``.''). 1826 */ 1827 int 1828 sys_chdir(struct chdir_args *uap) 1829 { 1830 struct nlookupdata nd; 1831 int error; 1832 1833 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1834 if (error == 0) 1835 error = kern_chdir(&nd); 1836 nlookup_done(&nd); 1837 return (error); 1838 } 1839 1840 /* 1841 * Helper function for raised chroot(2) security function: Refuse if 1842 * any filedescriptors are open directories. 1843 */ 1844 static int 1845 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1846 { 1847 struct vnode *vp; 1848 struct file *fp; 1849 int error; 1850 int fd; 1851 1852 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1853 if ((error = holdvnode(td, fd, &fp)) != 0) 1854 continue; 1855 vp = (struct vnode *)fp->f_data; 1856 if (vp->v_type != VDIR) { 1857 fdrop(fp); 1858 continue; 1859 } 1860 fdrop(fp); 1861 return(EPERM); 1862 } 1863 return (0); 1864 } 1865 1866 /* 1867 * This sysctl determines if we will allow a process to chroot(2) if it 1868 * has a directory open: 1869 * 0: disallowed for all processes. 1870 * 1: allowed for processes that were not already chroot(2)'ed. 1871 * 2: allowed for all processes. 1872 */ 1873 1874 static int chroot_allow_open_directories = 1; 1875 1876 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1877 &chroot_allow_open_directories, 0, ""); 1878 1879 /* 1880 * chroot to the specified namecache entry. We obtain the vp from the 1881 * namecache data. The passed ncp must be locked and referenced and will 1882 * remain locked and referenced on return. 1883 */ 1884 int 1885 kern_chroot(struct nchandle *nch) 1886 { 1887 struct thread *td = curthread; 1888 struct proc *p = td->td_proc; 1889 struct filedesc *fdp = p->p_fd; 1890 struct vnode *vp; 1891 int error; 1892 1893 /* 1894 * Only privileged user can chroot 1895 */ 1896 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1897 if (error) 1898 return (error); 1899 1900 /* 1901 * Disallow open directory descriptors (fchdir() breakouts). 1902 */ 1903 if (chroot_allow_open_directories == 0 || 1904 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1905 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1906 return (error); 1907 } 1908 if ((vp = nch->ncp->nc_vp) == NULL) 1909 return (ENOENT); 1910 1911 if ((error = vget(vp, LK_SHARED)) != 0) 1912 return (error); 1913 1914 /* 1915 * Check the validity of vp as a directory to change to and 1916 * associate it with rdir/jdir. 1917 */ 1918 error = checkvp_chdir(vp, td); 1919 vn_unlock(vp); /* leave reference intact */ 1920 if (error == 0) { 1921 lwkt_gettoken(&p->p_token); 1922 vrele(fdp->fd_rdir); 1923 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1924 cache_drop(&fdp->fd_nrdir); 1925 cache_copy(nch, &fdp->fd_nrdir); 1926 if (fdp->fd_jdir == NULL) { 1927 fdp->fd_jdir = vp; 1928 vref(fdp->fd_jdir); 1929 cache_copy(nch, &fdp->fd_njdir); 1930 } 1931 if ((p->p_flags & P_DIDCHROOT) == 0) { 1932 p->p_flags |= P_DIDCHROOT; 1933 if (p->p_depth <= 65535 - 32) 1934 p->p_depth += 32; 1935 } 1936 lwkt_reltoken(&p->p_token); 1937 } else { 1938 vrele(vp); 1939 } 1940 return (error); 1941 } 1942 1943 /* 1944 * chroot_args(char *path) 1945 * 1946 * Change notion of root (``/'') directory. 1947 */ 1948 int 1949 sys_chroot(struct chroot_args *uap) 1950 { 1951 struct thread *td __debugvar = curthread; 1952 struct nlookupdata nd; 1953 int error; 1954 1955 KKASSERT(td->td_proc); 1956 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1957 if (error == 0) { 1958 nd.nl_flags |= NLC_EXEC; 1959 error = nlookup(&nd); 1960 if (error == 0) 1961 error = kern_chroot(&nd.nl_nch); 1962 } 1963 nlookup_done(&nd); 1964 return(error); 1965 } 1966 1967 int 1968 sys_chroot_kernel(struct chroot_kernel_args *uap) 1969 { 1970 struct thread *td = curthread; 1971 struct nlookupdata nd; 1972 struct nchandle *nch; 1973 struct vnode *vp; 1974 int error; 1975 1976 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1977 if (error) 1978 goto error_nond; 1979 1980 error = nlookup(&nd); 1981 if (error) 1982 goto error_out; 1983 1984 nch = &nd.nl_nch; 1985 1986 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1987 if (error) 1988 goto error_out; 1989 1990 if ((vp = nch->ncp->nc_vp) == NULL) { 1991 error = ENOENT; 1992 goto error_out; 1993 } 1994 1995 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1996 goto error_out; 1997 1998 vfs_cache_setroot(vp, cache_hold(nch)); 1999 2000 error_out: 2001 nlookup_done(&nd); 2002 error_nond: 2003 return(error); 2004 } 2005 2006 /* 2007 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2008 * determine whether it is legal to chdir to the vnode. The vnode's state 2009 * is not changed by this call. 2010 */ 2011 static int 2012 checkvp_chdir(struct vnode *vp, struct thread *td) 2013 { 2014 int error; 2015 2016 if (vp->v_type != VDIR) 2017 error = ENOTDIR; 2018 else 2019 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2020 return (error); 2021 } 2022 2023 int 2024 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2025 { 2026 struct thread *td = curthread; 2027 struct proc *p = td->td_proc; 2028 struct lwp *lp = td->td_lwp; 2029 struct filedesc *fdp = p->p_fd; 2030 int cmode, flags; 2031 struct file *nfp; 2032 struct file *fp; 2033 struct vnode *vp; 2034 int type, indx, error = 0; 2035 struct flock lf; 2036 2037 if ((oflags & O_ACCMODE) == O_ACCMODE) 2038 return (EINVAL); 2039 flags = FFLAGS(oflags); 2040 error = falloc(lp, &nfp, NULL); 2041 if (error) 2042 return (error); 2043 fp = nfp; 2044 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2045 2046 /* 2047 * XXX p_dupfd is a real mess. It allows a device to return a 2048 * file descriptor to be duplicated rather then doing the open 2049 * itself. 2050 */ 2051 lp->lwp_dupfd = -1; 2052 2053 /* 2054 * Call vn_open() to do the lookup and assign the vnode to the 2055 * file pointer. vn_open() does not change the ref count on fp 2056 * and the vnode, on success, will be inherited by the file pointer 2057 * and unlocked. 2058 * 2059 * Request a shared lock on the vnode if possible. 2060 * 2061 * Executable binaries can race VTEXT against O_RDWR opens, so 2062 * use an exclusive lock for O_RDWR opens as well. 2063 * 2064 * NOTE: We need a flag to separate terminal vnode locking from 2065 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2066 * and O_RDWR only need to lock the terminal vnode exclusively. 2067 */ 2068 nd->nl_flags |= NLC_LOCKVP; 2069 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0) 2070 nd->nl_flags |= NLC_SHAREDLOCK; 2071 2072 error = vn_open(nd, fp, flags, cmode); 2073 nlookup_done(nd); 2074 2075 if (error) { 2076 /* 2077 * handle special fdopen() case. bleh. dupfdopen() is 2078 * responsible for dropping the old contents of ofiles[indx] 2079 * if it succeeds. 2080 * 2081 * Note that fsetfd() will add a ref to fp which represents 2082 * the fd_files[] assignment. We must still drop our 2083 * reference. 2084 */ 2085 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2086 if (fdalloc(p, 0, &indx) == 0) { 2087 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2088 if (error == 0) { 2089 *res = indx; 2090 fdrop(fp); /* our ref */ 2091 return (0); 2092 } 2093 fsetfd(fdp, NULL, indx); 2094 } 2095 } 2096 fdrop(fp); /* our ref */ 2097 if (error == ERESTART) 2098 error = EINTR; 2099 return (error); 2100 } 2101 2102 /* 2103 * ref the vnode for ourselves so it can't be ripped out from under 2104 * is. XXX need an ND flag to request that the vnode be returned 2105 * anyway. 2106 * 2107 * Reserve a file descriptor but do not assign it until the open 2108 * succeeds. 2109 */ 2110 vp = (struct vnode *)fp->f_data; 2111 vref(vp); 2112 if ((error = fdalloc(p, 0, &indx)) != 0) { 2113 fdrop(fp); 2114 vrele(vp); 2115 return (error); 2116 } 2117 2118 /* 2119 * If no error occurs the vp will have been assigned to the file 2120 * pointer. 2121 */ 2122 lp->lwp_dupfd = 0; 2123 2124 if (flags & (O_EXLOCK | O_SHLOCK)) { 2125 lf.l_whence = SEEK_SET; 2126 lf.l_start = 0; 2127 lf.l_len = 0; 2128 if (flags & O_EXLOCK) 2129 lf.l_type = F_WRLCK; 2130 else 2131 lf.l_type = F_RDLCK; 2132 if (flags & FNONBLOCK) 2133 type = 0; 2134 else 2135 type = F_WAIT; 2136 2137 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2138 /* 2139 * lock request failed. Clean up the reserved 2140 * descriptor. 2141 */ 2142 vrele(vp); 2143 fsetfd(fdp, NULL, indx); 2144 fdrop(fp); 2145 return (error); 2146 } 2147 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2148 } 2149 #if 0 2150 /* 2151 * Assert that all regular file vnodes were created with a object. 2152 */ 2153 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2154 ("open: regular file has no backing object after vn_open")); 2155 #endif 2156 2157 vrele(vp); 2158 2159 /* 2160 * release our private reference, leaving the one associated with the 2161 * descriptor table intact. 2162 */ 2163 if (oflags & O_CLOEXEC) 2164 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2165 fsetfd(fdp, fp, indx); 2166 fdrop(fp); 2167 *res = indx; 2168 2169 return (error); 2170 } 2171 2172 /* 2173 * open_args(char *path, int flags, int mode) 2174 * 2175 * Check permissions, allocate an open file structure, 2176 * and call the device open routine if any. 2177 */ 2178 int 2179 sys_open(struct open_args *uap) 2180 { 2181 struct nlookupdata nd; 2182 int error; 2183 2184 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2185 if (error == 0) { 2186 error = kern_open(&nd, uap->flags, 2187 uap->mode, &uap->sysmsg_result); 2188 } 2189 nlookup_done(&nd); 2190 return (error); 2191 } 2192 2193 /* 2194 * openat_args(int fd, char *path, int flags, int mode) 2195 */ 2196 int 2197 sys_openat(struct openat_args *uap) 2198 { 2199 struct nlookupdata nd; 2200 int error; 2201 struct file *fp; 2202 2203 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2204 if (error == 0) { 2205 error = kern_open(&nd, uap->flags, uap->mode, 2206 &uap->sysmsg_result); 2207 } 2208 nlookup_done_at(&nd, fp); 2209 return (error); 2210 } 2211 2212 int 2213 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2214 { 2215 struct thread *td = curthread; 2216 struct proc *p = td->td_proc; 2217 struct vnode *vp; 2218 struct vattr vattr; 2219 int error; 2220 int whiteout = 0; 2221 2222 KKASSERT(p); 2223 2224 VATTR_NULL(&vattr); 2225 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2226 vattr.va_rmajor = rmajor; 2227 vattr.va_rminor = rminor; 2228 2229 switch (mode & S_IFMT) { 2230 case S_IFMT: /* used by badsect to flag bad sectors */ 2231 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2232 vattr.va_type = VBAD; 2233 break; 2234 case S_IFCHR: 2235 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2236 vattr.va_type = VCHR; 2237 break; 2238 case S_IFBLK: 2239 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2240 vattr.va_type = VBLK; 2241 break; 2242 case S_IFWHT: 2243 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2244 whiteout = 1; 2245 break; 2246 case S_IFDIR: /* special directories support for HAMMER */ 2247 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2248 vattr.va_type = VDIR; 2249 break; 2250 default: 2251 error = EINVAL; 2252 break; 2253 } 2254 2255 if (error) 2256 return (error); 2257 2258 bwillinode(1); 2259 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2260 if ((error = nlookup(nd)) != 0) 2261 return (error); 2262 if (nd->nl_nch.ncp->nc_vp) 2263 return (EEXIST); 2264 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2265 return (error); 2266 2267 if (whiteout) { 2268 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2269 nd->nl_cred, NAMEI_CREATE); 2270 } else { 2271 vp = NULL; 2272 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2273 &vp, nd->nl_cred, &vattr); 2274 if (error == 0) 2275 vput(vp); 2276 } 2277 return (error); 2278 } 2279 2280 /* 2281 * mknod_args(char *path, int mode, int dev) 2282 * 2283 * Create a special file. 2284 */ 2285 int 2286 sys_mknod(struct mknod_args *uap) 2287 { 2288 struct nlookupdata nd; 2289 int error; 2290 2291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2292 if (error == 0) { 2293 error = kern_mknod(&nd, uap->mode, 2294 umajor(uap->dev), uminor(uap->dev)); 2295 } 2296 nlookup_done(&nd); 2297 return (error); 2298 } 2299 2300 /* 2301 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2302 * 2303 * Create a special file. The path is relative to the directory associated 2304 * with fd. 2305 */ 2306 int 2307 sys_mknodat(struct mknodat_args *uap) 2308 { 2309 struct nlookupdata nd; 2310 struct file *fp; 2311 int error; 2312 2313 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2314 if (error == 0) { 2315 error = kern_mknod(&nd, uap->mode, 2316 umajor(uap->dev), uminor(uap->dev)); 2317 } 2318 nlookup_done_at(&nd, fp); 2319 return (error); 2320 } 2321 2322 int 2323 kern_mkfifo(struct nlookupdata *nd, int mode) 2324 { 2325 struct thread *td = curthread; 2326 struct proc *p = td->td_proc; 2327 struct vattr vattr; 2328 struct vnode *vp; 2329 int error; 2330 2331 bwillinode(1); 2332 2333 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2334 if ((error = nlookup(nd)) != 0) 2335 return (error); 2336 if (nd->nl_nch.ncp->nc_vp) 2337 return (EEXIST); 2338 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2339 return (error); 2340 2341 VATTR_NULL(&vattr); 2342 vattr.va_type = VFIFO; 2343 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2344 vp = NULL; 2345 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2346 if (error == 0) 2347 vput(vp); 2348 return (error); 2349 } 2350 2351 /* 2352 * mkfifo_args(char *path, int mode) 2353 * 2354 * Create a named pipe. 2355 */ 2356 int 2357 sys_mkfifo(struct mkfifo_args *uap) 2358 { 2359 struct nlookupdata nd; 2360 int error; 2361 2362 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2363 if (error == 0) 2364 error = kern_mkfifo(&nd, uap->mode); 2365 nlookup_done(&nd); 2366 return (error); 2367 } 2368 2369 /* 2370 * mkfifoat_args(int fd, char *path, mode_t mode) 2371 * 2372 * Create a named pipe. The path is relative to the directory associated 2373 * with fd. 2374 */ 2375 int 2376 sys_mkfifoat(struct mkfifoat_args *uap) 2377 { 2378 struct nlookupdata nd; 2379 struct file *fp; 2380 int error; 2381 2382 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2383 if (error == 0) 2384 error = kern_mkfifo(&nd, uap->mode); 2385 nlookup_done_at(&nd, fp); 2386 return (error); 2387 } 2388 2389 static int hardlink_check_uid = 0; 2390 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2391 &hardlink_check_uid, 0, 2392 "Unprivileged processes cannot create hard links to files owned by other " 2393 "users"); 2394 static int hardlink_check_gid = 0; 2395 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2396 &hardlink_check_gid, 0, 2397 "Unprivileged processes cannot create hard links to files owned by other " 2398 "groups"); 2399 2400 static int 2401 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2402 { 2403 struct vattr va; 2404 int error; 2405 2406 /* 2407 * Shortcut if disabled 2408 */ 2409 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2410 return (0); 2411 2412 /* 2413 * Privileged user can always hardlink 2414 */ 2415 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2416 return (0); 2417 2418 /* 2419 * Otherwise only if the originating file is owned by the 2420 * same user or group. Note that any group is allowed if 2421 * the file is owned by the caller. 2422 */ 2423 error = VOP_GETATTR(vp, &va); 2424 if (error != 0) 2425 return (error); 2426 2427 if (hardlink_check_uid) { 2428 if (cred->cr_uid != va.va_uid) 2429 return (EPERM); 2430 } 2431 2432 if (hardlink_check_gid) { 2433 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2434 return (EPERM); 2435 } 2436 2437 return (0); 2438 } 2439 2440 int 2441 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2442 { 2443 struct thread *td = curthread; 2444 struct vnode *vp; 2445 int error; 2446 2447 /* 2448 * Lookup the source and obtained a locked vnode. 2449 * 2450 * You may only hardlink a file which you have write permission 2451 * on or which you own. 2452 * 2453 * XXX relookup on vget failure / race ? 2454 */ 2455 bwillinode(1); 2456 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2457 if ((error = nlookup(nd)) != 0) 2458 return (error); 2459 vp = nd->nl_nch.ncp->nc_vp; 2460 KKASSERT(vp != NULL); 2461 if (vp->v_type == VDIR) 2462 return (EPERM); /* POSIX */ 2463 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2464 return (error); 2465 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2466 return (error); 2467 2468 /* 2469 * Unlock the source so we can lookup the target without deadlocking 2470 * (XXX vp is locked already, possible other deadlock?). The target 2471 * must not exist. 2472 */ 2473 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2474 nd->nl_flags &= ~NLC_NCPISLOCKED; 2475 cache_unlock(&nd->nl_nch); 2476 vn_unlock(vp); 2477 2478 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2479 if ((error = nlookup(linknd)) != 0) { 2480 vrele(vp); 2481 return (error); 2482 } 2483 if (linknd->nl_nch.ncp->nc_vp) { 2484 vrele(vp); 2485 return (EEXIST); 2486 } 2487 VFS_MODIFYING(vp->v_mount); 2488 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2489 if (error) { 2490 vrele(vp); 2491 return (error); 2492 } 2493 2494 /* 2495 * Finally run the new API VOP. 2496 */ 2497 error = can_hardlink(vp, td, td->td_ucred); 2498 if (error == 0) { 2499 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2500 vp, linknd->nl_cred); 2501 } 2502 vput(vp); 2503 return (error); 2504 } 2505 2506 /* 2507 * link_args(char *path, char *link) 2508 * 2509 * Make a hard file link. 2510 */ 2511 int 2512 sys_link(struct link_args *uap) 2513 { 2514 struct nlookupdata nd, linknd; 2515 int error; 2516 2517 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2518 if (error == 0) { 2519 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2520 if (error == 0) 2521 error = kern_link(&nd, &linknd); 2522 nlookup_done(&linknd); 2523 } 2524 nlookup_done(&nd); 2525 return (error); 2526 } 2527 2528 /* 2529 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2530 * 2531 * Make a hard file link. The path1 argument is relative to the directory 2532 * associated with fd1, and similarly the path2 argument is relative to 2533 * the directory associated with fd2. 2534 */ 2535 int 2536 sys_linkat(struct linkat_args *uap) 2537 { 2538 struct nlookupdata nd, linknd; 2539 struct file *fp1, *fp2; 2540 int error; 2541 2542 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2543 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2544 if (error == 0) { 2545 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2546 uap->path2, UIO_USERSPACE, 0); 2547 if (error == 0) 2548 error = kern_link(&nd, &linknd); 2549 nlookup_done_at(&linknd, fp2); 2550 } 2551 nlookup_done_at(&nd, fp1); 2552 return (error); 2553 } 2554 2555 int 2556 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2557 { 2558 struct vattr vattr; 2559 struct vnode *vp; 2560 struct vnode *dvp; 2561 int error; 2562 2563 bwillinode(1); 2564 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2565 if ((error = nlookup(nd)) != 0) 2566 return (error); 2567 if (nd->nl_nch.ncp->nc_vp) 2568 return (EEXIST); 2569 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2570 return (error); 2571 dvp = nd->nl_dvp; 2572 VATTR_NULL(&vattr); 2573 vattr.va_mode = mode; 2574 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2575 if (error == 0) 2576 vput(vp); 2577 return (error); 2578 } 2579 2580 /* 2581 * symlink(char *path, char *link) 2582 * 2583 * Make a symbolic link. 2584 */ 2585 int 2586 sys_symlink(struct symlink_args *uap) 2587 { 2588 struct thread *td = curthread; 2589 struct nlookupdata nd; 2590 char *path; 2591 int error; 2592 int mode; 2593 2594 path = objcache_get(namei_oc, M_WAITOK); 2595 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2596 if (error == 0) { 2597 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2598 if (error == 0) { 2599 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2600 error = kern_symlink(&nd, path, mode); 2601 } 2602 nlookup_done(&nd); 2603 } 2604 objcache_put(namei_oc, path); 2605 return (error); 2606 } 2607 2608 /* 2609 * symlinkat_args(char *path1, int fd, char *path2) 2610 * 2611 * Make a symbolic link. The path2 argument is relative to the directory 2612 * associated with fd. 2613 */ 2614 int 2615 sys_symlinkat(struct symlinkat_args *uap) 2616 { 2617 struct thread *td = curthread; 2618 struct nlookupdata nd; 2619 struct file *fp; 2620 char *path1; 2621 int error; 2622 int mode; 2623 2624 path1 = objcache_get(namei_oc, M_WAITOK); 2625 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2626 if (error == 0) { 2627 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2628 UIO_USERSPACE, 0); 2629 if (error == 0) { 2630 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2631 error = kern_symlink(&nd, path1, mode); 2632 } 2633 nlookup_done_at(&nd, fp); 2634 } 2635 objcache_put(namei_oc, path1); 2636 return (error); 2637 } 2638 2639 /* 2640 * undelete_args(char *path) 2641 * 2642 * Delete a whiteout from the filesystem. 2643 */ 2644 int 2645 sys_undelete(struct undelete_args *uap) 2646 { 2647 struct nlookupdata nd; 2648 int error; 2649 2650 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2651 bwillinode(1); 2652 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2653 if (error == 0) 2654 error = nlookup(&nd); 2655 if (error == 0) 2656 error = ncp_writechk(&nd.nl_nch); 2657 if (error == 0) { 2658 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2659 NAMEI_DELETE); 2660 } 2661 nlookup_done(&nd); 2662 return (error); 2663 } 2664 2665 int 2666 kern_unlink(struct nlookupdata *nd) 2667 { 2668 int error; 2669 2670 bwillinode(1); 2671 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2672 if ((error = nlookup(nd)) != 0) 2673 return (error); 2674 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2675 return (error); 2676 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2677 return (error); 2678 } 2679 2680 /* 2681 * unlink_args(char *path) 2682 * 2683 * Delete a name from the filesystem. 2684 */ 2685 int 2686 sys_unlink(struct unlink_args *uap) 2687 { 2688 struct nlookupdata nd; 2689 int error; 2690 2691 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2692 if (error == 0) 2693 error = kern_unlink(&nd); 2694 nlookup_done(&nd); 2695 return (error); 2696 } 2697 2698 2699 /* 2700 * unlinkat_args(int fd, char *path, int flags) 2701 * 2702 * Delete the file or directory entry pointed to by fd/path. 2703 */ 2704 int 2705 sys_unlinkat(struct unlinkat_args *uap) 2706 { 2707 struct nlookupdata nd; 2708 struct file *fp; 2709 int error; 2710 2711 if (uap->flags & ~AT_REMOVEDIR) 2712 return (EINVAL); 2713 2714 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2715 if (error == 0) { 2716 if (uap->flags & AT_REMOVEDIR) 2717 error = kern_rmdir(&nd); 2718 else 2719 error = kern_unlink(&nd); 2720 } 2721 nlookup_done_at(&nd, fp); 2722 return (error); 2723 } 2724 2725 int 2726 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2727 { 2728 struct thread *td = curthread; 2729 struct file *fp; 2730 struct vnode *vp; 2731 struct vattr vattr; 2732 off_t new_offset; 2733 int error; 2734 2735 fp = holdfp(td, fd, -1); 2736 if (fp == NULL) 2737 return (EBADF); 2738 if (fp->f_type != DTYPE_VNODE) { 2739 error = ESPIPE; 2740 goto done; 2741 } 2742 vp = (struct vnode *)fp->f_data; 2743 2744 switch (whence) { 2745 case L_INCR: 2746 spin_lock(&fp->f_spin); 2747 new_offset = fp->f_offset + offset; 2748 error = 0; 2749 break; 2750 case L_XTND: 2751 error = VOP_GETATTR_FP(vp, &vattr, fp); 2752 spin_lock(&fp->f_spin); 2753 new_offset = offset + vattr.va_size; 2754 break; 2755 case L_SET: 2756 new_offset = offset; 2757 error = 0; 2758 spin_lock(&fp->f_spin); 2759 break; 2760 default: 2761 new_offset = 0; 2762 error = EINVAL; 2763 spin_lock(&fp->f_spin); 2764 break; 2765 } 2766 2767 /* 2768 * Validate the seek position. Negative offsets are not allowed 2769 * for regular files or directories. 2770 * 2771 * Normally we would also not want to allow negative offsets for 2772 * character and block-special devices. However kvm addresses 2773 * on 64 bit architectures might appear to be negative and must 2774 * be allowed. 2775 */ 2776 if (error == 0) { 2777 if (new_offset < 0 && 2778 (vp->v_type == VREG || vp->v_type == VDIR)) { 2779 error = EINVAL; 2780 } else { 2781 fp->f_offset = new_offset; 2782 } 2783 } 2784 *res = fp->f_offset; 2785 spin_unlock(&fp->f_spin); 2786 done: 2787 dropfp(td, fd, fp); 2788 2789 return (error); 2790 } 2791 2792 /* 2793 * lseek_args(int fd, int pad, off_t offset, int whence) 2794 * 2795 * Reposition read/write file offset. 2796 */ 2797 int 2798 sys_lseek(struct lseek_args *uap) 2799 { 2800 int error; 2801 2802 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2803 &uap->sysmsg_offset); 2804 2805 return (error); 2806 } 2807 2808 /* 2809 * Check if current process can access given file. amode is a bitmask of *_OK 2810 * access bits. flags is a bitmask of AT_* flags. 2811 */ 2812 int 2813 kern_access(struct nlookupdata *nd, int amode, int flags) 2814 { 2815 struct vnode *vp; 2816 int error, mode; 2817 2818 if (flags & ~AT_EACCESS) 2819 return (EINVAL); 2820 nd->nl_flags |= NLC_SHAREDLOCK; 2821 if ((error = nlookup(nd)) != 0) 2822 return (error); 2823 retry: 2824 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2825 if (error) 2826 return (error); 2827 2828 /* Flags == 0 means only check for existence. */ 2829 if (amode) { 2830 mode = 0; 2831 if (amode & R_OK) 2832 mode |= VREAD; 2833 if (amode & W_OK) 2834 mode |= VWRITE; 2835 if (amode & X_OK) 2836 mode |= VEXEC; 2837 if ((mode & VWRITE) == 0 || 2838 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2839 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2840 2841 /* 2842 * If the file handle is stale we have to re-resolve the 2843 * entry with the ncp held exclusively. This is a hack 2844 * at the moment. 2845 */ 2846 if (error == ESTALE) { 2847 vput(vp); 2848 cache_unlock(&nd->nl_nch); 2849 cache_lock(&nd->nl_nch); 2850 cache_setunresolved(&nd->nl_nch); 2851 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2852 if (error == 0) { 2853 vp = NULL; 2854 goto retry; 2855 } 2856 return(error); 2857 } 2858 } 2859 vput(vp); 2860 return (error); 2861 } 2862 2863 /* 2864 * access_args(char *path, int flags) 2865 * 2866 * Check access permissions. 2867 */ 2868 int 2869 sys_access(struct access_args *uap) 2870 { 2871 struct nlookupdata nd; 2872 int error; 2873 2874 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2875 if (error == 0) 2876 error = kern_access(&nd, uap->flags, 0); 2877 nlookup_done(&nd); 2878 return (error); 2879 } 2880 2881 2882 /* 2883 * eaccess_args(char *path, int flags) 2884 * 2885 * Check access permissions. 2886 */ 2887 int 2888 sys_eaccess(struct eaccess_args *uap) 2889 { 2890 struct nlookupdata nd; 2891 int error; 2892 2893 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2894 if (error == 0) 2895 error = kern_access(&nd, uap->flags, AT_EACCESS); 2896 nlookup_done(&nd); 2897 return (error); 2898 } 2899 2900 2901 /* 2902 * faccessat_args(int fd, char *path, int amode, int flags) 2903 * 2904 * Check access permissions. 2905 */ 2906 int 2907 sys_faccessat(struct faccessat_args *uap) 2908 { 2909 struct nlookupdata nd; 2910 struct file *fp; 2911 int error; 2912 2913 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2914 NLC_FOLLOW); 2915 if (error == 0) 2916 error = kern_access(&nd, uap->amode, uap->flags); 2917 nlookup_done_at(&nd, fp); 2918 return (error); 2919 } 2920 2921 int 2922 kern_stat(struct nlookupdata *nd, struct stat *st) 2923 { 2924 int error; 2925 struct vnode *vp; 2926 2927 nd->nl_flags |= NLC_SHAREDLOCK; 2928 if ((error = nlookup(nd)) != 0) 2929 return (error); 2930 again: 2931 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2932 return (ENOENT); 2933 2934 if ((error = vget(vp, LK_SHARED)) != 0) 2935 return (error); 2936 error = vn_stat(vp, st, nd->nl_cred); 2937 2938 /* 2939 * If the file handle is stale we have to re-resolve the 2940 * entry with the ncp held exclusively. This is a hack 2941 * at the moment. 2942 */ 2943 if (error == ESTALE) { 2944 vput(vp); 2945 cache_unlock(&nd->nl_nch); 2946 cache_lock(&nd->nl_nch); 2947 cache_setunresolved(&nd->nl_nch); 2948 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2949 if (error == 0) 2950 goto again; 2951 } else { 2952 vput(vp); 2953 } 2954 return (error); 2955 } 2956 2957 /* 2958 * stat_args(char *path, struct stat *ub) 2959 * 2960 * Get file status; this version follows links. 2961 */ 2962 int 2963 sys_stat(struct stat_args *uap) 2964 { 2965 struct nlookupdata nd; 2966 struct stat st; 2967 int error; 2968 2969 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2970 if (error == 0) { 2971 error = kern_stat(&nd, &st); 2972 if (error == 0) 2973 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2974 } 2975 nlookup_done(&nd); 2976 return (error); 2977 } 2978 2979 /* 2980 * lstat_args(char *path, struct stat *ub) 2981 * 2982 * Get file status; this version does not follow links. 2983 */ 2984 int 2985 sys_lstat(struct lstat_args *uap) 2986 { 2987 struct nlookupdata nd; 2988 struct stat st; 2989 int error; 2990 2991 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2992 if (error == 0) { 2993 error = kern_stat(&nd, &st); 2994 if (error == 0) 2995 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2996 } 2997 nlookup_done(&nd); 2998 return (error); 2999 } 3000 3001 /* 3002 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3003 * 3004 * Get status of file pointed to by fd/path. 3005 */ 3006 int 3007 sys_fstatat(struct fstatat_args *uap) 3008 { 3009 struct nlookupdata nd; 3010 struct stat st; 3011 int error; 3012 int flags; 3013 struct file *fp; 3014 3015 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3016 return (EINVAL); 3017 3018 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3019 3020 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3021 UIO_USERSPACE, flags); 3022 if (error == 0) { 3023 error = kern_stat(&nd, &st); 3024 if (error == 0) 3025 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3026 } 3027 nlookup_done_at(&nd, fp); 3028 return (error); 3029 } 3030 3031 static int 3032 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3033 { 3034 struct nlookupdata nd; 3035 struct vnode *vp; 3036 int error; 3037 3038 vp = NULL; 3039 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3040 if (error == 0) 3041 error = nlookup(&nd); 3042 if (error == 0) 3043 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3044 nlookup_done(&nd); 3045 if (error == 0) { 3046 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3047 vput(vp); 3048 } 3049 return (error); 3050 } 3051 3052 /* 3053 * pathconf_Args(char *path, int name) 3054 * 3055 * Get configurable pathname variables. 3056 */ 3057 int 3058 sys_pathconf(struct pathconf_args *uap) 3059 { 3060 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3061 &uap->sysmsg_reg)); 3062 } 3063 3064 /* 3065 * lpathconf_Args(char *path, int name) 3066 * 3067 * Get configurable pathname variables, but don't follow symlinks. 3068 */ 3069 int 3070 sys_lpathconf(struct lpathconf_args *uap) 3071 { 3072 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3073 } 3074 3075 /* 3076 * XXX: daver 3077 * kern_readlink isn't properly split yet. There is a copyin burried 3078 * in VOP_READLINK(). 3079 */ 3080 int 3081 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3082 { 3083 struct thread *td = curthread; 3084 struct vnode *vp; 3085 struct iovec aiov; 3086 struct uio auio; 3087 int error; 3088 3089 nd->nl_flags |= NLC_SHAREDLOCK; 3090 if ((error = nlookup(nd)) != 0) 3091 return (error); 3092 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3093 if (error) 3094 return (error); 3095 if (vp->v_type != VLNK) { 3096 error = EINVAL; 3097 } else { 3098 aiov.iov_base = buf; 3099 aiov.iov_len = count; 3100 auio.uio_iov = &aiov; 3101 auio.uio_iovcnt = 1; 3102 auio.uio_offset = 0; 3103 auio.uio_rw = UIO_READ; 3104 auio.uio_segflg = UIO_USERSPACE; 3105 auio.uio_td = td; 3106 auio.uio_resid = count; 3107 error = VOP_READLINK(vp, &auio, td->td_ucred); 3108 } 3109 vput(vp); 3110 *res = count - auio.uio_resid; 3111 return (error); 3112 } 3113 3114 /* 3115 * readlink_args(char *path, char *buf, int count) 3116 * 3117 * Return target name of a symbolic link. 3118 */ 3119 int 3120 sys_readlink(struct readlink_args *uap) 3121 { 3122 struct nlookupdata nd; 3123 int error; 3124 3125 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3126 if (error == 0) { 3127 error = kern_readlink(&nd, uap->buf, uap->count, 3128 &uap->sysmsg_result); 3129 } 3130 nlookup_done(&nd); 3131 return (error); 3132 } 3133 3134 /* 3135 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3136 * 3137 * Return target name of a symbolic link. The path is relative to the 3138 * directory associated with fd. 3139 */ 3140 int 3141 sys_readlinkat(struct readlinkat_args *uap) 3142 { 3143 struct nlookupdata nd; 3144 struct file *fp; 3145 int error; 3146 3147 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3148 if (error == 0) { 3149 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3150 &uap->sysmsg_result); 3151 } 3152 nlookup_done_at(&nd, fp); 3153 return (error); 3154 } 3155 3156 static int 3157 setfflags(struct vnode *vp, u_long flags) 3158 { 3159 struct thread *td = curthread; 3160 int error; 3161 struct vattr vattr; 3162 3163 /* 3164 * Prevent non-root users from setting flags on devices. When 3165 * a device is reused, users can retain ownership of the device 3166 * if they are allowed to set flags and programs assume that 3167 * chown can't fail when done as root. 3168 */ 3169 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3170 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3171 return (error); 3172 3173 /* 3174 * note: vget is required for any operation that might mod the vnode 3175 * so VINACTIVE is properly cleared. 3176 */ 3177 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3178 VATTR_NULL(&vattr); 3179 vattr.va_flags = flags; 3180 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3181 vput(vp); 3182 } 3183 return (error); 3184 } 3185 3186 /* 3187 * chflags(const char *path, u_long flags) 3188 * 3189 * Change flags of a file given a path name. 3190 */ 3191 int 3192 sys_chflags(struct chflags_args *uap) 3193 { 3194 struct nlookupdata nd; 3195 struct vnode *vp; 3196 int error; 3197 3198 vp = NULL; 3199 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3200 if (error == 0) 3201 error = nlookup(&nd); 3202 if (error == 0) 3203 error = ncp_writechk(&nd.nl_nch); 3204 if (error == 0) 3205 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3206 nlookup_done(&nd); 3207 if (error == 0) { 3208 error = setfflags(vp, uap->flags); 3209 vrele(vp); 3210 } 3211 return (error); 3212 } 3213 3214 /* 3215 * lchflags(const char *path, u_long flags) 3216 * 3217 * Change flags of a file given a path name, but don't follow symlinks. 3218 */ 3219 int 3220 sys_lchflags(struct lchflags_args *uap) 3221 { 3222 struct nlookupdata nd; 3223 struct vnode *vp; 3224 int error; 3225 3226 vp = NULL; 3227 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3228 if (error == 0) 3229 error = nlookup(&nd); 3230 if (error == 0) 3231 error = ncp_writechk(&nd.nl_nch); 3232 if (error == 0) 3233 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3234 nlookup_done(&nd); 3235 if (error == 0) { 3236 error = setfflags(vp, uap->flags); 3237 vrele(vp); 3238 } 3239 return (error); 3240 } 3241 3242 /* 3243 * fchflags_args(int fd, u_flags flags) 3244 * 3245 * Change flags of a file given a file descriptor. 3246 */ 3247 int 3248 sys_fchflags(struct fchflags_args *uap) 3249 { 3250 struct thread *td = curthread; 3251 struct file *fp; 3252 int error; 3253 3254 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3255 return (error); 3256 if (fp->f_nchandle.ncp) 3257 error = ncp_writechk(&fp->f_nchandle); 3258 if (error == 0) 3259 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3260 fdrop(fp); 3261 return (error); 3262 } 3263 3264 /* 3265 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3266 * change flags given a pathname relative to a filedescriptor 3267 */ 3268 int sys_chflagsat(struct chflagsat_args *uap) 3269 { 3270 struct nlookupdata nd; 3271 struct vnode *vp; 3272 struct file *fp; 3273 int error; 3274 int lookupflags; 3275 3276 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3277 return (EINVAL); 3278 3279 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3280 3281 vp = NULL; 3282 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3283 if (error == 0) 3284 error = nlookup(&nd); 3285 if (error == 0) 3286 error = ncp_writechk(&nd.nl_nch); 3287 if (error == 0) 3288 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3289 nlookup_done_at(&nd, fp); 3290 if (error == 0) { 3291 error = setfflags(vp, uap->flags); 3292 vrele(vp); 3293 } 3294 return (error); 3295 } 3296 3297 3298 static int 3299 setfmode(struct vnode *vp, int mode) 3300 { 3301 struct thread *td = curthread; 3302 int error; 3303 struct vattr vattr; 3304 3305 /* 3306 * note: vget is required for any operation that might mod the vnode 3307 * so VINACTIVE is properly cleared. 3308 */ 3309 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3310 VATTR_NULL(&vattr); 3311 vattr.va_mode = mode & ALLPERMS; 3312 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3313 cache_inval_wxok(vp); 3314 vput(vp); 3315 } 3316 return error; 3317 } 3318 3319 int 3320 kern_chmod(struct nlookupdata *nd, int mode) 3321 { 3322 struct vnode *vp; 3323 int error; 3324 3325 if ((error = nlookup(nd)) != 0) 3326 return (error); 3327 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3328 return (error); 3329 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3330 error = setfmode(vp, mode); 3331 vrele(vp); 3332 return (error); 3333 } 3334 3335 /* 3336 * chmod_args(char *path, int mode) 3337 * 3338 * Change mode of a file given path name. 3339 */ 3340 int 3341 sys_chmod(struct chmod_args *uap) 3342 { 3343 struct nlookupdata nd; 3344 int error; 3345 3346 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3347 if (error == 0) 3348 error = kern_chmod(&nd, uap->mode); 3349 nlookup_done(&nd); 3350 return (error); 3351 } 3352 3353 /* 3354 * lchmod_args(char *path, int mode) 3355 * 3356 * Change mode of a file given path name (don't follow links.) 3357 */ 3358 int 3359 sys_lchmod(struct lchmod_args *uap) 3360 { 3361 struct nlookupdata nd; 3362 int error; 3363 3364 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3365 if (error == 0) 3366 error = kern_chmod(&nd, uap->mode); 3367 nlookup_done(&nd); 3368 return (error); 3369 } 3370 3371 /* 3372 * fchmod_args(int fd, int mode) 3373 * 3374 * Change mode of a file given a file descriptor. 3375 */ 3376 int 3377 sys_fchmod(struct fchmod_args *uap) 3378 { 3379 struct thread *td = curthread; 3380 struct file *fp; 3381 int error; 3382 3383 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3384 return (error); 3385 if (fp->f_nchandle.ncp) 3386 error = ncp_writechk(&fp->f_nchandle); 3387 if (error == 0) 3388 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3389 fdrop(fp); 3390 return (error); 3391 } 3392 3393 /* 3394 * fchmodat_args(char *path, int mode) 3395 * 3396 * Change mode of a file pointed to by fd/path. 3397 */ 3398 int 3399 sys_fchmodat(struct fchmodat_args *uap) 3400 { 3401 struct nlookupdata nd; 3402 struct file *fp; 3403 int error; 3404 int flags; 3405 3406 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3407 return (EINVAL); 3408 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3409 3410 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3411 UIO_USERSPACE, flags); 3412 if (error == 0) 3413 error = kern_chmod(&nd, uap->mode); 3414 nlookup_done_at(&nd, fp); 3415 return (error); 3416 } 3417 3418 static int 3419 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3420 { 3421 struct thread *td = curthread; 3422 int error; 3423 struct vattr vattr; 3424 uid_t o_uid; 3425 gid_t o_gid; 3426 uint64_t size; 3427 3428 /* 3429 * note: vget is required for any operation that might mod the vnode 3430 * so VINACTIVE is properly cleared. 3431 */ 3432 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3433 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3434 return error; 3435 o_uid = vattr.va_uid; 3436 o_gid = vattr.va_gid; 3437 size = vattr.va_size; 3438 3439 VATTR_NULL(&vattr); 3440 vattr.va_uid = uid; 3441 vattr.va_gid = gid; 3442 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3443 vput(vp); 3444 } 3445 3446 if (error == 0) { 3447 if (uid == -1) 3448 uid = o_uid; 3449 if (gid == -1) 3450 gid = o_gid; 3451 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3452 VFS_ACCOUNT(mp, uid, gid, size); 3453 } 3454 3455 return error; 3456 } 3457 3458 int 3459 kern_chown(struct nlookupdata *nd, int uid, int gid) 3460 { 3461 struct vnode *vp; 3462 int error; 3463 3464 if ((error = nlookup(nd)) != 0) 3465 return (error); 3466 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3467 return (error); 3468 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3469 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3470 vrele(vp); 3471 return (error); 3472 } 3473 3474 /* 3475 * chown(char *path, int uid, int gid) 3476 * 3477 * Set ownership given a path name. 3478 */ 3479 int 3480 sys_chown(struct chown_args *uap) 3481 { 3482 struct nlookupdata nd; 3483 int error; 3484 3485 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3486 if (error == 0) 3487 error = kern_chown(&nd, uap->uid, uap->gid); 3488 nlookup_done(&nd); 3489 return (error); 3490 } 3491 3492 /* 3493 * lchown_args(char *path, int uid, int gid) 3494 * 3495 * Set ownership given a path name, do not cross symlinks. 3496 */ 3497 int 3498 sys_lchown(struct lchown_args *uap) 3499 { 3500 struct nlookupdata nd; 3501 int error; 3502 3503 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3504 if (error == 0) 3505 error = kern_chown(&nd, uap->uid, uap->gid); 3506 nlookup_done(&nd); 3507 return (error); 3508 } 3509 3510 /* 3511 * fchown_args(int fd, int uid, int gid) 3512 * 3513 * Set ownership given a file descriptor. 3514 */ 3515 int 3516 sys_fchown(struct fchown_args *uap) 3517 { 3518 struct thread *td = curthread; 3519 struct proc *p = td->td_proc; 3520 struct file *fp; 3521 int error; 3522 3523 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3524 return (error); 3525 if (fp->f_nchandle.ncp) 3526 error = ncp_writechk(&fp->f_nchandle); 3527 if (error == 0) 3528 error = setfown(p->p_fd->fd_ncdir.mount, 3529 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3530 fdrop(fp); 3531 return (error); 3532 } 3533 3534 /* 3535 * fchownat(int fd, char *path, int uid, int gid, int flags) 3536 * 3537 * Set ownership of file pointed to by fd/path. 3538 */ 3539 int 3540 sys_fchownat(struct fchownat_args *uap) 3541 { 3542 struct nlookupdata nd; 3543 struct file *fp; 3544 int error; 3545 int flags; 3546 3547 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3548 return (EINVAL); 3549 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3550 3551 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3552 UIO_USERSPACE, flags); 3553 if (error == 0) 3554 error = kern_chown(&nd, uap->uid, uap->gid); 3555 nlookup_done_at(&nd, fp); 3556 return (error); 3557 } 3558 3559 3560 static int 3561 getutimes(struct timeval *tvp, struct timespec *tsp) 3562 { 3563 struct timeval tv[2]; 3564 int error; 3565 3566 if (tvp == NULL) { 3567 microtime(&tv[0]); 3568 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3569 tsp[1] = tsp[0]; 3570 } else { 3571 if ((error = itimerfix(tvp)) != 0) 3572 return (error); 3573 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3574 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3575 } 3576 return 0; 3577 } 3578 3579 static int 3580 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3581 { 3582 struct timespec tsnow; 3583 int error; 3584 3585 *nullflag = 0; 3586 nanotime(&tsnow); 3587 if (ts == NULL) { 3588 newts[0] = tsnow; 3589 newts[1] = tsnow; 3590 *nullflag = 1; 3591 return (0); 3592 } 3593 3594 newts[0] = ts[0]; 3595 newts[1] = ts[1]; 3596 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3597 return (0); 3598 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3599 *nullflag = 1; 3600 3601 if (newts[0].tv_nsec == UTIME_OMIT) 3602 newts[0].tv_sec = VNOVAL; 3603 else if (newts[0].tv_nsec == UTIME_NOW) 3604 newts[0] = tsnow; 3605 else if ((error = itimespecfix(&newts[0])) != 0) 3606 return (error); 3607 3608 if (newts[1].tv_nsec == UTIME_OMIT) 3609 newts[1].tv_sec = VNOVAL; 3610 else if (newts[1].tv_nsec == UTIME_NOW) 3611 newts[1] = tsnow; 3612 else if ((error = itimespecfix(&newts[1])) != 0) 3613 return (error); 3614 3615 return (0); 3616 } 3617 3618 static int 3619 setutimes(struct vnode *vp, struct vattr *vattr, 3620 const struct timespec *ts, int nullflag) 3621 { 3622 struct thread *td = curthread; 3623 int error; 3624 3625 VATTR_NULL(vattr); 3626 vattr->va_atime = ts[0]; 3627 vattr->va_mtime = ts[1]; 3628 if (nullflag) 3629 vattr->va_vaflags |= VA_UTIMES_NULL; 3630 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3631 3632 return error; 3633 } 3634 3635 int 3636 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3637 { 3638 struct timespec ts[2]; 3639 int error; 3640 3641 if (tptr) { 3642 if ((error = getutimes(tptr, ts)) != 0) 3643 return (error); 3644 } 3645 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3646 return (error); 3647 } 3648 3649 /* 3650 * utimes_args(char *path, struct timeval *tptr) 3651 * 3652 * Set the access and modification times of a file. 3653 */ 3654 int 3655 sys_utimes(struct utimes_args *uap) 3656 { 3657 struct timeval tv[2]; 3658 struct nlookupdata nd; 3659 int error; 3660 3661 if (uap->tptr) { 3662 error = copyin(uap->tptr, tv, sizeof(tv)); 3663 if (error) 3664 return (error); 3665 } 3666 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3667 if (error == 0) 3668 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3669 nlookup_done(&nd); 3670 return (error); 3671 } 3672 3673 /* 3674 * lutimes_args(char *path, struct timeval *tptr) 3675 * 3676 * Set the access and modification times of a file. 3677 */ 3678 int 3679 sys_lutimes(struct lutimes_args *uap) 3680 { 3681 struct timeval tv[2]; 3682 struct nlookupdata nd; 3683 int error; 3684 3685 if (uap->tptr) { 3686 error = copyin(uap->tptr, tv, sizeof(tv)); 3687 if (error) 3688 return (error); 3689 } 3690 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3691 if (error == 0) 3692 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3693 nlookup_done(&nd); 3694 return (error); 3695 } 3696 3697 /* 3698 * Set utimes on a file descriptor. The creds used to open the 3699 * file are used to determine whether the operation is allowed 3700 * or not. 3701 */ 3702 int 3703 kern_futimens(int fd, struct timespec *ts) 3704 { 3705 struct thread *td = curthread; 3706 struct timespec newts[2]; 3707 struct file *fp; 3708 struct vnode *vp; 3709 struct vattr vattr; 3710 int nullflag; 3711 int error; 3712 3713 error = getutimens(ts, newts, &nullflag); 3714 if (error) 3715 return (error); 3716 if ((error = holdvnode(td, fd, &fp)) != 0) 3717 return (error); 3718 if (fp->f_nchandle.ncp) 3719 error = ncp_writechk(&fp->f_nchandle); 3720 if (error == 0) { 3721 vp = fp->f_data; 3722 error = vget(vp, LK_EXCLUSIVE); 3723 if (error == 0) { 3724 error = VOP_GETATTR_FP(vp, &vattr, fp); 3725 if (error == 0) { 3726 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3727 fp->f_cred); 3728 } 3729 if (error == 0) { 3730 error = setutimes(vp, &vattr, newts, nullflag); 3731 } 3732 vput(vp); 3733 } 3734 } 3735 fdrop(fp); 3736 return (error); 3737 } 3738 3739 /* 3740 * futimens_args(int fd, struct timespec *ts) 3741 * 3742 * Set the access and modification times of a file. 3743 */ 3744 int 3745 sys_futimens(struct futimens_args *uap) 3746 { 3747 struct timespec ts[2]; 3748 int error; 3749 3750 if (uap->ts) { 3751 error = copyin(uap->ts, ts, sizeof(ts)); 3752 if (error) 3753 return (error); 3754 } 3755 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3756 return (error); 3757 } 3758 3759 int 3760 kern_futimes(int fd, struct timeval *tptr) 3761 { 3762 struct timespec ts[2]; 3763 int error; 3764 3765 if (tptr) { 3766 if ((error = getutimes(tptr, ts)) != 0) 3767 return (error); 3768 } 3769 error = kern_futimens(fd, tptr ? ts : NULL); 3770 return (error); 3771 } 3772 3773 /* 3774 * futimes_args(int fd, struct timeval *tptr) 3775 * 3776 * Set the access and modification times of a file. 3777 */ 3778 int 3779 sys_futimes(struct futimes_args *uap) 3780 { 3781 struct timeval tv[2]; 3782 int error; 3783 3784 if (uap->tptr) { 3785 error = copyin(uap->tptr, tv, sizeof(tv)); 3786 if (error) 3787 return (error); 3788 } 3789 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3790 return (error); 3791 } 3792 3793 int 3794 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3795 { 3796 struct timespec newts[2]; 3797 struct vnode *vp; 3798 struct vattr vattr; 3799 int nullflag; 3800 int error; 3801 3802 if (flags & ~AT_SYMLINK_NOFOLLOW) 3803 return (EINVAL); 3804 3805 error = getutimens(ts, newts, &nullflag); 3806 if (error) 3807 return (error); 3808 3809 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3810 if ((error = nlookup(nd)) != 0) 3811 return (error); 3812 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3813 return (error); 3814 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3815 return (error); 3816 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3817 error = vget(vp, LK_EXCLUSIVE); 3818 if (error == 0) { 3819 error = setutimes(vp, &vattr, newts, nullflag); 3820 vput(vp); 3821 } 3822 } 3823 vrele(vp); 3824 return (error); 3825 } 3826 3827 /* 3828 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3829 * 3830 * Set file access and modification times of a file. 3831 */ 3832 int 3833 sys_utimensat(struct utimensat_args *uap) 3834 { 3835 struct timespec ts[2]; 3836 struct nlookupdata nd; 3837 struct file *fp; 3838 int error; 3839 int flags; 3840 3841 if (uap->ts) { 3842 error = copyin(uap->ts, ts, sizeof(ts)); 3843 if (error) 3844 return (error); 3845 } 3846 3847 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3848 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3849 UIO_USERSPACE, flags); 3850 if (error == 0) 3851 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3852 nlookup_done_at(&nd, fp); 3853 return (error); 3854 } 3855 3856 int 3857 kern_truncate(struct nlookupdata *nd, off_t length) 3858 { 3859 struct vnode *vp; 3860 struct vattr vattr; 3861 int error; 3862 uid_t uid = 0; 3863 gid_t gid = 0; 3864 uint64_t old_size = 0; 3865 3866 if (length < 0) 3867 return(EINVAL); 3868 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3869 if ((error = nlookup(nd)) != 0) 3870 return (error); 3871 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3872 return (error); 3873 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3874 return (error); 3875 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3876 if (error) { 3877 vrele(vp); 3878 return (error); 3879 } 3880 if (vp->v_type == VDIR) { 3881 error = EISDIR; 3882 goto done; 3883 } 3884 if (vfs_quota_enabled) { 3885 error = VOP_GETATTR(vp, &vattr); 3886 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3887 uid = vattr.va_uid; 3888 gid = vattr.va_gid; 3889 old_size = vattr.va_size; 3890 } 3891 3892 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3893 VATTR_NULL(&vattr); 3894 vattr.va_size = length; 3895 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3896 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3897 } 3898 done: 3899 vput(vp); 3900 return (error); 3901 } 3902 3903 /* 3904 * truncate(char *path, int pad, off_t length) 3905 * 3906 * Truncate a file given its path name. 3907 */ 3908 int 3909 sys_truncate(struct truncate_args *uap) 3910 { 3911 struct nlookupdata nd; 3912 int error; 3913 3914 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3915 if (error == 0) 3916 error = kern_truncate(&nd, uap->length); 3917 nlookup_done(&nd); 3918 return error; 3919 } 3920 3921 int 3922 kern_ftruncate(int fd, off_t length) 3923 { 3924 struct thread *td = curthread; 3925 struct vattr vattr; 3926 struct vnode *vp; 3927 struct file *fp; 3928 int error; 3929 uid_t uid = 0; 3930 gid_t gid = 0; 3931 uint64_t old_size = 0; 3932 struct mount *mp; 3933 3934 if (length < 0) 3935 return(EINVAL); 3936 if ((error = holdvnode(td, fd, &fp)) != 0) 3937 return (error); 3938 if (fp->f_nchandle.ncp) { 3939 error = ncp_writechk(&fp->f_nchandle); 3940 if (error) 3941 goto done; 3942 } 3943 if ((fp->f_flag & FWRITE) == 0) { 3944 error = EINVAL; 3945 goto done; 3946 } 3947 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3948 error = EINVAL; 3949 goto done; 3950 } 3951 vp = (struct vnode *)fp->f_data; 3952 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3953 if (vp->v_type == VDIR) { 3954 error = EISDIR; 3955 vn_unlock(vp); 3956 goto done; 3957 } 3958 3959 if (vfs_quota_enabled) { 3960 error = VOP_GETATTR_FP(vp, &vattr, fp); 3961 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3962 uid = vattr.va_uid; 3963 gid = vattr.va_gid; 3964 old_size = vattr.va_size; 3965 } 3966 3967 if ((error = vn_writechk(vp, NULL)) == 0) { 3968 VATTR_NULL(&vattr); 3969 vattr.va_size = length; 3970 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 3971 mp = vq_vptomp(vp); 3972 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3973 } 3974 vn_unlock(vp); 3975 done: 3976 fdrop(fp); 3977 return (error); 3978 } 3979 3980 /* 3981 * ftruncate_args(int fd, int pad, off_t length) 3982 * 3983 * Truncate a file given a file descriptor. 3984 */ 3985 int 3986 sys_ftruncate(struct ftruncate_args *uap) 3987 { 3988 int error; 3989 3990 error = kern_ftruncate(uap->fd, uap->length); 3991 3992 return (error); 3993 } 3994 3995 /* 3996 * fsync(int fd) 3997 * 3998 * Sync an open file. 3999 */ 4000 int 4001 sys_fsync(struct fsync_args *uap) 4002 { 4003 struct thread *td = curthread; 4004 struct vnode *vp; 4005 struct file *fp; 4006 vm_object_t obj; 4007 int error; 4008 4009 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4010 return (error); 4011 vp = (struct vnode *)fp->f_data; 4012 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4013 if ((obj = vp->v_object) != NULL) { 4014 if (vp->v_mount == NULL || 4015 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4016 vm_object_page_clean(obj, 0, 0, 0); 4017 } 4018 } 4019 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4020 if (error == 0 && vp->v_mount) 4021 error = buf_fsync(vp); 4022 vn_unlock(vp); 4023 fdrop(fp); 4024 4025 return (error); 4026 } 4027 4028 int 4029 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4030 { 4031 struct nchandle fnchd; 4032 struct nchandle tnchd; 4033 struct namecache *ncp; 4034 struct vnode *fdvp; 4035 struct vnode *tdvp; 4036 struct mount *mp; 4037 int error; 4038 u_int fncp_gen; 4039 u_int tncp_gen; 4040 4041 bwillinode(1); 4042 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4043 if ((error = nlookup(fromnd)) != 0) 4044 return (error); 4045 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4046 return (ENOENT); 4047 fnchd.mount = fromnd->nl_nch.mount; 4048 cache_hold(&fnchd); 4049 4050 /* 4051 * unlock the source nch so we can lookup the target nch without 4052 * deadlocking. The target may or may not exist so we do not check 4053 * for a target vp like kern_mkdir() and other creation functions do. 4054 * 4055 * The source and target directories are ref'd and rechecked after 4056 * everything is relocked to determine if the source or target file 4057 * has been renamed. 4058 */ 4059 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4060 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4061 4062 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4063 4064 cache_unlock(&fromnd->nl_nch); 4065 4066 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4067 if ((error = nlookup(tond)) != 0) { 4068 cache_drop(&fnchd); 4069 return (error); 4070 } 4071 tncp_gen = tond->nl_nch.ncp->nc_generation; 4072 4073 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4074 cache_drop(&fnchd); 4075 return (ENOENT); 4076 } 4077 tnchd.mount = tond->nl_nch.mount; 4078 cache_hold(&tnchd); 4079 4080 /* 4081 * If the source and target are the same there is nothing to do 4082 */ 4083 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4084 cache_drop(&fnchd); 4085 cache_drop(&tnchd); 4086 return (0); 4087 } 4088 4089 /* 4090 * Mount points cannot be renamed or overwritten 4091 */ 4092 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4093 NCF_ISMOUNTPT 4094 ) { 4095 cache_drop(&fnchd); 4096 cache_drop(&tnchd); 4097 return (EINVAL); 4098 } 4099 4100 /* 4101 * Relock the source ncp. cache_relock() will deal with any 4102 * deadlocks against the already-locked tond and will also 4103 * make sure both are resolved. 4104 * 4105 * NOTE AFTER RELOCKING: The source or target ncp may have become 4106 * invalid while they were unlocked, nc_vp and nc_mount could 4107 * be NULL. 4108 */ 4109 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4110 &tond->nl_nch, tond->nl_cred); 4111 fromnd->nl_flags |= NLC_NCPISLOCKED; 4112 4113 /* 4114 * If the namecache generation changed for either fromnd or tond, 4115 * we must retry. 4116 */ 4117 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4118 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4119 kprintf("kern_rename: retry due to gen on: " 4120 "\"%s\" -> \"%s\"\n", 4121 fromnd->nl_nch.ncp->nc_name, 4122 tond->nl_nch.ncp->nc_name); 4123 cache_drop(&fnchd); 4124 cache_drop(&tnchd); 4125 return (EAGAIN); 4126 } 4127 4128 /* 4129 * If either fromnd or tond are marked destroyed a ripout occured 4130 * out from under us and we must retry. 4131 */ 4132 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4133 fromnd->nl_nch.ncp->nc_vp == NULL || 4134 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4135 kprintf("kern_rename: retry due to ripout on: " 4136 "\"%s\" -> \"%s\"\n", 4137 fromnd->nl_nch.ncp->nc_name, 4138 tond->nl_nch.ncp->nc_name); 4139 cache_drop(&fnchd); 4140 cache_drop(&tnchd); 4141 return (EAGAIN); 4142 } 4143 4144 /* 4145 * Make sure the parent directories linkages are the same. 4146 * XXX shouldn't be needed any more w/ generation check above. 4147 */ 4148 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4149 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4150 cache_drop(&fnchd); 4151 cache_drop(&tnchd); 4152 return (ENOENT); 4153 } 4154 4155 /* 4156 * Both the source and target must be within the same filesystem and 4157 * in the same filesystem as their parent directories within the 4158 * namecache topology. 4159 * 4160 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4161 */ 4162 mp = fnchd.mount; 4163 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4164 mp != tond->nl_nch.mount) { 4165 cache_drop(&fnchd); 4166 cache_drop(&tnchd); 4167 return (EXDEV); 4168 } 4169 4170 /* 4171 * Make sure the mount point is writable 4172 */ 4173 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4174 cache_drop(&fnchd); 4175 cache_drop(&tnchd); 4176 return (error); 4177 } 4178 4179 /* 4180 * If the target exists and either the source or target is a directory, 4181 * then both must be directories. 4182 * 4183 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4184 * have become NULL. 4185 */ 4186 if (tond->nl_nch.ncp->nc_vp) { 4187 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4188 error = ENOENT; 4189 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4190 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4191 error = ENOTDIR; 4192 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4193 error = EISDIR; 4194 } 4195 } 4196 4197 /* 4198 * You cannot rename a source into itself or a subdirectory of itself. 4199 * We check this by travsersing the target directory upwards looking 4200 * for a match against the source. 4201 * 4202 * XXX MPSAFE 4203 */ 4204 if (error == 0) { 4205 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4206 if (fromnd->nl_nch.ncp == ncp) { 4207 error = EINVAL; 4208 break; 4209 } 4210 } 4211 } 4212 4213 cache_drop(&fnchd); 4214 cache_drop(&tnchd); 4215 4216 /* 4217 * Even though the namespaces are different, they may still represent 4218 * hardlinks to the same file. The filesystem might have a hard time 4219 * with this so we issue a NREMOVE of the source instead of a NRENAME 4220 * when we detect the situation. 4221 */ 4222 if (error == 0) { 4223 fdvp = fromnd->nl_dvp; 4224 tdvp = tond->nl_dvp; 4225 if (fdvp == NULL || tdvp == NULL) { 4226 error = EPERM; 4227 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4228 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4229 fromnd->nl_cred); 4230 } else { 4231 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4232 fdvp, tdvp, tond->nl_cred); 4233 } 4234 } 4235 return (error); 4236 } 4237 4238 /* 4239 * rename_args(char *from, char *to) 4240 * 4241 * Rename files. Source and destination must either both be directories, 4242 * or both not be directories. If target is a directory, it must be empty. 4243 */ 4244 int 4245 sys_rename(struct rename_args *uap) 4246 { 4247 struct nlookupdata fromnd, tond; 4248 int error; 4249 4250 do { 4251 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4252 if (error == 0) { 4253 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4254 if (error == 0) 4255 error = kern_rename(&fromnd, &tond); 4256 nlookup_done(&tond); 4257 } 4258 nlookup_done(&fromnd); 4259 } while (error == EAGAIN); 4260 return (error); 4261 } 4262 4263 /* 4264 * renameat_args(int oldfd, char *old, int newfd, char *new) 4265 * 4266 * Rename files using paths relative to the directories associated with 4267 * oldfd and newfd. Source and destination must either both be directories, 4268 * or both not be directories. If target is a directory, it must be empty. 4269 */ 4270 int 4271 sys_renameat(struct renameat_args *uap) 4272 { 4273 struct nlookupdata oldnd, newnd; 4274 struct file *oldfp, *newfp; 4275 int error; 4276 4277 do { 4278 error = nlookup_init_at(&oldnd, &oldfp, 4279 uap->oldfd, uap->old, 4280 UIO_USERSPACE, 0); 4281 if (error == 0) { 4282 error = nlookup_init_at(&newnd, &newfp, 4283 uap->newfd, uap->new, 4284 UIO_USERSPACE, 0); 4285 if (error == 0) 4286 error = kern_rename(&oldnd, &newnd); 4287 nlookup_done_at(&newnd, newfp); 4288 } 4289 nlookup_done_at(&oldnd, oldfp); 4290 } while (error == EAGAIN); 4291 return (error); 4292 } 4293 4294 int 4295 kern_mkdir(struct nlookupdata *nd, int mode) 4296 { 4297 struct thread *td = curthread; 4298 struct proc *p = td->td_proc; 4299 struct vnode *vp; 4300 struct vattr vattr; 4301 int error; 4302 4303 bwillinode(1); 4304 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4305 if ((error = nlookup(nd)) != 0) 4306 return (error); 4307 4308 if (nd->nl_nch.ncp->nc_vp) 4309 return (EEXIST); 4310 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4311 return (error); 4312 VATTR_NULL(&vattr); 4313 vattr.va_type = VDIR; 4314 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4315 4316 vp = NULL; 4317 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4318 if (error == 0) 4319 vput(vp); 4320 return (error); 4321 } 4322 4323 /* 4324 * mkdir_args(char *path, int mode) 4325 * 4326 * Make a directory file. 4327 */ 4328 int 4329 sys_mkdir(struct mkdir_args *uap) 4330 { 4331 struct nlookupdata nd; 4332 int error; 4333 4334 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4335 if (error == 0) 4336 error = kern_mkdir(&nd, uap->mode); 4337 nlookup_done(&nd); 4338 return (error); 4339 } 4340 4341 /* 4342 * mkdirat_args(int fd, char *path, mode_t mode) 4343 * 4344 * Make a directory file. The path is relative to the directory associated 4345 * with fd. 4346 */ 4347 int 4348 sys_mkdirat(struct mkdirat_args *uap) 4349 { 4350 struct nlookupdata nd; 4351 struct file *fp; 4352 int error; 4353 4354 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4355 if (error == 0) 4356 error = kern_mkdir(&nd, uap->mode); 4357 nlookup_done_at(&nd, fp); 4358 return (error); 4359 } 4360 4361 int 4362 kern_rmdir(struct nlookupdata *nd) 4363 { 4364 int error; 4365 4366 bwillinode(1); 4367 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4368 if ((error = nlookup(nd)) != 0) 4369 return (error); 4370 4371 /* 4372 * Do not allow directories representing mount points to be 4373 * deleted, even if empty. Check write perms on mount point 4374 * in case the vnode is aliased (aka nullfs). 4375 */ 4376 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4377 return (EBUSY); 4378 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4379 return (error); 4380 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4381 return (error); 4382 } 4383 4384 /* 4385 * rmdir_args(char *path) 4386 * 4387 * Remove a directory file. 4388 */ 4389 int 4390 sys_rmdir(struct rmdir_args *uap) 4391 { 4392 struct nlookupdata nd; 4393 int error; 4394 4395 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4396 if (error == 0) 4397 error = kern_rmdir(&nd); 4398 nlookup_done(&nd); 4399 return (error); 4400 } 4401 4402 int 4403 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4404 enum uio_seg direction) 4405 { 4406 struct thread *td = curthread; 4407 struct vnode *vp; 4408 struct file *fp; 4409 struct uio auio; 4410 struct iovec aiov; 4411 off_t loff; 4412 int error, eofflag; 4413 4414 if ((error = holdvnode(td, fd, &fp)) != 0) 4415 return (error); 4416 if ((fp->f_flag & FREAD) == 0) { 4417 error = EBADF; 4418 goto done; 4419 } 4420 vp = (struct vnode *)fp->f_data; 4421 if (vp->v_type != VDIR) { 4422 error = EINVAL; 4423 goto done; 4424 } 4425 aiov.iov_base = buf; 4426 aiov.iov_len = count; 4427 auio.uio_iov = &aiov; 4428 auio.uio_iovcnt = 1; 4429 auio.uio_rw = UIO_READ; 4430 auio.uio_segflg = direction; 4431 auio.uio_td = td; 4432 auio.uio_resid = count; 4433 loff = auio.uio_offset = fp->f_offset; 4434 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4435 fp->f_offset = auio.uio_offset; 4436 if (error) 4437 goto done; 4438 4439 /* 4440 * WARNING! *basep may not be wide enough to accomodate the 4441 * seek offset. XXX should we hack this to return the upper 32 bits 4442 * for offsets greater then 4G? 4443 */ 4444 if (basep) { 4445 *basep = (long)loff; 4446 } 4447 *res = count - auio.uio_resid; 4448 done: 4449 fdrop(fp); 4450 return (error); 4451 } 4452 4453 /* 4454 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4455 * 4456 * Read a block of directory entries in a file system independent format. 4457 */ 4458 int 4459 sys_getdirentries(struct getdirentries_args *uap) 4460 { 4461 long base; 4462 int error; 4463 4464 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4465 &uap->sysmsg_result, UIO_USERSPACE); 4466 4467 if (error == 0 && uap->basep) 4468 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4469 return (error); 4470 } 4471 4472 /* 4473 * getdents_args(int fd, char *buf, size_t count) 4474 */ 4475 int 4476 sys_getdents(struct getdents_args *uap) 4477 { 4478 int error; 4479 4480 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4481 &uap->sysmsg_result, UIO_USERSPACE); 4482 4483 return (error); 4484 } 4485 4486 /* 4487 * Set the mode mask for creation of filesystem nodes. 4488 * 4489 * umask(int newmask) 4490 */ 4491 int 4492 sys_umask(struct umask_args *uap) 4493 { 4494 struct thread *td = curthread; 4495 struct proc *p = td->td_proc; 4496 struct filedesc *fdp; 4497 4498 fdp = p->p_fd; 4499 uap->sysmsg_result = fdp->fd_cmask; 4500 fdp->fd_cmask = uap->newmask & ALLPERMS; 4501 return (0); 4502 } 4503 4504 /* 4505 * revoke(char *path) 4506 * 4507 * Void all references to file by ripping underlying filesystem 4508 * away from vnode. 4509 */ 4510 int 4511 sys_revoke(struct revoke_args *uap) 4512 { 4513 struct nlookupdata nd; 4514 struct vattr vattr; 4515 struct vnode *vp; 4516 struct ucred *cred; 4517 int error; 4518 4519 vp = NULL; 4520 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4521 if (error == 0) 4522 error = nlookup(&nd); 4523 if (error == 0) 4524 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4525 cred = crhold(nd.nl_cred); 4526 nlookup_done(&nd); 4527 if (error == 0) { 4528 if (error == 0) 4529 error = VOP_GETATTR(vp, &vattr); 4530 if (error == 0 && cred->cr_uid != vattr.va_uid) 4531 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4532 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4533 if (vcount(vp) > 0) 4534 error = vrevoke(vp, cred); 4535 } else if (error == 0) { 4536 error = vrevoke(vp, cred); 4537 } 4538 vrele(vp); 4539 } 4540 if (cred) 4541 crfree(cred); 4542 return (error); 4543 } 4544 4545 /* 4546 * getfh_args(char *fname, fhandle_t *fhp) 4547 * 4548 * Get (NFS) file handle 4549 * 4550 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4551 * mount. This allows nullfs mounts to be explicitly exported. 4552 * 4553 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4554 * 4555 * nullfs mounts of subdirectories are not safe. That is, it will 4556 * work, but you do not really have protection against access to 4557 * the related parent directories. 4558 */ 4559 int 4560 sys_getfh(struct getfh_args *uap) 4561 { 4562 struct thread *td = curthread; 4563 struct nlookupdata nd; 4564 fhandle_t fh; 4565 struct vnode *vp; 4566 struct mount *mp; 4567 int error; 4568 4569 /* 4570 * Must be super user 4571 */ 4572 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4573 return (error); 4574 4575 vp = NULL; 4576 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4577 if (error == 0) 4578 error = nlookup(&nd); 4579 if (error == 0) 4580 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4581 mp = nd.nl_nch.mount; 4582 nlookup_done(&nd); 4583 if (error == 0) { 4584 bzero(&fh, sizeof(fh)); 4585 fh.fh_fsid = mp->mnt_stat.f_fsid; 4586 error = VFS_VPTOFH(vp, &fh.fh_fid); 4587 vput(vp); 4588 if (error == 0) 4589 error = copyout(&fh, uap->fhp, sizeof(fh)); 4590 } 4591 return (error); 4592 } 4593 4594 /* 4595 * fhopen_args(const struct fhandle *u_fhp, int flags) 4596 * 4597 * syscall for the rpc.lockd to use to translate a NFS file handle into 4598 * an open descriptor. 4599 * 4600 * warning: do not remove the priv_check() call or this becomes one giant 4601 * security hole. 4602 */ 4603 int 4604 sys_fhopen(struct fhopen_args *uap) 4605 { 4606 struct thread *td = curthread; 4607 struct filedesc *fdp = td->td_proc->p_fd; 4608 struct mount *mp; 4609 struct vnode *vp; 4610 struct fhandle fhp; 4611 struct vattr vat; 4612 struct vattr *vap = &vat; 4613 struct flock lf; 4614 int fmode, mode, error = 0, type; 4615 struct file *nfp; 4616 struct file *fp; 4617 int indx; 4618 4619 /* 4620 * Must be super user 4621 */ 4622 error = priv_check(td, PRIV_ROOT); 4623 if (error) 4624 return (error); 4625 4626 fmode = FFLAGS(uap->flags); 4627 4628 /* 4629 * Why not allow a non-read/write open for our lockd? 4630 */ 4631 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4632 return (EINVAL); 4633 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4634 if (error) 4635 return(error); 4636 4637 /* 4638 * Find the mount point 4639 */ 4640 mp = vfs_getvfs(&fhp.fh_fsid); 4641 if (mp == NULL) { 4642 error = ESTALE; 4643 goto done2; 4644 } 4645 /* now give me my vnode, it gets returned to me locked */ 4646 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4647 if (error) 4648 goto done; 4649 /* 4650 * from now on we have to make sure not 4651 * to forget about the vnode 4652 * any error that causes an abort must vput(vp) 4653 * just set error = err and 'goto bad;'. 4654 */ 4655 4656 /* 4657 * from vn_open 4658 */ 4659 if (vp->v_type == VLNK) { 4660 error = EMLINK; 4661 goto bad; 4662 } 4663 if (vp->v_type == VSOCK) { 4664 error = EOPNOTSUPP; 4665 goto bad; 4666 } 4667 mode = 0; 4668 if (fmode & (FWRITE | O_TRUNC)) { 4669 if (vp->v_type == VDIR) { 4670 error = EISDIR; 4671 goto bad; 4672 } 4673 error = vn_writechk(vp, NULL); 4674 if (error) 4675 goto bad; 4676 mode |= VWRITE; 4677 } 4678 if (fmode & FREAD) 4679 mode |= VREAD; 4680 if (mode) { 4681 error = VOP_ACCESS(vp, mode, td->td_ucred); 4682 if (error) 4683 goto bad; 4684 } 4685 if (fmode & O_TRUNC) { 4686 vn_unlock(vp); /* XXX */ 4687 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4688 VATTR_NULL(vap); 4689 vap->va_size = 0; 4690 error = VOP_SETATTR(vp, vap, td->td_ucred); 4691 if (error) 4692 goto bad; 4693 } 4694 4695 /* 4696 * VOP_OPEN needs the file pointer so it can potentially override 4697 * it. 4698 * 4699 * WARNING! no f_nchandle will be associated when fhopen()ing a 4700 * directory. XXX 4701 */ 4702 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4703 goto bad; 4704 fp = nfp; 4705 4706 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4707 if (error) { 4708 /* 4709 * setting f_ops this way prevents VOP_CLOSE from being 4710 * called or fdrop() releasing the vp from v_data. Since 4711 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4712 */ 4713 fp->f_ops = &badfileops; 4714 fp->f_data = NULL; 4715 goto bad_drop; 4716 } 4717 4718 /* 4719 * The fp is given its own reference, we still have our ref and lock. 4720 * 4721 * Assert that all regular files must be created with a VM object. 4722 */ 4723 if (vp->v_type == VREG && vp->v_object == NULL) { 4724 kprintf("fhopen: regular file did not " 4725 "have VM object: %p\n", 4726 vp); 4727 goto bad_drop; 4728 } 4729 4730 /* 4731 * The open was successful. Handle any locking requirements. 4732 */ 4733 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4734 lf.l_whence = SEEK_SET; 4735 lf.l_start = 0; 4736 lf.l_len = 0; 4737 if (fmode & O_EXLOCK) 4738 lf.l_type = F_WRLCK; 4739 else 4740 lf.l_type = F_RDLCK; 4741 if (fmode & FNONBLOCK) 4742 type = 0; 4743 else 4744 type = F_WAIT; 4745 vn_unlock(vp); 4746 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4747 &lf, type)) != 0) { 4748 /* 4749 * release our private reference. 4750 */ 4751 fsetfd(fdp, NULL, indx); 4752 fdrop(fp); 4753 vrele(vp); 4754 goto done; 4755 } 4756 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4757 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4758 } 4759 4760 /* 4761 * Clean up. Associate the file pointer with the previously 4762 * reserved descriptor and return it. 4763 */ 4764 vput(vp); 4765 if (uap->flags & O_CLOEXEC) 4766 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4767 fsetfd(fdp, fp, indx); 4768 fdrop(fp); 4769 uap->sysmsg_result = indx; 4770 mount_drop(mp); 4771 4772 return (error); 4773 4774 bad_drop: 4775 fsetfd(fdp, NULL, indx); 4776 fdrop(fp); 4777 bad: 4778 vput(vp); 4779 done: 4780 mount_drop(mp); 4781 done2: 4782 return (error); 4783 } 4784 4785 /* 4786 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4787 */ 4788 int 4789 sys_fhstat(struct fhstat_args *uap) 4790 { 4791 struct thread *td = curthread; 4792 struct stat sb; 4793 fhandle_t fh; 4794 struct mount *mp; 4795 struct vnode *vp; 4796 int error; 4797 4798 /* 4799 * Must be super user 4800 */ 4801 error = priv_check(td, PRIV_ROOT); 4802 if (error) 4803 return (error); 4804 4805 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4806 if (error) 4807 return (error); 4808 4809 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4810 error = ESTALE; 4811 if (error == 0) { 4812 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4813 error = vn_stat(vp, &sb, td->td_ucred); 4814 vput(vp); 4815 } 4816 } 4817 if (error == 0) 4818 error = copyout(&sb, uap->sb, sizeof(sb)); 4819 if (mp) 4820 mount_drop(mp); 4821 4822 return (error); 4823 } 4824 4825 /* 4826 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4827 */ 4828 int 4829 sys_fhstatfs(struct fhstatfs_args *uap) 4830 { 4831 struct thread *td = curthread; 4832 struct proc *p = td->td_proc; 4833 struct statfs *sp; 4834 struct mount *mp; 4835 struct vnode *vp; 4836 struct statfs sb; 4837 char *fullpath, *freepath; 4838 fhandle_t fh; 4839 int error; 4840 4841 /* 4842 * Must be super user 4843 */ 4844 if ((error = priv_check(td, PRIV_ROOT))) 4845 return (error); 4846 4847 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4848 return (error); 4849 4850 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4851 error = ESTALE; 4852 goto done; 4853 } 4854 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4855 error = ESTALE; 4856 goto done; 4857 } 4858 4859 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4860 goto done; 4861 mp = vp->v_mount; 4862 sp = &mp->mnt_stat; 4863 vput(vp); 4864 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4865 goto done; 4866 4867 error = mount_path(p, mp, &fullpath, &freepath); 4868 if (error) 4869 goto done; 4870 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4871 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4872 kfree(freepath, M_TEMP); 4873 4874 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4875 if (priv_check(td, PRIV_ROOT)) { 4876 bcopy(sp, &sb, sizeof(sb)); 4877 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4878 sp = &sb; 4879 } 4880 error = copyout(sp, uap->buf, sizeof(*sp)); 4881 done: 4882 if (mp) 4883 mount_drop(mp); 4884 4885 return (error); 4886 } 4887 4888 /* 4889 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4890 */ 4891 int 4892 sys_fhstatvfs(struct fhstatvfs_args *uap) 4893 { 4894 struct thread *td = curthread; 4895 struct proc *p = td->td_proc; 4896 struct statvfs *sp; 4897 struct mount *mp; 4898 struct vnode *vp; 4899 fhandle_t fh; 4900 int error; 4901 4902 /* 4903 * Must be super user 4904 */ 4905 if ((error = priv_check(td, PRIV_ROOT))) 4906 return (error); 4907 4908 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4909 return (error); 4910 4911 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4912 error = ESTALE; 4913 goto done; 4914 } 4915 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4916 error = ESTALE; 4917 goto done; 4918 } 4919 4920 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4921 goto done; 4922 mp = vp->v_mount; 4923 sp = &mp->mnt_vstat; 4924 vput(vp); 4925 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4926 goto done; 4927 4928 sp->f_flag = 0; 4929 if (mp->mnt_flag & MNT_RDONLY) 4930 sp->f_flag |= ST_RDONLY; 4931 if (mp->mnt_flag & MNT_NOSUID) 4932 sp->f_flag |= ST_NOSUID; 4933 error = copyout(sp, uap->buf, sizeof(*sp)); 4934 done: 4935 if (mp) 4936 mount_drop(mp); 4937 return (error); 4938 } 4939 4940 4941 /* 4942 * Syscall to push extended attribute configuration information into the 4943 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4944 * a command (int cmd), and attribute name and misc data. For now, the 4945 * attribute name is left in userspace for consumption by the VFS_op. 4946 * It will probably be changed to be copied into sysspace by the 4947 * syscall in the future, once issues with various consumers of the 4948 * attribute code have raised their hands. 4949 * 4950 * Currently this is used only by UFS Extended Attributes. 4951 */ 4952 int 4953 sys_extattrctl(struct extattrctl_args *uap) 4954 { 4955 struct nlookupdata nd; 4956 struct vnode *vp; 4957 char attrname[EXTATTR_MAXNAMELEN]; 4958 int error; 4959 size_t size; 4960 4961 attrname[0] = 0; 4962 vp = NULL; 4963 error = 0; 4964 4965 if (error == 0 && uap->filename) { 4966 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4967 NLC_FOLLOW); 4968 if (error == 0) 4969 error = nlookup(&nd); 4970 if (error == 0) 4971 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4972 nlookup_done(&nd); 4973 } 4974 4975 if (error == 0 && uap->attrname) { 4976 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4977 &size); 4978 } 4979 4980 if (error == 0) { 4981 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4982 if (error == 0) 4983 error = nlookup(&nd); 4984 if (error == 0) 4985 error = ncp_writechk(&nd.nl_nch); 4986 if (error == 0) { 4987 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4988 uap->attrnamespace, 4989 uap->attrname, nd.nl_cred); 4990 } 4991 nlookup_done(&nd); 4992 } 4993 4994 return (error); 4995 } 4996 4997 /* 4998 * Syscall to get a named extended attribute on a file or directory. 4999 */ 5000 int 5001 sys_extattr_set_file(struct extattr_set_file_args *uap) 5002 { 5003 char attrname[EXTATTR_MAXNAMELEN]; 5004 struct nlookupdata nd; 5005 struct vnode *vp; 5006 struct uio auio; 5007 struct iovec aiov; 5008 int error; 5009 5010 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5011 if (error) 5012 return (error); 5013 5014 vp = NULL; 5015 5016 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5017 if (error == 0) 5018 error = nlookup(&nd); 5019 if (error == 0) 5020 error = ncp_writechk(&nd.nl_nch); 5021 if (error == 0) 5022 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5023 if (error) { 5024 nlookup_done(&nd); 5025 return (error); 5026 } 5027 5028 bzero(&auio, sizeof(auio)); 5029 aiov.iov_base = uap->data; 5030 aiov.iov_len = uap->nbytes; 5031 auio.uio_iov = &aiov; 5032 auio.uio_iovcnt = 1; 5033 auio.uio_offset = 0; 5034 auio.uio_resid = uap->nbytes; 5035 auio.uio_rw = UIO_WRITE; 5036 auio.uio_td = curthread; 5037 5038 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5039 &auio, nd.nl_cred); 5040 5041 vput(vp); 5042 nlookup_done(&nd); 5043 return (error); 5044 } 5045 5046 /* 5047 * Syscall to get a named extended attribute on a file or directory. 5048 */ 5049 int 5050 sys_extattr_get_file(struct extattr_get_file_args *uap) 5051 { 5052 char attrname[EXTATTR_MAXNAMELEN]; 5053 struct nlookupdata nd; 5054 struct uio auio; 5055 struct iovec aiov; 5056 struct vnode *vp; 5057 int error; 5058 5059 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5060 if (error) 5061 return (error); 5062 5063 vp = NULL; 5064 5065 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5066 if (error == 0) 5067 error = nlookup(&nd); 5068 if (error == 0) 5069 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5070 if (error) { 5071 nlookup_done(&nd); 5072 return (error); 5073 } 5074 5075 bzero(&auio, sizeof(auio)); 5076 aiov.iov_base = uap->data; 5077 aiov.iov_len = uap->nbytes; 5078 auio.uio_iov = &aiov; 5079 auio.uio_iovcnt = 1; 5080 auio.uio_offset = 0; 5081 auio.uio_resid = uap->nbytes; 5082 auio.uio_rw = UIO_READ; 5083 auio.uio_td = curthread; 5084 5085 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5086 &auio, nd.nl_cred); 5087 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5088 5089 vput(vp); 5090 nlookup_done(&nd); 5091 return(error); 5092 } 5093 5094 /* 5095 * Syscall to delete a named extended attribute from a file or directory. 5096 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5097 */ 5098 int 5099 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5100 { 5101 char attrname[EXTATTR_MAXNAMELEN]; 5102 struct nlookupdata nd; 5103 struct vnode *vp; 5104 int error; 5105 5106 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5107 if (error) 5108 return(error); 5109 5110 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5111 if (error == 0) 5112 error = nlookup(&nd); 5113 if (error == 0) 5114 error = ncp_writechk(&nd.nl_nch); 5115 if (error == 0) { 5116 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5117 if (error == 0) { 5118 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5119 attrname, NULL, nd.nl_cred); 5120 vput(vp); 5121 } 5122 } 5123 nlookup_done(&nd); 5124 return(error); 5125 } 5126 5127 /* 5128 * Determine if the mount is visible to the process. 5129 */ 5130 static int 5131 chroot_visible_mnt(struct mount *mp, struct proc *p) 5132 { 5133 struct nchandle nch; 5134 5135 /* 5136 * Traverse from the mount point upwards. If we hit the process 5137 * root then the mount point is visible to the process. 5138 */ 5139 nch = mp->mnt_ncmountpt; 5140 while (nch.ncp) { 5141 if (nch.mount == p->p_fd->fd_nrdir.mount && 5142 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5143 return(1); 5144 } 5145 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5146 nch = nch.mount->mnt_ncmounton; 5147 } else { 5148 nch.ncp = nch.ncp->nc_parent; 5149 } 5150 } 5151 5152 /* 5153 * If the mount point is not visible to the process, but the 5154 * process root is in a subdirectory of the mount, return 5155 * TRUE anyway. 5156 */ 5157 if (p->p_fd->fd_nrdir.mount == mp) 5158 return(1); 5159 5160 return(0); 5161 } 5162 5163