1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 84 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 85 static int getutimes (struct timeval *, struct timespec *); 86 static int getutimens (const struct timespec *, struct timespec *, int *); 87 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 88 static int setfmode (struct vnode *, int); 89 static int setfflags (struct vnode *, int); 90 static int setutimes (struct vnode *, struct vattr *, 91 const struct timespec *, int); 92 93 static int usermount = 0; /* if 1, non-root can mount fs. */ 94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 95 "Allow non-root users to mount filesystems"); 96 97 static int debug_unmount = 0; /* if 1 loop until unmount success */ 98 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 99 "Stall failed unmounts in loop"); 100 /* 101 * Virtual File System System Calls 102 */ 103 104 /* 105 * Mount a file system. 106 * 107 * mount_args(char *type, char *path, int flags, caddr_t data) 108 * 109 * MPALMOSTSAFE 110 */ 111 int 112 sys_mount(struct mount_args *uap) 113 { 114 struct thread *td = curthread; 115 struct vnode *vp; 116 struct nchandle nch; 117 struct mount *mp, *nullmp; 118 struct vfsconf *vfsp; 119 int error, flag = 0, flag2 = 0; 120 int hasmount; 121 struct vattr va; 122 struct nlookupdata nd; 123 char fstypename[MFSNAMELEN]; 124 struct ucred *cred; 125 126 cred = td->td_ucred; 127 if (jailed(cred)) { 128 error = EPERM; 129 goto done; 130 } 131 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 132 goto done; 133 134 /* 135 * Do not allow NFS export by non-root users. 136 */ 137 if (uap->flags & MNT_EXPORTED) { 138 error = priv_check(td, PRIV_ROOT); 139 if (error) 140 goto done; 141 } 142 /* 143 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 144 */ 145 if (priv_check(td, PRIV_ROOT)) 146 uap->flags |= MNT_NOSUID | MNT_NODEV; 147 148 /* 149 * Lookup the requested path and extract the nch and vnode. 150 */ 151 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 152 if (error == 0) { 153 if ((error = nlookup(&nd)) == 0) { 154 if (nd.nl_nch.ncp->nc_vp == NULL) 155 error = ENOENT; 156 } 157 } 158 if (error) { 159 nlookup_done(&nd); 160 goto done; 161 } 162 163 /* 164 * If the target filesystem is resolved via a nullfs mount, then 165 * nd.nl_nch.mount will be pointing to the nullfs mount structure 166 * instead of the target file system. We need it in case we are 167 * doing an update. 168 */ 169 nullmp = nd.nl_nch.mount; 170 171 /* 172 * Extract the locked+refd ncp and cleanup the nd structure 173 */ 174 nch = nd.nl_nch; 175 cache_zero(&nd.nl_nch); 176 nlookup_done(&nd); 177 178 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 179 (mp = cache_findmount(&nch)) != NULL) { 180 cache_dropmount(mp); 181 hasmount = 1; 182 } else { 183 hasmount = 0; 184 } 185 186 187 /* 188 * now we have the locked ref'd nch and unreferenced vnode. 189 */ 190 vp = nch.ncp->nc_vp; 191 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 192 cache_put(&nch); 193 goto done; 194 } 195 cache_unlock(&nch); 196 197 /* 198 * Extract the file system type. We need to know this early, to take 199 * appropriate actions if we are dealing with a nullfs. 200 */ 201 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 202 cache_drop(&nch); 203 vput(vp); 204 goto done; 205 } 206 207 /* 208 * Now we have an unlocked ref'd nch and a locked ref'd vp 209 */ 210 if (uap->flags & MNT_UPDATE) { 211 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 212 cache_drop(&nch); 213 vput(vp); 214 error = EINVAL; 215 goto done; 216 } 217 218 if (strncmp(fstypename, "null", 5) == 0) { 219 KKASSERT(nullmp); 220 mp = nullmp; 221 } else { 222 mp = vp->v_mount; 223 } 224 225 flag = mp->mnt_flag; 226 flag2 = mp->mnt_kern_flag; 227 /* 228 * We only allow the filesystem to be reloaded if it 229 * is currently mounted read-only. 230 */ 231 if ((uap->flags & MNT_RELOAD) && 232 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 233 cache_drop(&nch); 234 vput(vp); 235 error = EOPNOTSUPP; /* Needs translation */ 236 goto done; 237 } 238 /* 239 * Only root, or the user that did the original mount is 240 * permitted to update it. 241 */ 242 if (mp->mnt_stat.f_owner != cred->cr_uid && 243 (error = priv_check(td, PRIV_ROOT))) { 244 cache_drop(&nch); 245 vput(vp); 246 goto done; 247 } 248 if (vfs_busy(mp, LK_NOWAIT)) { 249 cache_drop(&nch); 250 vput(vp); 251 error = EBUSY; 252 goto done; 253 } 254 if (hasmount) { 255 cache_drop(&nch); 256 vfs_unbusy(mp); 257 vput(vp); 258 error = EBUSY; 259 goto done; 260 } 261 mp->mnt_flag |= 262 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 263 lwkt_gettoken(&mp->mnt_token); 264 vn_unlock(vp); 265 vfsp = mp->mnt_vfc; 266 goto update; 267 } 268 269 /* 270 * If the user is not root, ensure that they own the directory 271 * onto which we are attempting to mount. 272 */ 273 if ((error = VOP_GETATTR(vp, &va)) || 274 (va.va_uid != cred->cr_uid && 275 (error = priv_check(td, PRIV_ROOT)))) { 276 cache_drop(&nch); 277 vput(vp); 278 goto done; 279 } 280 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 281 cache_drop(&nch); 282 vput(vp); 283 goto done; 284 } 285 if (vp->v_type != VDIR) { 286 cache_drop(&nch); 287 vput(vp); 288 error = ENOTDIR; 289 goto done; 290 } 291 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 292 cache_drop(&nch); 293 vput(vp); 294 error = EPERM; 295 goto done; 296 } 297 vfsp = vfsconf_find_by_name(fstypename); 298 if (vfsp == NULL) { 299 linker_file_t lf; 300 301 /* Only load modules for root (very important!) */ 302 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 303 cache_drop(&nch); 304 vput(vp); 305 goto done; 306 } 307 error = linker_load_file(fstypename, &lf); 308 if (error || lf == NULL) { 309 cache_drop(&nch); 310 vput(vp); 311 if (lf == NULL) 312 error = ENODEV; 313 goto done; 314 } 315 lf->userrefs++; 316 /* lookup again, see if the VFS was loaded */ 317 vfsp = vfsconf_find_by_name(fstypename); 318 if (vfsp == NULL) { 319 lf->userrefs--; 320 linker_file_unload(lf); 321 cache_drop(&nch); 322 vput(vp); 323 error = ENODEV; 324 goto done; 325 } 326 } 327 if (hasmount) { 328 cache_drop(&nch); 329 vput(vp); 330 error = EBUSY; 331 goto done; 332 } 333 334 /* 335 * Allocate and initialize the filesystem. 336 */ 337 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 338 mount_init(mp); 339 vfs_busy(mp, LK_NOWAIT); 340 mp->mnt_op = vfsp->vfc_vfsops; 341 mp->mnt_vfc = vfsp; 342 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 343 vfsp->vfc_refcount++; 344 mp->mnt_stat.f_type = vfsp->vfc_typenum; 345 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 346 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 347 mp->mnt_stat.f_owner = cred->cr_uid; 348 lwkt_gettoken(&mp->mnt_token); 349 vn_unlock(vp); 350 update: 351 /* 352 * (per-mount token acquired at this point) 353 * 354 * Set the mount level flags. 355 */ 356 if (uap->flags & MNT_RDONLY) 357 mp->mnt_flag |= MNT_RDONLY; 358 else if (mp->mnt_flag & MNT_RDONLY) 359 mp->mnt_kern_flag |= MNTK_WANTRDWR; 360 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 361 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 362 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 363 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 364 MNT_AUTOMOUNTED); 365 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 366 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 367 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 368 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 369 MNT_AUTOMOUNTED); 370 371 /* 372 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 373 * This way the initial VFS_MOUNT() call will also be MPSAFE. 374 */ 375 if (vfsp->vfc_flags & VFCF_MPSAFE) 376 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 377 378 /* 379 * Mount the filesystem. 380 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 381 * get. 382 */ 383 if (mp->mnt_flag & MNT_UPDATE) { 384 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 385 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 386 mp->mnt_flag &= ~MNT_RDONLY; 387 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 388 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 389 if (error) { 390 mp->mnt_flag = flag; 391 mp->mnt_kern_flag = flag2; 392 } 393 lwkt_reltoken(&mp->mnt_token); 394 vfs_unbusy(mp); 395 vrele(vp); 396 cache_drop(&nch); 397 goto done; 398 } 399 mp->mnt_ncmounton = nch; 400 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 401 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 402 403 /* 404 * Put the new filesystem on the mount list after root. The mount 405 * point gets its own mnt_ncmountpt (unless the VFS already set one 406 * up) which represents the root of the mount. The lookup code 407 * detects the mount point going forward and checks the root of 408 * the mount going backwards. 409 * 410 * It is not necessary to invalidate or purge the vnode underneath 411 * because elements under the mount will be given their own glue 412 * namecache record. 413 */ 414 if (!error) { 415 if (mp->mnt_ncmountpt.ncp == NULL) { 416 /* 417 * Allocate, then unlock, but leave the ref intact. 418 * This is the mnt_refs (1) that we will retain 419 * through to the unmount. 420 */ 421 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 422 cache_unlock(&mp->mnt_ncmountpt); 423 } 424 vn_unlock(vp); 425 cache_lock(&nch); 426 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 427 cache_unlock(&nch); 428 cache_ismounting(mp); 429 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 430 431 mountlist_insert(mp, MNTINS_LAST); 432 vn_unlock(vp); 433 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 434 error = vfs_allocate_syncvnode(mp); 435 lwkt_reltoken(&mp->mnt_token); 436 vfs_unbusy(mp); 437 error = VFS_START(mp, 0); 438 vrele(vp); 439 KNOTE(&fs_klist, VQ_MOUNT); 440 } else { 441 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 442 vn_syncer_thr_stop(mp); 443 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 444 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 445 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 446 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 447 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 448 mp->mnt_vfc->vfc_refcount--; 449 lwkt_reltoken(&mp->mnt_token); 450 vfs_unbusy(mp); 451 kfree(mp, M_MOUNT); 452 cache_drop(&nch); 453 vput(vp); 454 } 455 done: 456 return (error); 457 } 458 459 /* 460 * Scan all active processes to see if any of them have a current 461 * or root directory onto which the new filesystem has just been 462 * mounted. If so, replace them with the new mount point. 463 * 464 * Both old_nch and new_nch are ref'd on call but not locked. 465 * new_nch must be temporarily locked so it can be associated with the 466 * vnode representing the root of the mount point. 467 */ 468 struct checkdirs_info { 469 struct nchandle old_nch; 470 struct nchandle new_nch; 471 struct vnode *old_vp; 472 struct vnode *new_vp; 473 }; 474 475 static int checkdirs_callback(struct proc *p, void *data); 476 477 static void 478 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 479 { 480 struct checkdirs_info info; 481 struct vnode *olddp; 482 struct vnode *newdp; 483 struct mount *mp; 484 485 /* 486 * If the old mount point's vnode has a usecount of 1, it is not 487 * being held as a descriptor anywhere. 488 */ 489 olddp = old_nch->ncp->nc_vp; 490 if (olddp == NULL || VREFCNT(olddp) == 1) 491 return; 492 493 /* 494 * Force the root vnode of the new mount point to be resolved 495 * so we can update any matching processes. 496 */ 497 mp = new_nch->mount; 498 if (VFS_ROOT(mp, &newdp)) 499 panic("mount: lost mount"); 500 vn_unlock(newdp); 501 cache_lock(new_nch); 502 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 503 cache_setunresolved(new_nch); 504 cache_setvp(new_nch, newdp); 505 cache_unlock(new_nch); 506 507 /* 508 * Special handling of the root node 509 */ 510 if (rootvnode == olddp) { 511 vref(newdp); 512 vfs_cache_setroot(newdp, cache_hold(new_nch)); 513 } 514 515 /* 516 * Pass newdp separately so the callback does not have to access 517 * it via new_nch->ncp->nc_vp. 518 */ 519 info.old_nch = *old_nch; 520 info.new_nch = *new_nch; 521 info.new_vp = newdp; 522 allproc_scan(checkdirs_callback, &info, 0); 523 vput(newdp); 524 } 525 526 /* 527 * NOTE: callback is not MP safe because the scanned process's filedesc 528 * structure can be ripped out from under us, amoung other things. 529 */ 530 static int 531 checkdirs_callback(struct proc *p, void *data) 532 { 533 struct checkdirs_info *info = data; 534 struct filedesc *fdp; 535 struct nchandle ncdrop1; 536 struct nchandle ncdrop2; 537 struct vnode *vprele1; 538 struct vnode *vprele2; 539 540 if ((fdp = p->p_fd) != NULL) { 541 cache_zero(&ncdrop1); 542 cache_zero(&ncdrop2); 543 vprele1 = NULL; 544 vprele2 = NULL; 545 546 /* 547 * MPUNSAFE - XXX fdp can be pulled out from under a 548 * foreign process. 549 * 550 * A shared filedesc is ok, we don't have to copy it 551 * because we are making this change globally. 552 */ 553 spin_lock(&fdp->fd_spin); 554 if (fdp->fd_ncdir.mount == info->old_nch.mount && 555 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 556 vprele1 = fdp->fd_cdir; 557 vref(info->new_vp); 558 fdp->fd_cdir = info->new_vp; 559 ncdrop1 = fdp->fd_ncdir; 560 cache_copy(&info->new_nch, &fdp->fd_ncdir); 561 } 562 if (fdp->fd_nrdir.mount == info->old_nch.mount && 563 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 564 vprele2 = fdp->fd_rdir; 565 vref(info->new_vp); 566 fdp->fd_rdir = info->new_vp; 567 ncdrop2 = fdp->fd_nrdir; 568 cache_copy(&info->new_nch, &fdp->fd_nrdir); 569 } 570 spin_unlock(&fdp->fd_spin); 571 if (ncdrop1.ncp) 572 cache_drop(&ncdrop1); 573 if (ncdrop2.ncp) 574 cache_drop(&ncdrop2); 575 if (vprele1) 576 vrele(vprele1); 577 if (vprele2) 578 vrele(vprele2); 579 } 580 return(0); 581 } 582 583 /* 584 * Unmount a file system. 585 * 586 * Note: unmount takes a path to the vnode mounted on as argument, 587 * not special file (as before). 588 * 589 * umount_args(char *path, int flags) 590 * 591 * MPALMOSTSAFE 592 */ 593 int 594 sys_unmount(struct unmount_args *uap) 595 { 596 struct thread *td = curthread; 597 struct proc *p __debugvar = td->td_proc; 598 struct mount *mp = NULL; 599 struct nlookupdata nd; 600 int error; 601 602 KKASSERT(p); 603 if (td->td_ucred->cr_prison != NULL) { 604 error = EPERM; 605 goto done; 606 } 607 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 608 goto done; 609 610 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 611 NLC_FOLLOW | NLC_IGNBADDIR); 612 if (error == 0) 613 error = nlookup(&nd); 614 if (error) 615 goto out; 616 617 mp = nd.nl_nch.mount; 618 619 /* 620 * Only root, or the user that did the original mount is 621 * permitted to unmount this filesystem. 622 */ 623 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 624 (error = priv_check(td, PRIV_ROOT))) 625 goto out; 626 627 /* 628 * Don't allow unmounting the root file system. 629 */ 630 if (mp->mnt_flag & MNT_ROOTFS) { 631 error = EINVAL; 632 goto out; 633 } 634 635 /* 636 * Must be the root of the filesystem 637 */ 638 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 639 error = EINVAL; 640 goto out; 641 } 642 643 /* 644 * If no error try to issue the unmount. We lose our cache 645 * ref when we call nlookup_done so we must hold the mount point 646 * to prevent use-after-free races. 647 */ 648 out: 649 if (error == 0) { 650 mount_hold(mp); 651 nlookup_done(&nd); 652 error = dounmount(mp, uap->flags, 0); 653 mount_drop(mp); 654 } else { 655 nlookup_done(&nd); 656 } 657 done: 658 return (error); 659 } 660 661 /* 662 * Do the actual file system unmount (interlocked against the mountlist 663 * token and mp->mnt_token). 664 */ 665 static int 666 dounmount_interlock(struct mount *mp) 667 { 668 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 669 return (EBUSY); 670 mp->mnt_kern_flag |= MNTK_UNMOUNT; 671 return(0); 672 } 673 674 static int 675 unmount_allproc_cb(struct proc *p, void *arg) 676 { 677 struct mount *mp; 678 679 if (p->p_textnch.ncp == NULL) 680 return 0; 681 682 mp = (struct mount *)arg; 683 if (p->p_textnch.mount == mp) 684 cache_drop(&p->p_textnch); 685 686 return 0; 687 } 688 689 /* 690 * The guts of the unmount code. The mount owns one ref and one hold 691 * count. If we successfully interlock the unmount, those refs are ours. 692 * (The ref is from mnt_ncmountpt). 693 * 694 * When halting we shortcut certain mount types such as devfs by not actually 695 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 696 * from the mountlist so higher-level filesytems can unmount cleanly. 697 * 698 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 699 */ 700 int 701 dounmount(struct mount *mp, int flags, int halting) 702 { 703 struct namecache *ncp; 704 struct nchandle nch; 705 struct vnode *vp; 706 int error; 707 int async_flag; 708 int lflags; 709 int freeok = 1; 710 int hadsyncer = 0; 711 int retry; 712 int quickhalt; 713 714 lwkt_gettoken(&mp->mnt_token); 715 716 /* 717 * When halting, certain mount points can essentially just 718 * be unhooked and otherwise ignored. 719 */ 720 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 721 quickhalt = 1; 722 freeok = 0; 723 } else { 724 quickhalt = 0; 725 } 726 727 728 /* 729 * Exclusive access for unmounting purposes. 730 */ 731 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 732 goto out; 733 734 /* 735 * We now 'own' the last mp->mnt_refs 736 * 737 * Allow filesystems to detect that a forced unmount is in progress. 738 */ 739 if (flags & MNT_FORCE) 740 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 741 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 742 error = lockmgr(&mp->mnt_lock, lflags); 743 if (error) { 744 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 745 if (mp->mnt_kern_flag & MNTK_MWAIT) { 746 mp->mnt_kern_flag &= ~MNTK_MWAIT; 747 wakeup(mp); 748 } 749 goto out; 750 } 751 752 if (mp->mnt_flag & MNT_EXPUBLIC) 753 vfs_setpublicfs(NULL, NULL, NULL); 754 755 vfs_msync(mp, MNT_WAIT); 756 async_flag = mp->mnt_flag & MNT_ASYNC; 757 mp->mnt_flag &=~ MNT_ASYNC; 758 759 /* 760 * Decomission our special mnt_syncer vnode. This also stops 761 * the vnlru code. If we are unable to unmount we recommission 762 * the vnode. 763 * 764 * Then sync the filesystem. 765 */ 766 if ((vp = mp->mnt_syncer) != NULL) { 767 mp->mnt_syncer = NULL; 768 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 769 vrele(vp); 770 hadsyncer = 1; 771 } 772 773 /* 774 * Sync normally-mounted filesystem. 775 */ 776 if (quickhalt == 0) { 777 if ((mp->mnt_flag & MNT_RDONLY) == 0) 778 VFS_SYNC(mp, MNT_WAIT); 779 } 780 781 /* 782 * nchandle records ref the mount structure. Expect a count of 1 783 * (our mount->mnt_ncmountpt). 784 * 785 * Scans can get temporary refs on a mountpoint (thought really 786 * heavy duty stuff like cache_findmount() do not). 787 */ 788 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 789 /* 790 * Invalidate the namecache topology under the mount. 791 * nullfs mounts alias a real mount's namecache topology 792 * and it should not be invalidated in that case. 793 */ 794 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 795 cache_lock(&mp->mnt_ncmountpt); 796 cache_inval(&mp->mnt_ncmountpt, 797 CINV_DESTROY|CINV_CHILDREN); 798 cache_unlock(&mp->mnt_ncmountpt); 799 } 800 801 /* 802 * Clear pcpu caches 803 */ 804 cache_unmounting(mp); 805 if (mp->mnt_refs != 1) 806 cache_clearmntcache(); 807 808 /* 809 * Break out if we are good. Don't count ncp refs if the 810 * mount is aliased. 811 */ 812 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 813 NULL : mp->mnt_ncmountpt.ncp; 814 if (mp->mnt_refs == 1 && 815 (ncp == NULL || (ncp->nc_refs == 1 && 816 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 817 break; 818 } 819 820 /* 821 * If forcing the unmount, clean out any p->p_textnch 822 * nchandles that match this mount. 823 */ 824 if (flags & MNT_FORCE) 825 allproc_scan(&unmount_allproc_cb, mp, 0); 826 827 /* 828 * Sleep and retry. 829 */ 830 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 831 if ((retry & 15) == 15) { 832 mount_warning(mp, 833 "(%p) debug - retry %d, " 834 "%d namecache refs, %d mount refs", 835 mp, retry, 836 (ncp ? ncp->nc_refs - 1 : 0), 837 mp->mnt_refs - 1); 838 } 839 } 840 841 error = 0; 842 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 843 NULL : mp->mnt_ncmountpt.ncp; 844 if (mp->mnt_refs != 1 || 845 (ncp != NULL && (ncp->nc_refs != 1 || 846 TAILQ_FIRST(&ncp->nc_list)))) { 847 mount_warning(mp, 848 "(%p): %d namecache refs, %d mount refs " 849 "still present", 850 mp, 851 (ncp ? ncp->nc_refs - 1 : 0), 852 mp->mnt_refs - 1); 853 if (flags & MNT_FORCE) { 854 freeok = 0; 855 mount_warning(mp, "forcing unmount\n"); 856 } else { 857 error = EBUSY; 858 } 859 } 860 861 /* 862 * So far so good, sync the filesystem once more and 863 * call the VFS unmount code if the sync succeeds. 864 */ 865 if (error == 0 && quickhalt == 0) { 866 if (mp->mnt_flag & MNT_RDONLY) { 867 error = VFS_UNMOUNT(mp, flags); 868 } else { 869 error = VFS_SYNC(mp, MNT_WAIT); 870 if (error == 0 || /* no error */ 871 error == EOPNOTSUPP || /* no sync avail */ 872 (flags & MNT_FORCE)) { /* force anyway */ 873 error = VFS_UNMOUNT(mp, flags); 874 } 875 } 876 if (error) { 877 mount_warning(mp, 878 "(%p) unmount: vfs refused to unmount, " 879 "error %d", 880 mp, error); 881 } 882 } 883 884 /* 885 * If an error occurred we can still recover, restoring the 886 * syncer vnode and misc flags. 887 */ 888 if (error) { 889 if (mp->mnt_syncer == NULL && hadsyncer) 890 vfs_allocate_syncvnode(mp); 891 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 892 mp->mnt_flag |= async_flag; 893 lockmgr(&mp->mnt_lock, LK_RELEASE); 894 if (mp->mnt_kern_flag & MNTK_MWAIT) { 895 mp->mnt_kern_flag &= ~MNTK_MWAIT; 896 wakeup(mp); 897 } 898 goto out; 899 } 900 /* 901 * Clean up any journals still associated with the mount after 902 * filesystem activity has ceased. 903 */ 904 journal_remove_all_journals(mp, 905 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 906 907 mountlist_remove(mp); 908 909 /* 910 * Remove any installed vnode ops here so the individual VFSs don't 911 * have to. 912 * 913 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 914 * 915 * When quickhalting we have to keep these intact because the 916 * underlying vnodes have not been destroyed, and some might be 917 * dirty. 918 */ 919 if (quickhalt == 0) { 920 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 921 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 922 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 923 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 924 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 925 } 926 927 if (mp->mnt_ncmountpt.ncp != NULL) { 928 nch = mp->mnt_ncmountpt; 929 cache_zero(&mp->mnt_ncmountpt); 930 cache_clrmountpt(&nch); 931 cache_drop(&nch); 932 } 933 if (mp->mnt_ncmounton.ncp != NULL) { 934 cache_unmounting(mp); 935 nch = mp->mnt_ncmounton; 936 cache_zero(&mp->mnt_ncmounton); 937 cache_clrmountpt(&nch); 938 cache_drop(&nch); 939 } 940 941 mp->mnt_vfc->vfc_refcount--; 942 943 /* 944 * If not quickhalting the mount, we expect there to be no 945 * vnodes left. 946 */ 947 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 948 panic("unmount: dangling vnode"); 949 950 /* 951 * Release the lock 952 */ 953 lockmgr(&mp->mnt_lock, LK_RELEASE); 954 if (mp->mnt_kern_flag & MNTK_MWAIT) { 955 mp->mnt_kern_flag &= ~MNTK_MWAIT; 956 wakeup(mp); 957 } 958 959 /* 960 * If we reach here and freeok != 0 we must free the mount. 961 * mnt_refs should already have dropped to 0, so if it is not 962 * zero we must cycle the caches and wait. 963 * 964 * When we are satisfied that the mount has disconnected we can 965 * drop the hold on the mp that represented the mount (though the 966 * caller might actually have another, so the caller's drop may 967 * do the actual free). 968 */ 969 if (freeok) { 970 if (mp->mnt_refs > 0) 971 cache_clearmntcache(); 972 while (mp->mnt_refs > 0) { 973 cache_unmounting(mp); 974 wakeup(mp); 975 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 976 cache_clearmntcache(); 977 } 978 lwkt_reltoken(&mp->mnt_token); 979 mount_drop(mp); 980 mp = NULL; 981 } else { 982 cache_clearmntcache(); 983 } 984 error = 0; 985 KNOTE(&fs_klist, VQ_UNMOUNT); 986 out: 987 if (mp) 988 lwkt_reltoken(&mp->mnt_token); 989 return (error); 990 } 991 992 static 993 void 994 mount_warning(struct mount *mp, const char *ctl, ...) 995 { 996 char *ptr; 997 char *buf; 998 __va_list va; 999 1000 __va_start(va, ctl); 1001 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1002 &ptr, &buf, 0) == 0) { 1003 kprintf("unmount(%s): ", ptr); 1004 kvprintf(ctl, va); 1005 kprintf("\n"); 1006 kfree(buf, M_TEMP); 1007 } else { 1008 kprintf("unmount(%p", mp); 1009 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1010 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1011 kprintf("): "); 1012 kvprintf(ctl, va); 1013 kprintf("\n"); 1014 } 1015 __va_end(va); 1016 } 1017 1018 /* 1019 * Shim cache_fullpath() to handle the case where a process is chrooted into 1020 * a subdirectory of a mount. In this case if the root mount matches the 1021 * process root directory's mount we have to specify the process's root 1022 * directory instead of the mount point, because the mount point might 1023 * be above the root directory. 1024 */ 1025 static 1026 int 1027 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1028 { 1029 struct nchandle *nch; 1030 1031 if (p && p->p_fd->fd_nrdir.mount == mp) 1032 nch = &p->p_fd->fd_nrdir; 1033 else 1034 nch = &mp->mnt_ncmountpt; 1035 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1036 } 1037 1038 /* 1039 * Sync each mounted filesystem. 1040 */ 1041 1042 #ifdef DEBUG 1043 static int syncprt = 0; 1044 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1045 #endif /* DEBUG */ 1046 1047 static int sync_callback(struct mount *mp, void *data); 1048 1049 int 1050 sys_sync(struct sync_args *uap) 1051 { 1052 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1053 return (0); 1054 } 1055 1056 static 1057 int 1058 sync_callback(struct mount *mp, void *data __unused) 1059 { 1060 int asyncflag; 1061 1062 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1063 lwkt_gettoken(&mp->mnt_token); 1064 asyncflag = mp->mnt_flag & MNT_ASYNC; 1065 mp->mnt_flag &= ~MNT_ASYNC; 1066 lwkt_reltoken(&mp->mnt_token); 1067 vfs_msync(mp, MNT_NOWAIT); 1068 VFS_SYNC(mp, MNT_NOWAIT); 1069 lwkt_gettoken(&mp->mnt_token); 1070 mp->mnt_flag |= asyncflag; 1071 lwkt_reltoken(&mp->mnt_token); 1072 } 1073 return(0); 1074 } 1075 1076 /* XXX PRISON: could be per prison flag */ 1077 static int prison_quotas; 1078 #if 0 1079 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1080 #endif 1081 1082 /* 1083 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1084 * 1085 * Change filesystem quotas. 1086 * 1087 * MPALMOSTSAFE 1088 */ 1089 int 1090 sys_quotactl(struct quotactl_args *uap) 1091 { 1092 struct nlookupdata nd; 1093 struct thread *td; 1094 struct mount *mp; 1095 int error; 1096 1097 td = curthread; 1098 if (td->td_ucred->cr_prison && !prison_quotas) { 1099 error = EPERM; 1100 goto done; 1101 } 1102 1103 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1104 if (error == 0) 1105 error = nlookup(&nd); 1106 if (error == 0) { 1107 mp = nd.nl_nch.mount; 1108 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1109 uap->arg, nd.nl_cred); 1110 } 1111 nlookup_done(&nd); 1112 done: 1113 return (error); 1114 } 1115 1116 /* 1117 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1118 * void *buf, int buflen) 1119 * 1120 * This function operates on a mount point and executes the specified 1121 * operation using the specified control data, and possibly returns data. 1122 * 1123 * The actual number of bytes stored in the result buffer is returned, 0 1124 * if none, otherwise an error is returned. 1125 * 1126 * MPALMOSTSAFE 1127 */ 1128 int 1129 sys_mountctl(struct mountctl_args *uap) 1130 { 1131 struct thread *td = curthread; 1132 struct file *fp; 1133 void *ctl = NULL; 1134 void *buf = NULL; 1135 char *path = NULL; 1136 int error; 1137 1138 /* 1139 * Sanity and permissions checks. We must be root. 1140 */ 1141 if (td->td_ucred->cr_prison != NULL) 1142 return (EPERM); 1143 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1144 (error = priv_check(td, PRIV_ROOT)) != 0) 1145 return (error); 1146 1147 /* 1148 * Argument length checks 1149 */ 1150 if (uap->ctllen < 0 || uap->ctllen > 1024) 1151 return (EINVAL); 1152 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1153 return (EINVAL); 1154 if (uap->path == NULL) 1155 return (EINVAL); 1156 1157 /* 1158 * Allocate the necessary buffers and copyin data 1159 */ 1160 path = objcache_get(namei_oc, M_WAITOK); 1161 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1162 if (error) 1163 goto done; 1164 1165 if (uap->ctllen) { 1166 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1167 error = copyin(uap->ctl, ctl, uap->ctllen); 1168 if (error) 1169 goto done; 1170 } 1171 if (uap->buflen) 1172 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1173 1174 /* 1175 * Validate the descriptor 1176 */ 1177 if (uap->fd >= 0) { 1178 fp = holdfp(td, uap->fd, -1); 1179 if (fp == NULL) { 1180 error = EBADF; 1181 goto done; 1182 } 1183 } else { 1184 fp = NULL; 1185 } 1186 1187 /* 1188 * Execute the internal kernel function and clean up. 1189 */ 1190 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1191 buf, uap->buflen, &uap->sysmsg_result); 1192 if (fp) 1193 dropfp(td, uap->fd, fp); 1194 if (error == 0 && uap->sysmsg_result > 0) 1195 error = copyout(buf, uap->buf, uap->sysmsg_result); 1196 done: 1197 if (path) 1198 objcache_put(namei_oc, path); 1199 if (ctl) 1200 kfree(ctl, M_TEMP); 1201 if (buf) 1202 kfree(buf, M_TEMP); 1203 return (error); 1204 } 1205 1206 /* 1207 * Execute a mount control operation by resolving the path to a mount point 1208 * and calling vop_mountctl(). 1209 * 1210 * Use the mount point from the nch instead of the vnode so nullfs mounts 1211 * can properly spike the VOP. 1212 */ 1213 int 1214 kern_mountctl(const char *path, int op, struct file *fp, 1215 const void *ctl, int ctllen, 1216 void *buf, int buflen, int *res) 1217 { 1218 struct vnode *vp; 1219 struct nlookupdata nd; 1220 struct nchandle nch; 1221 struct mount *mp; 1222 int error; 1223 1224 *res = 0; 1225 vp = NULL; 1226 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1227 if (error) 1228 return (error); 1229 error = nlookup(&nd); 1230 if (error) { 1231 nlookup_done(&nd); 1232 return (error); 1233 } 1234 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1235 if (error) { 1236 nlookup_done(&nd); 1237 return (error); 1238 } 1239 1240 /* 1241 * Yes, all this is needed to use the nch.mount below, because 1242 * we must maintain a ref on the mount to avoid ripouts (e.g. 1243 * due to heavy mount/unmount use by synth or poudriere). 1244 */ 1245 nch = nd.nl_nch; 1246 cache_zero(&nd.nl_nch); 1247 cache_unlock(&nch); 1248 nlookup_done(&nd); 1249 vn_unlock(vp); 1250 1251 mp = nch.mount; 1252 1253 /* 1254 * Must be the root of the filesystem 1255 */ 1256 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1257 cache_drop(&nch); 1258 vrele(vp); 1259 return (EINVAL); 1260 } 1261 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1262 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1263 path); 1264 cache_drop(&nch); 1265 vrele(vp); 1266 return (EINVAL); 1267 } 1268 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1269 buf, buflen, res); 1270 vrele(vp); 1271 cache_drop(&nch); 1272 1273 return (error); 1274 } 1275 1276 int 1277 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1278 { 1279 struct thread *td = curthread; 1280 struct proc *p = td->td_proc; 1281 struct mount *mp; 1282 struct statfs *sp; 1283 char *fullpath, *freepath; 1284 int error; 1285 1286 if ((error = nlookup(nd)) != 0) 1287 return (error); 1288 mp = nd->nl_nch.mount; 1289 sp = &mp->mnt_stat; 1290 1291 /* 1292 * Ignore refresh error, user should have visibility. 1293 * This can happen if a NFS mount goes bad (e.g. server 1294 * revokes perms or goes down). 1295 */ 1296 error = VFS_STATFS(mp, sp, nd->nl_cred); 1297 /* ignore error */ 1298 1299 error = mount_path(p, mp, &fullpath, &freepath); 1300 if (error) 1301 return(error); 1302 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1303 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1304 kfree(freepath, M_TEMP); 1305 1306 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1307 bcopy(sp, buf, sizeof(*buf)); 1308 /* Only root should have access to the fsid's. */ 1309 if (priv_check(td, PRIV_ROOT)) 1310 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1311 return (0); 1312 } 1313 1314 /* 1315 * statfs_args(char *path, struct statfs *buf) 1316 * 1317 * Get filesystem statistics. 1318 */ 1319 int 1320 sys_statfs(struct statfs_args *uap) 1321 { 1322 struct nlookupdata nd; 1323 struct statfs buf; 1324 int error; 1325 1326 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1327 if (error == 0) 1328 error = kern_statfs(&nd, &buf); 1329 nlookup_done(&nd); 1330 if (error == 0) 1331 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1332 return (error); 1333 } 1334 1335 int 1336 kern_fstatfs(int fd, struct statfs *buf) 1337 { 1338 struct thread *td = curthread; 1339 struct proc *p = td->td_proc; 1340 struct file *fp; 1341 struct mount *mp; 1342 struct statfs *sp; 1343 char *fullpath, *freepath; 1344 int error; 1345 1346 KKASSERT(p); 1347 if ((error = holdvnode(td, fd, &fp)) != 0) 1348 return (error); 1349 1350 /* 1351 * Try to use mount info from any overlays rather than the 1352 * mount info for the underlying vnode, otherwise we will 1353 * fail when operating on null-mounted paths inside a chroot. 1354 */ 1355 if ((mp = fp->f_nchandle.mount) == NULL) 1356 mp = ((struct vnode *)fp->f_data)->v_mount; 1357 if (mp == NULL) { 1358 error = EBADF; 1359 goto done; 1360 } 1361 if (fp->f_cred == NULL) { 1362 error = EINVAL; 1363 goto done; 1364 } 1365 1366 /* 1367 * Ignore refresh error, user should have visibility. 1368 * This can happen if a NFS mount goes bad (e.g. server 1369 * revokes perms or goes down). 1370 */ 1371 sp = &mp->mnt_stat; 1372 error = VFS_STATFS(mp, sp, fp->f_cred); 1373 1374 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1375 goto done; 1376 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1377 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1378 kfree(freepath, M_TEMP); 1379 1380 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1381 bcopy(sp, buf, sizeof(*buf)); 1382 1383 /* Only root should have access to the fsid's. */ 1384 if (priv_check(td, PRIV_ROOT)) 1385 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1386 error = 0; 1387 done: 1388 fdrop(fp); 1389 return (error); 1390 } 1391 1392 /* 1393 * fstatfs_args(int fd, struct statfs *buf) 1394 * 1395 * Get filesystem statistics. 1396 */ 1397 int 1398 sys_fstatfs(struct fstatfs_args *uap) 1399 { 1400 struct statfs buf; 1401 int error; 1402 1403 error = kern_fstatfs(uap->fd, &buf); 1404 1405 if (error == 0) 1406 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1407 return (error); 1408 } 1409 1410 int 1411 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1412 { 1413 struct mount *mp; 1414 struct statvfs *sp; 1415 int error; 1416 1417 if ((error = nlookup(nd)) != 0) 1418 return (error); 1419 mp = nd->nl_nch.mount; 1420 sp = &mp->mnt_vstat; 1421 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1422 return (error); 1423 1424 sp->f_flag = 0; 1425 if (mp->mnt_flag & MNT_RDONLY) 1426 sp->f_flag |= ST_RDONLY; 1427 if (mp->mnt_flag & MNT_NOSUID) 1428 sp->f_flag |= ST_NOSUID; 1429 bcopy(sp, buf, sizeof(*buf)); 1430 return (0); 1431 } 1432 1433 /* 1434 * statfs_args(char *path, struct statfs *buf) 1435 * 1436 * Get filesystem statistics. 1437 */ 1438 int 1439 sys_statvfs(struct statvfs_args *uap) 1440 { 1441 struct nlookupdata nd; 1442 struct statvfs buf; 1443 int error; 1444 1445 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1446 if (error == 0) 1447 error = kern_statvfs(&nd, &buf); 1448 nlookup_done(&nd); 1449 if (error == 0) 1450 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1451 return (error); 1452 } 1453 1454 int 1455 kern_fstatvfs(int fd, struct statvfs *buf) 1456 { 1457 struct thread *td = curthread; 1458 struct file *fp; 1459 struct mount *mp; 1460 struct statvfs *sp; 1461 int error; 1462 1463 if ((error = holdvnode(td, fd, &fp)) != 0) 1464 return (error); 1465 if ((mp = fp->f_nchandle.mount) == NULL) 1466 mp = ((struct vnode *)fp->f_data)->v_mount; 1467 if (mp == NULL) { 1468 error = EBADF; 1469 goto done; 1470 } 1471 if (fp->f_cred == NULL) { 1472 error = EINVAL; 1473 goto done; 1474 } 1475 sp = &mp->mnt_vstat; 1476 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1477 goto done; 1478 1479 sp->f_flag = 0; 1480 if (mp->mnt_flag & MNT_RDONLY) 1481 sp->f_flag |= ST_RDONLY; 1482 if (mp->mnt_flag & MNT_NOSUID) 1483 sp->f_flag |= ST_NOSUID; 1484 1485 bcopy(sp, buf, sizeof(*buf)); 1486 error = 0; 1487 done: 1488 fdrop(fp); 1489 return (error); 1490 } 1491 1492 /* 1493 * fstatfs_args(int fd, struct statfs *buf) 1494 * 1495 * Get filesystem statistics. 1496 */ 1497 int 1498 sys_fstatvfs(struct fstatvfs_args *uap) 1499 { 1500 struct statvfs buf; 1501 int error; 1502 1503 error = kern_fstatvfs(uap->fd, &buf); 1504 1505 if (error == 0) 1506 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1507 return (error); 1508 } 1509 1510 /* 1511 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1512 * 1513 * Get statistics on all filesystems. 1514 */ 1515 1516 struct getfsstat_info { 1517 struct statfs *sfsp; 1518 long count; 1519 long maxcount; 1520 int error; 1521 int flags; 1522 struct thread *td; 1523 }; 1524 1525 static int getfsstat_callback(struct mount *, void *); 1526 1527 int 1528 sys_getfsstat(struct getfsstat_args *uap) 1529 { 1530 struct thread *td = curthread; 1531 struct getfsstat_info info; 1532 1533 bzero(&info, sizeof(info)); 1534 1535 info.maxcount = uap->bufsize / sizeof(struct statfs); 1536 info.sfsp = uap->buf; 1537 info.count = 0; 1538 info.flags = uap->flags; 1539 info.td = td; 1540 1541 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1542 if (info.sfsp && info.count > info.maxcount) 1543 uap->sysmsg_result = info.maxcount; 1544 else 1545 uap->sysmsg_result = info.count; 1546 return (info.error); 1547 } 1548 1549 static int 1550 getfsstat_callback(struct mount *mp, void *data) 1551 { 1552 struct getfsstat_info *info = data; 1553 struct statfs *sp; 1554 char *freepath; 1555 char *fullpath; 1556 int error; 1557 1558 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1559 return(0); 1560 1561 if (info->sfsp && info->count < info->maxcount) { 1562 sp = &mp->mnt_stat; 1563 1564 /* 1565 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1566 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1567 * overrides MNT_WAIT. 1568 * 1569 * Ignore refresh error, user should have visibility. 1570 * This can happen if a NFS mount goes bad (e.g. server 1571 * revokes perms or goes down). 1572 */ 1573 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1574 (info->flags & MNT_WAIT)) && 1575 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1576 /* ignore error */ 1577 } 1578 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1579 1580 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1581 if (error) { 1582 info->error = error; 1583 return(-1); 1584 } 1585 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1586 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1587 kfree(freepath, M_TEMP); 1588 1589 error = copyout(sp, info->sfsp, sizeof(*sp)); 1590 if (error) { 1591 info->error = error; 1592 return (-1); 1593 } 1594 ++info->sfsp; 1595 } 1596 info->count++; 1597 return(0); 1598 } 1599 1600 /* 1601 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1602 long bufsize, int flags) 1603 * 1604 * Get statistics on all filesystems. 1605 */ 1606 1607 struct getvfsstat_info { 1608 struct statfs *sfsp; 1609 struct statvfs *vsfsp; 1610 long count; 1611 long maxcount; 1612 int error; 1613 int flags; 1614 struct thread *td; 1615 }; 1616 1617 static int getvfsstat_callback(struct mount *, void *); 1618 1619 int 1620 sys_getvfsstat(struct getvfsstat_args *uap) 1621 { 1622 struct thread *td = curthread; 1623 struct getvfsstat_info info; 1624 1625 bzero(&info, sizeof(info)); 1626 1627 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1628 info.sfsp = uap->buf; 1629 info.vsfsp = uap->vbuf; 1630 info.count = 0; 1631 info.flags = uap->flags; 1632 info.td = td; 1633 1634 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1635 if (info.vsfsp && info.count > info.maxcount) 1636 uap->sysmsg_result = info.maxcount; 1637 else 1638 uap->sysmsg_result = info.count; 1639 return (info.error); 1640 } 1641 1642 static int 1643 getvfsstat_callback(struct mount *mp, void *data) 1644 { 1645 struct getvfsstat_info *info = data; 1646 struct statfs *sp; 1647 struct statvfs *vsp; 1648 char *freepath; 1649 char *fullpath; 1650 int error; 1651 1652 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1653 return(0); 1654 1655 if (info->vsfsp && info->count < info->maxcount) { 1656 sp = &mp->mnt_stat; 1657 vsp = &mp->mnt_vstat; 1658 1659 /* 1660 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1661 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1662 * overrides MNT_WAIT. 1663 * 1664 * Ignore refresh error, user should have visibility. 1665 * This can happen if a NFS mount goes bad (e.g. server 1666 * revokes perms or goes down). 1667 */ 1668 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1669 (info->flags & MNT_WAIT)) && 1670 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1671 /* ignore error */ 1672 } 1673 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1674 1675 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1676 (info->flags & MNT_WAIT)) && 1677 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1678 /* ignore error */ 1679 } 1680 vsp->f_flag = 0; 1681 if (mp->mnt_flag & MNT_RDONLY) 1682 vsp->f_flag |= ST_RDONLY; 1683 if (mp->mnt_flag & MNT_NOSUID) 1684 vsp->f_flag |= ST_NOSUID; 1685 1686 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1687 if (error) { 1688 info->error = error; 1689 return(-1); 1690 } 1691 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1692 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1693 kfree(freepath, M_TEMP); 1694 1695 error = copyout(sp, info->sfsp, sizeof(*sp)); 1696 if (error == 0) 1697 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1698 if (error) { 1699 info->error = error; 1700 return (-1); 1701 } 1702 ++info->sfsp; 1703 ++info->vsfsp; 1704 } 1705 info->count++; 1706 return(0); 1707 } 1708 1709 1710 /* 1711 * fchdir_args(int fd) 1712 * 1713 * Change current working directory to a given file descriptor. 1714 */ 1715 int 1716 sys_fchdir(struct fchdir_args *uap) 1717 { 1718 struct thread *td = curthread; 1719 struct proc *p = td->td_proc; 1720 struct filedesc *fdp = p->p_fd; 1721 struct vnode *vp, *ovp; 1722 struct mount *mp; 1723 struct file *fp; 1724 struct nchandle nch, onch, tnch; 1725 int error; 1726 1727 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1728 return (error); 1729 lwkt_gettoken(&p->p_token); 1730 vp = (struct vnode *)fp->f_data; 1731 vref(vp); 1732 vn_lock(vp, LK_SHARED | LK_RETRY); 1733 if (fp->f_nchandle.ncp == NULL) 1734 error = ENOTDIR; 1735 else 1736 error = checkvp_chdir(vp, td); 1737 if (error) { 1738 vput(vp); 1739 goto done; 1740 } 1741 cache_copy(&fp->f_nchandle, &nch); 1742 1743 /* 1744 * If the ncp has become a mount point, traverse through 1745 * the mount point. 1746 */ 1747 1748 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1749 (mp = cache_findmount(&nch)) != NULL 1750 ) { 1751 error = nlookup_mp(mp, &tnch); 1752 if (error == 0) { 1753 cache_unlock(&tnch); /* leave ref intact */ 1754 vput(vp); 1755 vp = tnch.ncp->nc_vp; 1756 error = vget(vp, LK_SHARED); 1757 KKASSERT(error == 0); 1758 cache_drop(&nch); 1759 nch = tnch; 1760 } 1761 cache_dropmount(mp); 1762 } 1763 if (error == 0) { 1764 spin_lock(&fdp->fd_spin); 1765 ovp = fdp->fd_cdir; 1766 onch = fdp->fd_ncdir; 1767 fdp->fd_cdir = vp; 1768 fdp->fd_ncdir = nch; 1769 spin_unlock(&fdp->fd_spin); 1770 vn_unlock(vp); /* leave ref intact */ 1771 cache_drop(&onch); 1772 vrele(ovp); 1773 } else { 1774 cache_drop(&nch); 1775 vput(vp); 1776 } 1777 fdrop(fp); 1778 done: 1779 lwkt_reltoken(&p->p_token); 1780 return (error); 1781 } 1782 1783 int 1784 kern_chdir(struct nlookupdata *nd) 1785 { 1786 struct thread *td = curthread; 1787 struct proc *p = td->td_proc; 1788 struct filedesc *fdp = p->p_fd; 1789 struct vnode *vp, *ovp; 1790 struct nchandle onch; 1791 int error; 1792 1793 nd->nl_flags |= NLC_SHAREDLOCK; 1794 if ((error = nlookup(nd)) != 0) 1795 return (error); 1796 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1797 return (ENOENT); 1798 if ((error = vget(vp, LK_SHARED)) != 0) 1799 return (error); 1800 1801 lwkt_gettoken(&p->p_token); 1802 error = checkvp_chdir(vp, td); 1803 vn_unlock(vp); 1804 if (error == 0) { 1805 spin_lock(&fdp->fd_spin); 1806 ovp = fdp->fd_cdir; 1807 onch = fdp->fd_ncdir; 1808 fdp->fd_ncdir = nd->nl_nch; 1809 fdp->fd_cdir = vp; 1810 spin_unlock(&fdp->fd_spin); 1811 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1812 cache_drop(&onch); 1813 vrele(ovp); 1814 cache_zero(&nd->nl_nch); 1815 } else { 1816 vrele(vp); 1817 } 1818 lwkt_reltoken(&p->p_token); 1819 return (error); 1820 } 1821 1822 /* 1823 * chdir_args(char *path) 1824 * 1825 * Change current working directory (``.''). 1826 */ 1827 int 1828 sys_chdir(struct chdir_args *uap) 1829 { 1830 struct nlookupdata nd; 1831 int error; 1832 1833 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1834 if (error == 0) 1835 error = kern_chdir(&nd); 1836 nlookup_done(&nd); 1837 return (error); 1838 } 1839 1840 /* 1841 * Helper function for raised chroot(2) security function: Refuse if 1842 * any filedescriptors are open directories. 1843 */ 1844 static int 1845 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1846 { 1847 struct vnode *vp; 1848 struct file *fp; 1849 int error; 1850 int fd; 1851 1852 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1853 if ((error = holdvnode(td, fd, &fp)) != 0) 1854 continue; 1855 vp = (struct vnode *)fp->f_data; 1856 if (vp->v_type != VDIR) { 1857 fdrop(fp); 1858 continue; 1859 } 1860 fdrop(fp); 1861 return(EPERM); 1862 } 1863 return (0); 1864 } 1865 1866 /* 1867 * This sysctl determines if we will allow a process to chroot(2) if it 1868 * has a directory open: 1869 * 0: disallowed for all processes. 1870 * 1: allowed for processes that were not already chroot(2)'ed. 1871 * 2: allowed for all processes. 1872 */ 1873 1874 static int chroot_allow_open_directories = 1; 1875 1876 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1877 &chroot_allow_open_directories, 0, ""); 1878 1879 /* 1880 * chroot to the specified namecache entry. We obtain the vp from the 1881 * namecache data. The passed ncp must be locked and referenced and will 1882 * remain locked and referenced on return. 1883 */ 1884 int 1885 kern_chroot(struct nchandle *nch) 1886 { 1887 struct thread *td = curthread; 1888 struct proc *p = td->td_proc; 1889 struct filedesc *fdp = p->p_fd; 1890 struct vnode *vp; 1891 int error; 1892 1893 /* 1894 * Only privileged user can chroot 1895 */ 1896 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1897 if (error) 1898 return (error); 1899 1900 /* 1901 * Disallow open directory descriptors (fchdir() breakouts). 1902 */ 1903 if (chroot_allow_open_directories == 0 || 1904 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1905 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1906 return (error); 1907 } 1908 if ((vp = nch->ncp->nc_vp) == NULL) 1909 return (ENOENT); 1910 1911 if ((error = vget(vp, LK_SHARED)) != 0) 1912 return (error); 1913 1914 /* 1915 * Check the validity of vp as a directory to change to and 1916 * associate it with rdir/jdir. 1917 */ 1918 error = checkvp_chdir(vp, td); 1919 vn_unlock(vp); /* leave reference intact */ 1920 if (error == 0) { 1921 lwkt_gettoken(&p->p_token); 1922 vrele(fdp->fd_rdir); 1923 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1924 cache_drop(&fdp->fd_nrdir); 1925 cache_copy(nch, &fdp->fd_nrdir); 1926 if (fdp->fd_jdir == NULL) { 1927 fdp->fd_jdir = vp; 1928 vref(fdp->fd_jdir); 1929 cache_copy(nch, &fdp->fd_njdir); 1930 } 1931 if ((p->p_flags & P_DIDCHROOT) == 0) { 1932 p->p_flags |= P_DIDCHROOT; 1933 if (p->p_depth <= 65535 - 32) 1934 p->p_depth += 32; 1935 } 1936 lwkt_reltoken(&p->p_token); 1937 } else { 1938 vrele(vp); 1939 } 1940 return (error); 1941 } 1942 1943 /* 1944 * chroot_args(char *path) 1945 * 1946 * Change notion of root (``/'') directory. 1947 */ 1948 int 1949 sys_chroot(struct chroot_args *uap) 1950 { 1951 struct thread *td __debugvar = curthread; 1952 struct nlookupdata nd; 1953 int error; 1954 1955 KKASSERT(td->td_proc); 1956 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1957 if (error == 0) { 1958 nd.nl_flags |= NLC_EXEC; 1959 error = nlookup(&nd); 1960 if (error == 0) 1961 error = kern_chroot(&nd.nl_nch); 1962 } 1963 nlookup_done(&nd); 1964 return(error); 1965 } 1966 1967 int 1968 sys_chroot_kernel(struct chroot_kernel_args *uap) 1969 { 1970 struct thread *td = curthread; 1971 struct nlookupdata nd; 1972 struct nchandle *nch; 1973 struct vnode *vp; 1974 int error; 1975 1976 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1977 if (error) 1978 goto error_nond; 1979 1980 error = nlookup(&nd); 1981 if (error) 1982 goto error_out; 1983 1984 nch = &nd.nl_nch; 1985 1986 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1987 if (error) 1988 goto error_out; 1989 1990 if ((vp = nch->ncp->nc_vp) == NULL) { 1991 error = ENOENT; 1992 goto error_out; 1993 } 1994 1995 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1996 goto error_out; 1997 1998 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1999 vfs_cache_setroot(vp, cache_hold(nch)); 2000 2001 error_out: 2002 nlookup_done(&nd); 2003 error_nond: 2004 return(error); 2005 } 2006 2007 /* 2008 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2009 * determine whether it is legal to chdir to the vnode. The vnode's state 2010 * is not changed by this call. 2011 */ 2012 static int 2013 checkvp_chdir(struct vnode *vp, struct thread *td) 2014 { 2015 int error; 2016 2017 if (vp->v_type != VDIR) 2018 error = ENOTDIR; 2019 else 2020 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2021 return (error); 2022 } 2023 2024 int 2025 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2026 { 2027 struct thread *td = curthread; 2028 struct proc *p = td->td_proc; 2029 struct lwp *lp = td->td_lwp; 2030 struct filedesc *fdp = p->p_fd; 2031 int cmode, flags; 2032 struct file *nfp; 2033 struct file *fp; 2034 struct vnode *vp; 2035 int type, indx, error = 0; 2036 struct flock lf; 2037 2038 if ((oflags & O_ACCMODE) == O_ACCMODE) 2039 return (EINVAL); 2040 flags = FFLAGS(oflags); 2041 error = falloc(lp, &nfp, NULL); 2042 if (error) 2043 return (error); 2044 fp = nfp; 2045 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2046 2047 /* 2048 * XXX p_dupfd is a real mess. It allows a device to return a 2049 * file descriptor to be duplicated rather then doing the open 2050 * itself. 2051 */ 2052 lp->lwp_dupfd = -1; 2053 2054 /* 2055 * Call vn_open() to do the lookup and assign the vnode to the 2056 * file pointer. vn_open() does not change the ref count on fp 2057 * and the vnode, on success, will be inherited by the file pointer 2058 * and unlocked. 2059 * 2060 * Request a shared lock on the vnode if possible. 2061 * 2062 * Executable binaries can race VTEXT against O_RDWR opens, so 2063 * use an exclusive lock for O_RDWR opens as well. 2064 * 2065 * NOTE: We need a flag to separate terminal vnode locking from 2066 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2067 * and O_RDWR only need to lock the terminal vnode exclusively. 2068 */ 2069 nd->nl_flags |= NLC_LOCKVP; 2070 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0) 2071 nd->nl_flags |= NLC_SHAREDLOCK; 2072 2073 error = vn_open(nd, fp, flags, cmode); 2074 nlookup_done(nd); 2075 2076 if (error) { 2077 /* 2078 * handle special fdopen() case. bleh. dupfdopen() is 2079 * responsible for dropping the old contents of ofiles[indx] 2080 * if it succeeds. 2081 * 2082 * Note that fsetfd() will add a ref to fp which represents 2083 * the fd_files[] assignment. We must still drop our 2084 * reference. 2085 */ 2086 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2087 if (fdalloc(p, 0, &indx) == 0) { 2088 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2089 if (error == 0) { 2090 *res = indx; 2091 fdrop(fp); /* our ref */ 2092 return (0); 2093 } 2094 fsetfd(fdp, NULL, indx); 2095 } 2096 } 2097 fdrop(fp); /* our ref */ 2098 if (error == ERESTART) 2099 error = EINTR; 2100 return (error); 2101 } 2102 2103 /* 2104 * ref the vnode for ourselves so it can't be ripped out from under 2105 * is. XXX need an ND flag to request that the vnode be returned 2106 * anyway. 2107 * 2108 * Reserve a file descriptor but do not assign it until the open 2109 * succeeds. 2110 */ 2111 vp = (struct vnode *)fp->f_data; 2112 vref(vp); 2113 if ((error = fdalloc(p, 0, &indx)) != 0) { 2114 fdrop(fp); 2115 vrele(vp); 2116 return (error); 2117 } 2118 2119 /* 2120 * If no error occurs the vp will have been assigned to the file 2121 * pointer. 2122 */ 2123 lp->lwp_dupfd = 0; 2124 2125 if (flags & (O_EXLOCK | O_SHLOCK)) { 2126 lf.l_whence = SEEK_SET; 2127 lf.l_start = 0; 2128 lf.l_len = 0; 2129 if (flags & O_EXLOCK) 2130 lf.l_type = F_WRLCK; 2131 else 2132 lf.l_type = F_RDLCK; 2133 if (flags & FNONBLOCK) 2134 type = 0; 2135 else 2136 type = F_WAIT; 2137 2138 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2139 /* 2140 * lock request failed. Clean up the reserved 2141 * descriptor. 2142 */ 2143 vrele(vp); 2144 fsetfd(fdp, NULL, indx); 2145 fdrop(fp); 2146 return (error); 2147 } 2148 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2149 } 2150 #if 0 2151 /* 2152 * Assert that all regular file vnodes were created with a object. 2153 */ 2154 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2155 ("open: regular file has no backing object after vn_open")); 2156 #endif 2157 2158 vrele(vp); 2159 2160 /* 2161 * release our private reference, leaving the one associated with the 2162 * descriptor table intact. 2163 */ 2164 if (oflags & O_CLOEXEC) 2165 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2166 fsetfd(fdp, fp, indx); 2167 fdrop(fp); 2168 *res = indx; 2169 2170 return (error); 2171 } 2172 2173 /* 2174 * open_args(char *path, int flags, int mode) 2175 * 2176 * Check permissions, allocate an open file structure, 2177 * and call the device open routine if any. 2178 */ 2179 int 2180 sys_open(struct open_args *uap) 2181 { 2182 struct nlookupdata nd; 2183 int error; 2184 2185 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2186 if (error == 0) { 2187 error = kern_open(&nd, uap->flags, 2188 uap->mode, &uap->sysmsg_result); 2189 } 2190 nlookup_done(&nd); 2191 return (error); 2192 } 2193 2194 /* 2195 * openat_args(int fd, char *path, int flags, int mode) 2196 */ 2197 int 2198 sys_openat(struct openat_args *uap) 2199 { 2200 struct nlookupdata nd; 2201 int error; 2202 struct file *fp; 2203 2204 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2205 if (error == 0) { 2206 error = kern_open(&nd, uap->flags, uap->mode, 2207 &uap->sysmsg_result); 2208 } 2209 nlookup_done_at(&nd, fp); 2210 return (error); 2211 } 2212 2213 int 2214 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2215 { 2216 struct thread *td = curthread; 2217 struct proc *p = td->td_proc; 2218 struct vnode *vp; 2219 struct vattr vattr; 2220 int error; 2221 int whiteout = 0; 2222 2223 KKASSERT(p); 2224 2225 VATTR_NULL(&vattr); 2226 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2227 vattr.va_rmajor = rmajor; 2228 vattr.va_rminor = rminor; 2229 2230 switch (mode & S_IFMT) { 2231 case S_IFMT: /* used by badsect to flag bad sectors */ 2232 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2233 vattr.va_type = VBAD; 2234 break; 2235 case S_IFCHR: 2236 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2237 vattr.va_type = VCHR; 2238 break; 2239 case S_IFBLK: 2240 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2241 vattr.va_type = VBLK; 2242 break; 2243 case S_IFWHT: 2244 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2245 whiteout = 1; 2246 break; 2247 case S_IFDIR: /* special directories support for HAMMER */ 2248 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2249 vattr.va_type = VDIR; 2250 break; 2251 default: 2252 error = EINVAL; 2253 break; 2254 } 2255 2256 if (error) 2257 return (error); 2258 2259 bwillinode(1); 2260 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2261 if ((error = nlookup(nd)) != 0) 2262 return (error); 2263 if (nd->nl_nch.ncp->nc_vp) 2264 return (EEXIST); 2265 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2266 return (error); 2267 2268 if (whiteout) { 2269 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2270 nd->nl_cred, NAMEI_CREATE); 2271 } else { 2272 vp = NULL; 2273 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2274 &vp, nd->nl_cred, &vattr); 2275 if (error == 0) 2276 vput(vp); 2277 } 2278 return (error); 2279 } 2280 2281 /* 2282 * mknod_args(char *path, int mode, int dev) 2283 * 2284 * Create a special file. 2285 */ 2286 int 2287 sys_mknod(struct mknod_args *uap) 2288 { 2289 struct nlookupdata nd; 2290 int error; 2291 2292 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2293 if (error == 0) { 2294 error = kern_mknod(&nd, uap->mode, 2295 umajor(uap->dev), uminor(uap->dev)); 2296 } 2297 nlookup_done(&nd); 2298 return (error); 2299 } 2300 2301 /* 2302 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2303 * 2304 * Create a special file. The path is relative to the directory associated 2305 * with fd. 2306 */ 2307 int 2308 sys_mknodat(struct mknodat_args *uap) 2309 { 2310 struct nlookupdata nd; 2311 struct file *fp; 2312 int error; 2313 2314 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2315 if (error == 0) { 2316 error = kern_mknod(&nd, uap->mode, 2317 umajor(uap->dev), uminor(uap->dev)); 2318 } 2319 nlookup_done_at(&nd, fp); 2320 return (error); 2321 } 2322 2323 int 2324 kern_mkfifo(struct nlookupdata *nd, int mode) 2325 { 2326 struct thread *td = curthread; 2327 struct proc *p = td->td_proc; 2328 struct vattr vattr; 2329 struct vnode *vp; 2330 int error; 2331 2332 bwillinode(1); 2333 2334 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2335 if ((error = nlookup(nd)) != 0) 2336 return (error); 2337 if (nd->nl_nch.ncp->nc_vp) 2338 return (EEXIST); 2339 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2340 return (error); 2341 2342 VATTR_NULL(&vattr); 2343 vattr.va_type = VFIFO; 2344 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2345 vp = NULL; 2346 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2347 if (error == 0) 2348 vput(vp); 2349 return (error); 2350 } 2351 2352 /* 2353 * mkfifo_args(char *path, int mode) 2354 * 2355 * Create a named pipe. 2356 */ 2357 int 2358 sys_mkfifo(struct mkfifo_args *uap) 2359 { 2360 struct nlookupdata nd; 2361 int error; 2362 2363 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2364 if (error == 0) 2365 error = kern_mkfifo(&nd, uap->mode); 2366 nlookup_done(&nd); 2367 return (error); 2368 } 2369 2370 /* 2371 * mkfifoat_args(int fd, char *path, mode_t mode) 2372 * 2373 * Create a named pipe. The path is relative to the directory associated 2374 * with fd. 2375 */ 2376 int 2377 sys_mkfifoat(struct mkfifoat_args *uap) 2378 { 2379 struct nlookupdata nd; 2380 struct file *fp; 2381 int error; 2382 2383 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2384 if (error == 0) 2385 error = kern_mkfifo(&nd, uap->mode); 2386 nlookup_done_at(&nd, fp); 2387 return (error); 2388 } 2389 2390 static int hardlink_check_uid = 0; 2391 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2392 &hardlink_check_uid, 0, 2393 "Unprivileged processes cannot create hard links to files owned by other " 2394 "users"); 2395 static int hardlink_check_gid = 0; 2396 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2397 &hardlink_check_gid, 0, 2398 "Unprivileged processes cannot create hard links to files owned by other " 2399 "groups"); 2400 2401 static int 2402 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2403 { 2404 struct vattr va; 2405 int error; 2406 2407 /* 2408 * Shortcut if disabled 2409 */ 2410 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2411 return (0); 2412 2413 /* 2414 * Privileged user can always hardlink 2415 */ 2416 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2417 return (0); 2418 2419 /* 2420 * Otherwise only if the originating file is owned by the 2421 * same user or group. Note that any group is allowed if 2422 * the file is owned by the caller. 2423 */ 2424 error = VOP_GETATTR(vp, &va); 2425 if (error != 0) 2426 return (error); 2427 2428 if (hardlink_check_uid) { 2429 if (cred->cr_uid != va.va_uid) 2430 return (EPERM); 2431 } 2432 2433 if (hardlink_check_gid) { 2434 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2435 return (EPERM); 2436 } 2437 2438 return (0); 2439 } 2440 2441 int 2442 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2443 { 2444 struct thread *td = curthread; 2445 struct vnode *vp; 2446 int error; 2447 2448 /* 2449 * Lookup the source and obtained a locked vnode. 2450 * 2451 * You may only hardlink a file which you have write permission 2452 * on or which you own. 2453 * 2454 * XXX relookup on vget failure / race ? 2455 */ 2456 bwillinode(1); 2457 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2458 if ((error = nlookup(nd)) != 0) 2459 return (error); 2460 vp = nd->nl_nch.ncp->nc_vp; 2461 KKASSERT(vp != NULL); 2462 if (vp->v_type == VDIR) 2463 return (EPERM); /* POSIX */ 2464 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2465 return (error); 2466 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2467 return (error); 2468 2469 /* 2470 * Unlock the source so we can lookup the target without deadlocking 2471 * (XXX vp is locked already, possible other deadlock?). The target 2472 * must not exist. 2473 */ 2474 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2475 nd->nl_flags &= ~NLC_NCPISLOCKED; 2476 cache_unlock(&nd->nl_nch); 2477 vn_unlock(vp); 2478 2479 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2480 if ((error = nlookup(linknd)) != 0) { 2481 vrele(vp); 2482 return (error); 2483 } 2484 if (linknd->nl_nch.ncp->nc_vp) { 2485 vrele(vp); 2486 return (EEXIST); 2487 } 2488 VFS_MODIFYING(vp->v_mount); 2489 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2490 if (error) { 2491 vrele(vp); 2492 return (error); 2493 } 2494 2495 /* 2496 * Finally run the new API VOP. 2497 */ 2498 error = can_hardlink(vp, td, td->td_ucred); 2499 if (error == 0) { 2500 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2501 vp, linknd->nl_cred); 2502 } 2503 vput(vp); 2504 return (error); 2505 } 2506 2507 /* 2508 * link_args(char *path, char *link) 2509 * 2510 * Make a hard file link. 2511 */ 2512 int 2513 sys_link(struct link_args *uap) 2514 { 2515 struct nlookupdata nd, linknd; 2516 int error; 2517 2518 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2519 if (error == 0) { 2520 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2521 if (error == 0) 2522 error = kern_link(&nd, &linknd); 2523 nlookup_done(&linknd); 2524 } 2525 nlookup_done(&nd); 2526 return (error); 2527 } 2528 2529 /* 2530 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2531 * 2532 * Make a hard file link. The path1 argument is relative to the directory 2533 * associated with fd1, and similarly the path2 argument is relative to 2534 * the directory associated with fd2. 2535 */ 2536 int 2537 sys_linkat(struct linkat_args *uap) 2538 { 2539 struct nlookupdata nd, linknd; 2540 struct file *fp1, *fp2; 2541 int error; 2542 2543 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2544 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2545 if (error == 0) { 2546 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2547 uap->path2, UIO_USERSPACE, 0); 2548 if (error == 0) 2549 error = kern_link(&nd, &linknd); 2550 nlookup_done_at(&linknd, fp2); 2551 } 2552 nlookup_done_at(&nd, fp1); 2553 return (error); 2554 } 2555 2556 int 2557 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2558 { 2559 struct vattr vattr; 2560 struct vnode *vp; 2561 struct vnode *dvp; 2562 int error; 2563 2564 bwillinode(1); 2565 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2566 if ((error = nlookup(nd)) != 0) 2567 return (error); 2568 if (nd->nl_nch.ncp->nc_vp) 2569 return (EEXIST); 2570 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2571 return (error); 2572 dvp = nd->nl_dvp; 2573 VATTR_NULL(&vattr); 2574 vattr.va_mode = mode; 2575 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2576 if (error == 0) 2577 vput(vp); 2578 return (error); 2579 } 2580 2581 /* 2582 * symlink(char *path, char *link) 2583 * 2584 * Make a symbolic link. 2585 */ 2586 int 2587 sys_symlink(struct symlink_args *uap) 2588 { 2589 struct thread *td = curthread; 2590 struct nlookupdata nd; 2591 char *path; 2592 int error; 2593 int mode; 2594 2595 path = objcache_get(namei_oc, M_WAITOK); 2596 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2597 if (error == 0) { 2598 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2599 if (error == 0) { 2600 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2601 error = kern_symlink(&nd, path, mode); 2602 } 2603 nlookup_done(&nd); 2604 } 2605 objcache_put(namei_oc, path); 2606 return (error); 2607 } 2608 2609 /* 2610 * symlinkat_args(char *path1, int fd, char *path2) 2611 * 2612 * Make a symbolic link. The path2 argument is relative to the directory 2613 * associated with fd. 2614 */ 2615 int 2616 sys_symlinkat(struct symlinkat_args *uap) 2617 { 2618 struct thread *td = curthread; 2619 struct nlookupdata nd; 2620 struct file *fp; 2621 char *path1; 2622 int error; 2623 int mode; 2624 2625 path1 = objcache_get(namei_oc, M_WAITOK); 2626 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2627 if (error == 0) { 2628 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2629 UIO_USERSPACE, 0); 2630 if (error == 0) { 2631 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2632 error = kern_symlink(&nd, path1, mode); 2633 } 2634 nlookup_done_at(&nd, fp); 2635 } 2636 objcache_put(namei_oc, path1); 2637 return (error); 2638 } 2639 2640 /* 2641 * undelete_args(char *path) 2642 * 2643 * Delete a whiteout from the filesystem. 2644 */ 2645 int 2646 sys_undelete(struct undelete_args *uap) 2647 { 2648 struct nlookupdata nd; 2649 int error; 2650 2651 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2652 bwillinode(1); 2653 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2654 if (error == 0) 2655 error = nlookup(&nd); 2656 if (error == 0) 2657 error = ncp_writechk(&nd.nl_nch); 2658 if (error == 0) { 2659 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2660 NAMEI_DELETE); 2661 } 2662 nlookup_done(&nd); 2663 return (error); 2664 } 2665 2666 int 2667 kern_unlink(struct nlookupdata *nd) 2668 { 2669 int error; 2670 2671 bwillinode(1); 2672 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2673 if ((error = nlookup(nd)) != 0) 2674 return (error); 2675 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2676 return (error); 2677 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2678 return (error); 2679 } 2680 2681 /* 2682 * unlink_args(char *path) 2683 * 2684 * Delete a name from the filesystem. 2685 */ 2686 int 2687 sys_unlink(struct unlink_args *uap) 2688 { 2689 struct nlookupdata nd; 2690 int error; 2691 2692 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2693 if (error == 0) 2694 error = kern_unlink(&nd); 2695 nlookup_done(&nd); 2696 return (error); 2697 } 2698 2699 2700 /* 2701 * unlinkat_args(int fd, char *path, int flags) 2702 * 2703 * Delete the file or directory entry pointed to by fd/path. 2704 */ 2705 int 2706 sys_unlinkat(struct unlinkat_args *uap) 2707 { 2708 struct nlookupdata nd; 2709 struct file *fp; 2710 int error; 2711 2712 if (uap->flags & ~AT_REMOVEDIR) 2713 return (EINVAL); 2714 2715 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2716 if (error == 0) { 2717 if (uap->flags & AT_REMOVEDIR) 2718 error = kern_rmdir(&nd); 2719 else 2720 error = kern_unlink(&nd); 2721 } 2722 nlookup_done_at(&nd, fp); 2723 return (error); 2724 } 2725 2726 int 2727 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2728 { 2729 struct thread *td = curthread; 2730 struct file *fp; 2731 struct vnode *vp; 2732 struct vattr vattr; 2733 off_t new_offset; 2734 int error; 2735 2736 fp = holdfp(td, fd, -1); 2737 if (fp == NULL) 2738 return (EBADF); 2739 if (fp->f_type != DTYPE_VNODE) { 2740 error = ESPIPE; 2741 goto done; 2742 } 2743 vp = (struct vnode *)fp->f_data; 2744 2745 switch (whence) { 2746 case L_INCR: 2747 spin_lock(&fp->f_spin); 2748 new_offset = fp->f_offset + offset; 2749 error = 0; 2750 break; 2751 case L_XTND: 2752 error = VOP_GETATTR_FP(vp, &vattr, fp); 2753 spin_lock(&fp->f_spin); 2754 new_offset = offset + vattr.va_size; 2755 break; 2756 case L_SET: 2757 new_offset = offset; 2758 error = 0; 2759 spin_lock(&fp->f_spin); 2760 break; 2761 default: 2762 new_offset = 0; 2763 error = EINVAL; 2764 spin_lock(&fp->f_spin); 2765 break; 2766 } 2767 2768 /* 2769 * Validate the seek position. Negative offsets are not allowed 2770 * for regular files or directories. 2771 * 2772 * Normally we would also not want to allow negative offsets for 2773 * character and block-special devices. However kvm addresses 2774 * on 64 bit architectures might appear to be negative and must 2775 * be allowed. 2776 */ 2777 if (error == 0) { 2778 if (new_offset < 0 && 2779 (vp->v_type == VREG || vp->v_type == VDIR)) { 2780 error = EINVAL; 2781 } else { 2782 fp->f_offset = new_offset; 2783 } 2784 } 2785 *res = fp->f_offset; 2786 spin_unlock(&fp->f_spin); 2787 done: 2788 dropfp(td, fd, fp); 2789 2790 return (error); 2791 } 2792 2793 /* 2794 * lseek_args(int fd, int pad, off_t offset, int whence) 2795 * 2796 * Reposition read/write file offset. 2797 */ 2798 int 2799 sys_lseek(struct lseek_args *uap) 2800 { 2801 int error; 2802 2803 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2804 &uap->sysmsg_offset); 2805 2806 return (error); 2807 } 2808 2809 /* 2810 * Check if current process can access given file. amode is a bitmask of *_OK 2811 * access bits. flags is a bitmask of AT_* flags. 2812 */ 2813 int 2814 kern_access(struct nlookupdata *nd, int amode, int flags) 2815 { 2816 struct vnode *vp; 2817 int error, mode; 2818 2819 if (flags & ~AT_EACCESS) 2820 return (EINVAL); 2821 nd->nl_flags |= NLC_SHAREDLOCK; 2822 if ((error = nlookup(nd)) != 0) 2823 return (error); 2824 retry: 2825 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2826 if (error) 2827 return (error); 2828 2829 /* Flags == 0 means only check for existence. */ 2830 if (amode) { 2831 mode = 0; 2832 if (amode & R_OK) 2833 mode |= VREAD; 2834 if (amode & W_OK) 2835 mode |= VWRITE; 2836 if (amode & X_OK) 2837 mode |= VEXEC; 2838 if ((mode & VWRITE) == 0 || 2839 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2840 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2841 2842 /* 2843 * If the file handle is stale we have to re-resolve the 2844 * entry with the ncp held exclusively. This is a hack 2845 * at the moment. 2846 */ 2847 if (error == ESTALE) { 2848 vput(vp); 2849 cache_unlock(&nd->nl_nch); 2850 cache_lock(&nd->nl_nch); 2851 cache_setunresolved(&nd->nl_nch); 2852 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2853 if (error == 0) { 2854 vp = NULL; 2855 goto retry; 2856 } 2857 return(error); 2858 } 2859 } 2860 vput(vp); 2861 return (error); 2862 } 2863 2864 /* 2865 * access_args(char *path, int flags) 2866 * 2867 * Check access permissions. 2868 */ 2869 int 2870 sys_access(struct access_args *uap) 2871 { 2872 struct nlookupdata nd; 2873 int error; 2874 2875 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2876 if (error == 0) 2877 error = kern_access(&nd, uap->flags, 0); 2878 nlookup_done(&nd); 2879 return (error); 2880 } 2881 2882 2883 /* 2884 * eaccess_args(char *path, int flags) 2885 * 2886 * Check access permissions. 2887 */ 2888 int 2889 sys_eaccess(struct eaccess_args *uap) 2890 { 2891 struct nlookupdata nd; 2892 int error; 2893 2894 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2895 if (error == 0) 2896 error = kern_access(&nd, uap->flags, AT_EACCESS); 2897 nlookup_done(&nd); 2898 return (error); 2899 } 2900 2901 2902 /* 2903 * faccessat_args(int fd, char *path, int amode, int flags) 2904 * 2905 * Check access permissions. 2906 */ 2907 int 2908 sys_faccessat(struct faccessat_args *uap) 2909 { 2910 struct nlookupdata nd; 2911 struct file *fp; 2912 int error; 2913 2914 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2915 NLC_FOLLOW); 2916 if (error == 0) 2917 error = kern_access(&nd, uap->amode, uap->flags); 2918 nlookup_done_at(&nd, fp); 2919 return (error); 2920 } 2921 2922 int 2923 kern_stat(struct nlookupdata *nd, struct stat *st) 2924 { 2925 int error; 2926 struct vnode *vp; 2927 2928 nd->nl_flags |= NLC_SHAREDLOCK; 2929 if ((error = nlookup(nd)) != 0) 2930 return (error); 2931 again: 2932 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2933 return (ENOENT); 2934 2935 if ((error = vget(vp, LK_SHARED)) != 0) 2936 return (error); 2937 error = vn_stat(vp, st, nd->nl_cred); 2938 2939 /* 2940 * If the file handle is stale we have to re-resolve the 2941 * entry with the ncp held exclusively. This is a hack 2942 * at the moment. 2943 */ 2944 if (error == ESTALE) { 2945 vput(vp); 2946 cache_unlock(&nd->nl_nch); 2947 cache_lock(&nd->nl_nch); 2948 cache_setunresolved(&nd->nl_nch); 2949 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2950 if (error == 0) 2951 goto again; 2952 } else { 2953 vput(vp); 2954 } 2955 return (error); 2956 } 2957 2958 /* 2959 * stat_args(char *path, struct stat *ub) 2960 * 2961 * Get file status; this version follows links. 2962 */ 2963 int 2964 sys_stat(struct stat_args *uap) 2965 { 2966 struct nlookupdata nd; 2967 struct stat st; 2968 int error; 2969 2970 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2971 if (error == 0) { 2972 error = kern_stat(&nd, &st); 2973 if (error == 0) 2974 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2975 } 2976 nlookup_done(&nd); 2977 return (error); 2978 } 2979 2980 /* 2981 * lstat_args(char *path, struct stat *ub) 2982 * 2983 * Get file status; this version does not follow links. 2984 */ 2985 int 2986 sys_lstat(struct lstat_args *uap) 2987 { 2988 struct nlookupdata nd; 2989 struct stat st; 2990 int error; 2991 2992 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2993 if (error == 0) { 2994 error = kern_stat(&nd, &st); 2995 if (error == 0) 2996 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2997 } 2998 nlookup_done(&nd); 2999 return (error); 3000 } 3001 3002 /* 3003 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3004 * 3005 * Get status of file pointed to by fd/path. 3006 */ 3007 int 3008 sys_fstatat(struct fstatat_args *uap) 3009 { 3010 struct nlookupdata nd; 3011 struct stat st; 3012 int error; 3013 int flags; 3014 struct file *fp; 3015 3016 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3017 return (EINVAL); 3018 3019 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3020 3021 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3022 UIO_USERSPACE, flags); 3023 if (error == 0) { 3024 error = kern_stat(&nd, &st); 3025 if (error == 0) 3026 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3027 } 3028 nlookup_done_at(&nd, fp); 3029 return (error); 3030 } 3031 3032 static int 3033 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3034 { 3035 struct nlookupdata nd; 3036 struct vnode *vp; 3037 int error; 3038 3039 vp = NULL; 3040 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3041 if (error == 0) 3042 error = nlookup(&nd); 3043 if (error == 0) 3044 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3045 nlookup_done(&nd); 3046 if (error == 0) { 3047 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3048 vput(vp); 3049 } 3050 return (error); 3051 } 3052 3053 /* 3054 * pathconf_Args(char *path, int name) 3055 * 3056 * Get configurable pathname variables. 3057 */ 3058 int 3059 sys_pathconf(struct pathconf_args *uap) 3060 { 3061 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3062 &uap->sysmsg_reg)); 3063 } 3064 3065 /* 3066 * lpathconf_Args(char *path, int name) 3067 * 3068 * Get configurable pathname variables, but don't follow symlinks. 3069 */ 3070 int 3071 sys_lpathconf(struct lpathconf_args *uap) 3072 { 3073 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3074 } 3075 3076 /* 3077 * XXX: daver 3078 * kern_readlink isn't properly split yet. There is a copyin burried 3079 * in VOP_READLINK(). 3080 */ 3081 int 3082 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3083 { 3084 struct thread *td = curthread; 3085 struct vnode *vp; 3086 struct iovec aiov; 3087 struct uio auio; 3088 int error; 3089 3090 nd->nl_flags |= NLC_SHAREDLOCK; 3091 if ((error = nlookup(nd)) != 0) 3092 return (error); 3093 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3094 if (error) 3095 return (error); 3096 if (vp->v_type != VLNK) { 3097 error = EINVAL; 3098 } else { 3099 aiov.iov_base = buf; 3100 aiov.iov_len = count; 3101 auio.uio_iov = &aiov; 3102 auio.uio_iovcnt = 1; 3103 auio.uio_offset = 0; 3104 auio.uio_rw = UIO_READ; 3105 auio.uio_segflg = UIO_USERSPACE; 3106 auio.uio_td = td; 3107 auio.uio_resid = count; 3108 error = VOP_READLINK(vp, &auio, td->td_ucred); 3109 } 3110 vput(vp); 3111 *res = count - auio.uio_resid; 3112 return (error); 3113 } 3114 3115 /* 3116 * readlink_args(char *path, char *buf, int count) 3117 * 3118 * Return target name of a symbolic link. 3119 */ 3120 int 3121 sys_readlink(struct readlink_args *uap) 3122 { 3123 struct nlookupdata nd; 3124 int error; 3125 3126 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3127 if (error == 0) { 3128 error = kern_readlink(&nd, uap->buf, uap->count, 3129 &uap->sysmsg_result); 3130 } 3131 nlookup_done(&nd); 3132 return (error); 3133 } 3134 3135 /* 3136 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3137 * 3138 * Return target name of a symbolic link. The path is relative to the 3139 * directory associated with fd. 3140 */ 3141 int 3142 sys_readlinkat(struct readlinkat_args *uap) 3143 { 3144 struct nlookupdata nd; 3145 struct file *fp; 3146 int error; 3147 3148 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3149 if (error == 0) { 3150 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3151 &uap->sysmsg_result); 3152 } 3153 nlookup_done_at(&nd, fp); 3154 return (error); 3155 } 3156 3157 static int 3158 setfflags(struct vnode *vp, int flags) 3159 { 3160 struct thread *td = curthread; 3161 int error; 3162 struct vattr vattr; 3163 3164 /* 3165 * Prevent non-root users from setting flags on devices. When 3166 * a device is reused, users can retain ownership of the device 3167 * if they are allowed to set flags and programs assume that 3168 * chown can't fail when done as root. 3169 */ 3170 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3171 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3172 return (error); 3173 3174 /* 3175 * note: vget is required for any operation that might mod the vnode 3176 * so VINACTIVE is properly cleared. 3177 */ 3178 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3179 VATTR_NULL(&vattr); 3180 vattr.va_flags = flags; 3181 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3182 vput(vp); 3183 } 3184 return (error); 3185 } 3186 3187 /* 3188 * chflags(char *path, int flags) 3189 * 3190 * Change flags of a file given a path name. 3191 */ 3192 int 3193 sys_chflags(struct chflags_args *uap) 3194 { 3195 struct nlookupdata nd; 3196 struct vnode *vp; 3197 int error; 3198 3199 vp = NULL; 3200 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3201 if (error == 0) 3202 error = nlookup(&nd); 3203 if (error == 0) 3204 error = ncp_writechk(&nd.nl_nch); 3205 if (error == 0) 3206 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3207 nlookup_done(&nd); 3208 if (error == 0) { 3209 error = setfflags(vp, uap->flags); 3210 vrele(vp); 3211 } 3212 return (error); 3213 } 3214 3215 /* 3216 * lchflags(char *path, int flags) 3217 * 3218 * Change flags of a file given a path name, but don't follow symlinks. 3219 */ 3220 int 3221 sys_lchflags(struct lchflags_args *uap) 3222 { 3223 struct nlookupdata nd; 3224 struct vnode *vp; 3225 int error; 3226 3227 vp = NULL; 3228 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3229 if (error == 0) 3230 error = nlookup(&nd); 3231 if (error == 0) 3232 error = ncp_writechk(&nd.nl_nch); 3233 if (error == 0) 3234 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3235 nlookup_done(&nd); 3236 if (error == 0) { 3237 error = setfflags(vp, uap->flags); 3238 vrele(vp); 3239 } 3240 return (error); 3241 } 3242 3243 /* 3244 * fchflags_args(int fd, int flags) 3245 * 3246 * Change flags of a file given a file descriptor. 3247 */ 3248 int 3249 sys_fchflags(struct fchflags_args *uap) 3250 { 3251 struct thread *td = curthread; 3252 struct file *fp; 3253 int error; 3254 3255 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3256 return (error); 3257 if (fp->f_nchandle.ncp) 3258 error = ncp_writechk(&fp->f_nchandle); 3259 if (error == 0) 3260 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3261 fdrop(fp); 3262 return (error); 3263 } 3264 3265 /* 3266 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3267 * change flags given a pathname relative to a filedescriptor 3268 */ 3269 int sys_chflagsat(struct chflagsat_args *uap) 3270 { 3271 struct nlookupdata nd; 3272 struct vnode *vp; 3273 struct file *fp; 3274 int error; 3275 int lookupflags; 3276 3277 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3278 return (EINVAL); 3279 3280 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3281 3282 vp = NULL; 3283 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3284 if (error == 0) 3285 error = nlookup(&nd); 3286 if (error == 0) 3287 error = ncp_writechk(&nd.nl_nch); 3288 if (error == 0) 3289 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3290 nlookup_done_at(&nd, fp); 3291 if (error == 0) { 3292 error = setfflags(vp, uap->flags); 3293 vrele(vp); 3294 } 3295 return (error); 3296 } 3297 3298 3299 static int 3300 setfmode(struct vnode *vp, int mode) 3301 { 3302 struct thread *td = curthread; 3303 int error; 3304 struct vattr vattr; 3305 3306 /* 3307 * note: vget is required for any operation that might mod the vnode 3308 * so VINACTIVE is properly cleared. 3309 */ 3310 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3311 VATTR_NULL(&vattr); 3312 vattr.va_mode = mode & ALLPERMS; 3313 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3314 cache_inval_wxok(vp); 3315 vput(vp); 3316 } 3317 return error; 3318 } 3319 3320 int 3321 kern_chmod(struct nlookupdata *nd, int mode) 3322 { 3323 struct vnode *vp; 3324 int error; 3325 3326 if ((error = nlookup(nd)) != 0) 3327 return (error); 3328 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3329 return (error); 3330 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3331 error = setfmode(vp, mode); 3332 vrele(vp); 3333 return (error); 3334 } 3335 3336 /* 3337 * chmod_args(char *path, int mode) 3338 * 3339 * Change mode of a file given path name. 3340 */ 3341 int 3342 sys_chmod(struct chmod_args *uap) 3343 { 3344 struct nlookupdata nd; 3345 int error; 3346 3347 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3348 if (error == 0) 3349 error = kern_chmod(&nd, uap->mode); 3350 nlookup_done(&nd); 3351 return (error); 3352 } 3353 3354 /* 3355 * lchmod_args(char *path, int mode) 3356 * 3357 * Change mode of a file given path name (don't follow links.) 3358 */ 3359 int 3360 sys_lchmod(struct lchmod_args *uap) 3361 { 3362 struct nlookupdata nd; 3363 int error; 3364 3365 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3366 if (error == 0) 3367 error = kern_chmod(&nd, uap->mode); 3368 nlookup_done(&nd); 3369 return (error); 3370 } 3371 3372 /* 3373 * fchmod_args(int fd, int mode) 3374 * 3375 * Change mode of a file given a file descriptor. 3376 */ 3377 int 3378 sys_fchmod(struct fchmod_args *uap) 3379 { 3380 struct thread *td = curthread; 3381 struct file *fp; 3382 int error; 3383 3384 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3385 return (error); 3386 if (fp->f_nchandle.ncp) 3387 error = ncp_writechk(&fp->f_nchandle); 3388 if (error == 0) 3389 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3390 fdrop(fp); 3391 return (error); 3392 } 3393 3394 /* 3395 * fchmodat_args(char *path, int mode) 3396 * 3397 * Change mode of a file pointed to by fd/path. 3398 */ 3399 int 3400 sys_fchmodat(struct fchmodat_args *uap) 3401 { 3402 struct nlookupdata nd; 3403 struct file *fp; 3404 int error; 3405 int flags; 3406 3407 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3408 return (EINVAL); 3409 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3410 3411 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3412 UIO_USERSPACE, flags); 3413 if (error == 0) 3414 error = kern_chmod(&nd, uap->mode); 3415 nlookup_done_at(&nd, fp); 3416 return (error); 3417 } 3418 3419 static int 3420 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3421 { 3422 struct thread *td = curthread; 3423 int error; 3424 struct vattr vattr; 3425 uid_t o_uid; 3426 gid_t o_gid; 3427 uint64_t size; 3428 3429 /* 3430 * note: vget is required for any operation that might mod the vnode 3431 * so VINACTIVE is properly cleared. 3432 */ 3433 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3434 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3435 return error; 3436 o_uid = vattr.va_uid; 3437 o_gid = vattr.va_gid; 3438 size = vattr.va_size; 3439 3440 VATTR_NULL(&vattr); 3441 vattr.va_uid = uid; 3442 vattr.va_gid = gid; 3443 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3444 vput(vp); 3445 } 3446 3447 if (error == 0) { 3448 if (uid == -1) 3449 uid = o_uid; 3450 if (gid == -1) 3451 gid = o_gid; 3452 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3453 VFS_ACCOUNT(mp, uid, gid, size); 3454 } 3455 3456 return error; 3457 } 3458 3459 int 3460 kern_chown(struct nlookupdata *nd, int uid, int gid) 3461 { 3462 struct vnode *vp; 3463 int error; 3464 3465 if ((error = nlookup(nd)) != 0) 3466 return (error); 3467 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3468 return (error); 3469 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3470 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3471 vrele(vp); 3472 return (error); 3473 } 3474 3475 /* 3476 * chown(char *path, int uid, int gid) 3477 * 3478 * Set ownership given a path name. 3479 */ 3480 int 3481 sys_chown(struct chown_args *uap) 3482 { 3483 struct nlookupdata nd; 3484 int error; 3485 3486 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3487 if (error == 0) 3488 error = kern_chown(&nd, uap->uid, uap->gid); 3489 nlookup_done(&nd); 3490 return (error); 3491 } 3492 3493 /* 3494 * lchown_args(char *path, int uid, int gid) 3495 * 3496 * Set ownership given a path name, do not cross symlinks. 3497 */ 3498 int 3499 sys_lchown(struct lchown_args *uap) 3500 { 3501 struct nlookupdata nd; 3502 int error; 3503 3504 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3505 if (error == 0) 3506 error = kern_chown(&nd, uap->uid, uap->gid); 3507 nlookup_done(&nd); 3508 return (error); 3509 } 3510 3511 /* 3512 * fchown_args(int fd, int uid, int gid) 3513 * 3514 * Set ownership given a file descriptor. 3515 */ 3516 int 3517 sys_fchown(struct fchown_args *uap) 3518 { 3519 struct thread *td = curthread; 3520 struct proc *p = td->td_proc; 3521 struct file *fp; 3522 int error; 3523 3524 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3525 return (error); 3526 if (fp->f_nchandle.ncp) 3527 error = ncp_writechk(&fp->f_nchandle); 3528 if (error == 0) 3529 error = setfown(p->p_fd->fd_ncdir.mount, 3530 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3531 fdrop(fp); 3532 return (error); 3533 } 3534 3535 /* 3536 * fchownat(int fd, char *path, int uid, int gid, int flags) 3537 * 3538 * Set ownership of file pointed to by fd/path. 3539 */ 3540 int 3541 sys_fchownat(struct fchownat_args *uap) 3542 { 3543 struct nlookupdata nd; 3544 struct file *fp; 3545 int error; 3546 int flags; 3547 3548 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3549 return (EINVAL); 3550 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3551 3552 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3553 UIO_USERSPACE, flags); 3554 if (error == 0) 3555 error = kern_chown(&nd, uap->uid, uap->gid); 3556 nlookup_done_at(&nd, fp); 3557 return (error); 3558 } 3559 3560 3561 static int 3562 getutimes(struct timeval *tvp, struct timespec *tsp) 3563 { 3564 struct timeval tv[2]; 3565 int error; 3566 3567 if (tvp == NULL) { 3568 microtime(&tv[0]); 3569 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3570 tsp[1] = tsp[0]; 3571 } else { 3572 if ((error = itimerfix(tvp)) != 0) 3573 return (error); 3574 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3575 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3576 } 3577 return 0; 3578 } 3579 3580 static int 3581 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3582 { 3583 struct timespec tsnow; 3584 int error; 3585 3586 *nullflag = 0; 3587 nanotime(&tsnow); 3588 if (ts == NULL) { 3589 newts[0] = tsnow; 3590 newts[1] = tsnow; 3591 *nullflag = 1; 3592 return (0); 3593 } 3594 3595 newts[0] = ts[0]; 3596 newts[1] = ts[1]; 3597 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3598 return (0); 3599 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3600 *nullflag = 1; 3601 3602 if (newts[0].tv_nsec == UTIME_OMIT) 3603 newts[0].tv_sec = VNOVAL; 3604 else if (newts[0].tv_nsec == UTIME_NOW) 3605 newts[0] = tsnow; 3606 else if ((error = itimespecfix(&newts[0])) != 0) 3607 return (error); 3608 3609 if (newts[1].tv_nsec == UTIME_OMIT) 3610 newts[1].tv_sec = VNOVAL; 3611 else if (newts[1].tv_nsec == UTIME_NOW) 3612 newts[1] = tsnow; 3613 else if ((error = itimespecfix(&newts[1])) != 0) 3614 return (error); 3615 3616 return (0); 3617 } 3618 3619 static int 3620 setutimes(struct vnode *vp, struct vattr *vattr, 3621 const struct timespec *ts, int nullflag) 3622 { 3623 struct thread *td = curthread; 3624 int error; 3625 3626 VATTR_NULL(vattr); 3627 vattr->va_atime = ts[0]; 3628 vattr->va_mtime = ts[1]; 3629 if (nullflag) 3630 vattr->va_vaflags |= VA_UTIMES_NULL; 3631 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3632 3633 return error; 3634 } 3635 3636 int 3637 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3638 { 3639 struct timespec ts[2]; 3640 int error; 3641 3642 if (tptr) { 3643 if ((error = getutimes(tptr, ts)) != 0) 3644 return (error); 3645 } 3646 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3647 return (error); 3648 } 3649 3650 /* 3651 * utimes_args(char *path, struct timeval *tptr) 3652 * 3653 * Set the access and modification times of a file. 3654 */ 3655 int 3656 sys_utimes(struct utimes_args *uap) 3657 { 3658 struct timeval tv[2]; 3659 struct nlookupdata nd; 3660 int error; 3661 3662 if (uap->tptr) { 3663 error = copyin(uap->tptr, tv, sizeof(tv)); 3664 if (error) 3665 return (error); 3666 } 3667 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3668 if (error == 0) 3669 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3670 nlookup_done(&nd); 3671 return (error); 3672 } 3673 3674 /* 3675 * lutimes_args(char *path, struct timeval *tptr) 3676 * 3677 * Set the access and modification times of a file. 3678 */ 3679 int 3680 sys_lutimes(struct lutimes_args *uap) 3681 { 3682 struct timeval tv[2]; 3683 struct nlookupdata nd; 3684 int error; 3685 3686 if (uap->tptr) { 3687 error = copyin(uap->tptr, tv, sizeof(tv)); 3688 if (error) 3689 return (error); 3690 } 3691 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3692 if (error == 0) 3693 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3694 nlookup_done(&nd); 3695 return (error); 3696 } 3697 3698 /* 3699 * Set utimes on a file descriptor. The creds used to open the 3700 * file are used to determine whether the operation is allowed 3701 * or not. 3702 */ 3703 int 3704 kern_futimens(int fd, struct timespec *ts) 3705 { 3706 struct thread *td = curthread; 3707 struct timespec newts[2]; 3708 struct file *fp; 3709 struct vnode *vp; 3710 struct vattr vattr; 3711 int nullflag; 3712 int error; 3713 3714 error = getutimens(ts, newts, &nullflag); 3715 if (error) 3716 return (error); 3717 if ((error = holdvnode(td, fd, &fp)) != 0) 3718 return (error); 3719 if (fp->f_nchandle.ncp) 3720 error = ncp_writechk(&fp->f_nchandle); 3721 if (error == 0) { 3722 vp = fp->f_data; 3723 error = vget(vp, LK_EXCLUSIVE); 3724 if (error == 0) { 3725 error = VOP_GETATTR_FP(vp, &vattr, fp); 3726 if (error == 0) { 3727 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3728 fp->f_cred); 3729 } 3730 if (error == 0) { 3731 error = setutimes(vp, &vattr, newts, nullflag); 3732 } 3733 vput(vp); 3734 } 3735 } 3736 fdrop(fp); 3737 return (error); 3738 } 3739 3740 /* 3741 * futimens_args(int fd, struct timespec *ts) 3742 * 3743 * Set the access and modification times of a file. 3744 */ 3745 int 3746 sys_futimens(struct futimens_args *uap) 3747 { 3748 struct timespec ts[2]; 3749 int error; 3750 3751 if (uap->ts) { 3752 error = copyin(uap->ts, ts, sizeof(ts)); 3753 if (error) 3754 return (error); 3755 } 3756 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3757 return (error); 3758 } 3759 3760 int 3761 kern_futimes(int fd, struct timeval *tptr) 3762 { 3763 struct timespec ts[2]; 3764 int error; 3765 3766 if (tptr) { 3767 if ((error = getutimes(tptr, ts)) != 0) 3768 return (error); 3769 } 3770 error = kern_futimens(fd, tptr ? ts : NULL); 3771 return (error); 3772 } 3773 3774 /* 3775 * futimes_args(int fd, struct timeval *tptr) 3776 * 3777 * Set the access and modification times of a file. 3778 */ 3779 int 3780 sys_futimes(struct futimes_args *uap) 3781 { 3782 struct timeval tv[2]; 3783 int error; 3784 3785 if (uap->tptr) { 3786 error = copyin(uap->tptr, tv, sizeof(tv)); 3787 if (error) 3788 return (error); 3789 } 3790 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3791 return (error); 3792 } 3793 3794 int 3795 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3796 { 3797 struct timespec newts[2]; 3798 struct vnode *vp; 3799 struct vattr vattr; 3800 int nullflag; 3801 int error; 3802 3803 if (flags & ~AT_SYMLINK_NOFOLLOW) 3804 return (EINVAL); 3805 3806 error = getutimens(ts, newts, &nullflag); 3807 if (error) 3808 return (error); 3809 3810 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3811 if ((error = nlookup(nd)) != 0) 3812 return (error); 3813 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3814 return (error); 3815 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3816 return (error); 3817 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3818 error = vget(vp, LK_EXCLUSIVE); 3819 if (error == 0) { 3820 error = setutimes(vp, &vattr, newts, nullflag); 3821 vput(vp); 3822 } 3823 } 3824 vrele(vp); 3825 return (error); 3826 } 3827 3828 /* 3829 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3830 * 3831 * Set file access and modification times of a file. 3832 */ 3833 int 3834 sys_utimensat(struct utimensat_args *uap) 3835 { 3836 struct timespec ts[2]; 3837 struct nlookupdata nd; 3838 struct file *fp; 3839 int error; 3840 int flags; 3841 3842 if (uap->ts) { 3843 error = copyin(uap->ts, ts, sizeof(ts)); 3844 if (error) 3845 return (error); 3846 } 3847 3848 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3849 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3850 UIO_USERSPACE, flags); 3851 if (error == 0) 3852 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3853 nlookup_done_at(&nd, fp); 3854 return (error); 3855 } 3856 3857 int 3858 kern_truncate(struct nlookupdata *nd, off_t length) 3859 { 3860 struct vnode *vp; 3861 struct vattr vattr; 3862 int error; 3863 uid_t uid = 0; 3864 gid_t gid = 0; 3865 uint64_t old_size = 0; 3866 3867 if (length < 0) 3868 return(EINVAL); 3869 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3870 if ((error = nlookup(nd)) != 0) 3871 return (error); 3872 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3873 return (error); 3874 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3875 return (error); 3876 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3877 if (error) { 3878 vrele(vp); 3879 return (error); 3880 } 3881 if (vp->v_type == VDIR) { 3882 error = EISDIR; 3883 goto done; 3884 } 3885 if (vfs_quota_enabled) { 3886 error = VOP_GETATTR(vp, &vattr); 3887 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3888 uid = vattr.va_uid; 3889 gid = vattr.va_gid; 3890 old_size = vattr.va_size; 3891 } 3892 3893 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3894 VATTR_NULL(&vattr); 3895 vattr.va_size = length; 3896 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3897 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3898 } 3899 done: 3900 vput(vp); 3901 return (error); 3902 } 3903 3904 /* 3905 * truncate(char *path, int pad, off_t length) 3906 * 3907 * Truncate a file given its path name. 3908 */ 3909 int 3910 sys_truncate(struct truncate_args *uap) 3911 { 3912 struct nlookupdata nd; 3913 int error; 3914 3915 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3916 if (error == 0) 3917 error = kern_truncate(&nd, uap->length); 3918 nlookup_done(&nd); 3919 return error; 3920 } 3921 3922 int 3923 kern_ftruncate(int fd, off_t length) 3924 { 3925 struct thread *td = curthread; 3926 struct vattr vattr; 3927 struct vnode *vp; 3928 struct file *fp; 3929 int error; 3930 uid_t uid = 0; 3931 gid_t gid = 0; 3932 uint64_t old_size = 0; 3933 struct mount *mp; 3934 3935 if (length < 0) 3936 return(EINVAL); 3937 if ((error = holdvnode(td, fd, &fp)) != 0) 3938 return (error); 3939 if (fp->f_nchandle.ncp) { 3940 error = ncp_writechk(&fp->f_nchandle); 3941 if (error) 3942 goto done; 3943 } 3944 if ((fp->f_flag & FWRITE) == 0) { 3945 error = EINVAL; 3946 goto done; 3947 } 3948 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3949 error = EINVAL; 3950 goto done; 3951 } 3952 vp = (struct vnode *)fp->f_data; 3953 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3954 if (vp->v_type == VDIR) { 3955 error = EISDIR; 3956 vn_unlock(vp); 3957 goto done; 3958 } 3959 3960 if (vfs_quota_enabled) { 3961 error = VOP_GETATTR_FP(vp, &vattr, fp); 3962 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3963 uid = vattr.va_uid; 3964 gid = vattr.va_gid; 3965 old_size = vattr.va_size; 3966 } 3967 3968 if ((error = vn_writechk(vp, NULL)) == 0) { 3969 VATTR_NULL(&vattr); 3970 vattr.va_size = length; 3971 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 3972 mp = vq_vptomp(vp); 3973 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3974 } 3975 vn_unlock(vp); 3976 done: 3977 fdrop(fp); 3978 return (error); 3979 } 3980 3981 /* 3982 * ftruncate_args(int fd, int pad, off_t length) 3983 * 3984 * Truncate a file given a file descriptor. 3985 */ 3986 int 3987 sys_ftruncate(struct ftruncate_args *uap) 3988 { 3989 int error; 3990 3991 error = kern_ftruncate(uap->fd, uap->length); 3992 3993 return (error); 3994 } 3995 3996 /* 3997 * fsync(int fd) 3998 * 3999 * Sync an open file. 4000 */ 4001 int 4002 sys_fsync(struct fsync_args *uap) 4003 { 4004 struct thread *td = curthread; 4005 struct vnode *vp; 4006 struct file *fp; 4007 vm_object_t obj; 4008 int error; 4009 4010 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4011 return (error); 4012 vp = (struct vnode *)fp->f_data; 4013 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4014 if ((obj = vp->v_object) != NULL) { 4015 if (vp->v_mount == NULL || 4016 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4017 vm_object_page_clean(obj, 0, 0, 0); 4018 } 4019 } 4020 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4021 if (error == 0 && vp->v_mount) 4022 error = buf_fsync(vp); 4023 vn_unlock(vp); 4024 fdrop(fp); 4025 4026 return (error); 4027 } 4028 4029 int 4030 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4031 { 4032 struct nchandle fnchd; 4033 struct nchandle tnchd; 4034 struct namecache *ncp; 4035 struct vnode *fdvp; 4036 struct vnode *tdvp; 4037 struct mount *mp; 4038 int error; 4039 u_int fncp_gen; 4040 u_int tncp_gen; 4041 4042 bwillinode(1); 4043 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4044 if ((error = nlookup(fromnd)) != 0) 4045 return (error); 4046 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4047 return (ENOENT); 4048 fnchd.mount = fromnd->nl_nch.mount; 4049 cache_hold(&fnchd); 4050 4051 /* 4052 * unlock the source nch so we can lookup the target nch without 4053 * deadlocking. The target may or may not exist so we do not check 4054 * for a target vp like kern_mkdir() and other creation functions do. 4055 * 4056 * The source and target directories are ref'd and rechecked after 4057 * everything is relocked to determine if the source or target file 4058 * has been renamed. 4059 */ 4060 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4061 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4062 4063 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4064 4065 cache_unlock(&fromnd->nl_nch); 4066 4067 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4068 if ((error = nlookup(tond)) != 0) { 4069 cache_drop(&fnchd); 4070 return (error); 4071 } 4072 tncp_gen = tond->nl_nch.ncp->nc_generation; 4073 4074 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4075 cache_drop(&fnchd); 4076 return (ENOENT); 4077 } 4078 tnchd.mount = tond->nl_nch.mount; 4079 cache_hold(&tnchd); 4080 4081 /* 4082 * If the source and target are the same there is nothing to do 4083 */ 4084 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4085 cache_drop(&fnchd); 4086 cache_drop(&tnchd); 4087 return (0); 4088 } 4089 4090 /* 4091 * Mount points cannot be renamed or overwritten 4092 */ 4093 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4094 NCF_ISMOUNTPT 4095 ) { 4096 cache_drop(&fnchd); 4097 cache_drop(&tnchd); 4098 return (EINVAL); 4099 } 4100 4101 /* 4102 * Relock the source ncp. cache_relock() will deal with any 4103 * deadlocks against the already-locked tond and will also 4104 * make sure both are resolved. 4105 * 4106 * NOTE AFTER RELOCKING: The source or target ncp may have become 4107 * invalid while they were unlocked, nc_vp and nc_mount could 4108 * be NULL. 4109 */ 4110 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4111 &tond->nl_nch, tond->nl_cred); 4112 fromnd->nl_flags |= NLC_NCPISLOCKED; 4113 4114 /* 4115 * If the namecache generation changed for either fromnd or tond, 4116 * we must retry. 4117 */ 4118 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4119 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4120 kprintf("kern_rename: retry due to gen on: " 4121 "\"%s\" -> \"%s\"\n", 4122 fromnd->nl_nch.ncp->nc_name, 4123 tond->nl_nch.ncp->nc_name); 4124 cache_drop(&fnchd); 4125 cache_drop(&tnchd); 4126 return (EAGAIN); 4127 } 4128 4129 /* 4130 * If either fromnd or tond are marked destroyed a ripout occured 4131 * out from under us and we must retry. 4132 */ 4133 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4134 fromnd->nl_nch.ncp->nc_vp == NULL || 4135 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4136 kprintf("kern_rename: retry due to ripout on: " 4137 "\"%s\" -> \"%s\"\n", 4138 fromnd->nl_nch.ncp->nc_name, 4139 tond->nl_nch.ncp->nc_name); 4140 cache_drop(&fnchd); 4141 cache_drop(&tnchd); 4142 return (EAGAIN); 4143 } 4144 4145 /* 4146 * Make sure the parent directories linkages are the same. 4147 * XXX shouldn't be needed any more w/ generation check above. 4148 */ 4149 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4150 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4151 cache_drop(&fnchd); 4152 cache_drop(&tnchd); 4153 return (ENOENT); 4154 } 4155 4156 /* 4157 * Both the source and target must be within the same filesystem and 4158 * in the same filesystem as their parent directories within the 4159 * namecache topology. 4160 * 4161 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4162 */ 4163 mp = fnchd.mount; 4164 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4165 mp != tond->nl_nch.mount) { 4166 cache_drop(&fnchd); 4167 cache_drop(&tnchd); 4168 return (EXDEV); 4169 } 4170 4171 /* 4172 * Make sure the mount point is writable 4173 */ 4174 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4175 cache_drop(&fnchd); 4176 cache_drop(&tnchd); 4177 return (error); 4178 } 4179 4180 /* 4181 * If the target exists and either the source or target is a directory, 4182 * then both must be directories. 4183 * 4184 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4185 * have become NULL. 4186 */ 4187 if (tond->nl_nch.ncp->nc_vp) { 4188 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4189 error = ENOENT; 4190 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4191 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4192 error = ENOTDIR; 4193 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4194 error = EISDIR; 4195 } 4196 } 4197 4198 /* 4199 * You cannot rename a source into itself or a subdirectory of itself. 4200 * We check this by travsersing the target directory upwards looking 4201 * for a match against the source. 4202 * 4203 * XXX MPSAFE 4204 */ 4205 if (error == 0) { 4206 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4207 if (fromnd->nl_nch.ncp == ncp) { 4208 error = EINVAL; 4209 break; 4210 } 4211 } 4212 } 4213 4214 cache_drop(&fnchd); 4215 cache_drop(&tnchd); 4216 4217 /* 4218 * Even though the namespaces are different, they may still represent 4219 * hardlinks to the same file. The filesystem might have a hard time 4220 * with this so we issue a NREMOVE of the source instead of a NRENAME 4221 * when we detect the situation. 4222 */ 4223 if (error == 0) { 4224 fdvp = fromnd->nl_dvp; 4225 tdvp = tond->nl_dvp; 4226 if (fdvp == NULL || tdvp == NULL) { 4227 error = EPERM; 4228 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4229 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4230 fromnd->nl_cred); 4231 } else { 4232 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4233 fdvp, tdvp, tond->nl_cred); 4234 } 4235 } 4236 return (error); 4237 } 4238 4239 /* 4240 * rename_args(char *from, char *to) 4241 * 4242 * Rename files. Source and destination must either both be directories, 4243 * or both not be directories. If target is a directory, it must be empty. 4244 */ 4245 int 4246 sys_rename(struct rename_args *uap) 4247 { 4248 struct nlookupdata fromnd, tond; 4249 int error; 4250 4251 do { 4252 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4253 if (error == 0) { 4254 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4255 if (error == 0) 4256 error = kern_rename(&fromnd, &tond); 4257 nlookup_done(&tond); 4258 } 4259 nlookup_done(&fromnd); 4260 } while (error == EAGAIN); 4261 return (error); 4262 } 4263 4264 /* 4265 * renameat_args(int oldfd, char *old, int newfd, char *new) 4266 * 4267 * Rename files using paths relative to the directories associated with 4268 * oldfd and newfd. Source and destination must either both be directories, 4269 * or both not be directories. If target is a directory, it must be empty. 4270 */ 4271 int 4272 sys_renameat(struct renameat_args *uap) 4273 { 4274 struct nlookupdata oldnd, newnd; 4275 struct file *oldfp, *newfp; 4276 int error; 4277 4278 do { 4279 error = nlookup_init_at(&oldnd, &oldfp, 4280 uap->oldfd, uap->old, 4281 UIO_USERSPACE, 0); 4282 if (error == 0) { 4283 error = nlookup_init_at(&newnd, &newfp, 4284 uap->newfd, uap->new, 4285 UIO_USERSPACE, 0); 4286 if (error == 0) 4287 error = kern_rename(&oldnd, &newnd); 4288 nlookup_done_at(&newnd, newfp); 4289 } 4290 nlookup_done_at(&oldnd, oldfp); 4291 } while (error == EAGAIN); 4292 return (error); 4293 } 4294 4295 int 4296 kern_mkdir(struct nlookupdata *nd, int mode) 4297 { 4298 struct thread *td = curthread; 4299 struct proc *p = td->td_proc; 4300 struct vnode *vp; 4301 struct vattr vattr; 4302 int error; 4303 4304 bwillinode(1); 4305 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4306 if ((error = nlookup(nd)) != 0) 4307 return (error); 4308 4309 if (nd->nl_nch.ncp->nc_vp) 4310 return (EEXIST); 4311 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4312 return (error); 4313 VATTR_NULL(&vattr); 4314 vattr.va_type = VDIR; 4315 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4316 4317 vp = NULL; 4318 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4319 if (error == 0) 4320 vput(vp); 4321 return (error); 4322 } 4323 4324 /* 4325 * mkdir_args(char *path, int mode) 4326 * 4327 * Make a directory file. 4328 */ 4329 int 4330 sys_mkdir(struct mkdir_args *uap) 4331 { 4332 struct nlookupdata nd; 4333 int error; 4334 4335 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4336 if (error == 0) 4337 error = kern_mkdir(&nd, uap->mode); 4338 nlookup_done(&nd); 4339 return (error); 4340 } 4341 4342 /* 4343 * mkdirat_args(int fd, char *path, mode_t mode) 4344 * 4345 * Make a directory file. The path is relative to the directory associated 4346 * with fd. 4347 */ 4348 int 4349 sys_mkdirat(struct mkdirat_args *uap) 4350 { 4351 struct nlookupdata nd; 4352 struct file *fp; 4353 int error; 4354 4355 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4356 if (error == 0) 4357 error = kern_mkdir(&nd, uap->mode); 4358 nlookup_done_at(&nd, fp); 4359 return (error); 4360 } 4361 4362 int 4363 kern_rmdir(struct nlookupdata *nd) 4364 { 4365 int error; 4366 4367 bwillinode(1); 4368 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4369 if ((error = nlookup(nd)) != 0) 4370 return (error); 4371 4372 /* 4373 * Do not allow directories representing mount points to be 4374 * deleted, even if empty. Check write perms on mount point 4375 * in case the vnode is aliased (aka nullfs). 4376 */ 4377 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4378 return (EBUSY); 4379 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4380 return (error); 4381 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4382 return (error); 4383 } 4384 4385 /* 4386 * rmdir_args(char *path) 4387 * 4388 * Remove a directory file. 4389 */ 4390 int 4391 sys_rmdir(struct rmdir_args *uap) 4392 { 4393 struct nlookupdata nd; 4394 int error; 4395 4396 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4397 if (error == 0) 4398 error = kern_rmdir(&nd); 4399 nlookup_done(&nd); 4400 return (error); 4401 } 4402 4403 int 4404 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4405 enum uio_seg direction) 4406 { 4407 struct thread *td = curthread; 4408 struct vnode *vp; 4409 struct file *fp; 4410 struct uio auio; 4411 struct iovec aiov; 4412 off_t loff; 4413 int error, eofflag; 4414 4415 if ((error = holdvnode(td, fd, &fp)) != 0) 4416 return (error); 4417 if ((fp->f_flag & FREAD) == 0) { 4418 error = EBADF; 4419 goto done; 4420 } 4421 vp = (struct vnode *)fp->f_data; 4422 if (vp->v_type != VDIR) { 4423 error = EINVAL; 4424 goto done; 4425 } 4426 aiov.iov_base = buf; 4427 aiov.iov_len = count; 4428 auio.uio_iov = &aiov; 4429 auio.uio_iovcnt = 1; 4430 auio.uio_rw = UIO_READ; 4431 auio.uio_segflg = direction; 4432 auio.uio_td = td; 4433 auio.uio_resid = count; 4434 loff = auio.uio_offset = fp->f_offset; 4435 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4436 fp->f_offset = auio.uio_offset; 4437 if (error) 4438 goto done; 4439 4440 /* 4441 * WARNING! *basep may not be wide enough to accomodate the 4442 * seek offset. XXX should we hack this to return the upper 32 bits 4443 * for offsets greater then 4G? 4444 */ 4445 if (basep) { 4446 *basep = (long)loff; 4447 } 4448 *res = count - auio.uio_resid; 4449 done: 4450 fdrop(fp); 4451 return (error); 4452 } 4453 4454 /* 4455 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4456 * 4457 * Read a block of directory entries in a file system independent format. 4458 */ 4459 int 4460 sys_getdirentries(struct getdirentries_args *uap) 4461 { 4462 long base; 4463 int error; 4464 4465 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4466 &uap->sysmsg_result, UIO_USERSPACE); 4467 4468 if (error == 0 && uap->basep) 4469 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4470 return (error); 4471 } 4472 4473 /* 4474 * getdents_args(int fd, char *buf, size_t count) 4475 */ 4476 int 4477 sys_getdents(struct getdents_args *uap) 4478 { 4479 int error; 4480 4481 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4482 &uap->sysmsg_result, UIO_USERSPACE); 4483 4484 return (error); 4485 } 4486 4487 /* 4488 * Set the mode mask for creation of filesystem nodes. 4489 * 4490 * umask(int newmask) 4491 */ 4492 int 4493 sys_umask(struct umask_args *uap) 4494 { 4495 struct thread *td = curthread; 4496 struct proc *p = td->td_proc; 4497 struct filedesc *fdp; 4498 4499 fdp = p->p_fd; 4500 uap->sysmsg_result = fdp->fd_cmask; 4501 fdp->fd_cmask = uap->newmask & ALLPERMS; 4502 return (0); 4503 } 4504 4505 /* 4506 * revoke(char *path) 4507 * 4508 * Void all references to file by ripping underlying filesystem 4509 * away from vnode. 4510 */ 4511 int 4512 sys_revoke(struct revoke_args *uap) 4513 { 4514 struct nlookupdata nd; 4515 struct vattr vattr; 4516 struct vnode *vp; 4517 struct ucred *cred; 4518 int error; 4519 4520 vp = NULL; 4521 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4522 if (error == 0) 4523 error = nlookup(&nd); 4524 if (error == 0) 4525 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4526 cred = crhold(nd.nl_cred); 4527 nlookup_done(&nd); 4528 if (error == 0) { 4529 if (error == 0) 4530 error = VOP_GETATTR(vp, &vattr); 4531 if (error == 0 && cred->cr_uid != vattr.va_uid) 4532 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4533 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4534 if (vcount(vp) > 0) 4535 error = vrevoke(vp, cred); 4536 } else if (error == 0) { 4537 error = vrevoke(vp, cred); 4538 } 4539 vrele(vp); 4540 } 4541 if (cred) 4542 crfree(cred); 4543 return (error); 4544 } 4545 4546 /* 4547 * getfh_args(char *fname, fhandle_t *fhp) 4548 * 4549 * Get (NFS) file handle 4550 * 4551 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4552 * mount. This allows nullfs mounts to be explicitly exported. 4553 * 4554 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4555 * 4556 * nullfs mounts of subdirectories are not safe. That is, it will 4557 * work, but you do not really have protection against access to 4558 * the related parent directories. 4559 */ 4560 int 4561 sys_getfh(struct getfh_args *uap) 4562 { 4563 struct thread *td = curthread; 4564 struct nlookupdata nd; 4565 fhandle_t fh; 4566 struct vnode *vp; 4567 struct mount *mp; 4568 int error; 4569 4570 /* 4571 * Must be super user 4572 */ 4573 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4574 return (error); 4575 4576 vp = NULL; 4577 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4578 if (error == 0) 4579 error = nlookup(&nd); 4580 if (error == 0) 4581 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4582 mp = nd.nl_nch.mount; 4583 nlookup_done(&nd); 4584 if (error == 0) { 4585 bzero(&fh, sizeof(fh)); 4586 fh.fh_fsid = mp->mnt_stat.f_fsid; 4587 error = VFS_VPTOFH(vp, &fh.fh_fid); 4588 vput(vp); 4589 if (error == 0) 4590 error = copyout(&fh, uap->fhp, sizeof(fh)); 4591 } 4592 return (error); 4593 } 4594 4595 /* 4596 * fhopen_args(const struct fhandle *u_fhp, int flags) 4597 * 4598 * syscall for the rpc.lockd to use to translate a NFS file handle into 4599 * an open descriptor. 4600 * 4601 * warning: do not remove the priv_check() call or this becomes one giant 4602 * security hole. 4603 */ 4604 int 4605 sys_fhopen(struct fhopen_args *uap) 4606 { 4607 struct thread *td = curthread; 4608 struct filedesc *fdp = td->td_proc->p_fd; 4609 struct mount *mp; 4610 struct vnode *vp; 4611 struct fhandle fhp; 4612 struct vattr vat; 4613 struct vattr *vap = &vat; 4614 struct flock lf; 4615 int fmode, mode, error = 0, type; 4616 struct file *nfp; 4617 struct file *fp; 4618 int indx; 4619 4620 /* 4621 * Must be super user 4622 */ 4623 error = priv_check(td, PRIV_ROOT); 4624 if (error) 4625 return (error); 4626 4627 fmode = FFLAGS(uap->flags); 4628 4629 /* 4630 * Why not allow a non-read/write open for our lockd? 4631 */ 4632 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4633 return (EINVAL); 4634 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4635 if (error) 4636 return(error); 4637 4638 /* 4639 * Find the mount point 4640 */ 4641 mp = vfs_getvfs(&fhp.fh_fsid); 4642 if (mp == NULL) { 4643 error = ESTALE; 4644 goto done2; 4645 } 4646 /* now give me my vnode, it gets returned to me locked */ 4647 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4648 if (error) 4649 goto done; 4650 /* 4651 * from now on we have to make sure not 4652 * to forget about the vnode 4653 * any error that causes an abort must vput(vp) 4654 * just set error = err and 'goto bad;'. 4655 */ 4656 4657 /* 4658 * from vn_open 4659 */ 4660 if (vp->v_type == VLNK) { 4661 error = EMLINK; 4662 goto bad; 4663 } 4664 if (vp->v_type == VSOCK) { 4665 error = EOPNOTSUPP; 4666 goto bad; 4667 } 4668 mode = 0; 4669 if (fmode & (FWRITE | O_TRUNC)) { 4670 if (vp->v_type == VDIR) { 4671 error = EISDIR; 4672 goto bad; 4673 } 4674 error = vn_writechk(vp, NULL); 4675 if (error) 4676 goto bad; 4677 mode |= VWRITE; 4678 } 4679 if (fmode & FREAD) 4680 mode |= VREAD; 4681 if (mode) { 4682 error = VOP_ACCESS(vp, mode, td->td_ucred); 4683 if (error) 4684 goto bad; 4685 } 4686 if (fmode & O_TRUNC) { 4687 vn_unlock(vp); /* XXX */ 4688 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4689 VATTR_NULL(vap); 4690 vap->va_size = 0; 4691 error = VOP_SETATTR(vp, vap, td->td_ucred); 4692 if (error) 4693 goto bad; 4694 } 4695 4696 /* 4697 * VOP_OPEN needs the file pointer so it can potentially override 4698 * it. 4699 * 4700 * WARNING! no f_nchandle will be associated when fhopen()ing a 4701 * directory. XXX 4702 */ 4703 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4704 goto bad; 4705 fp = nfp; 4706 4707 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4708 if (error) { 4709 /* 4710 * setting f_ops this way prevents VOP_CLOSE from being 4711 * called or fdrop() releasing the vp from v_data. Since 4712 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4713 */ 4714 fp->f_ops = &badfileops; 4715 fp->f_data = NULL; 4716 goto bad_drop; 4717 } 4718 4719 /* 4720 * The fp is given its own reference, we still have our ref and lock. 4721 * 4722 * Assert that all regular files must be created with a VM object. 4723 */ 4724 if (vp->v_type == VREG && vp->v_object == NULL) { 4725 kprintf("fhopen: regular file did not " 4726 "have VM object: %p\n", 4727 vp); 4728 goto bad_drop; 4729 } 4730 4731 /* 4732 * The open was successful. Handle any locking requirements. 4733 */ 4734 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4735 lf.l_whence = SEEK_SET; 4736 lf.l_start = 0; 4737 lf.l_len = 0; 4738 if (fmode & O_EXLOCK) 4739 lf.l_type = F_WRLCK; 4740 else 4741 lf.l_type = F_RDLCK; 4742 if (fmode & FNONBLOCK) 4743 type = 0; 4744 else 4745 type = F_WAIT; 4746 vn_unlock(vp); 4747 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4748 &lf, type)) != 0) { 4749 /* 4750 * release our private reference. 4751 */ 4752 fsetfd(fdp, NULL, indx); 4753 fdrop(fp); 4754 vrele(vp); 4755 goto done; 4756 } 4757 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4758 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4759 } 4760 4761 /* 4762 * Clean up. Associate the file pointer with the previously 4763 * reserved descriptor and return it. 4764 */ 4765 vput(vp); 4766 if (uap->flags & O_CLOEXEC) 4767 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4768 fsetfd(fdp, fp, indx); 4769 fdrop(fp); 4770 uap->sysmsg_result = indx; 4771 mount_drop(mp); 4772 4773 return (error); 4774 4775 bad_drop: 4776 fsetfd(fdp, NULL, indx); 4777 fdrop(fp); 4778 bad: 4779 vput(vp); 4780 done: 4781 mount_drop(mp); 4782 done2: 4783 return (error); 4784 } 4785 4786 /* 4787 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4788 */ 4789 int 4790 sys_fhstat(struct fhstat_args *uap) 4791 { 4792 struct thread *td = curthread; 4793 struct stat sb; 4794 fhandle_t fh; 4795 struct mount *mp; 4796 struct vnode *vp; 4797 int error; 4798 4799 /* 4800 * Must be super user 4801 */ 4802 error = priv_check(td, PRIV_ROOT); 4803 if (error) 4804 return (error); 4805 4806 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4807 if (error) 4808 return (error); 4809 4810 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4811 error = ESTALE; 4812 if (error == 0) { 4813 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4814 error = vn_stat(vp, &sb, td->td_ucred); 4815 vput(vp); 4816 } 4817 } 4818 if (error == 0) 4819 error = copyout(&sb, uap->sb, sizeof(sb)); 4820 if (mp) 4821 mount_drop(mp); 4822 4823 return (error); 4824 } 4825 4826 /* 4827 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4828 */ 4829 int 4830 sys_fhstatfs(struct fhstatfs_args *uap) 4831 { 4832 struct thread *td = curthread; 4833 struct proc *p = td->td_proc; 4834 struct statfs *sp; 4835 struct mount *mp; 4836 struct vnode *vp; 4837 struct statfs sb; 4838 char *fullpath, *freepath; 4839 fhandle_t fh; 4840 int error; 4841 4842 /* 4843 * Must be super user 4844 */ 4845 if ((error = priv_check(td, PRIV_ROOT))) 4846 return (error); 4847 4848 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4849 return (error); 4850 4851 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4852 error = ESTALE; 4853 goto done; 4854 } 4855 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4856 error = ESTALE; 4857 goto done; 4858 } 4859 4860 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4861 goto done; 4862 mp = vp->v_mount; 4863 sp = &mp->mnt_stat; 4864 vput(vp); 4865 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4866 goto done; 4867 4868 error = mount_path(p, mp, &fullpath, &freepath); 4869 if (error) 4870 goto done; 4871 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4872 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4873 kfree(freepath, M_TEMP); 4874 4875 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4876 if (priv_check(td, PRIV_ROOT)) { 4877 bcopy(sp, &sb, sizeof(sb)); 4878 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4879 sp = &sb; 4880 } 4881 error = copyout(sp, uap->buf, sizeof(*sp)); 4882 done: 4883 if (mp) 4884 mount_drop(mp); 4885 4886 return (error); 4887 } 4888 4889 /* 4890 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4891 */ 4892 int 4893 sys_fhstatvfs(struct fhstatvfs_args *uap) 4894 { 4895 struct thread *td = curthread; 4896 struct proc *p = td->td_proc; 4897 struct statvfs *sp; 4898 struct mount *mp; 4899 struct vnode *vp; 4900 fhandle_t fh; 4901 int error; 4902 4903 /* 4904 * Must be super user 4905 */ 4906 if ((error = priv_check(td, PRIV_ROOT))) 4907 return (error); 4908 4909 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4910 return (error); 4911 4912 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4913 error = ESTALE; 4914 goto done; 4915 } 4916 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4917 error = ESTALE; 4918 goto done; 4919 } 4920 4921 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4922 goto done; 4923 mp = vp->v_mount; 4924 sp = &mp->mnt_vstat; 4925 vput(vp); 4926 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4927 goto done; 4928 4929 sp->f_flag = 0; 4930 if (mp->mnt_flag & MNT_RDONLY) 4931 sp->f_flag |= ST_RDONLY; 4932 if (mp->mnt_flag & MNT_NOSUID) 4933 sp->f_flag |= ST_NOSUID; 4934 error = copyout(sp, uap->buf, sizeof(*sp)); 4935 done: 4936 if (mp) 4937 mount_drop(mp); 4938 return (error); 4939 } 4940 4941 4942 /* 4943 * Syscall to push extended attribute configuration information into the 4944 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4945 * a command (int cmd), and attribute name and misc data. For now, the 4946 * attribute name is left in userspace for consumption by the VFS_op. 4947 * It will probably be changed to be copied into sysspace by the 4948 * syscall in the future, once issues with various consumers of the 4949 * attribute code have raised their hands. 4950 * 4951 * Currently this is used only by UFS Extended Attributes. 4952 */ 4953 int 4954 sys_extattrctl(struct extattrctl_args *uap) 4955 { 4956 struct nlookupdata nd; 4957 struct vnode *vp; 4958 char attrname[EXTATTR_MAXNAMELEN]; 4959 int error; 4960 size_t size; 4961 4962 attrname[0] = 0; 4963 vp = NULL; 4964 error = 0; 4965 4966 if (error == 0 && uap->filename) { 4967 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4968 NLC_FOLLOW); 4969 if (error == 0) 4970 error = nlookup(&nd); 4971 if (error == 0) 4972 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4973 nlookup_done(&nd); 4974 } 4975 4976 if (error == 0 && uap->attrname) { 4977 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4978 &size); 4979 } 4980 4981 if (error == 0) { 4982 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4983 if (error == 0) 4984 error = nlookup(&nd); 4985 if (error == 0) 4986 error = ncp_writechk(&nd.nl_nch); 4987 if (error == 0) { 4988 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4989 uap->attrnamespace, 4990 uap->attrname, nd.nl_cred); 4991 } 4992 nlookup_done(&nd); 4993 } 4994 4995 return (error); 4996 } 4997 4998 /* 4999 * Syscall to get a named extended attribute on a file or directory. 5000 */ 5001 int 5002 sys_extattr_set_file(struct extattr_set_file_args *uap) 5003 { 5004 char attrname[EXTATTR_MAXNAMELEN]; 5005 struct nlookupdata nd; 5006 struct vnode *vp; 5007 struct uio auio; 5008 struct iovec aiov; 5009 int error; 5010 5011 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5012 if (error) 5013 return (error); 5014 5015 vp = NULL; 5016 5017 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5018 if (error == 0) 5019 error = nlookup(&nd); 5020 if (error == 0) 5021 error = ncp_writechk(&nd.nl_nch); 5022 if (error == 0) 5023 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5024 if (error) { 5025 nlookup_done(&nd); 5026 return (error); 5027 } 5028 5029 bzero(&auio, sizeof(auio)); 5030 aiov.iov_base = uap->data; 5031 aiov.iov_len = uap->nbytes; 5032 auio.uio_iov = &aiov; 5033 auio.uio_iovcnt = 1; 5034 auio.uio_offset = 0; 5035 auio.uio_resid = uap->nbytes; 5036 auio.uio_rw = UIO_WRITE; 5037 auio.uio_td = curthread; 5038 5039 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5040 &auio, nd.nl_cred); 5041 5042 vput(vp); 5043 nlookup_done(&nd); 5044 return (error); 5045 } 5046 5047 /* 5048 * Syscall to get a named extended attribute on a file or directory. 5049 */ 5050 int 5051 sys_extattr_get_file(struct extattr_get_file_args *uap) 5052 { 5053 char attrname[EXTATTR_MAXNAMELEN]; 5054 struct nlookupdata nd; 5055 struct uio auio; 5056 struct iovec aiov; 5057 struct vnode *vp; 5058 int error; 5059 5060 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5061 if (error) 5062 return (error); 5063 5064 vp = NULL; 5065 5066 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5067 if (error == 0) 5068 error = nlookup(&nd); 5069 if (error == 0) 5070 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5071 if (error) { 5072 nlookup_done(&nd); 5073 return (error); 5074 } 5075 5076 bzero(&auio, sizeof(auio)); 5077 aiov.iov_base = uap->data; 5078 aiov.iov_len = uap->nbytes; 5079 auio.uio_iov = &aiov; 5080 auio.uio_iovcnt = 1; 5081 auio.uio_offset = 0; 5082 auio.uio_resid = uap->nbytes; 5083 auio.uio_rw = UIO_READ; 5084 auio.uio_td = curthread; 5085 5086 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5087 &auio, nd.nl_cred); 5088 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5089 5090 vput(vp); 5091 nlookup_done(&nd); 5092 return(error); 5093 } 5094 5095 /* 5096 * Syscall to delete a named extended attribute from a file or directory. 5097 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5098 */ 5099 int 5100 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5101 { 5102 char attrname[EXTATTR_MAXNAMELEN]; 5103 struct nlookupdata nd; 5104 struct vnode *vp; 5105 int error; 5106 5107 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5108 if (error) 5109 return(error); 5110 5111 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5112 if (error == 0) 5113 error = nlookup(&nd); 5114 if (error == 0) 5115 error = ncp_writechk(&nd.nl_nch); 5116 if (error == 0) { 5117 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5118 if (error == 0) { 5119 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5120 attrname, NULL, nd.nl_cred); 5121 vput(vp); 5122 } 5123 } 5124 nlookup_done(&nd); 5125 return(error); 5126 } 5127 5128 /* 5129 * Determine if the mount is visible to the process. 5130 */ 5131 static int 5132 chroot_visible_mnt(struct mount *mp, struct proc *p) 5133 { 5134 struct nchandle nch; 5135 5136 /* 5137 * Traverse from the mount point upwards. If we hit the process 5138 * root then the mount point is visible to the process. 5139 */ 5140 nch = mp->mnt_ncmountpt; 5141 while (nch.ncp) { 5142 if (nch.mount == p->p_fd->fd_nrdir.mount && 5143 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5144 return(1); 5145 } 5146 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5147 nch = nch.mount->mnt_ncmounton; 5148 } else { 5149 nch.ncp = nch.ncp->nc_parent; 5150 } 5151 } 5152 5153 /* 5154 * If the mount point is not visible to the process, but the 5155 * process root is in a subdirectory of the mount, return 5156 * TRUE anyway. 5157 */ 5158 if (p->p_fd->fd_nrdir.mount == mp) 5159 return(1); 5160 5161 return(0); 5162 } 5163 5164