1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fspriv(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 int priv = 0; 123 struct vattr va; 124 struct nlookupdata nd; 125 char fstypename[MFSNAMELEN]; 126 struct ucred *cred; 127 128 cred = td->td_ucred; 129 130 /* We do not allow user mounts inside a jail for now */ 131 if (usermount && jailed(cred)) { 132 error = EPERM; 133 goto done; 134 } 135 136 /* 137 * Extract the file system type. We need to know this early, to take 138 * appropriate actions for jails and nullfs mounts. 139 */ 140 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 141 goto done; 142 143 /* 144 * Select the correct priv according to the file system type. 145 */ 146 priv = get_fspriv(fstypename); 147 148 if (usermount == 0 && (error = priv_check(td, priv))) 149 goto done; 150 151 /* 152 * Do not allow NFS export by non-root users. 153 */ 154 if (uap->flags & MNT_EXPORTED) { 155 error = priv_check(td, priv); 156 if (error) 157 goto done; 158 } 159 /* 160 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 161 */ 162 if (priv_check(td, priv)) 163 uap->flags |= MNT_NOSUID | MNT_NODEV; 164 165 /* 166 * Lookup the requested path and extract the nch and vnode. 167 */ 168 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 169 if (error == 0) { 170 if ((error = nlookup(&nd)) == 0) { 171 if (nd.nl_nch.ncp->nc_vp == NULL) 172 error = ENOENT; 173 } 174 } 175 if (error) { 176 nlookup_done(&nd); 177 goto done; 178 } 179 180 /* 181 * If the target filesystem is resolved via a nullfs mount, then 182 * nd.nl_nch.mount will be pointing to the nullfs mount structure 183 * instead of the target file system. We need it in case we are 184 * doing an update. 185 */ 186 nullmp = nd.nl_nch.mount; 187 188 /* 189 * Extract the locked+refd ncp and cleanup the nd structure 190 */ 191 nch = nd.nl_nch; 192 cache_zero(&nd.nl_nch); 193 nlookup_done(&nd); 194 195 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 196 (mp = cache_findmount(&nch)) != NULL) { 197 cache_dropmount(mp); 198 hasmount = 1; 199 } else { 200 hasmount = 0; 201 } 202 203 204 /* 205 * now we have the locked ref'd nch and unreferenced vnode. 206 */ 207 vp = nch.ncp->nc_vp; 208 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 209 cache_put(&nch); 210 goto done; 211 } 212 cache_unlock(&nch); 213 214 /* 215 * Now we have an unlocked ref'd nch and a locked ref'd vp 216 */ 217 if (uap->flags & MNT_UPDATE) { 218 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 219 cache_drop(&nch); 220 vput(vp); 221 error = EINVAL; 222 goto done; 223 } 224 225 if (strncmp(fstypename, "null", 5) == 0) { 226 KKASSERT(nullmp); 227 mp = nullmp; 228 } else { 229 mp = vp->v_mount; 230 } 231 232 flag = mp->mnt_flag; 233 flag2 = mp->mnt_kern_flag; 234 /* 235 * We only allow the filesystem to be reloaded if it 236 * is currently mounted read-only. 237 */ 238 if ((uap->flags & MNT_RELOAD) && 239 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 240 cache_drop(&nch); 241 vput(vp); 242 error = EOPNOTSUPP; /* Needs translation */ 243 goto done; 244 } 245 /* 246 * Only root, or the user that did the original mount is 247 * permitted to update it. 248 */ 249 if (mp->mnt_stat.f_owner != cred->cr_uid && 250 (error = priv_check(td, priv))) { 251 cache_drop(&nch); 252 vput(vp); 253 goto done; 254 } 255 if (vfs_busy(mp, LK_NOWAIT)) { 256 cache_drop(&nch); 257 vput(vp); 258 error = EBUSY; 259 goto done; 260 } 261 if (hasmount) { 262 cache_drop(&nch); 263 vfs_unbusy(mp); 264 vput(vp); 265 error = EBUSY; 266 goto done; 267 } 268 mp->mnt_flag |= 269 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 270 lwkt_gettoken(&mp->mnt_token); 271 vn_unlock(vp); 272 vfsp = mp->mnt_vfc; 273 goto update; 274 } 275 276 /* 277 * If the user is not root, ensure that they own the directory 278 * onto which we are attempting to mount. 279 */ 280 if ((error = VOP_GETATTR(vp, &va)) || 281 (va.va_uid != cred->cr_uid && 282 (error = priv_check(td, priv)))) { 283 cache_drop(&nch); 284 vput(vp); 285 goto done; 286 } 287 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 288 cache_drop(&nch); 289 vput(vp); 290 goto done; 291 } 292 if (vp->v_type != VDIR) { 293 cache_drop(&nch); 294 vput(vp); 295 error = ENOTDIR; 296 goto done; 297 } 298 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 299 cache_drop(&nch); 300 vput(vp); 301 error = EPERM; 302 goto done; 303 } 304 vfsp = vfsconf_find_by_name(fstypename); 305 if (vfsp == NULL) { 306 linker_file_t lf; 307 308 /* Only load modules for root (very important!) */ 309 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 310 cache_drop(&nch); 311 vput(vp); 312 goto done; 313 } 314 error = linker_load_file(fstypename, &lf); 315 if (error || lf == NULL) { 316 cache_drop(&nch); 317 vput(vp); 318 if (lf == NULL) 319 error = ENODEV; 320 goto done; 321 } 322 lf->userrefs++; 323 /* lookup again, see if the VFS was loaded */ 324 vfsp = vfsconf_find_by_name(fstypename); 325 if (vfsp == NULL) { 326 lf->userrefs--; 327 linker_file_unload(lf); 328 cache_drop(&nch); 329 vput(vp); 330 error = ENODEV; 331 goto done; 332 } 333 } 334 if (hasmount) { 335 cache_drop(&nch); 336 vput(vp); 337 error = EBUSY; 338 goto done; 339 } 340 341 /* 342 * Allocate and initialize the filesystem. 343 */ 344 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 345 mount_init(mp); 346 vfs_busy(mp, LK_NOWAIT); 347 mp->mnt_op = vfsp->vfc_vfsops; 348 mp->mnt_vfc = vfsp; 349 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 350 vfsp->vfc_refcount++; 351 mp->mnt_stat.f_type = vfsp->vfc_typenum; 352 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 353 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 354 mp->mnt_stat.f_owner = cred->cr_uid; 355 lwkt_gettoken(&mp->mnt_token); 356 vn_unlock(vp); 357 update: 358 /* 359 * (per-mount token acquired at this point) 360 * 361 * Set the mount level flags. 362 */ 363 if (uap->flags & MNT_RDONLY) 364 mp->mnt_flag |= MNT_RDONLY; 365 else if (mp->mnt_flag & MNT_RDONLY) 366 mp->mnt_kern_flag |= MNTK_WANTRDWR; 367 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 368 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 369 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 370 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 371 MNT_AUTOMOUNTED); 372 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 373 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 374 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 375 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 376 MNT_AUTOMOUNTED); 377 378 /* 379 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 380 * This way the initial VFS_MOUNT() call will also be MPSAFE. 381 */ 382 if (vfsp->vfc_flags & VFCF_MPSAFE) 383 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 384 385 /* 386 * Mount the filesystem. 387 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 388 * get. 389 */ 390 if (mp->mnt_flag & MNT_UPDATE) { 391 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 392 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 393 mp->mnt_flag &= ~MNT_RDONLY; 394 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 395 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 396 if (error) { 397 mp->mnt_flag = flag; 398 mp->mnt_kern_flag = flag2; 399 } 400 lwkt_reltoken(&mp->mnt_token); 401 vfs_unbusy(mp); 402 vrele(vp); 403 cache_drop(&nch); 404 goto done; 405 } 406 mp->mnt_ncmounton = nch; 407 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 408 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 409 410 /* 411 * Put the new filesystem on the mount list after root. The mount 412 * point gets its own mnt_ncmountpt (unless the VFS already set one 413 * up) which represents the root of the mount. The lookup code 414 * detects the mount point going forward and checks the root of 415 * the mount going backwards. 416 * 417 * It is not necessary to invalidate or purge the vnode underneath 418 * because elements under the mount will be given their own glue 419 * namecache record. 420 */ 421 if (!error) { 422 if (mp->mnt_ncmountpt.ncp == NULL) { 423 /* 424 * Allocate, then unlock, but leave the ref intact. 425 * This is the mnt_refs (1) that we will retain 426 * through to the unmount. 427 */ 428 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 429 cache_unlock(&mp->mnt_ncmountpt); 430 } 431 vn_unlock(vp); 432 cache_lock(&nch); 433 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 434 cache_unlock(&nch); 435 cache_ismounting(mp); 436 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 437 438 mountlist_insert(mp, MNTINS_LAST); 439 vn_unlock(vp); 440 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 441 error = vfs_allocate_syncvnode(mp); 442 lwkt_reltoken(&mp->mnt_token); 443 vfs_unbusy(mp); 444 error = VFS_START(mp, 0); 445 vrele(vp); 446 KNOTE(&fs_klist, VQ_MOUNT); 447 } else { 448 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 449 vn_syncer_thr_stop(mp); 450 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 451 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 452 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 454 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 455 if (mp->mnt_cred) { 456 crfree(mp->mnt_cred); 457 mp->mnt_cred = NULL; 458 } 459 mp->mnt_vfc->vfc_refcount--; 460 lwkt_reltoken(&mp->mnt_token); 461 vfs_unbusy(mp); 462 kfree(mp, M_MOUNT); 463 cache_drop(&nch); 464 vput(vp); 465 } 466 done: 467 return (error); 468 } 469 470 /* 471 * Scan all active processes to see if any of them have a current 472 * or root directory onto which the new filesystem has just been 473 * mounted. If so, replace them with the new mount point. 474 * 475 * Both old_nch and new_nch are ref'd on call but not locked. 476 * new_nch must be temporarily locked so it can be associated with the 477 * vnode representing the root of the mount point. 478 */ 479 struct checkdirs_info { 480 struct nchandle old_nch; 481 struct nchandle new_nch; 482 struct vnode *old_vp; 483 struct vnode *new_vp; 484 }; 485 486 static int checkdirs_callback(struct proc *p, void *data); 487 488 static void 489 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 490 { 491 struct checkdirs_info info; 492 struct vnode *olddp; 493 struct vnode *newdp; 494 struct mount *mp; 495 496 /* 497 * If the old mount point's vnode has a usecount of 1, it is not 498 * being held as a descriptor anywhere. 499 */ 500 olddp = old_nch->ncp->nc_vp; 501 if (olddp == NULL || VREFCNT(olddp) == 1) 502 return; 503 504 /* 505 * Force the root vnode of the new mount point to be resolved 506 * so we can update any matching processes. 507 */ 508 mp = new_nch->mount; 509 if (VFS_ROOT(mp, &newdp)) 510 panic("mount: lost mount"); 511 vn_unlock(newdp); 512 cache_lock(new_nch); 513 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 514 cache_setunresolved(new_nch); 515 cache_setvp(new_nch, newdp); 516 cache_unlock(new_nch); 517 518 /* 519 * Special handling of the root node 520 */ 521 if (rootvnode == olddp) { 522 vref(newdp); 523 vfs_cache_setroot(newdp, cache_hold(new_nch)); 524 } 525 526 /* 527 * Pass newdp separately so the callback does not have to access 528 * it via new_nch->ncp->nc_vp. 529 */ 530 info.old_nch = *old_nch; 531 info.new_nch = *new_nch; 532 info.new_vp = newdp; 533 allproc_scan(checkdirs_callback, &info, 0); 534 vput(newdp); 535 } 536 537 /* 538 * NOTE: callback is not MP safe because the scanned process's filedesc 539 * structure can be ripped out from under us, amoung other things. 540 */ 541 static int 542 checkdirs_callback(struct proc *p, void *data) 543 { 544 struct checkdirs_info *info = data; 545 struct filedesc *fdp; 546 struct nchandle ncdrop1; 547 struct nchandle ncdrop2; 548 struct vnode *vprele1; 549 struct vnode *vprele2; 550 551 if ((fdp = p->p_fd) != NULL) { 552 cache_zero(&ncdrop1); 553 cache_zero(&ncdrop2); 554 vprele1 = NULL; 555 vprele2 = NULL; 556 557 /* 558 * MPUNSAFE - XXX fdp can be pulled out from under a 559 * foreign process. 560 * 561 * A shared filedesc is ok, we don't have to copy it 562 * because we are making this change globally. 563 */ 564 spin_lock(&fdp->fd_spin); 565 if (fdp->fd_ncdir.mount == info->old_nch.mount && 566 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 567 vprele1 = fdp->fd_cdir; 568 vref(info->new_vp); 569 fdp->fd_cdir = info->new_vp; 570 ncdrop1 = fdp->fd_ncdir; 571 cache_copy(&info->new_nch, &fdp->fd_ncdir); 572 } 573 if (fdp->fd_nrdir.mount == info->old_nch.mount && 574 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 575 vprele2 = fdp->fd_rdir; 576 vref(info->new_vp); 577 fdp->fd_rdir = info->new_vp; 578 ncdrop2 = fdp->fd_nrdir; 579 cache_copy(&info->new_nch, &fdp->fd_nrdir); 580 } 581 spin_unlock(&fdp->fd_spin); 582 if (ncdrop1.ncp) 583 cache_drop(&ncdrop1); 584 if (ncdrop2.ncp) 585 cache_drop(&ncdrop2); 586 if (vprele1) 587 vrele(vprele1); 588 if (vprele2) 589 vrele(vprele2); 590 } 591 return(0); 592 } 593 594 /* 595 * Unmount a file system. 596 * 597 * Note: unmount takes a path to the vnode mounted on as argument, 598 * not special file (as before). 599 * 600 * umount_args(char *path, int flags) 601 * 602 * MPALMOSTSAFE 603 */ 604 int 605 sys_unmount(struct unmount_args *uap) 606 { 607 struct thread *td = curthread; 608 struct proc *p __debugvar = td->td_proc; 609 struct mount *mp = NULL; 610 struct nlookupdata nd; 611 char fstypename[MFSNAMELEN]; 612 int priv = 0; 613 int error; 614 struct ucred *cred; 615 616 cred = td->td_ucred; 617 618 KKASSERT(p); 619 620 /* We do not allow user umounts inside a jail for now */ 621 if (usermount && jailed(cred)) { 622 error = EPERM; 623 goto done; 624 } 625 626 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 627 NLC_FOLLOW | NLC_IGNBADDIR); 628 if (error == 0) 629 error = nlookup(&nd); 630 if (error) 631 goto out; 632 633 mp = nd.nl_nch.mount; 634 635 /* Figure out the fsname in order to select proper privs */ 636 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 637 priv = get_fspriv(fstypename); 638 639 if (usermount == 0 && (error = priv_check(td, priv))) { 640 nlookup_done(&nd); 641 goto done; 642 } 643 644 /* 645 * Only root, or the user that did the original mount is 646 * permitted to unmount this filesystem. 647 */ 648 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 649 (error = priv_check(td, priv))) 650 goto out; 651 652 /* 653 * Don't allow unmounting the root file system. 654 */ 655 if (mp->mnt_flag & MNT_ROOTFS) { 656 error = EINVAL; 657 goto out; 658 } 659 660 /* 661 * Must be the root of the filesystem 662 */ 663 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 664 error = EINVAL; 665 goto out; 666 } 667 668 /* Check if this mount belongs to this prison */ 669 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 670 mp->mnt_cred->cr_prison != cred->cr_prison)) { 671 kprintf("mountpoint %s does not belong to this jail\n", 672 uap->path); 673 error = EPERM; 674 goto out; 675 } 676 677 /* 678 * If no error try to issue the unmount. We lose our cache 679 * ref when we call nlookup_done so we must hold the mount point 680 * to prevent use-after-free races. 681 */ 682 out: 683 if (error == 0) { 684 mount_hold(mp); 685 nlookup_done(&nd); 686 error = dounmount(mp, uap->flags, 0); 687 mount_drop(mp); 688 } else { 689 nlookup_done(&nd); 690 } 691 done: 692 return (error); 693 } 694 695 /* 696 * Do the actual file system unmount (interlocked against the mountlist 697 * token and mp->mnt_token). 698 */ 699 static int 700 dounmount_interlock(struct mount *mp) 701 { 702 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 703 return (EBUSY); 704 mp->mnt_kern_flag |= MNTK_UNMOUNT; 705 return(0); 706 } 707 708 static int 709 unmount_allproc_cb(struct proc *p, void *arg) 710 { 711 struct mount *mp; 712 713 if (p->p_textnch.ncp == NULL) 714 return 0; 715 716 mp = (struct mount *)arg; 717 if (p->p_textnch.mount == mp) 718 cache_drop(&p->p_textnch); 719 720 return 0; 721 } 722 723 /* 724 * The guts of the unmount code. The mount owns one ref and one hold 725 * count. If we successfully interlock the unmount, those refs are ours. 726 * (The ref is from mnt_ncmountpt). 727 * 728 * When halting we shortcut certain mount types such as devfs by not actually 729 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 730 * from the mountlist so higher-level filesytems can unmount cleanly. 731 * 732 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 733 */ 734 int 735 dounmount(struct mount *mp, int flags, int halting) 736 { 737 struct namecache *ncp; 738 struct nchandle nch; 739 struct vnode *vp; 740 int error; 741 int async_flag; 742 int lflags; 743 int freeok = 1; 744 int hadsyncer = 0; 745 int retry; 746 int quickhalt; 747 748 lwkt_gettoken(&mp->mnt_token); 749 750 /* 751 * When halting, certain mount points can essentially just 752 * be unhooked and otherwise ignored. 753 */ 754 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 755 quickhalt = 1; 756 freeok = 0; 757 } else { 758 quickhalt = 0; 759 } 760 761 762 /* 763 * Exclusive access for unmounting purposes. 764 */ 765 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 766 goto out; 767 768 /* 769 * We now 'own' the last mp->mnt_refs 770 * 771 * Allow filesystems to detect that a forced unmount is in progress. 772 */ 773 if (flags & MNT_FORCE) 774 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 775 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 776 error = lockmgr(&mp->mnt_lock, lflags); 777 if (error) { 778 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 779 if (mp->mnt_kern_flag & MNTK_MWAIT) { 780 mp->mnt_kern_flag &= ~MNTK_MWAIT; 781 wakeup(mp); 782 } 783 goto out; 784 } 785 786 if (mp->mnt_flag & MNT_EXPUBLIC) 787 vfs_setpublicfs(NULL, NULL, NULL); 788 789 vfs_msync(mp, MNT_WAIT); 790 async_flag = mp->mnt_flag & MNT_ASYNC; 791 mp->mnt_flag &=~ MNT_ASYNC; 792 793 /* 794 * Decomission our special mnt_syncer vnode. This also stops 795 * the vnlru code. If we are unable to unmount we recommission 796 * the vnode. 797 * 798 * Then sync the filesystem. 799 */ 800 if ((vp = mp->mnt_syncer) != NULL) { 801 mp->mnt_syncer = NULL; 802 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 803 vrele(vp); 804 hadsyncer = 1; 805 } 806 807 /* 808 * Sync normally-mounted filesystem. 809 */ 810 if (quickhalt == 0) { 811 if ((mp->mnt_flag & MNT_RDONLY) == 0) 812 VFS_SYNC(mp, MNT_WAIT); 813 } 814 815 /* 816 * nchandle records ref the mount structure. Expect a count of 1 817 * (our mount->mnt_ncmountpt). 818 * 819 * Scans can get temporary refs on a mountpoint (thought really 820 * heavy duty stuff like cache_findmount() do not). 821 */ 822 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 823 /* 824 * Invalidate the namecache topology under the mount. 825 * nullfs mounts alias a real mount's namecache topology 826 * and it should not be invalidated in that case. 827 */ 828 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 829 cache_lock(&mp->mnt_ncmountpt); 830 cache_inval(&mp->mnt_ncmountpt, 831 CINV_DESTROY|CINV_CHILDREN); 832 cache_unlock(&mp->mnt_ncmountpt); 833 } 834 835 /* 836 * Clear pcpu caches 837 */ 838 cache_unmounting(mp); 839 if (mp->mnt_refs != 1) 840 cache_clearmntcache(); 841 842 /* 843 * Break out if we are good. Don't count ncp refs if the 844 * mount is aliased. 845 */ 846 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 847 NULL : mp->mnt_ncmountpt.ncp; 848 if (mp->mnt_refs == 1 && 849 (ncp == NULL || (ncp->nc_refs == 1 && 850 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 851 break; 852 } 853 854 /* 855 * If forcing the unmount, clean out any p->p_textnch 856 * nchandles that match this mount. 857 */ 858 if (flags & MNT_FORCE) 859 allproc_scan(&unmount_allproc_cb, mp, 0); 860 861 /* 862 * Sleep and retry. 863 */ 864 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 865 if ((retry & 15) == 15) { 866 mount_warning(mp, 867 "(%p) debug - retry %d, " 868 "%d namecache refs, %d mount refs", 869 mp, retry, 870 (ncp ? ncp->nc_refs - 1 : 0), 871 mp->mnt_refs - 1); 872 } 873 } 874 875 error = 0; 876 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 877 NULL : mp->mnt_ncmountpt.ncp; 878 if (mp->mnt_refs != 1 || 879 (ncp != NULL && (ncp->nc_refs != 1 || 880 TAILQ_FIRST(&ncp->nc_list)))) { 881 mount_warning(mp, 882 "(%p): %d namecache refs, %d mount refs " 883 "still present", 884 mp, 885 (ncp ? ncp->nc_refs - 1 : 0), 886 mp->mnt_refs - 1); 887 if (flags & MNT_FORCE) { 888 freeok = 0; 889 mount_warning(mp, "forcing unmount\n"); 890 } else { 891 error = EBUSY; 892 } 893 } 894 895 /* 896 * So far so good, sync the filesystem once more and 897 * call the VFS unmount code if the sync succeeds. 898 */ 899 if (error == 0 && quickhalt == 0) { 900 if (mp->mnt_flag & MNT_RDONLY) { 901 error = VFS_UNMOUNT(mp, flags); 902 } else { 903 error = VFS_SYNC(mp, MNT_WAIT); 904 if (error == 0 || /* no error */ 905 error == EOPNOTSUPP || /* no sync avail */ 906 (flags & MNT_FORCE)) { /* force anyway */ 907 error = VFS_UNMOUNT(mp, flags); 908 } 909 } 910 if (error) { 911 mount_warning(mp, 912 "(%p) unmount: vfs refused to unmount, " 913 "error %d", 914 mp, error); 915 } 916 } 917 918 /* 919 * If an error occurred we can still recover, restoring the 920 * syncer vnode and misc flags. 921 */ 922 if (error) { 923 if (mp->mnt_syncer == NULL && hadsyncer) 924 vfs_allocate_syncvnode(mp); 925 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 926 mp->mnt_flag |= async_flag; 927 lockmgr(&mp->mnt_lock, LK_RELEASE); 928 if (mp->mnt_kern_flag & MNTK_MWAIT) { 929 mp->mnt_kern_flag &= ~MNTK_MWAIT; 930 wakeup(mp); 931 } 932 goto out; 933 } 934 /* 935 * Clean up any journals still associated with the mount after 936 * filesystem activity has ceased. 937 */ 938 journal_remove_all_journals(mp, 939 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 940 941 mountlist_remove(mp); 942 943 /* 944 * Remove any installed vnode ops here so the individual VFSs don't 945 * have to. 946 * 947 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 948 * 949 * When quickhalting we have to keep these intact because the 950 * underlying vnodes have not been destroyed, and some might be 951 * dirty. 952 */ 953 if (quickhalt == 0) { 954 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 955 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 956 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 957 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 958 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 959 } 960 961 if (mp->mnt_ncmountpt.ncp != NULL) { 962 nch = mp->mnt_ncmountpt; 963 cache_zero(&mp->mnt_ncmountpt); 964 cache_clrmountpt(&nch); 965 cache_drop(&nch); 966 } 967 if (mp->mnt_ncmounton.ncp != NULL) { 968 cache_unmounting(mp); 969 nch = mp->mnt_ncmounton; 970 cache_zero(&mp->mnt_ncmounton); 971 cache_clrmountpt(&nch); 972 cache_drop(&nch); 973 } 974 975 if (mp->mnt_cred) { 976 crfree(mp->mnt_cred); 977 mp->mnt_cred = NULL; 978 } 979 980 mp->mnt_vfc->vfc_refcount--; 981 982 /* 983 * If not quickhalting the mount, we expect there to be no 984 * vnodes left. 985 */ 986 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 987 panic("unmount: dangling vnode"); 988 989 /* 990 * Release the lock 991 */ 992 lockmgr(&mp->mnt_lock, LK_RELEASE); 993 if (mp->mnt_kern_flag & MNTK_MWAIT) { 994 mp->mnt_kern_flag &= ~MNTK_MWAIT; 995 wakeup(mp); 996 } 997 998 /* 999 * If we reach here and freeok != 0 we must free the mount. 1000 * mnt_refs should already have dropped to 0, so if it is not 1001 * zero we must cycle the caches and wait. 1002 * 1003 * When we are satisfied that the mount has disconnected we can 1004 * drop the hold on the mp that represented the mount (though the 1005 * caller might actually have another, so the caller's drop may 1006 * do the actual free). 1007 */ 1008 if (freeok) { 1009 if (mp->mnt_refs > 0) 1010 cache_clearmntcache(); 1011 while (mp->mnt_refs > 0) { 1012 cache_unmounting(mp); 1013 wakeup(mp); 1014 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1015 cache_clearmntcache(); 1016 } 1017 lwkt_reltoken(&mp->mnt_token); 1018 mount_drop(mp); 1019 mp = NULL; 1020 } else { 1021 cache_clearmntcache(); 1022 } 1023 error = 0; 1024 KNOTE(&fs_klist, VQ_UNMOUNT); 1025 out: 1026 if (mp) 1027 lwkt_reltoken(&mp->mnt_token); 1028 return (error); 1029 } 1030 1031 static 1032 void 1033 mount_warning(struct mount *mp, const char *ctl, ...) 1034 { 1035 char *ptr; 1036 char *buf; 1037 __va_list va; 1038 1039 __va_start(va, ctl); 1040 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1041 &ptr, &buf, 0) == 0) { 1042 kprintf("unmount(%s): ", ptr); 1043 kvprintf(ctl, va); 1044 kprintf("\n"); 1045 kfree(buf, M_TEMP); 1046 } else { 1047 kprintf("unmount(%p", mp); 1048 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1049 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1050 kprintf("): "); 1051 kvprintf(ctl, va); 1052 kprintf("\n"); 1053 } 1054 __va_end(va); 1055 } 1056 1057 /* 1058 * Shim cache_fullpath() to handle the case where a process is chrooted into 1059 * a subdirectory of a mount. In this case if the root mount matches the 1060 * process root directory's mount we have to specify the process's root 1061 * directory instead of the mount point, because the mount point might 1062 * be above the root directory. 1063 */ 1064 static 1065 int 1066 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1067 { 1068 struct nchandle *nch; 1069 1070 if (p && p->p_fd->fd_nrdir.mount == mp) 1071 nch = &p->p_fd->fd_nrdir; 1072 else 1073 nch = &mp->mnt_ncmountpt; 1074 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1075 } 1076 1077 /* 1078 * Sync each mounted filesystem. 1079 */ 1080 1081 #ifdef DEBUG 1082 static int syncprt = 0; 1083 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1084 #endif /* DEBUG */ 1085 1086 static int sync_callback(struct mount *mp, void *data); 1087 1088 int 1089 sys_sync(struct sync_args *uap) 1090 { 1091 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1092 return (0); 1093 } 1094 1095 static 1096 int 1097 sync_callback(struct mount *mp, void *data __unused) 1098 { 1099 int asyncflag; 1100 1101 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1102 lwkt_gettoken(&mp->mnt_token); 1103 asyncflag = mp->mnt_flag & MNT_ASYNC; 1104 mp->mnt_flag &= ~MNT_ASYNC; 1105 lwkt_reltoken(&mp->mnt_token); 1106 vfs_msync(mp, MNT_NOWAIT); 1107 VFS_SYNC(mp, MNT_NOWAIT); 1108 lwkt_gettoken(&mp->mnt_token); 1109 mp->mnt_flag |= asyncflag; 1110 lwkt_reltoken(&mp->mnt_token); 1111 } 1112 return(0); 1113 } 1114 1115 /* XXX PRISON: could be per prison flag */ 1116 static int prison_quotas; 1117 #if 0 1118 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1119 #endif 1120 1121 /* 1122 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1123 * 1124 * Change filesystem quotas. 1125 * 1126 * MPALMOSTSAFE 1127 */ 1128 int 1129 sys_quotactl(struct quotactl_args *uap) 1130 { 1131 struct nlookupdata nd; 1132 struct thread *td; 1133 struct mount *mp; 1134 int error; 1135 1136 td = curthread; 1137 if (td->td_ucred->cr_prison && !prison_quotas) { 1138 error = EPERM; 1139 goto done; 1140 } 1141 1142 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1143 if (error == 0) 1144 error = nlookup(&nd); 1145 if (error == 0) { 1146 mp = nd.nl_nch.mount; 1147 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1148 uap->arg, nd.nl_cred); 1149 } 1150 nlookup_done(&nd); 1151 done: 1152 return (error); 1153 } 1154 1155 /* 1156 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1157 * void *buf, int buflen) 1158 * 1159 * This function operates on a mount point and executes the specified 1160 * operation using the specified control data, and possibly returns data. 1161 * 1162 * The actual number of bytes stored in the result buffer is returned, 0 1163 * if none, otherwise an error is returned. 1164 * 1165 * MPALMOSTSAFE 1166 */ 1167 int 1168 sys_mountctl(struct mountctl_args *uap) 1169 { 1170 struct thread *td = curthread; 1171 struct file *fp; 1172 void *ctl = NULL; 1173 void *buf = NULL; 1174 char *path = NULL; 1175 int error; 1176 1177 /* 1178 * Sanity and permissions checks. We must be root. 1179 */ 1180 if (td->td_ucred->cr_prison != NULL) 1181 return (EPERM); 1182 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1183 (error = priv_check(td, PRIV_ROOT)) != 0) 1184 return (error); 1185 1186 /* 1187 * Argument length checks 1188 */ 1189 if (uap->ctllen < 0 || uap->ctllen > 1024) 1190 return (EINVAL); 1191 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1192 return (EINVAL); 1193 if (uap->path == NULL) 1194 return (EINVAL); 1195 1196 /* 1197 * Allocate the necessary buffers and copyin data 1198 */ 1199 path = objcache_get(namei_oc, M_WAITOK); 1200 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1201 if (error) 1202 goto done; 1203 1204 if (uap->ctllen) { 1205 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1206 error = copyin(uap->ctl, ctl, uap->ctllen); 1207 if (error) 1208 goto done; 1209 } 1210 if (uap->buflen) 1211 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1212 1213 /* 1214 * Validate the descriptor 1215 */ 1216 if (uap->fd >= 0) { 1217 fp = holdfp(td, uap->fd, -1); 1218 if (fp == NULL) { 1219 error = EBADF; 1220 goto done; 1221 } 1222 } else { 1223 fp = NULL; 1224 } 1225 1226 /* 1227 * Execute the internal kernel function and clean up. 1228 */ 1229 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1230 buf, uap->buflen, &uap->sysmsg_result); 1231 if (fp) 1232 dropfp(td, uap->fd, fp); 1233 if (error == 0 && uap->sysmsg_result > 0) 1234 error = copyout(buf, uap->buf, uap->sysmsg_result); 1235 done: 1236 if (path) 1237 objcache_put(namei_oc, path); 1238 if (ctl) 1239 kfree(ctl, M_TEMP); 1240 if (buf) 1241 kfree(buf, M_TEMP); 1242 return (error); 1243 } 1244 1245 /* 1246 * Execute a mount control operation by resolving the path to a mount point 1247 * and calling vop_mountctl(). 1248 * 1249 * Use the mount point from the nch instead of the vnode so nullfs mounts 1250 * can properly spike the VOP. 1251 */ 1252 int 1253 kern_mountctl(const char *path, int op, struct file *fp, 1254 const void *ctl, int ctllen, 1255 void *buf, int buflen, int *res) 1256 { 1257 struct vnode *vp; 1258 struct nlookupdata nd; 1259 struct nchandle nch; 1260 struct mount *mp; 1261 int error; 1262 1263 *res = 0; 1264 vp = NULL; 1265 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1266 if (error) 1267 return (error); 1268 error = nlookup(&nd); 1269 if (error) { 1270 nlookup_done(&nd); 1271 return (error); 1272 } 1273 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1274 if (error) { 1275 nlookup_done(&nd); 1276 return (error); 1277 } 1278 1279 /* 1280 * Yes, all this is needed to use the nch.mount below, because 1281 * we must maintain a ref on the mount to avoid ripouts (e.g. 1282 * due to heavy mount/unmount use by synth or poudriere). 1283 */ 1284 nch = nd.nl_nch; 1285 cache_zero(&nd.nl_nch); 1286 cache_unlock(&nch); 1287 nlookup_done(&nd); 1288 vn_unlock(vp); 1289 1290 mp = nch.mount; 1291 1292 /* 1293 * Must be the root of the filesystem 1294 */ 1295 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1296 cache_drop(&nch); 1297 vrele(vp); 1298 return (EINVAL); 1299 } 1300 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1301 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1302 path); 1303 cache_drop(&nch); 1304 vrele(vp); 1305 return (EINVAL); 1306 } 1307 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1308 buf, buflen, res); 1309 vrele(vp); 1310 cache_drop(&nch); 1311 1312 return (error); 1313 } 1314 1315 int 1316 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1317 { 1318 struct thread *td = curthread; 1319 struct proc *p = td->td_proc; 1320 struct mount *mp; 1321 struct statfs *sp; 1322 char *fullpath, *freepath; 1323 int error; 1324 1325 if ((error = nlookup(nd)) != 0) 1326 return (error); 1327 mp = nd->nl_nch.mount; 1328 sp = &mp->mnt_stat; 1329 1330 /* 1331 * Ignore refresh error, user should have visibility. 1332 * This can happen if a NFS mount goes bad (e.g. server 1333 * revokes perms or goes down). 1334 */ 1335 error = VFS_STATFS(mp, sp, nd->nl_cred); 1336 /* ignore error */ 1337 1338 error = mount_path(p, mp, &fullpath, &freepath); 1339 if (error) 1340 return(error); 1341 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1342 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1343 kfree(freepath, M_TEMP); 1344 1345 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1346 bcopy(sp, buf, sizeof(*buf)); 1347 /* Only root should have access to the fsid's. */ 1348 if (priv_check(td, PRIV_ROOT)) 1349 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1350 return (0); 1351 } 1352 1353 /* 1354 * statfs_args(char *path, struct statfs *buf) 1355 * 1356 * Get filesystem statistics. 1357 */ 1358 int 1359 sys_statfs(struct statfs_args *uap) 1360 { 1361 struct nlookupdata nd; 1362 struct statfs buf; 1363 int error; 1364 1365 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1366 if (error == 0) 1367 error = kern_statfs(&nd, &buf); 1368 nlookup_done(&nd); 1369 if (error == 0) 1370 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1371 return (error); 1372 } 1373 1374 int 1375 kern_fstatfs(int fd, struct statfs *buf) 1376 { 1377 struct thread *td = curthread; 1378 struct proc *p = td->td_proc; 1379 struct file *fp; 1380 struct mount *mp; 1381 struct statfs *sp; 1382 char *fullpath, *freepath; 1383 int error; 1384 1385 KKASSERT(p); 1386 if ((error = holdvnode(td, fd, &fp)) != 0) 1387 return (error); 1388 1389 /* 1390 * Try to use mount info from any overlays rather than the 1391 * mount info for the underlying vnode, otherwise we will 1392 * fail when operating on null-mounted paths inside a chroot. 1393 */ 1394 if ((mp = fp->f_nchandle.mount) == NULL) 1395 mp = ((struct vnode *)fp->f_data)->v_mount; 1396 if (mp == NULL) { 1397 error = EBADF; 1398 goto done; 1399 } 1400 if (fp->f_cred == NULL) { 1401 error = EINVAL; 1402 goto done; 1403 } 1404 1405 /* 1406 * Ignore refresh error, user should have visibility. 1407 * This can happen if a NFS mount goes bad (e.g. server 1408 * revokes perms or goes down). 1409 */ 1410 sp = &mp->mnt_stat; 1411 error = VFS_STATFS(mp, sp, fp->f_cred); 1412 1413 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1414 goto done; 1415 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1416 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1417 kfree(freepath, M_TEMP); 1418 1419 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1420 bcopy(sp, buf, sizeof(*buf)); 1421 1422 /* Only root should have access to the fsid's. */ 1423 if (priv_check(td, PRIV_ROOT)) 1424 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1425 error = 0; 1426 done: 1427 fdrop(fp); 1428 return (error); 1429 } 1430 1431 /* 1432 * fstatfs_args(int fd, struct statfs *buf) 1433 * 1434 * Get filesystem statistics. 1435 */ 1436 int 1437 sys_fstatfs(struct fstatfs_args *uap) 1438 { 1439 struct statfs buf; 1440 int error; 1441 1442 error = kern_fstatfs(uap->fd, &buf); 1443 1444 if (error == 0) 1445 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1446 return (error); 1447 } 1448 1449 int 1450 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1451 { 1452 struct mount *mp; 1453 struct statvfs *sp; 1454 int error; 1455 1456 if ((error = nlookup(nd)) != 0) 1457 return (error); 1458 mp = nd->nl_nch.mount; 1459 sp = &mp->mnt_vstat; 1460 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1461 return (error); 1462 1463 sp->f_flag = 0; 1464 if (mp->mnt_flag & MNT_RDONLY) 1465 sp->f_flag |= ST_RDONLY; 1466 if (mp->mnt_flag & MNT_NOSUID) 1467 sp->f_flag |= ST_NOSUID; 1468 bcopy(sp, buf, sizeof(*buf)); 1469 return (0); 1470 } 1471 1472 /* 1473 * statfs_args(char *path, struct statfs *buf) 1474 * 1475 * Get filesystem statistics. 1476 */ 1477 int 1478 sys_statvfs(struct statvfs_args *uap) 1479 { 1480 struct nlookupdata nd; 1481 struct statvfs buf; 1482 int error; 1483 1484 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1485 if (error == 0) 1486 error = kern_statvfs(&nd, &buf); 1487 nlookup_done(&nd); 1488 if (error == 0) 1489 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1490 return (error); 1491 } 1492 1493 int 1494 kern_fstatvfs(int fd, struct statvfs *buf) 1495 { 1496 struct thread *td = curthread; 1497 struct file *fp; 1498 struct mount *mp; 1499 struct statvfs *sp; 1500 int error; 1501 1502 if ((error = holdvnode(td, fd, &fp)) != 0) 1503 return (error); 1504 if ((mp = fp->f_nchandle.mount) == NULL) 1505 mp = ((struct vnode *)fp->f_data)->v_mount; 1506 if (mp == NULL) { 1507 error = EBADF; 1508 goto done; 1509 } 1510 if (fp->f_cred == NULL) { 1511 error = EINVAL; 1512 goto done; 1513 } 1514 sp = &mp->mnt_vstat; 1515 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1516 goto done; 1517 1518 sp->f_flag = 0; 1519 if (mp->mnt_flag & MNT_RDONLY) 1520 sp->f_flag |= ST_RDONLY; 1521 if (mp->mnt_flag & MNT_NOSUID) 1522 sp->f_flag |= ST_NOSUID; 1523 1524 bcopy(sp, buf, sizeof(*buf)); 1525 error = 0; 1526 done: 1527 fdrop(fp); 1528 return (error); 1529 } 1530 1531 /* 1532 * fstatfs_args(int fd, struct statfs *buf) 1533 * 1534 * Get filesystem statistics. 1535 */ 1536 int 1537 sys_fstatvfs(struct fstatvfs_args *uap) 1538 { 1539 struct statvfs buf; 1540 int error; 1541 1542 error = kern_fstatvfs(uap->fd, &buf); 1543 1544 if (error == 0) 1545 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1546 return (error); 1547 } 1548 1549 /* 1550 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1551 * 1552 * Get statistics on all filesystems. 1553 */ 1554 1555 struct getfsstat_info { 1556 struct statfs *sfsp; 1557 long count; 1558 long maxcount; 1559 int error; 1560 int flags; 1561 struct thread *td; 1562 }; 1563 1564 static int getfsstat_callback(struct mount *, void *); 1565 1566 int 1567 sys_getfsstat(struct getfsstat_args *uap) 1568 { 1569 struct thread *td = curthread; 1570 struct getfsstat_info info; 1571 1572 bzero(&info, sizeof(info)); 1573 1574 info.maxcount = uap->bufsize / sizeof(struct statfs); 1575 info.sfsp = uap->buf; 1576 info.count = 0; 1577 info.flags = uap->flags; 1578 info.td = td; 1579 1580 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1581 if (info.sfsp && info.count > info.maxcount) 1582 uap->sysmsg_result = info.maxcount; 1583 else 1584 uap->sysmsg_result = info.count; 1585 return (info.error); 1586 } 1587 1588 static int 1589 getfsstat_callback(struct mount *mp, void *data) 1590 { 1591 struct getfsstat_info *info = data; 1592 struct statfs *sp; 1593 char *freepath; 1594 char *fullpath; 1595 int error; 1596 1597 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1598 return(0); 1599 1600 if (info->sfsp && info->count < info->maxcount) { 1601 sp = &mp->mnt_stat; 1602 1603 /* 1604 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1605 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1606 * overrides MNT_WAIT. 1607 * 1608 * Ignore refresh error, user should have visibility. 1609 * This can happen if a NFS mount goes bad (e.g. server 1610 * revokes perms or goes down). 1611 */ 1612 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1613 (info->flags & MNT_WAIT)) && 1614 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1615 /* ignore error */ 1616 } 1617 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1618 1619 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1620 if (error) { 1621 info->error = error; 1622 return(-1); 1623 } 1624 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1625 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1626 kfree(freepath, M_TEMP); 1627 1628 error = copyout(sp, info->sfsp, sizeof(*sp)); 1629 if (error) { 1630 info->error = error; 1631 return (-1); 1632 } 1633 ++info->sfsp; 1634 } 1635 info->count++; 1636 return(0); 1637 } 1638 1639 /* 1640 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1641 long bufsize, int flags) 1642 * 1643 * Get statistics on all filesystems. 1644 */ 1645 1646 struct getvfsstat_info { 1647 struct statfs *sfsp; 1648 struct statvfs *vsfsp; 1649 long count; 1650 long maxcount; 1651 int error; 1652 int flags; 1653 struct thread *td; 1654 }; 1655 1656 static int getvfsstat_callback(struct mount *, void *); 1657 1658 int 1659 sys_getvfsstat(struct getvfsstat_args *uap) 1660 { 1661 struct thread *td = curthread; 1662 struct getvfsstat_info info; 1663 1664 bzero(&info, sizeof(info)); 1665 1666 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1667 info.sfsp = uap->buf; 1668 info.vsfsp = uap->vbuf; 1669 info.count = 0; 1670 info.flags = uap->flags; 1671 info.td = td; 1672 1673 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1674 if (info.vsfsp && info.count > info.maxcount) 1675 uap->sysmsg_result = info.maxcount; 1676 else 1677 uap->sysmsg_result = info.count; 1678 return (info.error); 1679 } 1680 1681 static int 1682 getvfsstat_callback(struct mount *mp, void *data) 1683 { 1684 struct getvfsstat_info *info = data; 1685 struct statfs *sp; 1686 struct statvfs *vsp; 1687 char *freepath; 1688 char *fullpath; 1689 int error; 1690 1691 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1692 return(0); 1693 1694 if (info->vsfsp && info->count < info->maxcount) { 1695 sp = &mp->mnt_stat; 1696 vsp = &mp->mnt_vstat; 1697 1698 /* 1699 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1700 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1701 * overrides MNT_WAIT. 1702 * 1703 * Ignore refresh error, user should have visibility. 1704 * This can happen if a NFS mount goes bad (e.g. server 1705 * revokes perms or goes down). 1706 */ 1707 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1708 (info->flags & MNT_WAIT)) && 1709 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1710 /* ignore error */ 1711 } 1712 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1713 1714 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1715 (info->flags & MNT_WAIT)) && 1716 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1717 /* ignore error */ 1718 } 1719 vsp->f_flag = 0; 1720 if (mp->mnt_flag & MNT_RDONLY) 1721 vsp->f_flag |= ST_RDONLY; 1722 if (mp->mnt_flag & MNT_NOSUID) 1723 vsp->f_flag |= ST_NOSUID; 1724 1725 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1726 if (error) { 1727 info->error = error; 1728 return(-1); 1729 } 1730 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1731 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1732 kfree(freepath, M_TEMP); 1733 1734 error = copyout(sp, info->sfsp, sizeof(*sp)); 1735 if (error == 0) 1736 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1737 if (error) { 1738 info->error = error; 1739 return (-1); 1740 } 1741 ++info->sfsp; 1742 ++info->vsfsp; 1743 } 1744 info->count++; 1745 return(0); 1746 } 1747 1748 1749 /* 1750 * fchdir_args(int fd) 1751 * 1752 * Change current working directory to a given file descriptor. 1753 */ 1754 int 1755 sys_fchdir(struct fchdir_args *uap) 1756 { 1757 struct thread *td = curthread; 1758 struct proc *p = td->td_proc; 1759 struct filedesc *fdp = p->p_fd; 1760 struct vnode *vp, *ovp; 1761 struct mount *mp; 1762 struct file *fp; 1763 struct nchandle nch, onch, tnch; 1764 int error; 1765 1766 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1767 return (error); 1768 lwkt_gettoken(&p->p_token); 1769 vp = (struct vnode *)fp->f_data; 1770 vref(vp); 1771 vn_lock(vp, LK_SHARED | LK_RETRY); 1772 if (fp->f_nchandle.ncp == NULL) 1773 error = ENOTDIR; 1774 else 1775 error = checkvp_chdir(vp, td); 1776 if (error) { 1777 vput(vp); 1778 goto done; 1779 } 1780 cache_copy(&fp->f_nchandle, &nch); 1781 1782 /* 1783 * If the ncp has become a mount point, traverse through 1784 * the mount point. 1785 */ 1786 1787 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1788 (mp = cache_findmount(&nch)) != NULL 1789 ) { 1790 error = nlookup_mp(mp, &tnch); 1791 if (error == 0) { 1792 cache_unlock(&tnch); /* leave ref intact */ 1793 vput(vp); 1794 vp = tnch.ncp->nc_vp; 1795 error = vget(vp, LK_SHARED); 1796 KKASSERT(error == 0); 1797 cache_drop(&nch); 1798 nch = tnch; 1799 } 1800 cache_dropmount(mp); 1801 } 1802 if (error == 0) { 1803 spin_lock(&fdp->fd_spin); 1804 ovp = fdp->fd_cdir; 1805 onch = fdp->fd_ncdir; 1806 fdp->fd_cdir = vp; 1807 fdp->fd_ncdir = nch; 1808 spin_unlock(&fdp->fd_spin); 1809 vn_unlock(vp); /* leave ref intact */ 1810 cache_drop(&onch); 1811 vrele(ovp); 1812 } else { 1813 cache_drop(&nch); 1814 vput(vp); 1815 } 1816 fdrop(fp); 1817 done: 1818 lwkt_reltoken(&p->p_token); 1819 return (error); 1820 } 1821 1822 int 1823 kern_chdir(struct nlookupdata *nd) 1824 { 1825 struct thread *td = curthread; 1826 struct proc *p = td->td_proc; 1827 struct filedesc *fdp = p->p_fd; 1828 struct vnode *vp, *ovp; 1829 struct nchandle onch; 1830 int error; 1831 1832 nd->nl_flags |= NLC_SHAREDLOCK; 1833 if ((error = nlookup(nd)) != 0) 1834 return (error); 1835 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1836 return (ENOENT); 1837 if ((error = vget(vp, LK_SHARED)) != 0) 1838 return (error); 1839 1840 lwkt_gettoken(&p->p_token); 1841 error = checkvp_chdir(vp, td); 1842 vn_unlock(vp); 1843 if (error == 0) { 1844 spin_lock(&fdp->fd_spin); 1845 ovp = fdp->fd_cdir; 1846 onch = fdp->fd_ncdir; 1847 fdp->fd_ncdir = nd->nl_nch; 1848 fdp->fd_cdir = vp; 1849 spin_unlock(&fdp->fd_spin); 1850 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1851 cache_drop(&onch); 1852 vrele(ovp); 1853 cache_zero(&nd->nl_nch); 1854 } else { 1855 vrele(vp); 1856 } 1857 lwkt_reltoken(&p->p_token); 1858 return (error); 1859 } 1860 1861 /* 1862 * chdir_args(char *path) 1863 * 1864 * Change current working directory (``.''). 1865 */ 1866 int 1867 sys_chdir(struct chdir_args *uap) 1868 { 1869 struct nlookupdata nd; 1870 int error; 1871 1872 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1873 if (error == 0) 1874 error = kern_chdir(&nd); 1875 nlookup_done(&nd); 1876 return (error); 1877 } 1878 1879 /* 1880 * Helper function for raised chroot(2) security function: Refuse if 1881 * any filedescriptors are open directories. 1882 */ 1883 static int 1884 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1885 { 1886 struct vnode *vp; 1887 struct file *fp; 1888 int error; 1889 int fd; 1890 1891 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1892 if ((error = holdvnode(td, fd, &fp)) != 0) 1893 continue; 1894 vp = (struct vnode *)fp->f_data; 1895 if (vp->v_type != VDIR) { 1896 fdrop(fp); 1897 continue; 1898 } 1899 fdrop(fp); 1900 return(EPERM); 1901 } 1902 return (0); 1903 } 1904 1905 /* 1906 * This sysctl determines if we will allow a process to chroot(2) if it 1907 * has a directory open: 1908 * 0: disallowed for all processes. 1909 * 1: allowed for processes that were not already chroot(2)'ed. 1910 * 2: allowed for all processes. 1911 */ 1912 1913 static int chroot_allow_open_directories = 1; 1914 1915 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1916 &chroot_allow_open_directories, 0, ""); 1917 1918 /* 1919 * chroot to the specified namecache entry. We obtain the vp from the 1920 * namecache data. The passed ncp must be locked and referenced and will 1921 * remain locked and referenced on return. 1922 */ 1923 int 1924 kern_chroot(struct nchandle *nch) 1925 { 1926 struct thread *td = curthread; 1927 struct proc *p = td->td_proc; 1928 struct filedesc *fdp = p->p_fd; 1929 struct vnode *vp; 1930 int error; 1931 1932 /* 1933 * Only privileged user can chroot 1934 */ 1935 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1936 if (error) 1937 return (error); 1938 1939 /* 1940 * Disallow open directory descriptors (fchdir() breakouts). 1941 */ 1942 if (chroot_allow_open_directories == 0 || 1943 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1944 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1945 return (error); 1946 } 1947 if ((vp = nch->ncp->nc_vp) == NULL) 1948 return (ENOENT); 1949 1950 if ((error = vget(vp, LK_SHARED)) != 0) 1951 return (error); 1952 1953 /* 1954 * Check the validity of vp as a directory to change to and 1955 * associate it with rdir/jdir. 1956 */ 1957 error = checkvp_chdir(vp, td); 1958 vn_unlock(vp); /* leave reference intact */ 1959 if (error == 0) { 1960 lwkt_gettoken(&p->p_token); 1961 vrele(fdp->fd_rdir); 1962 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1963 cache_drop(&fdp->fd_nrdir); 1964 cache_copy(nch, &fdp->fd_nrdir); 1965 if (fdp->fd_jdir == NULL) { 1966 fdp->fd_jdir = vp; 1967 vref(fdp->fd_jdir); 1968 cache_copy(nch, &fdp->fd_njdir); 1969 } 1970 if ((p->p_flags & P_DIDCHROOT) == 0) { 1971 p->p_flags |= P_DIDCHROOT; 1972 if (p->p_depth <= 65535 - 32) 1973 p->p_depth += 32; 1974 } 1975 lwkt_reltoken(&p->p_token); 1976 } else { 1977 vrele(vp); 1978 } 1979 return (error); 1980 } 1981 1982 /* 1983 * chroot_args(char *path) 1984 * 1985 * Change notion of root (``/'') directory. 1986 */ 1987 int 1988 sys_chroot(struct chroot_args *uap) 1989 { 1990 struct thread *td __debugvar = curthread; 1991 struct nlookupdata nd; 1992 int error; 1993 1994 KKASSERT(td->td_proc); 1995 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1996 if (error == 0) { 1997 nd.nl_flags |= NLC_EXEC; 1998 error = nlookup(&nd); 1999 if (error == 0) 2000 error = kern_chroot(&nd.nl_nch); 2001 } 2002 nlookup_done(&nd); 2003 return(error); 2004 } 2005 2006 int 2007 sys_chroot_kernel(struct chroot_kernel_args *uap) 2008 { 2009 struct thread *td = curthread; 2010 struct nlookupdata nd; 2011 struct nchandle *nch; 2012 struct vnode *vp; 2013 int error; 2014 2015 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2016 if (error) 2017 goto error_nond; 2018 2019 error = nlookup(&nd); 2020 if (error) 2021 goto error_out; 2022 2023 nch = &nd.nl_nch; 2024 2025 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 2026 if (error) 2027 goto error_out; 2028 2029 if ((vp = nch->ncp->nc_vp) == NULL) { 2030 error = ENOENT; 2031 goto error_out; 2032 } 2033 2034 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2035 goto error_out; 2036 2037 vfs_cache_setroot(vp, cache_hold(nch)); 2038 2039 error_out: 2040 nlookup_done(&nd); 2041 error_nond: 2042 return(error); 2043 } 2044 2045 /* 2046 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2047 * determine whether it is legal to chdir to the vnode. The vnode's state 2048 * is not changed by this call. 2049 */ 2050 static int 2051 checkvp_chdir(struct vnode *vp, struct thread *td) 2052 { 2053 int error; 2054 2055 if (vp->v_type != VDIR) 2056 error = ENOTDIR; 2057 else 2058 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2059 return (error); 2060 } 2061 2062 int 2063 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2064 { 2065 struct thread *td = curthread; 2066 struct proc *p = td->td_proc; 2067 struct lwp *lp = td->td_lwp; 2068 struct filedesc *fdp = p->p_fd; 2069 int cmode, flags; 2070 struct file *nfp; 2071 struct file *fp; 2072 struct vnode *vp; 2073 int type, indx, error = 0; 2074 struct flock lf; 2075 2076 if ((oflags & O_ACCMODE) == O_ACCMODE) 2077 return (EINVAL); 2078 flags = FFLAGS(oflags); 2079 error = falloc(lp, &nfp, NULL); 2080 if (error) 2081 return (error); 2082 fp = nfp; 2083 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2084 2085 /* 2086 * XXX p_dupfd is a real mess. It allows a device to return a 2087 * file descriptor to be duplicated rather then doing the open 2088 * itself. 2089 */ 2090 lp->lwp_dupfd = -1; 2091 2092 /* 2093 * Call vn_open() to do the lookup and assign the vnode to the 2094 * file pointer. vn_open() does not change the ref count on fp 2095 * and the vnode, on success, will be inherited by the file pointer 2096 * and unlocked. 2097 * 2098 * Request a shared lock on the vnode if possible. 2099 * 2100 * Executable binaries can race VTEXT against O_RDWR opens, so 2101 * use an exclusive lock for O_RDWR opens as well. 2102 * 2103 * NOTE: We need a flag to separate terminal vnode locking from 2104 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2105 * and O_RDWR only need to lock the terminal vnode exclusively. 2106 */ 2107 nd->nl_flags |= NLC_LOCKVP; 2108 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0) 2109 nd->nl_flags |= NLC_SHAREDLOCK; 2110 2111 error = vn_open(nd, fp, flags, cmode); 2112 nlookup_done(nd); 2113 2114 if (error) { 2115 /* 2116 * handle special fdopen() case. bleh. dupfdopen() is 2117 * responsible for dropping the old contents of ofiles[indx] 2118 * if it succeeds. 2119 * 2120 * Note that fsetfd() will add a ref to fp which represents 2121 * the fd_files[] assignment. We must still drop our 2122 * reference. 2123 */ 2124 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2125 if (fdalloc(p, 0, &indx) == 0) { 2126 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2127 if (error == 0) { 2128 *res = indx; 2129 fdrop(fp); /* our ref */ 2130 return (0); 2131 } 2132 fsetfd(fdp, NULL, indx); 2133 } 2134 } 2135 fdrop(fp); /* our ref */ 2136 if (error == ERESTART) 2137 error = EINTR; 2138 return (error); 2139 } 2140 2141 /* 2142 * ref the vnode for ourselves so it can't be ripped out from under 2143 * is. XXX need an ND flag to request that the vnode be returned 2144 * anyway. 2145 * 2146 * Reserve a file descriptor but do not assign it until the open 2147 * succeeds. 2148 */ 2149 vp = (struct vnode *)fp->f_data; 2150 vref(vp); 2151 if ((error = fdalloc(p, 0, &indx)) != 0) { 2152 fdrop(fp); 2153 vrele(vp); 2154 return (error); 2155 } 2156 2157 /* 2158 * If no error occurs the vp will have been assigned to the file 2159 * pointer. 2160 */ 2161 lp->lwp_dupfd = 0; 2162 2163 if (flags & (O_EXLOCK | O_SHLOCK)) { 2164 lf.l_whence = SEEK_SET; 2165 lf.l_start = 0; 2166 lf.l_len = 0; 2167 if (flags & O_EXLOCK) 2168 lf.l_type = F_WRLCK; 2169 else 2170 lf.l_type = F_RDLCK; 2171 if (flags & FNONBLOCK) 2172 type = 0; 2173 else 2174 type = F_WAIT; 2175 2176 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2177 /* 2178 * lock request failed. Clean up the reserved 2179 * descriptor. 2180 */ 2181 vrele(vp); 2182 fsetfd(fdp, NULL, indx); 2183 fdrop(fp); 2184 return (error); 2185 } 2186 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2187 } 2188 #if 0 2189 /* 2190 * Assert that all regular file vnodes were created with a object. 2191 */ 2192 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2193 ("open: regular file has no backing object after vn_open")); 2194 #endif 2195 2196 vrele(vp); 2197 2198 /* 2199 * release our private reference, leaving the one associated with the 2200 * descriptor table intact. 2201 */ 2202 if (oflags & O_CLOEXEC) 2203 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2204 fsetfd(fdp, fp, indx); 2205 fdrop(fp); 2206 *res = indx; 2207 2208 return (error); 2209 } 2210 2211 /* 2212 * open_args(char *path, int flags, int mode) 2213 * 2214 * Check permissions, allocate an open file structure, 2215 * and call the device open routine if any. 2216 */ 2217 int 2218 sys_open(struct open_args *uap) 2219 { 2220 struct nlookupdata nd; 2221 int error; 2222 2223 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2224 if (error == 0) { 2225 error = kern_open(&nd, uap->flags, 2226 uap->mode, &uap->sysmsg_result); 2227 } 2228 nlookup_done(&nd); 2229 return (error); 2230 } 2231 2232 /* 2233 * openat_args(int fd, char *path, int flags, int mode) 2234 */ 2235 int 2236 sys_openat(struct openat_args *uap) 2237 { 2238 struct nlookupdata nd; 2239 int error; 2240 struct file *fp; 2241 2242 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2243 if (error == 0) { 2244 error = kern_open(&nd, uap->flags, uap->mode, 2245 &uap->sysmsg_result); 2246 } 2247 nlookup_done_at(&nd, fp); 2248 return (error); 2249 } 2250 2251 int 2252 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2253 { 2254 struct thread *td = curthread; 2255 struct proc *p = td->td_proc; 2256 struct vnode *vp; 2257 struct vattr vattr; 2258 int error; 2259 int whiteout = 0; 2260 2261 KKASSERT(p); 2262 2263 VATTR_NULL(&vattr); 2264 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2265 vattr.va_rmajor = rmajor; 2266 vattr.va_rminor = rminor; 2267 2268 switch (mode & S_IFMT) { 2269 case S_IFMT: /* used by badsect to flag bad sectors */ 2270 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2271 vattr.va_type = VBAD; 2272 break; 2273 case S_IFCHR: 2274 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2275 vattr.va_type = VCHR; 2276 break; 2277 case S_IFBLK: 2278 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2279 vattr.va_type = VBLK; 2280 break; 2281 case S_IFWHT: 2282 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2283 whiteout = 1; 2284 break; 2285 case S_IFDIR: /* special directories support for HAMMER */ 2286 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2287 vattr.va_type = VDIR; 2288 break; 2289 default: 2290 error = EINVAL; 2291 break; 2292 } 2293 2294 if (error) 2295 return (error); 2296 2297 bwillinode(1); 2298 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2299 if ((error = nlookup(nd)) != 0) 2300 return (error); 2301 if (nd->nl_nch.ncp->nc_vp) 2302 return (EEXIST); 2303 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2304 return (error); 2305 2306 if (whiteout) { 2307 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2308 nd->nl_cred, NAMEI_CREATE); 2309 } else { 2310 vp = NULL; 2311 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2312 &vp, nd->nl_cred, &vattr); 2313 if (error == 0) 2314 vput(vp); 2315 } 2316 return (error); 2317 } 2318 2319 /* 2320 * mknod_args(char *path, int mode, int dev) 2321 * 2322 * Create a special file. 2323 */ 2324 int 2325 sys_mknod(struct mknod_args *uap) 2326 { 2327 struct nlookupdata nd; 2328 int error; 2329 2330 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2331 if (error == 0) { 2332 error = kern_mknod(&nd, uap->mode, 2333 umajor(uap->dev), uminor(uap->dev)); 2334 } 2335 nlookup_done(&nd); 2336 return (error); 2337 } 2338 2339 /* 2340 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2341 * 2342 * Create a special file. The path is relative to the directory associated 2343 * with fd. 2344 */ 2345 int 2346 sys_mknodat(struct mknodat_args *uap) 2347 { 2348 struct nlookupdata nd; 2349 struct file *fp; 2350 int error; 2351 2352 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2353 if (error == 0) { 2354 error = kern_mknod(&nd, uap->mode, 2355 umajor(uap->dev), uminor(uap->dev)); 2356 } 2357 nlookup_done_at(&nd, fp); 2358 return (error); 2359 } 2360 2361 int 2362 kern_mkfifo(struct nlookupdata *nd, int mode) 2363 { 2364 struct thread *td = curthread; 2365 struct proc *p = td->td_proc; 2366 struct vattr vattr; 2367 struct vnode *vp; 2368 int error; 2369 2370 bwillinode(1); 2371 2372 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2373 if ((error = nlookup(nd)) != 0) 2374 return (error); 2375 if (nd->nl_nch.ncp->nc_vp) 2376 return (EEXIST); 2377 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2378 return (error); 2379 2380 VATTR_NULL(&vattr); 2381 vattr.va_type = VFIFO; 2382 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2383 vp = NULL; 2384 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2385 if (error == 0) 2386 vput(vp); 2387 return (error); 2388 } 2389 2390 /* 2391 * mkfifo_args(char *path, int mode) 2392 * 2393 * Create a named pipe. 2394 */ 2395 int 2396 sys_mkfifo(struct mkfifo_args *uap) 2397 { 2398 struct nlookupdata nd; 2399 int error; 2400 2401 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2402 if (error == 0) 2403 error = kern_mkfifo(&nd, uap->mode); 2404 nlookup_done(&nd); 2405 return (error); 2406 } 2407 2408 /* 2409 * mkfifoat_args(int fd, char *path, mode_t mode) 2410 * 2411 * Create a named pipe. The path is relative to the directory associated 2412 * with fd. 2413 */ 2414 int 2415 sys_mkfifoat(struct mkfifoat_args *uap) 2416 { 2417 struct nlookupdata nd; 2418 struct file *fp; 2419 int error; 2420 2421 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2422 if (error == 0) 2423 error = kern_mkfifo(&nd, uap->mode); 2424 nlookup_done_at(&nd, fp); 2425 return (error); 2426 } 2427 2428 static int hardlink_check_uid = 0; 2429 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2430 &hardlink_check_uid, 0, 2431 "Unprivileged processes cannot create hard links to files owned by other " 2432 "users"); 2433 static int hardlink_check_gid = 0; 2434 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2435 &hardlink_check_gid, 0, 2436 "Unprivileged processes cannot create hard links to files owned by other " 2437 "groups"); 2438 2439 static int 2440 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2441 { 2442 struct vattr va; 2443 int error; 2444 2445 /* 2446 * Shortcut if disabled 2447 */ 2448 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2449 return (0); 2450 2451 /* 2452 * Privileged user can always hardlink 2453 */ 2454 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2455 return (0); 2456 2457 /* 2458 * Otherwise only if the originating file is owned by the 2459 * same user or group. Note that any group is allowed if 2460 * the file is owned by the caller. 2461 */ 2462 error = VOP_GETATTR(vp, &va); 2463 if (error != 0) 2464 return (error); 2465 2466 if (hardlink_check_uid) { 2467 if (cred->cr_uid != va.va_uid) 2468 return (EPERM); 2469 } 2470 2471 if (hardlink_check_gid) { 2472 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2473 return (EPERM); 2474 } 2475 2476 return (0); 2477 } 2478 2479 int 2480 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2481 { 2482 struct thread *td = curthread; 2483 struct vnode *vp; 2484 int error; 2485 2486 /* 2487 * Lookup the source and obtained a locked vnode. 2488 * 2489 * You may only hardlink a file which you have write permission 2490 * on or which you own. 2491 * 2492 * XXX relookup on vget failure / race ? 2493 */ 2494 bwillinode(1); 2495 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2496 if ((error = nlookup(nd)) != 0) 2497 return (error); 2498 vp = nd->nl_nch.ncp->nc_vp; 2499 KKASSERT(vp != NULL); 2500 if (vp->v_type == VDIR) 2501 return (EPERM); /* POSIX */ 2502 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2503 return (error); 2504 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2505 return (error); 2506 2507 /* 2508 * Unlock the source so we can lookup the target without deadlocking 2509 * (XXX vp is locked already, possible other deadlock?). The target 2510 * must not exist. 2511 */ 2512 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2513 nd->nl_flags &= ~NLC_NCPISLOCKED; 2514 cache_unlock(&nd->nl_nch); 2515 vn_unlock(vp); 2516 2517 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2518 if ((error = nlookup(linknd)) != 0) { 2519 vrele(vp); 2520 return (error); 2521 } 2522 if (linknd->nl_nch.ncp->nc_vp) { 2523 vrele(vp); 2524 return (EEXIST); 2525 } 2526 VFS_MODIFYING(vp->v_mount); 2527 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2528 if (error) { 2529 vrele(vp); 2530 return (error); 2531 } 2532 2533 /* 2534 * Finally run the new API VOP. 2535 */ 2536 error = can_hardlink(vp, td, td->td_ucred); 2537 if (error == 0) { 2538 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2539 vp, linknd->nl_cred); 2540 } 2541 vput(vp); 2542 return (error); 2543 } 2544 2545 /* 2546 * link_args(char *path, char *link) 2547 * 2548 * Make a hard file link. 2549 */ 2550 int 2551 sys_link(struct link_args *uap) 2552 { 2553 struct nlookupdata nd, linknd; 2554 int error; 2555 2556 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2557 if (error == 0) { 2558 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2559 if (error == 0) 2560 error = kern_link(&nd, &linknd); 2561 nlookup_done(&linknd); 2562 } 2563 nlookup_done(&nd); 2564 return (error); 2565 } 2566 2567 /* 2568 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2569 * 2570 * Make a hard file link. The path1 argument is relative to the directory 2571 * associated with fd1, and similarly the path2 argument is relative to 2572 * the directory associated with fd2. 2573 */ 2574 int 2575 sys_linkat(struct linkat_args *uap) 2576 { 2577 struct nlookupdata nd, linknd; 2578 struct file *fp1, *fp2; 2579 int error; 2580 2581 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2582 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2583 if (error == 0) { 2584 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2585 uap->path2, UIO_USERSPACE, 0); 2586 if (error == 0) 2587 error = kern_link(&nd, &linknd); 2588 nlookup_done_at(&linknd, fp2); 2589 } 2590 nlookup_done_at(&nd, fp1); 2591 return (error); 2592 } 2593 2594 int 2595 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2596 { 2597 struct vattr vattr; 2598 struct vnode *vp; 2599 struct vnode *dvp; 2600 int error; 2601 2602 bwillinode(1); 2603 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2604 if ((error = nlookup(nd)) != 0) 2605 return (error); 2606 if (nd->nl_nch.ncp->nc_vp) 2607 return (EEXIST); 2608 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2609 return (error); 2610 dvp = nd->nl_dvp; 2611 VATTR_NULL(&vattr); 2612 vattr.va_mode = mode; 2613 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2614 if (error == 0) 2615 vput(vp); 2616 return (error); 2617 } 2618 2619 /* 2620 * symlink(char *path, char *link) 2621 * 2622 * Make a symbolic link. 2623 */ 2624 int 2625 sys_symlink(struct symlink_args *uap) 2626 { 2627 struct thread *td = curthread; 2628 struct nlookupdata nd; 2629 char *path; 2630 int error; 2631 int mode; 2632 2633 path = objcache_get(namei_oc, M_WAITOK); 2634 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2635 if (error == 0) { 2636 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2637 if (error == 0) { 2638 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2639 error = kern_symlink(&nd, path, mode); 2640 } 2641 nlookup_done(&nd); 2642 } 2643 objcache_put(namei_oc, path); 2644 return (error); 2645 } 2646 2647 /* 2648 * symlinkat_args(char *path1, int fd, char *path2) 2649 * 2650 * Make a symbolic link. The path2 argument is relative to the directory 2651 * associated with fd. 2652 */ 2653 int 2654 sys_symlinkat(struct symlinkat_args *uap) 2655 { 2656 struct thread *td = curthread; 2657 struct nlookupdata nd; 2658 struct file *fp; 2659 char *path1; 2660 int error; 2661 int mode; 2662 2663 path1 = objcache_get(namei_oc, M_WAITOK); 2664 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2665 if (error == 0) { 2666 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2667 UIO_USERSPACE, 0); 2668 if (error == 0) { 2669 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2670 error = kern_symlink(&nd, path1, mode); 2671 } 2672 nlookup_done_at(&nd, fp); 2673 } 2674 objcache_put(namei_oc, path1); 2675 return (error); 2676 } 2677 2678 /* 2679 * undelete_args(char *path) 2680 * 2681 * Delete a whiteout from the filesystem. 2682 */ 2683 int 2684 sys_undelete(struct undelete_args *uap) 2685 { 2686 struct nlookupdata nd; 2687 int error; 2688 2689 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2690 bwillinode(1); 2691 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2692 if (error == 0) 2693 error = nlookup(&nd); 2694 if (error == 0) 2695 error = ncp_writechk(&nd.nl_nch); 2696 if (error == 0) { 2697 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2698 NAMEI_DELETE); 2699 } 2700 nlookup_done(&nd); 2701 return (error); 2702 } 2703 2704 int 2705 kern_unlink(struct nlookupdata *nd) 2706 { 2707 int error; 2708 2709 bwillinode(1); 2710 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2711 if ((error = nlookup(nd)) != 0) 2712 return (error); 2713 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2714 return (error); 2715 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2716 return (error); 2717 } 2718 2719 /* 2720 * unlink_args(char *path) 2721 * 2722 * Delete a name from the filesystem. 2723 */ 2724 int 2725 sys_unlink(struct unlink_args *uap) 2726 { 2727 struct nlookupdata nd; 2728 int error; 2729 2730 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2731 if (error == 0) 2732 error = kern_unlink(&nd); 2733 nlookup_done(&nd); 2734 return (error); 2735 } 2736 2737 2738 /* 2739 * unlinkat_args(int fd, char *path, int flags) 2740 * 2741 * Delete the file or directory entry pointed to by fd/path. 2742 */ 2743 int 2744 sys_unlinkat(struct unlinkat_args *uap) 2745 { 2746 struct nlookupdata nd; 2747 struct file *fp; 2748 int error; 2749 2750 if (uap->flags & ~AT_REMOVEDIR) 2751 return (EINVAL); 2752 2753 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2754 if (error == 0) { 2755 if (uap->flags & AT_REMOVEDIR) 2756 error = kern_rmdir(&nd); 2757 else 2758 error = kern_unlink(&nd); 2759 } 2760 nlookup_done_at(&nd, fp); 2761 return (error); 2762 } 2763 2764 int 2765 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2766 { 2767 struct thread *td = curthread; 2768 struct file *fp; 2769 struct vnode *vp; 2770 struct vattr vattr; 2771 off_t new_offset; 2772 int error; 2773 2774 fp = holdfp(td, fd, -1); 2775 if (fp == NULL) 2776 return (EBADF); 2777 if (fp->f_type != DTYPE_VNODE) { 2778 error = ESPIPE; 2779 goto done; 2780 } 2781 vp = (struct vnode *)fp->f_data; 2782 2783 switch (whence) { 2784 case L_INCR: 2785 spin_lock(&fp->f_spin); 2786 new_offset = fp->f_offset + offset; 2787 error = 0; 2788 break; 2789 case L_XTND: 2790 error = VOP_GETATTR_FP(vp, &vattr, fp); 2791 spin_lock(&fp->f_spin); 2792 new_offset = offset + vattr.va_size; 2793 break; 2794 case L_SET: 2795 new_offset = offset; 2796 error = 0; 2797 spin_lock(&fp->f_spin); 2798 break; 2799 default: 2800 new_offset = 0; 2801 error = EINVAL; 2802 spin_lock(&fp->f_spin); 2803 break; 2804 } 2805 2806 /* 2807 * Validate the seek position. Negative offsets are not allowed 2808 * for regular files or directories. 2809 * 2810 * Normally we would also not want to allow negative offsets for 2811 * character and block-special devices. However kvm addresses 2812 * on 64 bit architectures might appear to be negative and must 2813 * be allowed. 2814 */ 2815 if (error == 0) { 2816 if (new_offset < 0 && 2817 (vp->v_type == VREG || vp->v_type == VDIR)) { 2818 error = EINVAL; 2819 } else { 2820 fp->f_offset = new_offset; 2821 } 2822 } 2823 *res = fp->f_offset; 2824 spin_unlock(&fp->f_spin); 2825 done: 2826 dropfp(td, fd, fp); 2827 2828 return (error); 2829 } 2830 2831 /* 2832 * lseek_args(int fd, int pad, off_t offset, int whence) 2833 * 2834 * Reposition read/write file offset. 2835 */ 2836 int 2837 sys_lseek(struct lseek_args *uap) 2838 { 2839 int error; 2840 2841 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2842 &uap->sysmsg_offset); 2843 2844 return (error); 2845 } 2846 2847 /* 2848 * Check if current process can access given file. amode is a bitmask of *_OK 2849 * access bits. flags is a bitmask of AT_* flags. 2850 */ 2851 int 2852 kern_access(struct nlookupdata *nd, int amode, int flags) 2853 { 2854 struct vnode *vp; 2855 int error, mode; 2856 2857 if (flags & ~AT_EACCESS) 2858 return (EINVAL); 2859 nd->nl_flags |= NLC_SHAREDLOCK; 2860 if ((error = nlookup(nd)) != 0) 2861 return (error); 2862 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2863 return (error); 2864 retry: 2865 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2866 if (error) 2867 return (error); 2868 2869 /* Flags == 0 means only check for existence. */ 2870 if (amode) { 2871 mode = 0; 2872 if (amode & R_OK) 2873 mode |= VREAD; 2874 if (amode & W_OK) 2875 mode |= VWRITE; 2876 if (amode & X_OK) 2877 mode |= VEXEC; 2878 if ((mode & VWRITE) == 0 || 2879 (error = vn_writechk(vp)) == 0) { 2880 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2881 } 2882 2883 /* 2884 * If the file handle is stale we have to re-resolve the 2885 * entry with the ncp held exclusively. This is a hack 2886 * at the moment. 2887 */ 2888 if (error == ESTALE) { 2889 vput(vp); 2890 cache_unlock(&nd->nl_nch); 2891 cache_lock(&nd->nl_nch); 2892 cache_setunresolved(&nd->nl_nch); 2893 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2894 if (error == 0) { 2895 vp = NULL; 2896 goto retry; 2897 } 2898 return(error); 2899 } 2900 } 2901 vput(vp); 2902 return (error); 2903 } 2904 2905 /* 2906 * access_args(char *path, int flags) 2907 * 2908 * Check access permissions. 2909 */ 2910 int 2911 sys_access(struct access_args *uap) 2912 { 2913 struct nlookupdata nd; 2914 int error; 2915 2916 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2917 if (error == 0) 2918 error = kern_access(&nd, uap->flags, 0); 2919 nlookup_done(&nd); 2920 return (error); 2921 } 2922 2923 2924 /* 2925 * eaccess_args(char *path, int flags) 2926 * 2927 * Check access permissions. 2928 */ 2929 int 2930 sys_eaccess(struct eaccess_args *uap) 2931 { 2932 struct nlookupdata nd; 2933 int error; 2934 2935 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2936 if (error == 0) 2937 error = kern_access(&nd, uap->flags, AT_EACCESS); 2938 nlookup_done(&nd); 2939 return (error); 2940 } 2941 2942 2943 /* 2944 * faccessat_args(int fd, char *path, int amode, int flags) 2945 * 2946 * Check access permissions. 2947 */ 2948 int 2949 sys_faccessat(struct faccessat_args *uap) 2950 { 2951 struct nlookupdata nd; 2952 struct file *fp; 2953 int error; 2954 2955 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2956 NLC_FOLLOW); 2957 if (error == 0) 2958 error = kern_access(&nd, uap->amode, uap->flags); 2959 nlookup_done_at(&nd, fp); 2960 return (error); 2961 } 2962 2963 int 2964 kern_stat(struct nlookupdata *nd, struct stat *st) 2965 { 2966 int error; 2967 struct vnode *vp; 2968 2969 nd->nl_flags |= NLC_SHAREDLOCK; 2970 if ((error = nlookup(nd)) != 0) 2971 return (error); 2972 again: 2973 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2974 return (ENOENT); 2975 2976 if ((error = vget(vp, LK_SHARED)) != 0) 2977 return (error); 2978 error = vn_stat(vp, st, nd->nl_cred); 2979 2980 /* 2981 * If the file handle is stale we have to re-resolve the 2982 * entry with the ncp held exclusively. This is a hack 2983 * at the moment. 2984 */ 2985 if (error == ESTALE) { 2986 vput(vp); 2987 cache_unlock(&nd->nl_nch); 2988 cache_lock(&nd->nl_nch); 2989 cache_setunresolved(&nd->nl_nch); 2990 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2991 if (error == 0) 2992 goto again; 2993 } else { 2994 vput(vp); 2995 } 2996 return (error); 2997 } 2998 2999 /* 3000 * stat_args(char *path, struct stat *ub) 3001 * 3002 * Get file status; this version follows links. 3003 */ 3004 int 3005 sys_stat(struct stat_args *uap) 3006 { 3007 struct nlookupdata nd; 3008 struct stat st; 3009 int error; 3010 3011 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3012 if (error == 0) { 3013 error = kern_stat(&nd, &st); 3014 if (error == 0) 3015 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3016 } 3017 nlookup_done(&nd); 3018 return (error); 3019 } 3020 3021 /* 3022 * lstat_args(char *path, struct stat *ub) 3023 * 3024 * Get file status; this version does not follow links. 3025 */ 3026 int 3027 sys_lstat(struct lstat_args *uap) 3028 { 3029 struct nlookupdata nd; 3030 struct stat st; 3031 int error; 3032 3033 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3034 if (error == 0) { 3035 error = kern_stat(&nd, &st); 3036 if (error == 0) 3037 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3038 } 3039 nlookup_done(&nd); 3040 return (error); 3041 } 3042 3043 /* 3044 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3045 * 3046 * Get status of file pointed to by fd/path. 3047 */ 3048 int 3049 sys_fstatat(struct fstatat_args *uap) 3050 { 3051 struct nlookupdata nd; 3052 struct stat st; 3053 int error; 3054 int flags; 3055 struct file *fp; 3056 3057 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3058 return (EINVAL); 3059 3060 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3061 3062 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3063 UIO_USERSPACE, flags); 3064 if (error == 0) { 3065 error = kern_stat(&nd, &st); 3066 if (error == 0) 3067 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3068 } 3069 nlookup_done_at(&nd, fp); 3070 return (error); 3071 } 3072 3073 static int 3074 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3075 { 3076 struct nlookupdata nd; 3077 struct vnode *vp; 3078 int error; 3079 3080 vp = NULL; 3081 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3082 if (error == 0) 3083 error = nlookup(&nd); 3084 if (error == 0) 3085 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3086 nlookup_done(&nd); 3087 if (error == 0) { 3088 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3089 vput(vp); 3090 } 3091 return (error); 3092 } 3093 3094 /* 3095 * pathconf_Args(char *path, int name) 3096 * 3097 * Get configurable pathname variables. 3098 */ 3099 int 3100 sys_pathconf(struct pathconf_args *uap) 3101 { 3102 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3103 &uap->sysmsg_reg)); 3104 } 3105 3106 /* 3107 * lpathconf_Args(char *path, int name) 3108 * 3109 * Get configurable pathname variables, but don't follow symlinks. 3110 */ 3111 int 3112 sys_lpathconf(struct lpathconf_args *uap) 3113 { 3114 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3115 } 3116 3117 /* 3118 * XXX: daver 3119 * kern_readlink isn't properly split yet. There is a copyin burried 3120 * in VOP_READLINK(). 3121 */ 3122 int 3123 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3124 { 3125 struct thread *td = curthread; 3126 struct vnode *vp; 3127 struct iovec aiov; 3128 struct uio auio; 3129 int error; 3130 3131 nd->nl_flags |= NLC_SHAREDLOCK; 3132 if ((error = nlookup(nd)) != 0) 3133 return (error); 3134 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3135 if (error) 3136 return (error); 3137 if (vp->v_type != VLNK) { 3138 error = EINVAL; 3139 } else { 3140 aiov.iov_base = buf; 3141 aiov.iov_len = count; 3142 auio.uio_iov = &aiov; 3143 auio.uio_iovcnt = 1; 3144 auio.uio_offset = 0; 3145 auio.uio_rw = UIO_READ; 3146 auio.uio_segflg = UIO_USERSPACE; 3147 auio.uio_td = td; 3148 auio.uio_resid = count; 3149 error = VOP_READLINK(vp, &auio, td->td_ucred); 3150 } 3151 vput(vp); 3152 *res = count - auio.uio_resid; 3153 return (error); 3154 } 3155 3156 /* 3157 * readlink_args(char *path, char *buf, int count) 3158 * 3159 * Return target name of a symbolic link. 3160 */ 3161 int 3162 sys_readlink(struct readlink_args *uap) 3163 { 3164 struct nlookupdata nd; 3165 int error; 3166 3167 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3168 if (error == 0) { 3169 error = kern_readlink(&nd, uap->buf, uap->count, 3170 &uap->sysmsg_result); 3171 } 3172 nlookup_done(&nd); 3173 return (error); 3174 } 3175 3176 /* 3177 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3178 * 3179 * Return target name of a symbolic link. The path is relative to the 3180 * directory associated with fd. 3181 */ 3182 int 3183 sys_readlinkat(struct readlinkat_args *uap) 3184 { 3185 struct nlookupdata nd; 3186 struct file *fp; 3187 int error; 3188 3189 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3190 if (error == 0) { 3191 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3192 &uap->sysmsg_result); 3193 } 3194 nlookup_done_at(&nd, fp); 3195 return (error); 3196 } 3197 3198 static int 3199 setfflags(struct vnode *vp, u_long flags) 3200 { 3201 struct thread *td = curthread; 3202 int error; 3203 struct vattr vattr; 3204 3205 /* 3206 * Prevent non-root users from setting flags on devices. When 3207 * a device is reused, users can retain ownership of the device 3208 * if they are allowed to set flags and programs assume that 3209 * chown can't fail when done as root. 3210 */ 3211 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3212 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3213 return (error); 3214 3215 /* 3216 * note: vget is required for any operation that might mod the vnode 3217 * so VINACTIVE is properly cleared. 3218 */ 3219 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3220 VATTR_NULL(&vattr); 3221 vattr.va_flags = flags; 3222 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3223 vput(vp); 3224 } 3225 return (error); 3226 } 3227 3228 /* 3229 * chflags(const char *path, u_long flags) 3230 * 3231 * Change flags of a file given a path name. 3232 */ 3233 int 3234 sys_chflags(struct chflags_args *uap) 3235 { 3236 struct nlookupdata nd; 3237 struct vnode *vp; 3238 int error; 3239 3240 vp = NULL; 3241 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3242 if (error == 0) 3243 error = nlookup(&nd); 3244 if (error == 0) 3245 error = ncp_writechk(&nd.nl_nch); 3246 if (error == 0) 3247 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3248 nlookup_done(&nd); 3249 if (error == 0) { 3250 error = setfflags(vp, uap->flags); 3251 vrele(vp); 3252 } 3253 return (error); 3254 } 3255 3256 /* 3257 * lchflags(const char *path, u_long flags) 3258 * 3259 * Change flags of a file given a path name, but don't follow symlinks. 3260 */ 3261 int 3262 sys_lchflags(struct lchflags_args *uap) 3263 { 3264 struct nlookupdata nd; 3265 struct vnode *vp; 3266 int error; 3267 3268 vp = NULL; 3269 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3270 if (error == 0) 3271 error = nlookup(&nd); 3272 if (error == 0) 3273 error = ncp_writechk(&nd.nl_nch); 3274 if (error == 0) 3275 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3276 nlookup_done(&nd); 3277 if (error == 0) { 3278 error = setfflags(vp, uap->flags); 3279 vrele(vp); 3280 } 3281 return (error); 3282 } 3283 3284 /* 3285 * fchflags_args(int fd, u_flags flags) 3286 * 3287 * Change flags of a file given a file descriptor. 3288 */ 3289 int 3290 sys_fchflags(struct fchflags_args *uap) 3291 { 3292 struct thread *td = curthread; 3293 struct file *fp; 3294 int error; 3295 3296 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3297 return (error); 3298 if (fp->f_nchandle.ncp) 3299 error = ncp_writechk(&fp->f_nchandle); 3300 if (error == 0) 3301 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3302 fdrop(fp); 3303 return (error); 3304 } 3305 3306 /* 3307 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3308 * change flags given a pathname relative to a filedescriptor 3309 */ 3310 int sys_chflagsat(struct chflagsat_args *uap) 3311 { 3312 struct nlookupdata nd; 3313 struct vnode *vp; 3314 struct file *fp; 3315 int error; 3316 int lookupflags; 3317 3318 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3319 return (EINVAL); 3320 3321 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3322 3323 vp = NULL; 3324 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3325 if (error == 0) 3326 error = nlookup(&nd); 3327 if (error == 0) 3328 error = ncp_writechk(&nd.nl_nch); 3329 if (error == 0) 3330 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3331 nlookup_done_at(&nd, fp); 3332 if (error == 0) { 3333 error = setfflags(vp, uap->flags); 3334 vrele(vp); 3335 } 3336 return (error); 3337 } 3338 3339 3340 static int 3341 setfmode(struct vnode *vp, int mode) 3342 { 3343 struct thread *td = curthread; 3344 int error; 3345 struct vattr vattr; 3346 3347 /* 3348 * note: vget is required for any operation that might mod the vnode 3349 * so VINACTIVE is properly cleared. 3350 */ 3351 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3352 VATTR_NULL(&vattr); 3353 vattr.va_mode = mode & ALLPERMS; 3354 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3355 cache_inval_wxok(vp); 3356 vput(vp); 3357 } 3358 return error; 3359 } 3360 3361 int 3362 kern_chmod(struct nlookupdata *nd, int mode) 3363 { 3364 struct vnode *vp; 3365 int error; 3366 3367 if ((error = nlookup(nd)) != 0) 3368 return (error); 3369 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3370 return (error); 3371 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3372 error = setfmode(vp, mode); 3373 vrele(vp); 3374 return (error); 3375 } 3376 3377 /* 3378 * chmod_args(char *path, int mode) 3379 * 3380 * Change mode of a file given path name. 3381 */ 3382 int 3383 sys_chmod(struct chmod_args *uap) 3384 { 3385 struct nlookupdata nd; 3386 int error; 3387 3388 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3389 if (error == 0) 3390 error = kern_chmod(&nd, uap->mode); 3391 nlookup_done(&nd); 3392 return (error); 3393 } 3394 3395 /* 3396 * lchmod_args(char *path, int mode) 3397 * 3398 * Change mode of a file given path name (don't follow links.) 3399 */ 3400 int 3401 sys_lchmod(struct lchmod_args *uap) 3402 { 3403 struct nlookupdata nd; 3404 int error; 3405 3406 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3407 if (error == 0) 3408 error = kern_chmod(&nd, uap->mode); 3409 nlookup_done(&nd); 3410 return (error); 3411 } 3412 3413 /* 3414 * fchmod_args(int fd, int mode) 3415 * 3416 * Change mode of a file given a file descriptor. 3417 */ 3418 int 3419 sys_fchmod(struct fchmod_args *uap) 3420 { 3421 struct thread *td = curthread; 3422 struct file *fp; 3423 int error; 3424 3425 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3426 return (error); 3427 if (fp->f_nchandle.ncp) 3428 error = ncp_writechk(&fp->f_nchandle); 3429 if (error == 0) 3430 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3431 fdrop(fp); 3432 return (error); 3433 } 3434 3435 /* 3436 * fchmodat_args(char *path, int mode) 3437 * 3438 * Change mode of a file pointed to by fd/path. 3439 */ 3440 int 3441 sys_fchmodat(struct fchmodat_args *uap) 3442 { 3443 struct nlookupdata nd; 3444 struct file *fp; 3445 int error; 3446 int flags; 3447 3448 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3449 return (EINVAL); 3450 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3451 3452 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3453 UIO_USERSPACE, flags); 3454 if (error == 0) 3455 error = kern_chmod(&nd, uap->mode); 3456 nlookup_done_at(&nd, fp); 3457 return (error); 3458 } 3459 3460 static int 3461 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3462 { 3463 struct thread *td = curthread; 3464 int error; 3465 struct vattr vattr; 3466 uid_t o_uid; 3467 gid_t o_gid; 3468 uint64_t size; 3469 3470 /* 3471 * note: vget is required for any operation that might mod the vnode 3472 * so VINACTIVE is properly cleared. 3473 */ 3474 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3475 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3476 return error; 3477 o_uid = vattr.va_uid; 3478 o_gid = vattr.va_gid; 3479 size = vattr.va_size; 3480 3481 VATTR_NULL(&vattr); 3482 vattr.va_uid = uid; 3483 vattr.va_gid = gid; 3484 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3485 vput(vp); 3486 } 3487 3488 if (error == 0) { 3489 if (uid == -1) 3490 uid = o_uid; 3491 if (gid == -1) 3492 gid = o_gid; 3493 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3494 VFS_ACCOUNT(mp, uid, gid, size); 3495 } 3496 3497 return error; 3498 } 3499 3500 int 3501 kern_chown(struct nlookupdata *nd, int uid, int gid) 3502 { 3503 struct vnode *vp; 3504 int error; 3505 3506 if ((error = nlookup(nd)) != 0) 3507 return (error); 3508 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3509 return (error); 3510 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3511 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3512 vrele(vp); 3513 return (error); 3514 } 3515 3516 /* 3517 * chown(char *path, int uid, int gid) 3518 * 3519 * Set ownership given a path name. 3520 */ 3521 int 3522 sys_chown(struct chown_args *uap) 3523 { 3524 struct nlookupdata nd; 3525 int error; 3526 3527 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3528 if (error == 0) 3529 error = kern_chown(&nd, uap->uid, uap->gid); 3530 nlookup_done(&nd); 3531 return (error); 3532 } 3533 3534 /* 3535 * lchown_args(char *path, int uid, int gid) 3536 * 3537 * Set ownership given a path name, do not cross symlinks. 3538 */ 3539 int 3540 sys_lchown(struct lchown_args *uap) 3541 { 3542 struct nlookupdata nd; 3543 int error; 3544 3545 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3546 if (error == 0) 3547 error = kern_chown(&nd, uap->uid, uap->gid); 3548 nlookup_done(&nd); 3549 return (error); 3550 } 3551 3552 /* 3553 * fchown_args(int fd, int uid, int gid) 3554 * 3555 * Set ownership given a file descriptor. 3556 */ 3557 int 3558 sys_fchown(struct fchown_args *uap) 3559 { 3560 struct thread *td = curthread; 3561 struct proc *p = td->td_proc; 3562 struct file *fp; 3563 int error; 3564 3565 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3566 return (error); 3567 if (fp->f_nchandle.ncp) 3568 error = ncp_writechk(&fp->f_nchandle); 3569 if (error == 0) 3570 error = setfown(p->p_fd->fd_ncdir.mount, 3571 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3572 fdrop(fp); 3573 return (error); 3574 } 3575 3576 /* 3577 * fchownat(int fd, char *path, int uid, int gid, int flags) 3578 * 3579 * Set ownership of file pointed to by fd/path. 3580 */ 3581 int 3582 sys_fchownat(struct fchownat_args *uap) 3583 { 3584 struct nlookupdata nd; 3585 struct file *fp; 3586 int error; 3587 int flags; 3588 3589 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3590 return (EINVAL); 3591 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3592 3593 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3594 UIO_USERSPACE, flags); 3595 if (error == 0) 3596 error = kern_chown(&nd, uap->uid, uap->gid); 3597 nlookup_done_at(&nd, fp); 3598 return (error); 3599 } 3600 3601 3602 static int 3603 getutimes(struct timeval *tvp, struct timespec *tsp) 3604 { 3605 struct timeval tv[2]; 3606 int error; 3607 3608 if (tvp == NULL) { 3609 microtime(&tv[0]); 3610 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3611 tsp[1] = tsp[0]; 3612 } else { 3613 if ((error = itimerfix(tvp)) != 0) 3614 return (error); 3615 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3616 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3617 } 3618 return 0; 3619 } 3620 3621 static int 3622 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3623 { 3624 struct timespec tsnow; 3625 int error; 3626 3627 *nullflag = 0; 3628 nanotime(&tsnow); 3629 if (ts == NULL) { 3630 newts[0] = tsnow; 3631 newts[1] = tsnow; 3632 *nullflag = 1; 3633 return (0); 3634 } 3635 3636 newts[0] = ts[0]; 3637 newts[1] = ts[1]; 3638 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3639 return (0); 3640 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3641 *nullflag = 1; 3642 3643 if (newts[0].tv_nsec == UTIME_OMIT) 3644 newts[0].tv_sec = VNOVAL; 3645 else if (newts[0].tv_nsec == UTIME_NOW) 3646 newts[0] = tsnow; 3647 else if ((error = itimespecfix(&newts[0])) != 0) 3648 return (error); 3649 3650 if (newts[1].tv_nsec == UTIME_OMIT) 3651 newts[1].tv_sec = VNOVAL; 3652 else if (newts[1].tv_nsec == UTIME_NOW) 3653 newts[1] = tsnow; 3654 else if ((error = itimespecfix(&newts[1])) != 0) 3655 return (error); 3656 3657 return (0); 3658 } 3659 3660 static int 3661 setutimes(struct vnode *vp, struct vattr *vattr, 3662 const struct timespec *ts, int nullflag) 3663 { 3664 struct thread *td = curthread; 3665 int error; 3666 3667 VATTR_NULL(vattr); 3668 vattr->va_atime = ts[0]; 3669 vattr->va_mtime = ts[1]; 3670 if (nullflag) 3671 vattr->va_vaflags |= VA_UTIMES_NULL; 3672 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3673 3674 return error; 3675 } 3676 3677 int 3678 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3679 { 3680 struct timespec ts[2]; 3681 int error; 3682 3683 if (tptr) { 3684 if ((error = getutimes(tptr, ts)) != 0) 3685 return (error); 3686 } 3687 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3688 return (error); 3689 } 3690 3691 /* 3692 * utimes_args(char *path, struct timeval *tptr) 3693 * 3694 * Set the access and modification times of a file. 3695 */ 3696 int 3697 sys_utimes(struct utimes_args *uap) 3698 { 3699 struct timeval tv[2]; 3700 struct nlookupdata nd; 3701 int error; 3702 3703 if (uap->tptr) { 3704 error = copyin(uap->tptr, tv, sizeof(tv)); 3705 if (error) 3706 return (error); 3707 } 3708 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3709 if (error == 0) 3710 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3711 nlookup_done(&nd); 3712 return (error); 3713 } 3714 3715 /* 3716 * lutimes_args(char *path, struct timeval *tptr) 3717 * 3718 * Set the access and modification times of a file. 3719 */ 3720 int 3721 sys_lutimes(struct lutimes_args *uap) 3722 { 3723 struct timeval tv[2]; 3724 struct nlookupdata nd; 3725 int error; 3726 3727 if (uap->tptr) { 3728 error = copyin(uap->tptr, tv, sizeof(tv)); 3729 if (error) 3730 return (error); 3731 } 3732 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3733 if (error == 0) 3734 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3735 nlookup_done(&nd); 3736 return (error); 3737 } 3738 3739 /* 3740 * Set utimes on a file descriptor. The creds used to open the 3741 * file are used to determine whether the operation is allowed 3742 * or not. 3743 */ 3744 int 3745 kern_futimens(int fd, struct timespec *ts) 3746 { 3747 struct thread *td = curthread; 3748 struct timespec newts[2]; 3749 struct file *fp; 3750 struct vnode *vp; 3751 struct vattr vattr; 3752 int nullflag; 3753 int error; 3754 3755 error = getutimens(ts, newts, &nullflag); 3756 if (error) 3757 return (error); 3758 if ((error = holdvnode(td, fd, &fp)) != 0) 3759 return (error); 3760 if (fp->f_nchandle.ncp) 3761 error = ncp_writechk(&fp->f_nchandle); 3762 if (error == 0) { 3763 vp = fp->f_data; 3764 error = vget(vp, LK_EXCLUSIVE); 3765 if (error == 0) { 3766 error = VOP_GETATTR_FP(vp, &vattr, fp); 3767 if (error == 0) { 3768 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3769 fp->f_cred); 3770 } 3771 if (error == 0) { 3772 error = setutimes(vp, &vattr, newts, nullflag); 3773 } 3774 vput(vp); 3775 } 3776 } 3777 fdrop(fp); 3778 return (error); 3779 } 3780 3781 /* 3782 * futimens_args(int fd, struct timespec *ts) 3783 * 3784 * Set the access and modification times of a file. 3785 */ 3786 int 3787 sys_futimens(struct futimens_args *uap) 3788 { 3789 struct timespec ts[2]; 3790 int error; 3791 3792 if (uap->ts) { 3793 error = copyin(uap->ts, ts, sizeof(ts)); 3794 if (error) 3795 return (error); 3796 } 3797 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3798 return (error); 3799 } 3800 3801 int 3802 kern_futimes(int fd, struct timeval *tptr) 3803 { 3804 struct timespec ts[2]; 3805 int error; 3806 3807 if (tptr) { 3808 if ((error = getutimes(tptr, ts)) != 0) 3809 return (error); 3810 } 3811 error = kern_futimens(fd, tptr ? ts : NULL); 3812 return (error); 3813 } 3814 3815 /* 3816 * futimes_args(int fd, struct timeval *tptr) 3817 * 3818 * Set the access and modification times of a file. 3819 */ 3820 int 3821 sys_futimes(struct futimes_args *uap) 3822 { 3823 struct timeval tv[2]; 3824 int error; 3825 3826 if (uap->tptr) { 3827 error = copyin(uap->tptr, tv, sizeof(tv)); 3828 if (error) 3829 return (error); 3830 } 3831 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3832 return (error); 3833 } 3834 3835 int 3836 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3837 { 3838 struct timespec newts[2]; 3839 struct vnode *vp; 3840 struct vattr vattr; 3841 int nullflag; 3842 int error; 3843 3844 if (flags & ~AT_SYMLINK_NOFOLLOW) 3845 return (EINVAL); 3846 3847 error = getutimens(ts, newts, &nullflag); 3848 if (error) 3849 return (error); 3850 3851 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3852 if ((error = nlookup(nd)) != 0) 3853 return (error); 3854 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3855 return (error); 3856 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3857 return (error); 3858 if ((error = vn_writechk(vp)) == 0) { 3859 error = vget(vp, LK_EXCLUSIVE); 3860 if (error == 0) { 3861 error = setutimes(vp, &vattr, newts, nullflag); 3862 vput(vp); 3863 } 3864 } 3865 vrele(vp); 3866 return (error); 3867 } 3868 3869 /* 3870 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3871 * 3872 * Set file access and modification times of a file. 3873 */ 3874 int 3875 sys_utimensat(struct utimensat_args *uap) 3876 { 3877 struct timespec ts[2]; 3878 struct nlookupdata nd; 3879 struct file *fp; 3880 int error; 3881 int flags; 3882 3883 if (uap->ts) { 3884 error = copyin(uap->ts, ts, sizeof(ts)); 3885 if (error) 3886 return (error); 3887 } 3888 3889 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3890 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3891 UIO_USERSPACE, flags); 3892 if (error == 0) 3893 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3894 nlookup_done_at(&nd, fp); 3895 return (error); 3896 } 3897 3898 int 3899 kern_truncate(struct nlookupdata *nd, off_t length) 3900 { 3901 struct vnode *vp; 3902 struct vattr vattr; 3903 int error; 3904 uid_t uid = 0; 3905 gid_t gid = 0; 3906 uint64_t old_size = 0; 3907 3908 if (length < 0) 3909 return(EINVAL); 3910 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3911 if ((error = nlookup(nd)) != 0) 3912 return (error); 3913 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3914 return (error); 3915 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3916 return (error); 3917 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3918 if (error) { 3919 vrele(vp); 3920 return (error); 3921 } 3922 if (vp->v_type == VDIR) { 3923 error = EISDIR; 3924 goto done; 3925 } 3926 if (vfs_quota_enabled) { 3927 error = VOP_GETATTR(vp, &vattr); 3928 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3929 uid = vattr.va_uid; 3930 gid = vattr.va_gid; 3931 old_size = vattr.va_size; 3932 } 3933 3934 if ((error = vn_writechk(vp)) == 0) { 3935 VATTR_NULL(&vattr); 3936 vattr.va_size = length; 3937 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3938 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3939 } 3940 done: 3941 vput(vp); 3942 return (error); 3943 } 3944 3945 /* 3946 * truncate(char *path, int pad, off_t length) 3947 * 3948 * Truncate a file given its path name. 3949 */ 3950 int 3951 sys_truncate(struct truncate_args *uap) 3952 { 3953 struct nlookupdata nd; 3954 int error; 3955 3956 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3957 if (error == 0) 3958 error = kern_truncate(&nd, uap->length); 3959 nlookup_done(&nd); 3960 return error; 3961 } 3962 3963 int 3964 kern_ftruncate(int fd, off_t length) 3965 { 3966 struct thread *td = curthread; 3967 struct vattr vattr; 3968 struct vnode *vp; 3969 struct file *fp; 3970 int error; 3971 uid_t uid = 0; 3972 gid_t gid = 0; 3973 uint64_t old_size = 0; 3974 struct mount *mp; 3975 3976 if (length < 0) 3977 return(EINVAL); 3978 if ((error = holdvnode(td, fd, &fp)) != 0) 3979 return (error); 3980 if (fp->f_nchandle.ncp) { 3981 error = ncp_writechk(&fp->f_nchandle); 3982 if (error) 3983 goto done; 3984 } 3985 if ((fp->f_flag & FWRITE) == 0) { 3986 error = EINVAL; 3987 goto done; 3988 } 3989 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3990 error = EINVAL; 3991 goto done; 3992 } 3993 vp = (struct vnode *)fp->f_data; 3994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3995 if (vp->v_type == VDIR) { 3996 error = EISDIR; 3997 vn_unlock(vp); 3998 goto done; 3999 } 4000 4001 if (vfs_quota_enabled) { 4002 error = VOP_GETATTR_FP(vp, &vattr, fp); 4003 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4004 uid = vattr.va_uid; 4005 gid = vattr.va_gid; 4006 old_size = vattr.va_size; 4007 } 4008 4009 if ((error = vn_writechk(vp)) == 0) { 4010 VATTR_NULL(&vattr); 4011 vattr.va_size = length; 4012 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4013 mp = vq_vptomp(vp); 4014 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4015 } 4016 vn_unlock(vp); 4017 done: 4018 fdrop(fp); 4019 return (error); 4020 } 4021 4022 /* 4023 * ftruncate_args(int fd, int pad, off_t length) 4024 * 4025 * Truncate a file given a file descriptor. 4026 */ 4027 int 4028 sys_ftruncate(struct ftruncate_args *uap) 4029 { 4030 int error; 4031 4032 error = kern_ftruncate(uap->fd, uap->length); 4033 4034 return (error); 4035 } 4036 4037 /* 4038 * fsync(int fd) 4039 * 4040 * Sync an open file. 4041 */ 4042 int 4043 sys_fsync(struct fsync_args *uap) 4044 { 4045 struct thread *td = curthread; 4046 struct vnode *vp; 4047 struct file *fp; 4048 vm_object_t obj; 4049 int error; 4050 4051 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4052 return (error); 4053 vp = (struct vnode *)fp->f_data; 4054 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4055 if ((obj = vp->v_object) != NULL) { 4056 if (vp->v_mount == NULL || 4057 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4058 vm_object_page_clean(obj, 0, 0, 0); 4059 } 4060 } 4061 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4062 if (error == 0 && vp->v_mount) 4063 error = buf_fsync(vp); 4064 vn_unlock(vp); 4065 fdrop(fp); 4066 4067 return (error); 4068 } 4069 4070 int 4071 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4072 { 4073 struct nchandle fnchd; 4074 struct nchandle tnchd; 4075 struct namecache *ncp; 4076 struct vnode *fdvp; 4077 struct vnode *tdvp; 4078 struct mount *mp; 4079 int error; 4080 u_int fncp_gen; 4081 u_int tncp_gen; 4082 4083 bwillinode(1); 4084 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4085 if ((error = nlookup(fromnd)) != 0) 4086 return (error); 4087 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4088 return (ENOENT); 4089 fnchd.mount = fromnd->nl_nch.mount; 4090 cache_hold(&fnchd); 4091 4092 /* 4093 * unlock the source nch so we can lookup the target nch without 4094 * deadlocking. The target may or may not exist so we do not check 4095 * for a target vp like kern_mkdir() and other creation functions do. 4096 * 4097 * The source and target directories are ref'd and rechecked after 4098 * everything is relocked to determine if the source or target file 4099 * has been renamed. 4100 */ 4101 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4102 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4103 4104 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4105 4106 cache_unlock(&fromnd->nl_nch); 4107 4108 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4109 if ((error = nlookup(tond)) != 0) { 4110 cache_drop(&fnchd); 4111 return (error); 4112 } 4113 tncp_gen = tond->nl_nch.ncp->nc_generation; 4114 4115 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4116 cache_drop(&fnchd); 4117 return (ENOENT); 4118 } 4119 tnchd.mount = tond->nl_nch.mount; 4120 cache_hold(&tnchd); 4121 4122 /* 4123 * If the source and target are the same there is nothing to do 4124 */ 4125 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4126 cache_drop(&fnchd); 4127 cache_drop(&tnchd); 4128 return (0); 4129 } 4130 4131 /* 4132 * Mount points cannot be renamed or overwritten 4133 */ 4134 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4135 NCF_ISMOUNTPT 4136 ) { 4137 cache_drop(&fnchd); 4138 cache_drop(&tnchd); 4139 return (EINVAL); 4140 } 4141 4142 /* 4143 * Relock the source ncp. cache_relock() will deal with any 4144 * deadlocks against the already-locked tond and will also 4145 * make sure both are resolved. 4146 * 4147 * NOTE AFTER RELOCKING: The source or target ncp may have become 4148 * invalid while they were unlocked, nc_vp and nc_mount could 4149 * be NULL. 4150 */ 4151 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4152 &tond->nl_nch, tond->nl_cred); 4153 fromnd->nl_flags |= NLC_NCPISLOCKED; 4154 4155 /* 4156 * If the namecache generation changed for either fromnd or tond, 4157 * we must retry. 4158 */ 4159 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4160 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4161 kprintf("kern_rename: retry due to gen on: " 4162 "\"%s\" -> \"%s\"\n", 4163 fromnd->nl_nch.ncp->nc_name, 4164 tond->nl_nch.ncp->nc_name); 4165 cache_drop(&fnchd); 4166 cache_drop(&tnchd); 4167 return (EAGAIN); 4168 } 4169 4170 /* 4171 * If either fromnd or tond are marked destroyed a ripout occured 4172 * out from under us and we must retry. 4173 */ 4174 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4175 fromnd->nl_nch.ncp->nc_vp == NULL || 4176 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4177 kprintf("kern_rename: retry due to ripout on: " 4178 "\"%s\" -> \"%s\"\n", 4179 fromnd->nl_nch.ncp->nc_name, 4180 tond->nl_nch.ncp->nc_name); 4181 cache_drop(&fnchd); 4182 cache_drop(&tnchd); 4183 return (EAGAIN); 4184 } 4185 4186 /* 4187 * Make sure the parent directories linkages are the same. 4188 * XXX shouldn't be needed any more w/ generation check above. 4189 */ 4190 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4191 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4192 cache_drop(&fnchd); 4193 cache_drop(&tnchd); 4194 return (ENOENT); 4195 } 4196 4197 /* 4198 * Both the source and target must be within the same filesystem and 4199 * in the same filesystem as their parent directories within the 4200 * namecache topology. 4201 * 4202 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4203 */ 4204 mp = fnchd.mount; 4205 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4206 mp != tond->nl_nch.mount) { 4207 cache_drop(&fnchd); 4208 cache_drop(&tnchd); 4209 return (EXDEV); 4210 } 4211 4212 /* 4213 * Make sure the mount point is writable 4214 */ 4215 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4216 cache_drop(&fnchd); 4217 cache_drop(&tnchd); 4218 return (error); 4219 } 4220 4221 /* 4222 * If the target exists and either the source or target is a directory, 4223 * then both must be directories. 4224 * 4225 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4226 * have become NULL. 4227 */ 4228 if (tond->nl_nch.ncp->nc_vp) { 4229 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4230 error = ENOENT; 4231 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4232 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4233 error = ENOTDIR; 4234 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4235 error = EISDIR; 4236 } 4237 } 4238 4239 /* 4240 * You cannot rename a source into itself or a subdirectory of itself. 4241 * We check this by travsersing the target directory upwards looking 4242 * for a match against the source. 4243 * 4244 * XXX MPSAFE 4245 */ 4246 if (error == 0) { 4247 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4248 if (fromnd->nl_nch.ncp == ncp) { 4249 error = EINVAL; 4250 break; 4251 } 4252 } 4253 } 4254 4255 cache_drop(&fnchd); 4256 cache_drop(&tnchd); 4257 4258 /* 4259 * Even though the namespaces are different, they may still represent 4260 * hardlinks to the same file. The filesystem might have a hard time 4261 * with this so we issue a NREMOVE of the source instead of a NRENAME 4262 * when we detect the situation. 4263 */ 4264 if (error == 0) { 4265 fdvp = fromnd->nl_dvp; 4266 tdvp = tond->nl_dvp; 4267 if (fdvp == NULL || tdvp == NULL) { 4268 error = EPERM; 4269 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4270 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4271 fromnd->nl_cred); 4272 } else { 4273 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4274 fdvp, tdvp, tond->nl_cred); 4275 } 4276 } 4277 return (error); 4278 } 4279 4280 /* 4281 * rename_args(char *from, char *to) 4282 * 4283 * Rename files. Source and destination must either both be directories, 4284 * or both not be directories. If target is a directory, it must be empty. 4285 */ 4286 int 4287 sys_rename(struct rename_args *uap) 4288 { 4289 struct nlookupdata fromnd, tond; 4290 int error; 4291 4292 do { 4293 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4294 if (error == 0) { 4295 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4296 if (error == 0) 4297 error = kern_rename(&fromnd, &tond); 4298 nlookup_done(&tond); 4299 } 4300 nlookup_done(&fromnd); 4301 } while (error == EAGAIN); 4302 return (error); 4303 } 4304 4305 /* 4306 * renameat_args(int oldfd, char *old, int newfd, char *new) 4307 * 4308 * Rename files using paths relative to the directories associated with 4309 * oldfd and newfd. Source and destination must either both be directories, 4310 * or both not be directories. If target is a directory, it must be empty. 4311 */ 4312 int 4313 sys_renameat(struct renameat_args *uap) 4314 { 4315 struct nlookupdata oldnd, newnd; 4316 struct file *oldfp, *newfp; 4317 int error; 4318 4319 do { 4320 error = nlookup_init_at(&oldnd, &oldfp, 4321 uap->oldfd, uap->old, 4322 UIO_USERSPACE, 0); 4323 if (error == 0) { 4324 error = nlookup_init_at(&newnd, &newfp, 4325 uap->newfd, uap->new, 4326 UIO_USERSPACE, 0); 4327 if (error == 0) 4328 error = kern_rename(&oldnd, &newnd); 4329 nlookup_done_at(&newnd, newfp); 4330 } 4331 nlookup_done_at(&oldnd, oldfp); 4332 } while (error == EAGAIN); 4333 return (error); 4334 } 4335 4336 int 4337 kern_mkdir(struct nlookupdata *nd, int mode) 4338 { 4339 struct thread *td = curthread; 4340 struct proc *p = td->td_proc; 4341 struct vnode *vp; 4342 struct vattr vattr; 4343 int error; 4344 4345 bwillinode(1); 4346 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4347 if ((error = nlookup(nd)) != 0) 4348 return (error); 4349 4350 if (nd->nl_nch.ncp->nc_vp) 4351 return (EEXIST); 4352 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4353 return (error); 4354 VATTR_NULL(&vattr); 4355 vattr.va_type = VDIR; 4356 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4357 4358 vp = NULL; 4359 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4360 if (error == 0) 4361 vput(vp); 4362 return (error); 4363 } 4364 4365 /* 4366 * mkdir_args(char *path, int mode) 4367 * 4368 * Make a directory file. 4369 */ 4370 int 4371 sys_mkdir(struct mkdir_args *uap) 4372 { 4373 struct nlookupdata nd; 4374 int error; 4375 4376 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4377 if (error == 0) 4378 error = kern_mkdir(&nd, uap->mode); 4379 nlookup_done(&nd); 4380 return (error); 4381 } 4382 4383 /* 4384 * mkdirat_args(int fd, char *path, mode_t mode) 4385 * 4386 * Make a directory file. The path is relative to the directory associated 4387 * with fd. 4388 */ 4389 int 4390 sys_mkdirat(struct mkdirat_args *uap) 4391 { 4392 struct nlookupdata nd; 4393 struct file *fp; 4394 int error; 4395 4396 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4397 if (error == 0) 4398 error = kern_mkdir(&nd, uap->mode); 4399 nlookup_done_at(&nd, fp); 4400 return (error); 4401 } 4402 4403 int 4404 kern_rmdir(struct nlookupdata *nd) 4405 { 4406 int error; 4407 4408 bwillinode(1); 4409 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4410 if ((error = nlookup(nd)) != 0) 4411 return (error); 4412 4413 /* 4414 * Do not allow directories representing mount points to be 4415 * deleted, even if empty. Check write perms on mount point 4416 * in case the vnode is aliased (aka nullfs). 4417 */ 4418 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4419 return (EBUSY); 4420 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4421 return (error); 4422 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4423 return (error); 4424 } 4425 4426 /* 4427 * rmdir_args(char *path) 4428 * 4429 * Remove a directory file. 4430 */ 4431 int 4432 sys_rmdir(struct rmdir_args *uap) 4433 { 4434 struct nlookupdata nd; 4435 int error; 4436 4437 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4438 if (error == 0) 4439 error = kern_rmdir(&nd); 4440 nlookup_done(&nd); 4441 return (error); 4442 } 4443 4444 int 4445 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4446 enum uio_seg direction) 4447 { 4448 struct thread *td = curthread; 4449 struct vnode *vp; 4450 struct file *fp; 4451 struct uio auio; 4452 struct iovec aiov; 4453 off_t loff; 4454 int error, eofflag; 4455 4456 if ((error = holdvnode(td, fd, &fp)) != 0) 4457 return (error); 4458 if ((fp->f_flag & FREAD) == 0) { 4459 error = EBADF; 4460 goto done; 4461 } 4462 vp = (struct vnode *)fp->f_data; 4463 if (vp->v_type != VDIR) { 4464 error = EINVAL; 4465 goto done; 4466 } 4467 aiov.iov_base = buf; 4468 aiov.iov_len = count; 4469 auio.uio_iov = &aiov; 4470 auio.uio_iovcnt = 1; 4471 auio.uio_rw = UIO_READ; 4472 auio.uio_segflg = direction; 4473 auio.uio_td = td; 4474 auio.uio_resid = count; 4475 loff = auio.uio_offset = fp->f_offset; 4476 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4477 fp->f_offset = auio.uio_offset; 4478 if (error) 4479 goto done; 4480 4481 /* 4482 * WARNING! *basep may not be wide enough to accomodate the 4483 * seek offset. XXX should we hack this to return the upper 32 bits 4484 * for offsets greater then 4G? 4485 */ 4486 if (basep) { 4487 *basep = (long)loff; 4488 } 4489 *res = count - auio.uio_resid; 4490 done: 4491 fdrop(fp); 4492 return (error); 4493 } 4494 4495 /* 4496 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4497 * 4498 * Read a block of directory entries in a file system independent format. 4499 */ 4500 int 4501 sys_getdirentries(struct getdirentries_args *uap) 4502 { 4503 long base; 4504 int error; 4505 4506 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4507 &uap->sysmsg_result, UIO_USERSPACE); 4508 4509 if (error == 0 && uap->basep) 4510 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4511 return (error); 4512 } 4513 4514 /* 4515 * getdents_args(int fd, char *buf, size_t count) 4516 */ 4517 int 4518 sys_getdents(struct getdents_args *uap) 4519 { 4520 int error; 4521 4522 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4523 &uap->sysmsg_result, UIO_USERSPACE); 4524 4525 return (error); 4526 } 4527 4528 /* 4529 * Set the mode mask for creation of filesystem nodes. 4530 * 4531 * umask(int newmask) 4532 */ 4533 int 4534 sys_umask(struct umask_args *uap) 4535 { 4536 struct thread *td = curthread; 4537 struct proc *p = td->td_proc; 4538 struct filedesc *fdp; 4539 4540 fdp = p->p_fd; 4541 uap->sysmsg_result = fdp->fd_cmask; 4542 fdp->fd_cmask = uap->newmask & ALLPERMS; 4543 return (0); 4544 } 4545 4546 /* 4547 * revoke(char *path) 4548 * 4549 * Void all references to file by ripping underlying filesystem 4550 * away from vnode. 4551 */ 4552 int 4553 sys_revoke(struct revoke_args *uap) 4554 { 4555 struct nlookupdata nd; 4556 struct vattr vattr; 4557 struct vnode *vp; 4558 struct ucred *cred; 4559 int error; 4560 4561 vp = NULL; 4562 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4563 if (error == 0) 4564 error = nlookup(&nd); 4565 if (error == 0) 4566 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4567 cred = crhold(nd.nl_cred); 4568 nlookup_done(&nd); 4569 if (error == 0) { 4570 if (error == 0) 4571 error = VOP_GETATTR(vp, &vattr); 4572 if (error == 0 && cred->cr_uid != vattr.va_uid) 4573 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4574 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4575 if (vcount(vp) > 0) 4576 error = vrevoke(vp, cred); 4577 } else if (error == 0) { 4578 error = vrevoke(vp, cred); 4579 } 4580 vrele(vp); 4581 } 4582 if (cred) 4583 crfree(cred); 4584 return (error); 4585 } 4586 4587 /* 4588 * getfh_args(char *fname, fhandle_t *fhp) 4589 * 4590 * Get (NFS) file handle 4591 * 4592 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4593 * mount. This allows nullfs mounts to be explicitly exported. 4594 * 4595 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4596 * 4597 * nullfs mounts of subdirectories are not safe. That is, it will 4598 * work, but you do not really have protection against access to 4599 * the related parent directories. 4600 */ 4601 int 4602 sys_getfh(struct getfh_args *uap) 4603 { 4604 struct thread *td = curthread; 4605 struct nlookupdata nd; 4606 fhandle_t fh; 4607 struct vnode *vp; 4608 struct mount *mp; 4609 int error; 4610 4611 /* 4612 * Must be super user 4613 */ 4614 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4615 return (error); 4616 4617 vp = NULL; 4618 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4619 if (error == 0) 4620 error = nlookup(&nd); 4621 if (error == 0) 4622 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4623 mp = nd.nl_nch.mount; 4624 nlookup_done(&nd); 4625 if (error == 0) { 4626 bzero(&fh, sizeof(fh)); 4627 fh.fh_fsid = mp->mnt_stat.f_fsid; 4628 error = VFS_VPTOFH(vp, &fh.fh_fid); 4629 vput(vp); 4630 if (error == 0) 4631 error = copyout(&fh, uap->fhp, sizeof(fh)); 4632 } 4633 return (error); 4634 } 4635 4636 /* 4637 * fhopen_args(const struct fhandle *u_fhp, int flags) 4638 * 4639 * syscall for the rpc.lockd to use to translate a NFS file handle into 4640 * an open descriptor. 4641 * 4642 * warning: do not remove the priv_check() call or this becomes one giant 4643 * security hole. 4644 */ 4645 int 4646 sys_fhopen(struct fhopen_args *uap) 4647 { 4648 struct thread *td = curthread; 4649 struct filedesc *fdp = td->td_proc->p_fd; 4650 struct mount *mp; 4651 struct vnode *vp; 4652 struct fhandle fhp; 4653 struct vattr vat; 4654 struct vattr *vap = &vat; 4655 struct flock lf; 4656 int fmode, mode, error = 0, type; 4657 struct file *nfp; 4658 struct file *fp; 4659 int indx; 4660 4661 /* 4662 * Must be super user 4663 */ 4664 error = priv_check(td, PRIV_ROOT); 4665 if (error) 4666 return (error); 4667 4668 fmode = FFLAGS(uap->flags); 4669 4670 /* 4671 * Why not allow a non-read/write open for our lockd? 4672 */ 4673 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4674 return (EINVAL); 4675 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4676 if (error) 4677 return(error); 4678 4679 /* 4680 * Find the mount point 4681 */ 4682 mp = vfs_getvfs(&fhp.fh_fsid); 4683 if (mp == NULL) { 4684 error = ESTALE; 4685 goto done2; 4686 } 4687 /* now give me my vnode, it gets returned to me locked */ 4688 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4689 if (error) 4690 goto done; 4691 /* 4692 * from now on we have to make sure not 4693 * to forget about the vnode 4694 * any error that causes an abort must vput(vp) 4695 * just set error = err and 'goto bad;'. 4696 */ 4697 4698 /* 4699 * from vn_open 4700 */ 4701 if (vp->v_type == VLNK) { 4702 error = EMLINK; 4703 goto bad; 4704 } 4705 if (vp->v_type == VSOCK) { 4706 error = EOPNOTSUPP; 4707 goto bad; 4708 } 4709 mode = 0; 4710 if (fmode & (FWRITE | O_TRUNC)) { 4711 if (vp->v_type == VDIR) { 4712 error = EISDIR; 4713 goto bad; 4714 } 4715 error = vn_writechk(vp); 4716 if (error) 4717 goto bad; 4718 mode |= VWRITE; 4719 } 4720 if (fmode & FREAD) 4721 mode |= VREAD; 4722 if (mode) { 4723 error = VOP_ACCESS(vp, mode, td->td_ucred); 4724 if (error) 4725 goto bad; 4726 } 4727 if (fmode & O_TRUNC) { 4728 vn_unlock(vp); /* XXX */ 4729 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4730 VATTR_NULL(vap); 4731 vap->va_size = 0; 4732 error = VOP_SETATTR(vp, vap, td->td_ucred); 4733 if (error) 4734 goto bad; 4735 } 4736 4737 /* 4738 * VOP_OPEN needs the file pointer so it can potentially override 4739 * it. 4740 * 4741 * WARNING! no f_nchandle will be associated when fhopen()ing a 4742 * directory. XXX 4743 */ 4744 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4745 goto bad; 4746 fp = nfp; 4747 4748 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4749 if (error) { 4750 /* 4751 * setting f_ops this way prevents VOP_CLOSE from being 4752 * called or fdrop() releasing the vp from v_data. Since 4753 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4754 */ 4755 fp->f_ops = &badfileops; 4756 fp->f_data = NULL; 4757 goto bad_drop; 4758 } 4759 4760 /* 4761 * The fp is given its own reference, we still have our ref and lock. 4762 * 4763 * Assert that all regular files must be created with a VM object. 4764 */ 4765 if (vp->v_type == VREG && vp->v_object == NULL) { 4766 kprintf("fhopen: regular file did not " 4767 "have VM object: %p\n", 4768 vp); 4769 goto bad_drop; 4770 } 4771 4772 /* 4773 * The open was successful. Handle any locking requirements. 4774 */ 4775 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4776 lf.l_whence = SEEK_SET; 4777 lf.l_start = 0; 4778 lf.l_len = 0; 4779 if (fmode & O_EXLOCK) 4780 lf.l_type = F_WRLCK; 4781 else 4782 lf.l_type = F_RDLCK; 4783 if (fmode & FNONBLOCK) 4784 type = 0; 4785 else 4786 type = F_WAIT; 4787 vn_unlock(vp); 4788 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4789 &lf, type)) != 0) { 4790 /* 4791 * release our private reference. 4792 */ 4793 fsetfd(fdp, NULL, indx); 4794 fdrop(fp); 4795 vrele(vp); 4796 goto done; 4797 } 4798 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4799 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4800 } 4801 4802 /* 4803 * Clean up. Associate the file pointer with the previously 4804 * reserved descriptor and return it. 4805 */ 4806 vput(vp); 4807 if (uap->flags & O_CLOEXEC) 4808 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4809 fsetfd(fdp, fp, indx); 4810 fdrop(fp); 4811 uap->sysmsg_result = indx; 4812 mount_drop(mp); 4813 4814 return (error); 4815 4816 bad_drop: 4817 fsetfd(fdp, NULL, indx); 4818 fdrop(fp); 4819 bad: 4820 vput(vp); 4821 done: 4822 mount_drop(mp); 4823 done2: 4824 return (error); 4825 } 4826 4827 /* 4828 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4829 */ 4830 int 4831 sys_fhstat(struct fhstat_args *uap) 4832 { 4833 struct thread *td = curthread; 4834 struct stat sb; 4835 fhandle_t fh; 4836 struct mount *mp; 4837 struct vnode *vp; 4838 int error; 4839 4840 /* 4841 * Must be super user 4842 */ 4843 error = priv_check(td, PRIV_ROOT); 4844 if (error) 4845 return (error); 4846 4847 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4848 if (error) 4849 return (error); 4850 4851 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4852 error = ESTALE; 4853 if (error == 0) { 4854 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4855 error = vn_stat(vp, &sb, td->td_ucred); 4856 vput(vp); 4857 } 4858 } 4859 if (error == 0) 4860 error = copyout(&sb, uap->sb, sizeof(sb)); 4861 if (mp) 4862 mount_drop(mp); 4863 4864 return (error); 4865 } 4866 4867 /* 4868 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4869 */ 4870 int 4871 sys_fhstatfs(struct fhstatfs_args *uap) 4872 { 4873 struct thread *td = curthread; 4874 struct proc *p = td->td_proc; 4875 struct statfs *sp; 4876 struct mount *mp; 4877 struct vnode *vp; 4878 struct statfs sb; 4879 char *fullpath, *freepath; 4880 fhandle_t fh; 4881 int error; 4882 4883 /* 4884 * Must be super user 4885 */ 4886 if ((error = priv_check(td, PRIV_ROOT))) 4887 return (error); 4888 4889 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4890 return (error); 4891 4892 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4893 error = ESTALE; 4894 goto done; 4895 } 4896 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4897 error = ESTALE; 4898 goto done; 4899 } 4900 4901 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4902 goto done; 4903 mp = vp->v_mount; 4904 sp = &mp->mnt_stat; 4905 vput(vp); 4906 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4907 goto done; 4908 4909 error = mount_path(p, mp, &fullpath, &freepath); 4910 if (error) 4911 goto done; 4912 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4913 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4914 kfree(freepath, M_TEMP); 4915 4916 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4917 if (priv_check(td, PRIV_ROOT)) { 4918 bcopy(sp, &sb, sizeof(sb)); 4919 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4920 sp = &sb; 4921 } 4922 error = copyout(sp, uap->buf, sizeof(*sp)); 4923 done: 4924 if (mp) 4925 mount_drop(mp); 4926 4927 return (error); 4928 } 4929 4930 /* 4931 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4932 */ 4933 int 4934 sys_fhstatvfs(struct fhstatvfs_args *uap) 4935 { 4936 struct thread *td = curthread; 4937 struct proc *p = td->td_proc; 4938 struct statvfs *sp; 4939 struct mount *mp; 4940 struct vnode *vp; 4941 fhandle_t fh; 4942 int error; 4943 4944 /* 4945 * Must be super user 4946 */ 4947 if ((error = priv_check(td, PRIV_ROOT))) 4948 return (error); 4949 4950 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4951 return (error); 4952 4953 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4954 error = ESTALE; 4955 goto done; 4956 } 4957 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4958 error = ESTALE; 4959 goto done; 4960 } 4961 4962 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4963 goto done; 4964 mp = vp->v_mount; 4965 sp = &mp->mnt_vstat; 4966 vput(vp); 4967 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4968 goto done; 4969 4970 sp->f_flag = 0; 4971 if (mp->mnt_flag & MNT_RDONLY) 4972 sp->f_flag |= ST_RDONLY; 4973 if (mp->mnt_flag & MNT_NOSUID) 4974 sp->f_flag |= ST_NOSUID; 4975 error = copyout(sp, uap->buf, sizeof(*sp)); 4976 done: 4977 if (mp) 4978 mount_drop(mp); 4979 return (error); 4980 } 4981 4982 4983 /* 4984 * Syscall to push extended attribute configuration information into the 4985 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4986 * a command (int cmd), and attribute name and misc data. For now, the 4987 * attribute name is left in userspace for consumption by the VFS_op. 4988 * It will probably be changed to be copied into sysspace by the 4989 * syscall in the future, once issues with various consumers of the 4990 * attribute code have raised their hands. 4991 * 4992 * Currently this is used only by UFS Extended Attributes. 4993 */ 4994 int 4995 sys_extattrctl(struct extattrctl_args *uap) 4996 { 4997 struct nlookupdata nd; 4998 struct vnode *vp; 4999 char attrname[EXTATTR_MAXNAMELEN]; 5000 int error; 5001 size_t size; 5002 5003 attrname[0] = 0; 5004 vp = NULL; 5005 error = 0; 5006 5007 if (error == 0 && uap->filename) { 5008 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5009 NLC_FOLLOW); 5010 if (error == 0) 5011 error = nlookup(&nd); 5012 if (error == 0) 5013 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5014 nlookup_done(&nd); 5015 } 5016 5017 if (error == 0 && uap->attrname) { 5018 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5019 &size); 5020 } 5021 5022 if (error == 0) { 5023 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5024 if (error == 0) 5025 error = nlookup(&nd); 5026 if (error == 0) 5027 error = ncp_writechk(&nd.nl_nch); 5028 if (error == 0) { 5029 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5030 uap->attrnamespace, 5031 uap->attrname, nd.nl_cred); 5032 } 5033 nlookup_done(&nd); 5034 } 5035 5036 return (error); 5037 } 5038 5039 /* 5040 * Syscall to get a named extended attribute on a file or directory. 5041 */ 5042 int 5043 sys_extattr_set_file(struct extattr_set_file_args *uap) 5044 { 5045 char attrname[EXTATTR_MAXNAMELEN]; 5046 struct nlookupdata nd; 5047 struct vnode *vp; 5048 struct uio auio; 5049 struct iovec aiov; 5050 int error; 5051 5052 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5053 if (error) 5054 return (error); 5055 5056 vp = NULL; 5057 5058 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5059 if (error == 0) 5060 error = nlookup(&nd); 5061 if (error == 0) 5062 error = ncp_writechk(&nd.nl_nch); 5063 if (error == 0) 5064 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5065 if (error) { 5066 nlookup_done(&nd); 5067 return (error); 5068 } 5069 5070 bzero(&auio, sizeof(auio)); 5071 aiov.iov_base = uap->data; 5072 aiov.iov_len = uap->nbytes; 5073 auio.uio_iov = &aiov; 5074 auio.uio_iovcnt = 1; 5075 auio.uio_offset = 0; 5076 auio.uio_resid = uap->nbytes; 5077 auio.uio_rw = UIO_WRITE; 5078 auio.uio_td = curthread; 5079 5080 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5081 &auio, nd.nl_cred); 5082 5083 vput(vp); 5084 nlookup_done(&nd); 5085 return (error); 5086 } 5087 5088 /* 5089 * Syscall to get a named extended attribute on a file or directory. 5090 */ 5091 int 5092 sys_extattr_get_file(struct extattr_get_file_args *uap) 5093 { 5094 char attrname[EXTATTR_MAXNAMELEN]; 5095 struct nlookupdata nd; 5096 struct uio auio; 5097 struct iovec aiov; 5098 struct vnode *vp; 5099 int error; 5100 5101 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5102 if (error) 5103 return (error); 5104 5105 vp = NULL; 5106 5107 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5108 if (error == 0) 5109 error = nlookup(&nd); 5110 if (error == 0) 5111 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5112 if (error) { 5113 nlookup_done(&nd); 5114 return (error); 5115 } 5116 5117 bzero(&auio, sizeof(auio)); 5118 aiov.iov_base = uap->data; 5119 aiov.iov_len = uap->nbytes; 5120 auio.uio_iov = &aiov; 5121 auio.uio_iovcnt = 1; 5122 auio.uio_offset = 0; 5123 auio.uio_resid = uap->nbytes; 5124 auio.uio_rw = UIO_READ; 5125 auio.uio_td = curthread; 5126 5127 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5128 &auio, nd.nl_cred); 5129 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5130 5131 vput(vp); 5132 nlookup_done(&nd); 5133 return(error); 5134 } 5135 5136 /* 5137 * Syscall to delete a named extended attribute from a file or directory. 5138 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5139 */ 5140 int 5141 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5142 { 5143 char attrname[EXTATTR_MAXNAMELEN]; 5144 struct nlookupdata nd; 5145 struct vnode *vp; 5146 int error; 5147 5148 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5149 if (error) 5150 return(error); 5151 5152 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5153 if (error == 0) 5154 error = nlookup(&nd); 5155 if (error == 0) 5156 error = ncp_writechk(&nd.nl_nch); 5157 if (error == 0) { 5158 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5159 if (error == 0) { 5160 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5161 attrname, NULL, nd.nl_cred); 5162 vput(vp); 5163 } 5164 } 5165 nlookup_done(&nd); 5166 return(error); 5167 } 5168 5169 /* 5170 * Determine if the mount is visible to the process. 5171 */ 5172 static int 5173 chroot_visible_mnt(struct mount *mp, struct proc *p) 5174 { 5175 struct nchandle nch; 5176 5177 /* 5178 * Traverse from the mount point upwards. If we hit the process 5179 * root then the mount point is visible to the process. 5180 */ 5181 nch = mp->mnt_ncmountpt; 5182 while (nch.ncp) { 5183 if (nch.mount == p->p_fd->fd_nrdir.mount && 5184 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5185 return(1); 5186 } 5187 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5188 nch = nch.mount->mnt_ncmounton; 5189 } else { 5190 nch.ncp = nch.ncp->nc_parent; 5191 } 5192 } 5193 5194 /* 5195 * If the mount point is not visible to the process, but the 5196 * process root is in a subdirectory of the mount, return 5197 * TRUE anyway. 5198 */ 5199 if (p->p_fd->fd_nrdir.mount == mp) 5200 return(1); 5201 5202 return(0); 5203 } 5204 5205 /* Sets priv to PRIV_ROOT in case no matching fs */ 5206 static int 5207 get_fspriv(const char *fsname) 5208 { 5209 5210 if (strncmp("null", fsname, 5) == 0) { 5211 return PRIV_VFS_MOUNT_NULLFS; 5212 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5213 return PRIV_VFS_MOUNT_TMPFS; 5214 } 5215 5216 return PRIV_ROOT; 5217 } 5218