1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysmsg.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fspriv(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 102 static struct krate krate_rename = { 1 }; 103 104 /* 105 * Virtual File System System Calls 106 */ 107 108 /* 109 * Mount a file system. 110 * 111 * mount_args(char *type, char *path, int flags, caddr_t data) 112 * 113 * MPALMOSTSAFE 114 */ 115 int 116 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap) 117 { 118 struct thread *td = curthread; 119 struct vnode *vp; 120 struct nchandle nch; 121 struct mount *mp, *nullmp; 122 struct vfsconf *vfsp; 123 int error, flag = 0, flag2 = 0; 124 int hasmount; 125 int priv = 0; 126 int flags = uap->flags; 127 struct vattr va; 128 struct nlookupdata nd; 129 char fstypename[MFSNAMELEN]; 130 struct ucred *cred; 131 132 cred = td->td_ucred; 133 134 /* We do not allow user mounts inside a jail for now */ 135 if (usermount && jailed(cred)) { 136 error = EPERM; 137 goto done; 138 } 139 140 /* 141 * Extract the file system type. We need to know this early, to take 142 * appropriate actions for jails and nullfs mounts. 143 */ 144 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 145 goto done; 146 147 /* 148 * Select the correct priv according to the file system type. 149 */ 150 priv = get_fspriv(fstypename); 151 152 if (usermount == 0 && (error = priv_check(td, priv))) 153 goto done; 154 155 /* 156 * Do not allow NFS export by non-root users. 157 */ 158 if (flags & MNT_EXPORTED) { 159 error = priv_check(td, priv); 160 if (error) 161 goto done; 162 } 163 /* 164 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 165 */ 166 if (priv_check(td, priv)) 167 flags |= MNT_NOSUID | MNT_NODEV; 168 169 /* 170 * Lookup the requested path and extract the nch and vnode. 171 */ 172 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 173 if (error == 0) { 174 if ((error = nlookup(&nd)) == 0) { 175 if (nd.nl_nch.ncp->nc_vp == NULL) 176 error = ENOENT; 177 } 178 } 179 if (error) { 180 nlookup_done(&nd); 181 goto done; 182 } 183 184 /* 185 * If the target filesystem is resolved via a nullfs mount, then 186 * nd.nl_nch.mount will be pointing to the nullfs mount structure 187 * instead of the target file system. We need it in case we are 188 * doing an update. 189 */ 190 nullmp = nd.nl_nch.mount; 191 192 /* 193 * Extract the locked+refd ncp and cleanup the nd structure 194 */ 195 nch = nd.nl_nch; 196 cache_zero(&nd.nl_nch); 197 nlookup_done(&nd); 198 199 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 200 (mp = cache_findmount(&nch)) != NULL) { 201 cache_dropmount(mp); 202 hasmount = 1; 203 } else { 204 hasmount = 0; 205 } 206 207 208 /* 209 * now we have the locked ref'd nch and unreferenced vnode. 210 */ 211 vp = nch.ncp->nc_vp; 212 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 213 cache_put(&nch); 214 goto done; 215 } 216 cache_unlock(&nch); 217 218 /* 219 * Now we have an unlocked ref'd nch and a locked ref'd vp 220 */ 221 if (flags & MNT_UPDATE) { 222 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 223 cache_drop(&nch); 224 vput(vp); 225 error = EINVAL; 226 goto done; 227 } 228 229 if (strncmp(fstypename, "null", 5) == 0) { 230 KKASSERT(nullmp); 231 mp = nullmp; 232 } else { 233 mp = vp->v_mount; 234 } 235 236 flag = mp->mnt_flag; 237 flag2 = mp->mnt_kern_flag; 238 /* 239 * We only allow the filesystem to be reloaded if it 240 * is currently mounted read-only. 241 */ 242 if ((flags & MNT_RELOAD) && 243 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 244 cache_drop(&nch); 245 vput(vp); 246 error = EOPNOTSUPP; /* Needs translation */ 247 goto done; 248 } 249 /* 250 * Only root, or the user that did the original mount is 251 * permitted to update it. 252 */ 253 if (mp->mnt_stat.f_owner != cred->cr_uid && 254 (error = priv_check(td, priv))) { 255 cache_drop(&nch); 256 vput(vp); 257 goto done; 258 } 259 if (vfs_busy(mp, LK_NOWAIT)) { 260 cache_drop(&nch); 261 vput(vp); 262 error = EBUSY; 263 goto done; 264 } 265 if (hasmount) { 266 cache_drop(&nch); 267 vfs_unbusy(mp); 268 vput(vp); 269 error = EBUSY; 270 goto done; 271 } 272 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 273 lwkt_gettoken(&mp->mnt_token); 274 vn_unlock(vp); 275 vfsp = mp->mnt_vfc; 276 goto update; 277 } 278 279 /* 280 * If the user is not root, ensure that they own the directory 281 * onto which we are attempting to mount. 282 */ 283 if ((error = VOP_GETATTR(vp, &va)) || 284 (va.va_uid != cred->cr_uid && 285 (error = priv_check(td, priv)))) { 286 cache_drop(&nch); 287 vput(vp); 288 goto done; 289 } 290 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 291 cache_drop(&nch); 292 vput(vp); 293 goto done; 294 } 295 if (vp->v_type != VDIR) { 296 cache_drop(&nch); 297 vput(vp); 298 error = ENOTDIR; 299 goto done; 300 } 301 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 302 cache_drop(&nch); 303 vput(vp); 304 error = EPERM; 305 goto done; 306 } 307 vfsp = vfsconf_find_by_name(fstypename); 308 if (vfsp == NULL) { 309 linker_file_t lf; 310 311 /* Only load modules for root (very important!) */ 312 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 313 cache_drop(&nch); 314 vput(vp); 315 goto done; 316 } 317 error = linker_load_file(fstypename, &lf); 318 if (error || lf == NULL) { 319 cache_drop(&nch); 320 vput(vp); 321 if (lf == NULL) 322 error = ENODEV; 323 goto done; 324 } 325 lf->userrefs++; 326 /* lookup again, see if the VFS was loaded */ 327 vfsp = vfsconf_find_by_name(fstypename); 328 if (vfsp == NULL) { 329 lf->userrefs--; 330 linker_file_unload(lf); 331 cache_drop(&nch); 332 vput(vp); 333 error = ENODEV; 334 goto done; 335 } 336 } 337 if (hasmount) { 338 cache_drop(&nch); 339 vput(vp); 340 error = EBUSY; 341 goto done; 342 } 343 344 /* 345 * Allocate and initialize the filesystem. 346 */ 347 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 348 mount_init(mp, vfsp->vfc_vfsops); 349 vfs_busy(mp, LK_NOWAIT); 350 mp->mnt_vfc = vfsp; 351 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 352 vfsp->vfc_refcount++; 353 mp->mnt_stat.f_type = vfsp->vfc_typenum; 354 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 355 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 356 mp->mnt_stat.f_owner = cred->cr_uid; 357 lwkt_gettoken(&mp->mnt_token); 358 vn_unlock(vp); 359 update: 360 /* 361 * (per-mount token acquired at this point) 362 * 363 * Set the mount level flags. 364 */ 365 if (flags & MNT_RDONLY) 366 mp->mnt_flag |= MNT_RDONLY; 367 else if (mp->mnt_flag & MNT_RDONLY) 368 mp->mnt_kern_flag |= MNTK_WANTRDWR; 369 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 370 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 371 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 372 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 373 MNT_AUTOMOUNTED); 374 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | 375 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 376 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 377 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 378 MNT_AUTOMOUNTED); 379 380 /* 381 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 382 * This way the initial VFS_MOUNT() call will also be MPSAFE. 383 */ 384 if (vfsp->vfc_flags & VFCF_MPSAFE) 385 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 386 387 /* 388 * Mount the filesystem. 389 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 390 * get. 391 */ 392 if (mp->mnt_flag & MNT_UPDATE) { 393 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 394 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 395 mp->mnt_flag &= ~MNT_RDONLY; 396 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 397 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 398 if (error) { 399 mp->mnt_flag = flag; 400 mp->mnt_kern_flag = flag2; 401 } 402 lwkt_reltoken(&mp->mnt_token); 403 vfs_unbusy(mp); 404 vrele(vp); 405 cache_drop(&nch); 406 goto done; 407 } 408 mp->mnt_ncmounton = nch; 409 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 410 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 411 412 /* 413 * Put the new filesystem on the mount list after root. The mount 414 * point gets its own mnt_ncmountpt (unless the VFS already set one 415 * up) which represents the root of the mount. The lookup code 416 * detects the mount point going forward and checks the root of 417 * the mount going backwards. 418 * 419 * It is not necessary to invalidate or purge the vnode underneath 420 * because elements under the mount will be given their own glue 421 * namecache record. 422 */ 423 if (!error) { 424 if (mp->mnt_ncmountpt.ncp == NULL) { 425 /* 426 * Allocate, then unlock, but leave the ref intact. 427 * This is the mnt_refs (1) that we will retain 428 * through to the unmount. 429 */ 430 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 431 cache_unlock(&mp->mnt_ncmountpt); 432 } 433 vn_unlock(vp); 434 cache_lock(&nch); 435 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 436 cache_unlock(&nch); 437 cache_ismounting(mp); 438 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 439 440 mountlist_insert(mp, MNTINS_LAST); 441 vn_unlock(vp); 442 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 443 error = vfs_allocate_syncvnode(mp); 444 lwkt_reltoken(&mp->mnt_token); 445 vfs_unbusy(mp); 446 error = VFS_START(mp, 0); 447 vrele(vp); 448 KNOTE(&fs_klist, VQ_MOUNT); 449 } else { 450 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 451 vn_syncer_thr_stop(mp); 452 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 454 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 455 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 456 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 457 if (mp->mnt_cred) { 458 crfree(mp->mnt_cred); 459 mp->mnt_cred = NULL; 460 } 461 mp->mnt_vfc->vfc_refcount--; 462 lwkt_reltoken(&mp->mnt_token); 463 vfs_unbusy(mp); 464 kfree(mp, M_MOUNT); 465 cache_drop(&nch); 466 vput(vp); 467 } 468 done: 469 return (error); 470 } 471 472 /* 473 * Scan all active processes to see if any of them have a current 474 * or root directory onto which the new filesystem has just been 475 * mounted. If so, replace them with the new mount point. 476 * 477 * Both old_nch and new_nch are ref'd on call but not locked. 478 * new_nch must be temporarily locked so it can be associated with the 479 * vnode representing the root of the mount point. 480 */ 481 struct checkdirs_info { 482 struct nchandle old_nch; 483 struct nchandle new_nch; 484 struct vnode *old_vp; 485 struct vnode *new_vp; 486 }; 487 488 static int checkdirs_callback(struct proc *p, void *data); 489 490 static void 491 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 492 { 493 struct checkdirs_info info; 494 struct vnode *olddp; 495 struct vnode *newdp; 496 struct mount *mp; 497 498 /* 499 * If the old mount point's vnode has a usecount of 1, it is not 500 * being held as a descriptor anywhere. 501 */ 502 olddp = old_nch->ncp->nc_vp; 503 if (olddp == NULL || VREFCNT(olddp) == 1) 504 return; 505 506 /* 507 * Force the root vnode of the new mount point to be resolved 508 * so we can update any matching processes. 509 */ 510 mp = new_nch->mount; 511 if (VFS_ROOT(mp, &newdp)) 512 panic("mount: lost mount"); 513 vn_unlock(newdp); 514 cache_lock(new_nch); 515 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 516 cache_setunresolved(new_nch); 517 cache_setvp(new_nch, newdp); 518 cache_unlock(new_nch); 519 520 /* 521 * Special handling of the root node 522 */ 523 if (rootvnode == olddp) { 524 vref(newdp); 525 vfs_cache_setroot(newdp, cache_hold(new_nch)); 526 } 527 528 /* 529 * Pass newdp separately so the callback does not have to access 530 * it via new_nch->ncp->nc_vp. 531 */ 532 info.old_nch = *old_nch; 533 info.new_nch = *new_nch; 534 info.new_vp = newdp; 535 allproc_scan(checkdirs_callback, &info, 0); 536 vput(newdp); 537 } 538 539 /* 540 * NOTE: callback is not MP safe because the scanned process's filedesc 541 * structure can be ripped out from under us, amoung other things. 542 */ 543 static int 544 checkdirs_callback(struct proc *p, void *data) 545 { 546 struct checkdirs_info *info = data; 547 struct filedesc *fdp; 548 struct nchandle ncdrop1; 549 struct nchandle ncdrop2; 550 struct vnode *vprele1; 551 struct vnode *vprele2; 552 553 if ((fdp = p->p_fd) != NULL) { 554 cache_zero(&ncdrop1); 555 cache_zero(&ncdrop2); 556 vprele1 = NULL; 557 vprele2 = NULL; 558 559 /* 560 * MPUNSAFE - XXX fdp can be pulled out from under a 561 * foreign process. 562 * 563 * A shared filedesc is ok, we don't have to copy it 564 * because we are making this change globally. 565 */ 566 spin_lock(&fdp->fd_spin); 567 if (fdp->fd_ncdir.mount == info->old_nch.mount && 568 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 569 vprele1 = fdp->fd_cdir; 570 vref(info->new_vp); 571 fdp->fd_cdir = info->new_vp; 572 ncdrop1 = fdp->fd_ncdir; 573 cache_copy(&info->new_nch, &fdp->fd_ncdir); 574 } 575 if (fdp->fd_nrdir.mount == info->old_nch.mount && 576 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 577 vprele2 = fdp->fd_rdir; 578 vref(info->new_vp); 579 fdp->fd_rdir = info->new_vp; 580 ncdrop2 = fdp->fd_nrdir; 581 cache_copy(&info->new_nch, &fdp->fd_nrdir); 582 } 583 spin_unlock(&fdp->fd_spin); 584 if (ncdrop1.ncp) 585 cache_drop(&ncdrop1); 586 if (ncdrop2.ncp) 587 cache_drop(&ncdrop2); 588 if (vprele1) 589 vrele(vprele1); 590 if (vprele2) 591 vrele(vprele2); 592 } 593 return(0); 594 } 595 596 /* 597 * Unmount a file system. 598 * 599 * Note: unmount takes a path to the vnode mounted on as argument, 600 * not special file (as before). 601 * 602 * umount_args(char *path, int flags) 603 * 604 * MPALMOSTSAFE 605 */ 606 int 607 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap) 608 { 609 struct thread *td = curthread; 610 struct proc *p __debugvar = td->td_proc; 611 struct mount *mp = NULL; 612 struct nlookupdata nd; 613 char fstypename[MFSNAMELEN]; 614 int priv = 0; 615 int error; 616 struct ucred *cred; 617 618 cred = td->td_ucred; 619 620 KKASSERT(p); 621 622 /* We do not allow user umounts inside a jail for now */ 623 if (usermount && jailed(cred)) { 624 error = EPERM; 625 goto done; 626 } 627 628 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 629 NLC_FOLLOW | NLC_IGNBADDIR); 630 if (error == 0) 631 error = nlookup(&nd); 632 if (error) 633 goto out; 634 635 mp = nd.nl_nch.mount; 636 637 /* Figure out the fsname in order to select proper privs */ 638 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 639 priv = get_fspriv(fstypename); 640 641 if (usermount == 0 && (error = priv_check(td, priv))) { 642 nlookup_done(&nd); 643 goto done; 644 } 645 646 /* 647 * Only root, or the user that did the original mount is 648 * permitted to unmount this filesystem. 649 */ 650 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 651 (error = priv_check(td, priv))) 652 goto out; 653 654 /* 655 * Don't allow unmounting the root file system. 656 */ 657 if (mp->mnt_flag & MNT_ROOTFS) { 658 error = EINVAL; 659 goto out; 660 } 661 662 /* 663 * Must be the root of the filesystem 664 */ 665 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 666 error = EINVAL; 667 goto out; 668 } 669 670 /* Check if this mount belongs to this prison */ 671 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 672 mp->mnt_cred->cr_prison != cred->cr_prison)) { 673 kprintf("mountpoint %s does not belong to this jail\n", 674 uap->path); 675 error = EPERM; 676 goto out; 677 } 678 679 /* 680 * If no error try to issue the unmount. We lose our cache 681 * ref when we call nlookup_done so we must hold the mount point 682 * to prevent use-after-free races. 683 */ 684 out: 685 if (error == 0) { 686 mount_hold(mp); 687 nlookup_done(&nd); 688 error = dounmount(mp, uap->flags, 0); 689 mount_drop(mp); 690 } else { 691 nlookup_done(&nd); 692 } 693 done: 694 return (error); 695 } 696 697 /* 698 * Do the actual file system unmount (interlocked against the mountlist 699 * token and mp->mnt_token). 700 */ 701 static int 702 dounmount_interlock(struct mount *mp) 703 { 704 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 705 return (EBUSY); 706 mp->mnt_kern_flag |= MNTK_UNMOUNT; 707 return(0); 708 } 709 710 static int 711 unmount_allproc_cb(struct proc *p, void *arg) 712 { 713 struct mount *mp; 714 715 if (p->p_textnch.ncp == NULL) 716 return 0; 717 718 mp = (struct mount *)arg; 719 if (p->p_textnch.mount == mp) 720 cache_drop(&p->p_textnch); 721 722 return 0; 723 } 724 725 /* 726 * The guts of the unmount code. The mount owns one ref and one hold 727 * count. If we successfully interlock the unmount, those refs are ours. 728 * (The ref is from mnt_ncmountpt). 729 * 730 * When halting we shortcut certain mount types such as devfs by not actually 731 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 732 * from the mountlist so higher-level filesytems can unmount cleanly. 733 * 734 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 735 */ 736 int 737 dounmount(struct mount *mp, int flags, int halting) 738 { 739 struct namecache *ncp; 740 struct nchandle nch; 741 struct vnode *vp; 742 int error; 743 int async_flag; 744 int lflags; 745 int freeok = 1; 746 int hadsyncer = 0; 747 int retry; 748 int quickhalt; 749 750 lwkt_gettoken(&mp->mnt_token); 751 752 /* 753 * When halting, certain mount points can essentially just 754 * be unhooked and otherwise ignored. 755 */ 756 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 757 quickhalt = 1; 758 freeok = 0; 759 } else { 760 quickhalt = 0; 761 } 762 763 764 /* 765 * Exclusive access for unmounting purposes. 766 */ 767 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 768 goto out; 769 770 /* 771 * We now 'own' the last mp->mnt_refs 772 * 773 * Allow filesystems to detect that a forced unmount is in progress. 774 */ 775 if (flags & MNT_FORCE) 776 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 777 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 778 error = lockmgr(&mp->mnt_lock, lflags); 779 if (error) { 780 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 781 if (mp->mnt_kern_flag & MNTK_MWAIT) { 782 mp->mnt_kern_flag &= ~MNTK_MWAIT; 783 wakeup(mp); 784 } 785 goto out; 786 } 787 788 if (mp->mnt_flag & MNT_EXPUBLIC) 789 vfs_setpublicfs(NULL, NULL, NULL); 790 791 vfs_msync(mp, MNT_WAIT); 792 async_flag = mp->mnt_flag & MNT_ASYNC; 793 mp->mnt_flag &=~ MNT_ASYNC; 794 795 /* 796 * Decomission our special mnt_syncer vnode. This also stops 797 * the vnlru code. If we are unable to unmount we recommission 798 * the vnode. 799 * 800 * Then sync the filesystem. 801 */ 802 if ((vp = mp->mnt_syncer) != NULL) { 803 mp->mnt_syncer = NULL; 804 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 805 vrele(vp); 806 hadsyncer = 1; 807 } 808 809 /* 810 * Sync normally-mounted filesystem. 811 */ 812 if (quickhalt == 0) { 813 if ((mp->mnt_flag & MNT_RDONLY) == 0) 814 VFS_SYNC(mp, MNT_WAIT); 815 } 816 817 /* 818 * nchandle records ref the mount structure. Expect a count of 1 819 * (our mount->mnt_ncmountpt). 820 * 821 * Scans can get temporary refs on a mountpoint (thought really 822 * heavy duty stuff like cache_findmount() do not). 823 */ 824 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 825 /* 826 * Invalidate the namecache topology under the mount. 827 * nullfs mounts alias a real mount's namecache topology 828 * and it should not be invalidated in that case. 829 */ 830 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 831 cache_lock(&mp->mnt_ncmountpt); 832 cache_inval(&mp->mnt_ncmountpt, 833 CINV_DESTROY|CINV_CHILDREN); 834 cache_unlock(&mp->mnt_ncmountpt); 835 } 836 837 /* 838 * Clear pcpu caches 839 */ 840 cache_unmounting(mp); 841 if (mp->mnt_refs != 1) 842 cache_clearmntcache(mp); 843 844 /* 845 * Break out if we are good. Don't count ncp refs if the 846 * mount is aliased. 847 */ 848 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 849 NULL : mp->mnt_ncmountpt.ncp; 850 if (mp->mnt_refs == 1 && 851 (ncp == NULL || (ncp->nc_refs == 1 && 852 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 853 break; 854 } 855 856 /* 857 * If forcing the unmount, clean out any p->p_textnch 858 * nchandles that match this mount. 859 */ 860 if (flags & MNT_FORCE) 861 allproc_scan(&unmount_allproc_cb, mp, 0); 862 863 /* 864 * Sleep and retry. 865 */ 866 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 867 if ((retry & 15) == 15) { 868 mount_warning(mp, 869 "(%p) debug - retry %d, " 870 "%d namecache refs, %d mount refs", 871 mp, retry, 872 (ncp ? ncp->nc_refs - 1 : 0), 873 mp->mnt_refs - 1); 874 } 875 } 876 877 error = 0; 878 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 879 NULL : mp->mnt_ncmountpt.ncp; 880 if (mp->mnt_refs != 1 || 881 (ncp != NULL && (ncp->nc_refs != 1 || 882 TAILQ_FIRST(&ncp->nc_list)))) { 883 mount_warning(mp, 884 "(%p): %d namecache refs, %d mount refs " 885 "still present", 886 mp, 887 (ncp ? ncp->nc_refs - 1 : 0), 888 mp->mnt_refs - 1); 889 if (flags & MNT_FORCE) { 890 freeok = 0; 891 mount_warning(mp, "forcing unmount\n"); 892 } else { 893 error = EBUSY; 894 } 895 } 896 897 /* 898 * So far so good, sync the filesystem once more and 899 * call the VFS unmount code if the sync succeeds. 900 */ 901 if (error == 0 && quickhalt == 0) { 902 if (mp->mnt_flag & MNT_RDONLY) { 903 error = VFS_UNMOUNT(mp, flags); 904 } else { 905 error = VFS_SYNC(mp, MNT_WAIT); 906 if (error == 0 || /* no error */ 907 error == EOPNOTSUPP || /* no sync avail */ 908 (flags & MNT_FORCE)) { /* force anyway */ 909 error = VFS_UNMOUNT(mp, flags); 910 } 911 } 912 if (error) { 913 mount_warning(mp, 914 "(%p) unmount: vfs refused to unmount, " 915 "error %d", 916 mp, error); 917 } 918 } 919 920 /* 921 * If an error occurred we can still recover, restoring the 922 * syncer vnode and misc flags. 923 */ 924 if (error) { 925 if (mp->mnt_syncer == NULL && hadsyncer) 926 vfs_allocate_syncvnode(mp); 927 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 928 mp->mnt_flag |= async_flag; 929 lockmgr(&mp->mnt_lock, LK_RELEASE); 930 if (mp->mnt_kern_flag & MNTK_MWAIT) { 931 mp->mnt_kern_flag &= ~MNTK_MWAIT; 932 wakeup(mp); 933 } 934 goto out; 935 } 936 /* 937 * Clean up any journals still associated with the mount after 938 * filesystem activity has ceased. 939 */ 940 journal_remove_all_journals(mp, 941 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 942 943 mountlist_remove(mp); 944 945 /* 946 * Remove any installed vnode ops here so the individual VFSs don't 947 * have to. 948 * 949 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 950 * 951 * When quickhalting we have to keep these intact because the 952 * underlying vnodes have not been destroyed, and some might be 953 * dirty. 954 */ 955 if (quickhalt == 0) { 956 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 957 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 958 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 959 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 960 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 961 } 962 963 if (mp->mnt_ncmountpt.ncp != NULL) { 964 nch = mp->mnt_ncmountpt; 965 cache_zero(&mp->mnt_ncmountpt); 966 cache_clrmountpt(&nch); 967 cache_drop(&nch); 968 } 969 if (mp->mnt_ncmounton.ncp != NULL) { 970 cache_unmounting(mp); 971 nch = mp->mnt_ncmounton; 972 cache_zero(&mp->mnt_ncmounton); 973 cache_clrmountpt(&nch); 974 cache_drop(&nch); 975 } 976 977 if (mp->mnt_cred) { 978 crfree(mp->mnt_cred); 979 mp->mnt_cred = NULL; 980 } 981 982 mp->mnt_vfc->vfc_refcount--; 983 984 /* 985 * If not quickhalting the mount, we expect there to be no 986 * vnodes left. 987 */ 988 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 989 panic("unmount: dangling vnode"); 990 991 /* 992 * Release the lock 993 */ 994 lockmgr(&mp->mnt_lock, LK_RELEASE); 995 if (mp->mnt_kern_flag & MNTK_MWAIT) { 996 mp->mnt_kern_flag &= ~MNTK_MWAIT; 997 wakeup(mp); 998 } 999 1000 /* 1001 * If we reach here and freeok != 0 we must free the mount. 1002 * mnt_refs should already have dropped to 0, so if it is not 1003 * zero we must cycle the caches and wait. 1004 * 1005 * When we are satisfied that the mount has disconnected we can 1006 * drop the hold on the mp that represented the mount (though the 1007 * caller might actually have another, so the caller's drop may 1008 * do the actual free). 1009 */ 1010 if (freeok) { 1011 if (mp->mnt_refs > 0) 1012 cache_clearmntcache(mp); 1013 while (mp->mnt_refs > 0) { 1014 cache_unmounting(mp); 1015 wakeup(mp); 1016 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1017 cache_clearmntcache(mp); 1018 } 1019 lwkt_reltoken(&mp->mnt_token); 1020 mount_drop(mp); 1021 mp = NULL; 1022 } else { 1023 cache_clearmntcache(mp); 1024 } 1025 error = 0; 1026 KNOTE(&fs_klist, VQ_UNMOUNT); 1027 out: 1028 if (mp) 1029 lwkt_reltoken(&mp->mnt_token); 1030 return (error); 1031 } 1032 1033 static 1034 void 1035 mount_warning(struct mount *mp, const char *ctl, ...) 1036 { 1037 char *ptr; 1038 char *buf; 1039 __va_list va; 1040 1041 __va_start(va, ctl); 1042 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1043 &ptr, &buf, 0) == 0) { 1044 kprintf("unmount(%s): ", ptr); 1045 kvprintf(ctl, va); 1046 kprintf("\n"); 1047 kfree(buf, M_TEMP); 1048 } else { 1049 kprintf("unmount(%p", mp); 1050 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1051 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1052 kprintf("): "); 1053 kvprintf(ctl, va); 1054 kprintf("\n"); 1055 } 1056 __va_end(va); 1057 } 1058 1059 /* 1060 * Shim cache_fullpath() to handle the case where a process is chrooted into 1061 * a subdirectory of a mount. In this case if the root mount matches the 1062 * process root directory's mount we have to specify the process's root 1063 * directory instead of the mount point, because the mount point might 1064 * be above the root directory. 1065 */ 1066 static 1067 int 1068 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1069 { 1070 struct nchandle *nch; 1071 1072 if (p && p->p_fd->fd_nrdir.mount == mp) 1073 nch = &p->p_fd->fd_nrdir; 1074 else 1075 nch = &mp->mnt_ncmountpt; 1076 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1077 } 1078 1079 /* 1080 * Sync each mounted filesystem. 1081 */ 1082 1083 #ifdef DEBUG 1084 static int syncprt = 0; 1085 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1086 #endif /* DEBUG */ 1087 1088 static int sync_callback(struct mount *mp, void *data); 1089 1090 int 1091 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap) 1092 { 1093 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1094 return (0); 1095 } 1096 1097 static 1098 int 1099 sync_callback(struct mount *mp, void *data __unused) 1100 { 1101 int asyncflag; 1102 1103 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1104 lwkt_gettoken(&mp->mnt_token); 1105 asyncflag = mp->mnt_flag & MNT_ASYNC; 1106 mp->mnt_flag &= ~MNT_ASYNC; 1107 lwkt_reltoken(&mp->mnt_token); 1108 vfs_msync(mp, MNT_NOWAIT); 1109 VFS_SYNC(mp, MNT_NOWAIT); 1110 lwkt_gettoken(&mp->mnt_token); 1111 mp->mnt_flag |= asyncflag; 1112 lwkt_reltoken(&mp->mnt_token); 1113 } 1114 return(0); 1115 } 1116 1117 /* XXX PRISON: could be per prison flag */ 1118 static int prison_quotas; 1119 #if 0 1120 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1121 #endif 1122 1123 /* 1124 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1125 * 1126 * Change filesystem quotas. 1127 * 1128 * MPALMOSTSAFE 1129 */ 1130 int 1131 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap) 1132 { 1133 struct nlookupdata nd; 1134 struct thread *td; 1135 struct mount *mp; 1136 int error; 1137 1138 td = curthread; 1139 if (td->td_ucred->cr_prison && !prison_quotas) { 1140 error = EPERM; 1141 goto done; 1142 } 1143 1144 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1145 if (error == 0) 1146 error = nlookup(&nd); 1147 if (error == 0) { 1148 mp = nd.nl_nch.mount; 1149 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1150 uap->arg, nd.nl_cred); 1151 } 1152 nlookup_done(&nd); 1153 done: 1154 return (error); 1155 } 1156 1157 /* 1158 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1159 * void *buf, int buflen) 1160 * 1161 * This function operates on a mount point and executes the specified 1162 * operation using the specified control data, and possibly returns data. 1163 * 1164 * The actual number of bytes stored in the result buffer is returned, 0 1165 * if none, otherwise an error is returned. 1166 * 1167 * MPALMOSTSAFE 1168 */ 1169 int 1170 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap) 1171 { 1172 struct thread *td = curthread; 1173 struct file *fp; 1174 void *ctl = NULL; 1175 void *buf = NULL; 1176 char *path = NULL; 1177 int error; 1178 1179 /* 1180 * Sanity and permissions checks. We must be root. 1181 */ 1182 if (td->td_ucred->cr_prison != NULL) 1183 return (EPERM); 1184 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1185 (error = priv_check(td, PRIV_ROOT)) != 0) 1186 return (error); 1187 1188 /* 1189 * Argument length checks 1190 */ 1191 if (uap->ctllen < 0 || uap->ctllen > 1024) 1192 return (EINVAL); 1193 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1194 return (EINVAL); 1195 if (uap->path == NULL) 1196 return (EINVAL); 1197 1198 /* 1199 * Allocate the necessary buffers and copyin data 1200 */ 1201 path = objcache_get(namei_oc, M_WAITOK); 1202 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1203 if (error) 1204 goto done; 1205 1206 if (uap->ctllen) { 1207 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1208 error = copyin(uap->ctl, ctl, uap->ctllen); 1209 if (error) 1210 goto done; 1211 } 1212 if (uap->buflen) 1213 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1214 1215 /* 1216 * Validate the descriptor 1217 */ 1218 if (uap->fd >= 0) { 1219 fp = holdfp(td, uap->fd, -1); 1220 if (fp == NULL) { 1221 error = EBADF; 1222 goto done; 1223 } 1224 } else { 1225 fp = NULL; 1226 } 1227 1228 /* 1229 * Execute the internal kernel function and clean up. 1230 */ 1231 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1232 buf, uap->buflen, &sysmsg->sysmsg_result); 1233 if (fp) 1234 dropfp(td, uap->fd, fp); 1235 if (error == 0 && sysmsg->sysmsg_result > 0) 1236 error = copyout(buf, uap->buf, sysmsg->sysmsg_result); 1237 done: 1238 if (path) 1239 objcache_put(namei_oc, path); 1240 if (ctl) 1241 kfree(ctl, M_TEMP); 1242 if (buf) 1243 kfree(buf, M_TEMP); 1244 return (error); 1245 } 1246 1247 /* 1248 * Execute a mount control operation by resolving the path to a mount point 1249 * and calling vop_mountctl(). 1250 * 1251 * Use the mount point from the nch instead of the vnode so nullfs mounts 1252 * can properly spike the VOP. 1253 */ 1254 int 1255 kern_mountctl(const char *path, int op, struct file *fp, 1256 const void *ctl, int ctllen, 1257 void *buf, int buflen, int *res) 1258 { 1259 struct vnode *vp; 1260 struct nlookupdata nd; 1261 struct nchandle nch; 1262 struct mount *mp; 1263 int error; 1264 1265 *res = 0; 1266 vp = NULL; 1267 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1268 if (error) 1269 return (error); 1270 error = nlookup(&nd); 1271 if (error) { 1272 nlookup_done(&nd); 1273 return (error); 1274 } 1275 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1276 if (error) { 1277 nlookup_done(&nd); 1278 return (error); 1279 } 1280 1281 /* 1282 * Yes, all this is needed to use the nch.mount below, because 1283 * we must maintain a ref on the mount to avoid ripouts (e.g. 1284 * due to heavy mount/unmount use by synth or poudriere). 1285 */ 1286 nch = nd.nl_nch; 1287 cache_zero(&nd.nl_nch); 1288 cache_unlock(&nch); 1289 nlookup_done(&nd); 1290 vn_unlock(vp); 1291 1292 mp = nch.mount; 1293 1294 /* 1295 * Must be the root of the filesystem 1296 */ 1297 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1298 cache_drop(&nch); 1299 vrele(vp); 1300 return (EINVAL); 1301 } 1302 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1303 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1304 path); 1305 cache_drop(&nch); 1306 vrele(vp); 1307 return (EINVAL); 1308 } 1309 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1310 buf, buflen, res); 1311 vrele(vp); 1312 cache_drop(&nch); 1313 1314 return (error); 1315 } 1316 1317 int 1318 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1319 { 1320 struct thread *td = curthread; 1321 struct proc *p = td->td_proc; 1322 struct mount *mp; 1323 struct statfs *sp; 1324 char *fullpath, *freepath; 1325 int error; 1326 1327 if ((error = nlookup(nd)) != 0) 1328 return (error); 1329 mp = nd->nl_nch.mount; 1330 sp = &mp->mnt_stat; 1331 1332 /* 1333 * Ignore refresh error, user should have visibility. 1334 * This can happen if a NFS mount goes bad (e.g. server 1335 * revokes perms or goes down). 1336 */ 1337 error = VFS_STATFS(mp, sp, nd->nl_cred); 1338 /* ignore error */ 1339 1340 error = mount_path(p, mp, &fullpath, &freepath); 1341 if (error) 1342 return(error); 1343 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1344 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1345 kfree(freepath, M_TEMP); 1346 1347 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1348 bcopy(sp, buf, sizeof(*buf)); 1349 /* Only root should have access to the fsid's. */ 1350 if (priv_check(td, PRIV_ROOT)) 1351 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1352 return (0); 1353 } 1354 1355 /* 1356 * statfs_args(char *path, struct statfs *buf) 1357 * 1358 * Get filesystem statistics. 1359 */ 1360 int 1361 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap) 1362 { 1363 struct nlookupdata nd; 1364 struct statfs buf; 1365 int error; 1366 1367 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1368 if (error == 0) 1369 error = kern_statfs(&nd, &buf); 1370 nlookup_done(&nd); 1371 if (error == 0) 1372 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1373 return (error); 1374 } 1375 1376 int 1377 kern_fstatfs(int fd, struct statfs *buf) 1378 { 1379 struct thread *td = curthread; 1380 struct proc *p = td->td_proc; 1381 struct file *fp; 1382 struct mount *mp; 1383 struct statfs *sp; 1384 char *fullpath, *freepath; 1385 int error; 1386 1387 KKASSERT(p); 1388 if ((error = holdvnode(td, fd, &fp)) != 0) 1389 return (error); 1390 1391 /* 1392 * Try to use mount info from any overlays rather than the 1393 * mount info for the underlying vnode, otherwise we will 1394 * fail when operating on null-mounted paths inside a chroot. 1395 */ 1396 if ((mp = fp->f_nchandle.mount) == NULL) 1397 mp = ((struct vnode *)fp->f_data)->v_mount; 1398 if (mp == NULL) { 1399 error = EBADF; 1400 goto done; 1401 } 1402 if (fp->f_cred == NULL) { 1403 error = EINVAL; 1404 goto done; 1405 } 1406 1407 /* 1408 * Ignore refresh error, user should have visibility. 1409 * This can happen if a NFS mount goes bad (e.g. server 1410 * revokes perms or goes down). 1411 */ 1412 sp = &mp->mnt_stat; 1413 error = VFS_STATFS(mp, sp, fp->f_cred); 1414 1415 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1416 goto done; 1417 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1418 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1419 kfree(freepath, M_TEMP); 1420 1421 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1422 bcopy(sp, buf, sizeof(*buf)); 1423 1424 /* Only root should have access to the fsid's. */ 1425 if (priv_check(td, PRIV_ROOT)) 1426 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1427 error = 0; 1428 done: 1429 fdrop(fp); 1430 return (error); 1431 } 1432 1433 /* 1434 * fstatfs_args(int fd, struct statfs *buf) 1435 * 1436 * Get filesystem statistics. 1437 */ 1438 int 1439 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap) 1440 { 1441 struct statfs buf; 1442 int error; 1443 1444 error = kern_fstatfs(uap->fd, &buf); 1445 1446 if (error == 0) 1447 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1448 return (error); 1449 } 1450 1451 int 1452 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1453 { 1454 struct mount *mp; 1455 struct statvfs *sp; 1456 int error; 1457 1458 if ((error = nlookup(nd)) != 0) 1459 return (error); 1460 mp = nd->nl_nch.mount; 1461 sp = &mp->mnt_vstat; 1462 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1463 return (error); 1464 1465 sp->f_flag = 0; 1466 if (mp->mnt_flag & MNT_RDONLY) 1467 sp->f_flag |= ST_RDONLY; 1468 if (mp->mnt_flag & MNT_NOSUID) 1469 sp->f_flag |= ST_NOSUID; 1470 bcopy(sp, buf, sizeof(*buf)); 1471 return (0); 1472 } 1473 1474 /* 1475 * statfs_args(char *path, struct statfs *buf) 1476 * 1477 * Get filesystem statistics. 1478 */ 1479 int 1480 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap) 1481 { 1482 struct nlookupdata nd; 1483 struct statvfs buf; 1484 int error; 1485 1486 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1487 if (error == 0) 1488 error = kern_statvfs(&nd, &buf); 1489 nlookup_done(&nd); 1490 if (error == 0) 1491 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1492 return (error); 1493 } 1494 1495 int 1496 kern_fstatvfs(int fd, struct statvfs *buf) 1497 { 1498 struct thread *td = curthread; 1499 struct file *fp; 1500 struct mount *mp; 1501 struct statvfs *sp; 1502 int error; 1503 1504 if ((error = holdvnode(td, fd, &fp)) != 0) 1505 return (error); 1506 if ((mp = fp->f_nchandle.mount) == NULL) 1507 mp = ((struct vnode *)fp->f_data)->v_mount; 1508 if (mp == NULL) { 1509 error = EBADF; 1510 goto done; 1511 } 1512 if (fp->f_cred == NULL) { 1513 error = EINVAL; 1514 goto done; 1515 } 1516 sp = &mp->mnt_vstat; 1517 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1518 goto done; 1519 1520 sp->f_flag = 0; 1521 if (mp->mnt_flag & MNT_RDONLY) 1522 sp->f_flag |= ST_RDONLY; 1523 if (mp->mnt_flag & MNT_NOSUID) 1524 sp->f_flag |= ST_NOSUID; 1525 1526 bcopy(sp, buf, sizeof(*buf)); 1527 error = 0; 1528 done: 1529 fdrop(fp); 1530 return (error); 1531 } 1532 1533 /* 1534 * fstatfs_args(int fd, struct statfs *buf) 1535 * 1536 * Get filesystem statistics. 1537 */ 1538 int 1539 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap) 1540 { 1541 struct statvfs buf; 1542 int error; 1543 1544 error = kern_fstatvfs(uap->fd, &buf); 1545 1546 if (error == 0) 1547 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1548 return (error); 1549 } 1550 1551 /* 1552 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1553 * 1554 * Get statistics on all filesystems. 1555 */ 1556 1557 struct getfsstat_info { 1558 struct statfs *sfsp; 1559 long count; 1560 long maxcount; 1561 int error; 1562 int flags; 1563 struct thread *td; 1564 }; 1565 1566 static int getfsstat_callback(struct mount *, void *); 1567 1568 int 1569 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap) 1570 { 1571 struct thread *td = curthread; 1572 struct getfsstat_info info; 1573 1574 bzero(&info, sizeof(info)); 1575 1576 info.maxcount = uap->bufsize / sizeof(struct statfs); 1577 info.sfsp = uap->buf; 1578 info.count = 0; 1579 info.flags = uap->flags; 1580 info.td = td; 1581 1582 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1583 if (info.sfsp && info.count > info.maxcount) 1584 sysmsg->sysmsg_result = info.maxcount; 1585 else 1586 sysmsg->sysmsg_result = info.count; 1587 return (info.error); 1588 } 1589 1590 static int 1591 getfsstat_callback(struct mount *mp, void *data) 1592 { 1593 struct getfsstat_info *info = data; 1594 struct statfs *sp; 1595 char *freepath; 1596 char *fullpath; 1597 int error; 1598 1599 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1600 return(0); 1601 1602 if (info->sfsp && info->count < info->maxcount) { 1603 sp = &mp->mnt_stat; 1604 1605 /* 1606 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1607 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1608 * overrides MNT_WAIT. 1609 * 1610 * Ignore refresh error, user should have visibility. 1611 * This can happen if a NFS mount goes bad (e.g. server 1612 * revokes perms or goes down). 1613 */ 1614 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1615 (info->flags & MNT_WAIT)) && 1616 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1617 /* ignore error */ 1618 } 1619 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1620 1621 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1622 if (error) { 1623 info->error = error; 1624 return(-1); 1625 } 1626 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1627 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1628 kfree(freepath, M_TEMP); 1629 1630 error = copyout(sp, info->sfsp, sizeof(*sp)); 1631 if (error) { 1632 info->error = error; 1633 return (-1); 1634 } 1635 ++info->sfsp; 1636 } 1637 info->count++; 1638 return(0); 1639 } 1640 1641 /* 1642 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1643 long bufsize, int flags) 1644 * 1645 * Get statistics on all filesystems. 1646 */ 1647 1648 struct getvfsstat_info { 1649 struct statfs *sfsp; 1650 struct statvfs *vsfsp; 1651 long count; 1652 long maxcount; 1653 int error; 1654 int flags; 1655 struct thread *td; 1656 }; 1657 1658 static int getvfsstat_callback(struct mount *, void *); 1659 1660 int 1661 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap) 1662 { 1663 struct thread *td = curthread; 1664 struct getvfsstat_info info; 1665 1666 bzero(&info, sizeof(info)); 1667 1668 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1669 info.sfsp = uap->buf; 1670 info.vsfsp = uap->vbuf; 1671 info.count = 0; 1672 info.flags = uap->flags; 1673 info.td = td; 1674 1675 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1676 if (info.vsfsp && info.count > info.maxcount) 1677 sysmsg->sysmsg_result = info.maxcount; 1678 else 1679 sysmsg->sysmsg_result = info.count; 1680 return (info.error); 1681 } 1682 1683 static int 1684 getvfsstat_callback(struct mount *mp, void *data) 1685 { 1686 struct getvfsstat_info *info = data; 1687 struct statfs *sp; 1688 struct statvfs *vsp; 1689 char *freepath; 1690 char *fullpath; 1691 int error; 1692 1693 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1694 return(0); 1695 1696 if (info->vsfsp && info->count < info->maxcount) { 1697 sp = &mp->mnt_stat; 1698 vsp = &mp->mnt_vstat; 1699 1700 /* 1701 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1702 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1703 * overrides MNT_WAIT. 1704 * 1705 * Ignore refresh error, user should have visibility. 1706 * This can happen if a NFS mount goes bad (e.g. server 1707 * revokes perms or goes down). 1708 */ 1709 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1710 (info->flags & MNT_WAIT)) && 1711 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1712 /* ignore error */ 1713 } 1714 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1715 1716 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1717 (info->flags & MNT_WAIT)) && 1718 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1719 /* ignore error */ 1720 } 1721 vsp->f_flag = 0; 1722 if (mp->mnt_flag & MNT_RDONLY) 1723 vsp->f_flag |= ST_RDONLY; 1724 if (mp->mnt_flag & MNT_NOSUID) 1725 vsp->f_flag |= ST_NOSUID; 1726 1727 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1728 if (error) { 1729 info->error = error; 1730 return(-1); 1731 } 1732 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1733 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1734 kfree(freepath, M_TEMP); 1735 1736 error = copyout(sp, info->sfsp, sizeof(*sp)); 1737 if (error == 0) 1738 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1739 if (error) { 1740 info->error = error; 1741 return (-1); 1742 } 1743 ++info->sfsp; 1744 ++info->vsfsp; 1745 } 1746 info->count++; 1747 return(0); 1748 } 1749 1750 1751 /* 1752 * fchdir_args(int fd) 1753 * 1754 * Change current working directory to a given file descriptor. 1755 */ 1756 int 1757 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap) 1758 { 1759 struct thread *td = curthread; 1760 struct proc *p = td->td_proc; 1761 struct filedesc *fdp = p->p_fd; 1762 struct vnode *vp, *ovp; 1763 struct mount *mp; 1764 struct file *fp; 1765 struct nchandle nch, onch, tnch; 1766 int error; 1767 1768 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1769 return (error); 1770 lwkt_gettoken(&p->p_token); 1771 vp = (struct vnode *)fp->f_data; 1772 vref(vp); 1773 vn_lock(vp, LK_SHARED | LK_RETRY); 1774 if (fp->f_nchandle.ncp == NULL) 1775 error = ENOTDIR; 1776 else 1777 error = checkvp_chdir(vp, td); 1778 if (error) { 1779 vput(vp); 1780 goto done; 1781 } 1782 cache_copy(&fp->f_nchandle, &nch); 1783 1784 /* 1785 * If the ncp has become a mount point, traverse through 1786 * the mount point. 1787 */ 1788 1789 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1790 (mp = cache_findmount(&nch)) != NULL 1791 ) { 1792 error = nlookup_mp(mp, &tnch); 1793 if (error == 0) { 1794 cache_unlock(&tnch); /* leave ref intact */ 1795 vput(vp); 1796 vp = tnch.ncp->nc_vp; 1797 error = vget(vp, LK_SHARED); 1798 KKASSERT(error == 0); 1799 cache_drop(&nch); 1800 nch = tnch; 1801 } 1802 cache_dropmount(mp); 1803 } 1804 if (error == 0) { 1805 spin_lock(&fdp->fd_spin); 1806 ovp = fdp->fd_cdir; 1807 onch = fdp->fd_ncdir; 1808 fdp->fd_cdir = vp; 1809 fdp->fd_ncdir = nch; 1810 spin_unlock(&fdp->fd_spin); 1811 vn_unlock(vp); /* leave ref intact */ 1812 cache_drop(&onch); 1813 vrele(ovp); 1814 } else { 1815 cache_drop(&nch); 1816 vput(vp); 1817 } 1818 fdrop(fp); 1819 done: 1820 lwkt_reltoken(&p->p_token); 1821 return (error); 1822 } 1823 1824 int 1825 kern_chdir(struct nlookupdata *nd) 1826 { 1827 struct thread *td = curthread; 1828 struct proc *p = td->td_proc; 1829 struct filedesc *fdp = p->p_fd; 1830 struct vnode *vp, *ovp; 1831 struct nchandle onch; 1832 int error; 1833 1834 nd->nl_flags |= NLC_SHAREDLOCK; 1835 if ((error = nlookup(nd)) != 0) 1836 return (error); 1837 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1838 return (ENOENT); 1839 if ((error = vget(vp, LK_SHARED)) != 0) 1840 return (error); 1841 1842 lwkt_gettoken(&p->p_token); 1843 error = checkvp_chdir(vp, td); 1844 vn_unlock(vp); 1845 if (error == 0) { 1846 spin_lock(&fdp->fd_spin); 1847 ovp = fdp->fd_cdir; 1848 onch = fdp->fd_ncdir; 1849 fdp->fd_ncdir = nd->nl_nch; 1850 fdp->fd_cdir = vp; 1851 spin_unlock(&fdp->fd_spin); 1852 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1853 cache_drop(&onch); 1854 vrele(ovp); 1855 cache_zero(&nd->nl_nch); 1856 } else { 1857 vrele(vp); 1858 } 1859 lwkt_reltoken(&p->p_token); 1860 return (error); 1861 } 1862 1863 /* 1864 * chdir_args(char *path) 1865 * 1866 * Change current working directory (``.''). 1867 */ 1868 int 1869 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap) 1870 { 1871 struct nlookupdata nd; 1872 int error; 1873 1874 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1875 if (error == 0) 1876 error = kern_chdir(&nd); 1877 nlookup_done(&nd); 1878 return (error); 1879 } 1880 1881 /* 1882 * Helper function for raised chroot(2) security function: Refuse if 1883 * any filedescriptors are open directories. 1884 */ 1885 static int 1886 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1887 { 1888 struct vnode *vp; 1889 struct file *fp; 1890 int error; 1891 int fd; 1892 1893 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1894 if ((error = holdvnode(td, fd, &fp)) != 0) 1895 continue; 1896 vp = (struct vnode *)fp->f_data; 1897 if (vp->v_type != VDIR) { 1898 fdrop(fp); 1899 continue; 1900 } 1901 fdrop(fp); 1902 return(EPERM); 1903 } 1904 return (0); 1905 } 1906 1907 /* 1908 * This sysctl determines if we will allow a process to chroot(2) if it 1909 * has a directory open: 1910 * 0: disallowed for all processes. 1911 * 1: allowed for processes that were not already chroot(2)'ed. 1912 * 2: allowed for all processes. 1913 */ 1914 1915 static int chroot_allow_open_directories = 1; 1916 1917 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1918 &chroot_allow_open_directories, 0, ""); 1919 1920 /* 1921 * chroot to the specified namecache entry. We obtain the vp from the 1922 * namecache data. The passed ncp must be locked and referenced and will 1923 * remain locked and referenced on return. 1924 */ 1925 int 1926 kern_chroot(struct nchandle *nch) 1927 { 1928 struct thread *td = curthread; 1929 struct proc *p = td->td_proc; 1930 struct filedesc *fdp = p->p_fd; 1931 struct vnode *vp; 1932 int error; 1933 1934 /* 1935 * Only privileged user can chroot 1936 */ 1937 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1938 if (error) 1939 return (error); 1940 1941 /* 1942 * Disallow open directory descriptors (fchdir() breakouts). 1943 */ 1944 if (chroot_allow_open_directories == 0 || 1945 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1946 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1947 return (error); 1948 } 1949 if ((vp = nch->ncp->nc_vp) == NULL) 1950 return (ENOENT); 1951 1952 if ((error = vget(vp, LK_SHARED)) != 0) 1953 return (error); 1954 1955 /* 1956 * Check the validity of vp as a directory to change to and 1957 * associate it with rdir/jdir. 1958 */ 1959 error = checkvp_chdir(vp, td); 1960 vn_unlock(vp); /* leave reference intact */ 1961 if (error == 0) { 1962 lwkt_gettoken(&p->p_token); 1963 vrele(fdp->fd_rdir); 1964 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1965 cache_drop(&fdp->fd_nrdir); 1966 cache_copy(nch, &fdp->fd_nrdir); 1967 if (fdp->fd_jdir == NULL) { 1968 fdp->fd_jdir = vp; 1969 vref(fdp->fd_jdir); 1970 cache_copy(nch, &fdp->fd_njdir); 1971 } 1972 if ((p->p_flags & P_DIDCHROOT) == 0) { 1973 p->p_flags |= P_DIDCHROOT; 1974 if (p->p_depth <= 65535 - 32) 1975 p->p_depth += 32; 1976 } 1977 lwkt_reltoken(&p->p_token); 1978 } else { 1979 vrele(vp); 1980 } 1981 return (error); 1982 } 1983 1984 /* 1985 * chroot_args(char *path) 1986 * 1987 * Change notion of root (``/'') directory. 1988 */ 1989 int 1990 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap) 1991 { 1992 struct thread *td __debugvar = curthread; 1993 struct nlookupdata nd; 1994 int error; 1995 1996 KKASSERT(td->td_proc); 1997 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1998 if (error == 0) { 1999 nd.nl_flags |= NLC_EXEC; 2000 error = nlookup(&nd); 2001 if (error == 0) 2002 error = kern_chroot(&nd.nl_nch); 2003 } 2004 nlookup_done(&nd); 2005 return(error); 2006 } 2007 2008 int 2009 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap) 2010 { 2011 struct thread *td = curthread; 2012 struct nlookupdata nd; 2013 struct nchandle *nch; 2014 struct vnode *vp; 2015 int error; 2016 2017 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2018 if (error) 2019 goto error_nond; 2020 2021 error = nlookup(&nd); 2022 if (error) 2023 goto error_out; 2024 2025 nch = &nd.nl_nch; 2026 2027 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 2028 if (error) 2029 goto error_out; 2030 2031 if ((vp = nch->ncp->nc_vp) == NULL) { 2032 error = ENOENT; 2033 goto error_out; 2034 } 2035 2036 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2037 goto error_out; 2038 2039 vfs_cache_setroot(vp, cache_hold(nch)); 2040 2041 error_out: 2042 nlookup_done(&nd); 2043 error_nond: 2044 return(error); 2045 } 2046 2047 /* 2048 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2049 * determine whether it is legal to chdir to the vnode. The vnode's state 2050 * is not changed by this call. 2051 */ 2052 static int 2053 checkvp_chdir(struct vnode *vp, struct thread *td) 2054 { 2055 int error; 2056 2057 if (vp->v_type != VDIR) 2058 error = ENOTDIR; 2059 else 2060 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2061 return (error); 2062 } 2063 2064 int 2065 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2066 { 2067 struct thread *td = curthread; 2068 struct proc *p = td->td_proc; 2069 struct lwp *lp = td->td_lwp; 2070 struct filedesc *fdp = p->p_fd; 2071 int cmode, flags; 2072 struct file *nfp; 2073 struct file *fp; 2074 struct vnode *vp; 2075 int type, indx, error = 0; 2076 struct flock lf; 2077 2078 if ((oflags & O_ACCMODE) == O_ACCMODE) 2079 return (EINVAL); 2080 flags = FFLAGS(oflags); 2081 error = falloc(lp, &nfp, NULL); 2082 if (error) 2083 return (error); 2084 fp = nfp; 2085 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2086 2087 /* 2088 * XXX p_dupfd is a real mess. It allows a device to return a 2089 * file descriptor to be duplicated rather then doing the open 2090 * itself. 2091 */ 2092 lp->lwp_dupfd = -1; 2093 2094 /* 2095 * Call vn_open() to do the lookup and assign the vnode to the 2096 * file pointer. vn_open() does not change the ref count on fp 2097 * and the vnode, on success, will be inherited by the file pointer 2098 * and unlocked. 2099 * 2100 * Request a shared lock on the vnode if possible. 2101 * 2102 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2103 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2104 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2105 * 2106 * NOTE: We need a flag to separate terminal vnode locking from 2107 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2108 * and O_RDWR only need to lock the terminal vnode exclusively. 2109 */ 2110 nd->nl_flags |= NLC_LOCKVP; 2111 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2112 nd->nl_flags |= NLC_SHAREDLOCK; 2113 if (flags & O_RDWR) 2114 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2115 } 2116 2117 error = vn_open(nd, fp, flags, cmode); 2118 nlookup_done(nd); 2119 2120 if (error) { 2121 /* 2122 * handle special fdopen() case. bleh. dupfdopen() is 2123 * responsible for dropping the old contents of ofiles[indx] 2124 * if it succeeds. 2125 * 2126 * Note that fsetfd() will add a ref to fp which represents 2127 * the fd_files[] assignment. We must still drop our 2128 * reference. 2129 */ 2130 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2131 if (fdalloc(p, 0, &indx) == 0) { 2132 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2133 if (error == 0) { 2134 *res = indx; 2135 fdrop(fp); /* our ref */ 2136 return (0); 2137 } 2138 fsetfd(fdp, NULL, indx); 2139 } 2140 } 2141 fdrop(fp); /* our ref */ 2142 if (error == ERESTART) 2143 error = EINTR; 2144 return (error); 2145 } 2146 2147 /* 2148 * ref the vnode for ourselves so it can't be ripped out from under 2149 * is. XXX need an ND flag to request that the vnode be returned 2150 * anyway. 2151 * 2152 * Reserve a file descriptor but do not assign it until the open 2153 * succeeds. 2154 */ 2155 vp = (struct vnode *)fp->f_data; 2156 vref(vp); 2157 if ((error = fdalloc(p, 0, &indx)) != 0) { 2158 fdrop(fp); 2159 vrele(vp); 2160 return (error); 2161 } 2162 2163 /* 2164 * If no error occurs the vp will have been assigned to the file 2165 * pointer. 2166 */ 2167 lp->lwp_dupfd = 0; 2168 2169 if (flags & (O_EXLOCK | O_SHLOCK)) { 2170 lf.l_whence = SEEK_SET; 2171 lf.l_start = 0; 2172 lf.l_len = 0; 2173 if (flags & O_EXLOCK) 2174 lf.l_type = F_WRLCK; 2175 else 2176 lf.l_type = F_RDLCK; 2177 if (flags & FNONBLOCK) 2178 type = 0; 2179 else 2180 type = F_WAIT; 2181 2182 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2183 /* 2184 * lock request failed. Clean up the reserved 2185 * descriptor. 2186 */ 2187 vrele(vp); 2188 fsetfd(fdp, NULL, indx); 2189 fdrop(fp); 2190 return (error); 2191 } 2192 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2193 } 2194 #if 0 2195 /* 2196 * Assert that all regular file vnodes were created with a object. 2197 */ 2198 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2199 ("open: regular file has no backing object after vn_open")); 2200 #endif 2201 2202 vrele(vp); 2203 2204 /* 2205 * release our private reference, leaving the one associated with the 2206 * descriptor table intact. 2207 */ 2208 if (oflags & O_CLOEXEC) 2209 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2210 fsetfd(fdp, fp, indx); 2211 fdrop(fp); 2212 *res = indx; 2213 2214 return (error); 2215 } 2216 2217 /* 2218 * open_args(char *path, int flags, int mode) 2219 * 2220 * Check permissions, allocate an open file structure, 2221 * and call the device open routine if any. 2222 */ 2223 int 2224 sys_open(struct sysmsg *sysmsg, const struct open_args *uap) 2225 { 2226 struct nlookupdata nd; 2227 int error; 2228 2229 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2230 if (error == 0) { 2231 error = kern_open(&nd, uap->flags, 2232 uap->mode, &sysmsg->sysmsg_result); 2233 } 2234 nlookup_done(&nd); 2235 return (error); 2236 } 2237 2238 /* 2239 * openat_args(int fd, char *path, int flags, int mode) 2240 */ 2241 int 2242 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap) 2243 { 2244 struct nlookupdata nd; 2245 int error; 2246 struct file *fp; 2247 2248 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2249 if (error == 0) { 2250 error = kern_open(&nd, uap->flags, uap->mode, 2251 &sysmsg->sysmsg_result); 2252 } 2253 nlookup_done_at(&nd, fp); 2254 return (error); 2255 } 2256 2257 int 2258 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2259 { 2260 struct thread *td = curthread; 2261 struct proc *p = td->td_proc; 2262 struct vnode *vp; 2263 struct vattr vattr; 2264 int error; 2265 int whiteout = 0; 2266 2267 KKASSERT(p); 2268 2269 VATTR_NULL(&vattr); 2270 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2271 vattr.va_rmajor = rmajor; 2272 vattr.va_rminor = rminor; 2273 2274 switch (mode & S_IFMT) { 2275 case S_IFMT: /* used by badsect to flag bad sectors */ 2276 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2277 vattr.va_type = VBAD; 2278 break; 2279 case S_IFCHR: 2280 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2281 vattr.va_type = VCHR; 2282 break; 2283 case S_IFBLK: 2284 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2285 vattr.va_type = VBLK; 2286 break; 2287 case S_IFWHT: 2288 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2289 whiteout = 1; 2290 break; 2291 case S_IFDIR: /* special directories support for HAMMER */ 2292 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2293 vattr.va_type = VDIR; 2294 break; 2295 default: 2296 error = EINVAL; 2297 break; 2298 } 2299 2300 if (error) 2301 return (error); 2302 2303 bwillinode(1); 2304 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2305 if ((error = nlookup(nd)) != 0) 2306 return (error); 2307 if (nd->nl_nch.ncp->nc_vp) 2308 return (EEXIST); 2309 if (nd->nl_dvp == NULL) 2310 return (EINVAL); 2311 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2312 return (error); 2313 2314 if (whiteout) { 2315 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2316 nd->nl_cred, NAMEI_CREATE); 2317 } else { 2318 vp = NULL; 2319 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2320 &vp, nd->nl_cred, &vattr); 2321 if (error == 0) 2322 vput(vp); 2323 } 2324 return (error); 2325 } 2326 2327 /* 2328 * mknod_args(char *path, int mode, int dev) 2329 * 2330 * Create a special file. 2331 */ 2332 int 2333 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap) 2334 { 2335 struct nlookupdata nd; 2336 int error; 2337 2338 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2339 if (error == 0) { 2340 error = kern_mknod(&nd, uap->mode, 2341 umajor(uap->dev), uminor(uap->dev)); 2342 } 2343 nlookup_done(&nd); 2344 return (error); 2345 } 2346 2347 /* 2348 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2349 * 2350 * Create a special file. The path is relative to the directory associated 2351 * with fd. 2352 */ 2353 int 2354 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap) 2355 { 2356 struct nlookupdata nd; 2357 struct file *fp; 2358 int error; 2359 2360 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2361 if (error == 0) { 2362 error = kern_mknod(&nd, uap->mode, 2363 umajor(uap->dev), uminor(uap->dev)); 2364 } 2365 nlookup_done_at(&nd, fp); 2366 return (error); 2367 } 2368 2369 int 2370 kern_mkfifo(struct nlookupdata *nd, int mode) 2371 { 2372 struct thread *td = curthread; 2373 struct proc *p = td->td_proc; 2374 struct vattr vattr; 2375 struct vnode *vp; 2376 int error; 2377 2378 bwillinode(1); 2379 2380 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2381 if ((error = nlookup(nd)) != 0) 2382 return (error); 2383 if (nd->nl_nch.ncp->nc_vp) 2384 return (EEXIST); 2385 if (nd->nl_dvp == NULL) 2386 return (EINVAL); 2387 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2388 return (error); 2389 2390 VATTR_NULL(&vattr); 2391 vattr.va_type = VFIFO; 2392 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2393 vp = NULL; 2394 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2395 if (error == 0) 2396 vput(vp); 2397 return (error); 2398 } 2399 2400 /* 2401 * mkfifo_args(char *path, int mode) 2402 * 2403 * Create a named pipe. 2404 */ 2405 int 2406 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap) 2407 { 2408 struct nlookupdata nd; 2409 int error; 2410 2411 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2412 if (error == 0) 2413 error = kern_mkfifo(&nd, uap->mode); 2414 nlookup_done(&nd); 2415 return (error); 2416 } 2417 2418 /* 2419 * mkfifoat_args(int fd, char *path, mode_t mode) 2420 * 2421 * Create a named pipe. The path is relative to the directory associated 2422 * with fd. 2423 */ 2424 int 2425 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap) 2426 { 2427 struct nlookupdata nd; 2428 struct file *fp; 2429 int error; 2430 2431 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2432 if (error == 0) 2433 error = kern_mkfifo(&nd, uap->mode); 2434 nlookup_done_at(&nd, fp); 2435 return (error); 2436 } 2437 2438 static int hardlink_check_uid = 0; 2439 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2440 &hardlink_check_uid, 0, 2441 "Unprivileged processes cannot create hard links to files owned by other " 2442 "users"); 2443 static int hardlink_check_gid = 0; 2444 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2445 &hardlink_check_gid, 0, 2446 "Unprivileged processes cannot create hard links to files owned by other " 2447 "groups"); 2448 2449 static int 2450 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2451 { 2452 struct vattr va; 2453 int error; 2454 2455 /* 2456 * Shortcut if disabled 2457 */ 2458 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2459 return (0); 2460 2461 /* 2462 * Privileged user can always hardlink 2463 */ 2464 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2465 return (0); 2466 2467 /* 2468 * Otherwise only if the originating file is owned by the 2469 * same user or group. Note that any group is allowed if 2470 * the file is owned by the caller. 2471 */ 2472 error = VOP_GETATTR(vp, &va); 2473 if (error != 0) 2474 return (error); 2475 2476 if (hardlink_check_uid) { 2477 if (cred->cr_uid != va.va_uid) 2478 return (EPERM); 2479 } 2480 2481 if (hardlink_check_gid) { 2482 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2483 return (EPERM); 2484 } 2485 2486 return (0); 2487 } 2488 2489 int 2490 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2491 { 2492 struct thread *td = curthread; 2493 struct vnode *vp; 2494 int error; 2495 2496 /* 2497 * Lookup the source and obtained a locked vnode. 2498 * 2499 * You may only hardlink a file which you have write permission 2500 * on or which you own. 2501 * 2502 * XXX relookup on vget failure / race ? 2503 */ 2504 bwillinode(1); 2505 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2506 if ((error = nlookup(nd)) != 0) 2507 return (error); 2508 vp = nd->nl_nch.ncp->nc_vp; 2509 KKASSERT(vp != NULL); 2510 if (vp->v_type == VDIR) 2511 return (EPERM); /* POSIX */ 2512 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2513 return (error); 2514 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2515 return (error); 2516 2517 /* 2518 * Unlock the source so we can lookup the target without deadlocking 2519 * (XXX vp is locked already, possible other deadlock?). The target 2520 * must not exist. 2521 */ 2522 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2523 nd->nl_flags &= ~NLC_NCPISLOCKED; 2524 cache_unlock(&nd->nl_nch); 2525 vn_unlock(vp); 2526 2527 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2528 if ((error = nlookup(linknd)) != 0) { 2529 vrele(vp); 2530 return (error); 2531 } 2532 if (linknd->nl_nch.ncp->nc_vp) { 2533 vrele(vp); 2534 return (EEXIST); 2535 } 2536 if (linknd->nl_dvp == NULL) { 2537 vrele(vp); 2538 return (EINVAL); 2539 } 2540 VFS_MODIFYING(vp->v_mount); 2541 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2542 if (error) { 2543 vrele(vp); 2544 return (error); 2545 } 2546 2547 /* 2548 * Finally run the new API VOP. 2549 */ 2550 error = can_hardlink(vp, td, td->td_ucred); 2551 if (error == 0) { 2552 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2553 vp, linknd->nl_cred); 2554 } 2555 vput(vp); 2556 return (error); 2557 } 2558 2559 /* 2560 * link_args(char *path, char *link) 2561 * 2562 * Make a hard file link. 2563 */ 2564 int 2565 sys_link(struct sysmsg *sysmsg, const struct link_args *uap) 2566 { 2567 struct nlookupdata nd, linknd; 2568 int error; 2569 2570 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2571 if (error == 0) { 2572 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2573 if (error == 0) 2574 error = kern_link(&nd, &linknd); 2575 nlookup_done(&linknd); 2576 } 2577 nlookup_done(&nd); 2578 return (error); 2579 } 2580 2581 /* 2582 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2583 * 2584 * Make a hard file link. The path1 argument is relative to the directory 2585 * associated with fd1, and similarly the path2 argument is relative to 2586 * the directory associated with fd2. 2587 */ 2588 int 2589 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap) 2590 { 2591 struct nlookupdata nd, linknd; 2592 struct file *fp1, *fp2; 2593 int error; 2594 2595 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2596 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2597 if (error == 0) { 2598 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2599 uap->path2, UIO_USERSPACE, 0); 2600 if (error == 0) 2601 error = kern_link(&nd, &linknd); 2602 nlookup_done_at(&linknd, fp2); 2603 } 2604 nlookup_done_at(&nd, fp1); 2605 return (error); 2606 } 2607 2608 int 2609 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2610 { 2611 struct vattr vattr; 2612 struct vnode *vp; 2613 struct vnode *dvp; 2614 int error; 2615 2616 bwillinode(1); 2617 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2618 if ((error = nlookup(nd)) != 0) 2619 return (error); 2620 if (nd->nl_nch.ncp->nc_vp) 2621 return (EEXIST); 2622 if (nd->nl_dvp == NULL) 2623 return (EINVAL); 2624 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2625 return (error); 2626 dvp = nd->nl_dvp; 2627 VATTR_NULL(&vattr); 2628 vattr.va_mode = mode; 2629 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2630 if (error == 0) 2631 vput(vp); 2632 return (error); 2633 } 2634 2635 /* 2636 * symlink(char *path, char *link) 2637 * 2638 * Make a symbolic link. 2639 */ 2640 int 2641 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap) 2642 { 2643 struct thread *td = curthread; 2644 struct nlookupdata nd; 2645 char *path; 2646 int error; 2647 int mode; 2648 2649 path = objcache_get(namei_oc, M_WAITOK); 2650 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2651 if (error == 0) { 2652 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2653 if (error == 0) { 2654 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2655 error = kern_symlink(&nd, path, mode); 2656 } 2657 nlookup_done(&nd); 2658 } 2659 objcache_put(namei_oc, path); 2660 return (error); 2661 } 2662 2663 /* 2664 * symlinkat_args(char *path1, int fd, char *path2) 2665 * 2666 * Make a symbolic link. The path2 argument is relative to the directory 2667 * associated with fd. 2668 */ 2669 int 2670 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap) 2671 { 2672 struct thread *td = curthread; 2673 struct nlookupdata nd; 2674 struct file *fp; 2675 char *path1; 2676 int error; 2677 int mode; 2678 2679 path1 = objcache_get(namei_oc, M_WAITOK); 2680 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2681 if (error == 0) { 2682 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2683 UIO_USERSPACE, 0); 2684 if (error == 0) { 2685 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2686 error = kern_symlink(&nd, path1, mode); 2687 } 2688 nlookup_done_at(&nd, fp); 2689 } 2690 objcache_put(namei_oc, path1); 2691 return (error); 2692 } 2693 2694 /* 2695 * undelete_args(char *path) 2696 * 2697 * Delete a whiteout from the filesystem. 2698 */ 2699 int 2700 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap) 2701 { 2702 struct nlookupdata nd; 2703 int error; 2704 2705 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2706 bwillinode(1); 2707 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2708 if (error == 0) 2709 error = nlookup(&nd); 2710 if (error == 0 && nd.nl_dvp == NULL) 2711 error = EINVAL; 2712 if (error == 0) 2713 error = ncp_writechk(&nd.nl_nch); 2714 if (error == 0) { 2715 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2716 NAMEI_DELETE); 2717 } 2718 nlookup_done(&nd); 2719 return (error); 2720 } 2721 2722 int 2723 kern_unlink(struct nlookupdata *nd) 2724 { 2725 int error; 2726 2727 bwillinode(1); 2728 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2729 if ((error = nlookup(nd)) != 0) 2730 return (error); 2731 if (nd->nl_dvp == NULL) 2732 return EINVAL; 2733 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2734 return (error); 2735 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2736 return (error); 2737 } 2738 2739 /* 2740 * unlink_args(char *path) 2741 * 2742 * Delete a name from the filesystem. 2743 */ 2744 int 2745 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap) 2746 { 2747 struct nlookupdata nd; 2748 int error; 2749 2750 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2751 if (error == 0) 2752 error = kern_unlink(&nd); 2753 nlookup_done(&nd); 2754 return (error); 2755 } 2756 2757 2758 /* 2759 * unlinkat_args(int fd, char *path, int flags) 2760 * 2761 * Delete the file or directory entry pointed to by fd/path. 2762 */ 2763 int 2764 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap) 2765 { 2766 struct nlookupdata nd; 2767 struct file *fp; 2768 int error; 2769 2770 if (uap->flags & ~AT_REMOVEDIR) 2771 return (EINVAL); 2772 2773 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2774 if (error == 0) { 2775 if (uap->flags & AT_REMOVEDIR) 2776 error = kern_rmdir(&nd); 2777 else 2778 error = kern_unlink(&nd); 2779 } 2780 nlookup_done_at(&nd, fp); 2781 return (error); 2782 } 2783 2784 int 2785 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2786 { 2787 struct thread *td = curthread; 2788 struct file *fp; 2789 struct vnode *vp; 2790 struct vattr_lite lva; 2791 off_t new_offset; 2792 int error; 2793 2794 fp = holdfp(td, fd, -1); 2795 if (fp == NULL) 2796 return (EBADF); 2797 if (fp->f_type != DTYPE_VNODE) { 2798 error = ESPIPE; 2799 goto done; 2800 } 2801 vp = (struct vnode *)fp->f_data; 2802 2803 switch (whence) { 2804 case L_INCR: 2805 spin_lock(&fp->f_spin); 2806 new_offset = fp->f_offset + offset; 2807 error = 0; 2808 break; 2809 case L_XTND: 2810 error = VOP_GETATTR_LITE(vp, &lva); 2811 spin_lock(&fp->f_spin); 2812 new_offset = offset + lva.va_size; 2813 break; 2814 case L_SET: 2815 new_offset = offset; 2816 error = 0; 2817 spin_lock(&fp->f_spin); 2818 break; 2819 default: 2820 new_offset = 0; 2821 error = EINVAL; 2822 spin_lock(&fp->f_spin); 2823 break; 2824 } 2825 2826 /* 2827 * Validate the seek position. Negative offsets are not allowed 2828 * for regular files or directories. 2829 * 2830 * Normally we would also not want to allow negative offsets for 2831 * character and block-special devices. However kvm addresses 2832 * on 64 bit architectures might appear to be negative and must 2833 * be allowed. 2834 */ 2835 if (error == 0) { 2836 if (new_offset < 0 && 2837 (vp->v_type == VREG || vp->v_type == VDIR)) { 2838 error = EINVAL; 2839 } else { 2840 fp->f_offset = new_offset; 2841 } 2842 } 2843 *res = fp->f_offset; 2844 spin_unlock(&fp->f_spin); 2845 done: 2846 dropfp(td, fd, fp); 2847 2848 return (error); 2849 } 2850 2851 /* 2852 * lseek_args(int fd, int pad, off_t offset, int whence) 2853 * 2854 * Reposition read/write file offset. 2855 */ 2856 int 2857 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap) 2858 { 2859 int error; 2860 2861 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2862 &sysmsg->sysmsg_offset); 2863 2864 return (error); 2865 } 2866 2867 /* 2868 * Check if current process can access given file. amode is a bitmask of *_OK 2869 * access bits. flags is a bitmask of AT_* flags. 2870 */ 2871 int 2872 kern_access(struct nlookupdata *nd, int amode, int flags) 2873 { 2874 struct vnode *vp; 2875 int error, mode; 2876 2877 if (flags & ~AT_EACCESS) 2878 return (EINVAL); 2879 nd->nl_flags |= NLC_SHAREDLOCK; 2880 if ((error = nlookup(nd)) != 0) 2881 return (error); 2882 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2883 return (error); 2884 retry: 2885 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2886 if (error) 2887 return (error); 2888 2889 /* Flags == 0 means only check for existence. */ 2890 if (amode) { 2891 mode = 0; 2892 if (amode & R_OK) 2893 mode |= VREAD; 2894 if (amode & W_OK) 2895 mode |= VWRITE; 2896 if (amode & X_OK) 2897 mode |= VEXEC; 2898 if ((mode & VWRITE) == 0 || 2899 (error = vn_writechk(vp)) == 0) { 2900 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2901 } 2902 2903 /* 2904 * If the file handle is stale we have to re-resolve the 2905 * entry with the ncp held exclusively. This is a hack 2906 * at the moment. 2907 */ 2908 if (error == ESTALE) { 2909 vput(vp); 2910 cache_unlock(&nd->nl_nch); 2911 cache_lock(&nd->nl_nch); 2912 cache_setunresolved(&nd->nl_nch); 2913 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2914 if (error == 0) { 2915 vp = NULL; 2916 goto retry; 2917 } 2918 return(error); 2919 } 2920 } 2921 vput(vp); 2922 return (error); 2923 } 2924 2925 /* 2926 * access_args(char *path, int flags) 2927 * 2928 * Check access permissions. 2929 */ 2930 int 2931 sys_access(struct sysmsg *sysmsg, const struct access_args *uap) 2932 { 2933 struct nlookupdata nd; 2934 int error; 2935 2936 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2937 if (error == 0) 2938 error = kern_access(&nd, uap->flags, 0); 2939 nlookup_done(&nd); 2940 return (error); 2941 } 2942 2943 2944 /* 2945 * eaccess_args(char *path, int flags) 2946 * 2947 * Check access permissions. 2948 */ 2949 int 2950 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap) 2951 { 2952 struct nlookupdata nd; 2953 int error; 2954 2955 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2956 if (error == 0) 2957 error = kern_access(&nd, uap->flags, AT_EACCESS); 2958 nlookup_done(&nd); 2959 return (error); 2960 } 2961 2962 2963 /* 2964 * faccessat_args(int fd, char *path, int amode, int flags) 2965 * 2966 * Check access permissions. 2967 */ 2968 int 2969 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap) 2970 { 2971 struct nlookupdata nd; 2972 struct file *fp; 2973 int error; 2974 2975 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2976 NLC_FOLLOW); 2977 if (error == 0) 2978 error = kern_access(&nd, uap->amode, uap->flags); 2979 nlookup_done_at(&nd, fp); 2980 return (error); 2981 } 2982 2983 int 2984 kern_stat(struct nlookupdata *nd, struct stat *st) 2985 { 2986 int error; 2987 struct vnode *vp; 2988 2989 nd->nl_flags |= NLC_SHAREDLOCK; 2990 if ((error = nlookup(nd)) != 0) 2991 return (error); 2992 again: 2993 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2994 return (ENOENT); 2995 2996 #if 1 2997 error = cache_vref(&nd->nl_nch, NULL, &vp); 2998 #else 2999 error = vget(vp, LK_SHARED); 3000 #endif 3001 if (error) 3002 return (error); 3003 error = vn_stat(vp, st, nd->nl_cred); 3004 3005 /* 3006 * If the file handle is stale we have to re-resolve the 3007 * entry with the ncp held exclusively. This is a hack 3008 * at the moment. 3009 */ 3010 if (error == ESTALE) { 3011 #if 1 3012 vrele(vp); 3013 #else 3014 vput(vp); 3015 #endif 3016 cache_unlock(&nd->nl_nch); 3017 cache_lock(&nd->nl_nch); 3018 cache_setunresolved(&nd->nl_nch); 3019 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 3020 if (error == 0) 3021 goto again; 3022 } else { 3023 #if 1 3024 vrele(vp); 3025 #else 3026 vput(vp); 3027 #endif 3028 } 3029 return (error); 3030 } 3031 3032 /* 3033 * stat_args(char *path, struct stat *ub) 3034 * 3035 * Get file status; this version follows links. 3036 */ 3037 int 3038 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap) 3039 { 3040 struct nlookupdata nd; 3041 struct stat st; 3042 int error; 3043 3044 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3045 if (error == 0) { 3046 error = kern_stat(&nd, &st); 3047 if (error == 0) 3048 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3049 } 3050 nlookup_done(&nd); 3051 return (error); 3052 } 3053 3054 /* 3055 * lstat_args(char *path, struct stat *ub) 3056 * 3057 * Get file status; this version does not follow links. 3058 */ 3059 int 3060 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap) 3061 { 3062 struct nlookupdata nd; 3063 struct stat st; 3064 int error; 3065 3066 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3067 if (error == 0) { 3068 error = kern_stat(&nd, &st); 3069 if (error == 0) 3070 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3071 } 3072 nlookup_done(&nd); 3073 return (error); 3074 } 3075 3076 /* 3077 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3078 * 3079 * Get status of file pointed to by fd/path. 3080 */ 3081 int 3082 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap) 3083 { 3084 struct nlookupdata nd; 3085 struct stat st; 3086 int error; 3087 int flags; 3088 struct file *fp; 3089 3090 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3091 return (EINVAL); 3092 3093 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3094 3095 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3096 UIO_USERSPACE, flags); 3097 if (error == 0) { 3098 error = kern_stat(&nd, &st); 3099 if (error == 0) 3100 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3101 } 3102 nlookup_done_at(&nd, fp); 3103 return (error); 3104 } 3105 3106 static int 3107 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3108 { 3109 struct nlookupdata nd; 3110 struct vnode *vp; 3111 int error; 3112 3113 vp = NULL; 3114 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3115 if (error == 0) 3116 error = nlookup(&nd); 3117 if (error == 0) 3118 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3119 nlookup_done(&nd); 3120 if (error == 0) { 3121 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3122 vput(vp); 3123 } 3124 return (error); 3125 } 3126 3127 /* 3128 * pathconf_Args(char *path, int name) 3129 * 3130 * Get configurable pathname variables. 3131 */ 3132 int 3133 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap) 3134 { 3135 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3136 &sysmsg->sysmsg_reg)); 3137 } 3138 3139 /* 3140 * lpathconf_Args(char *path, int name) 3141 * 3142 * Get configurable pathname variables, but don't follow symlinks. 3143 */ 3144 int 3145 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap) 3146 { 3147 return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg)); 3148 } 3149 3150 /* 3151 * XXX: daver 3152 * kern_readlink isn't properly split yet. There is a copyin burried 3153 * in VOP_READLINK(). 3154 */ 3155 int 3156 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3157 { 3158 struct thread *td = curthread; 3159 struct vnode *vp; 3160 struct iovec aiov; 3161 struct uio auio; 3162 int error; 3163 3164 nd->nl_flags |= NLC_SHAREDLOCK; 3165 if ((error = nlookup(nd)) != 0) 3166 return (error); 3167 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3168 if (error) 3169 return (error); 3170 if (vp->v_type != VLNK) { 3171 error = EINVAL; 3172 } else { 3173 aiov.iov_base = buf; 3174 aiov.iov_len = count; 3175 auio.uio_iov = &aiov; 3176 auio.uio_iovcnt = 1; 3177 auio.uio_offset = 0; 3178 auio.uio_rw = UIO_READ; 3179 auio.uio_segflg = UIO_USERSPACE; 3180 auio.uio_td = td; 3181 auio.uio_resid = count; 3182 error = VOP_READLINK(vp, &auio, td->td_ucred); 3183 } 3184 vput(vp); 3185 *res = count - auio.uio_resid; 3186 return (error); 3187 } 3188 3189 /* 3190 * readlink_args(char *path, char *buf, int count) 3191 * 3192 * Return target name of a symbolic link. 3193 */ 3194 int 3195 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap) 3196 { 3197 struct nlookupdata nd; 3198 int error; 3199 3200 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3201 if (error == 0) { 3202 error = kern_readlink(&nd, uap->buf, uap->count, 3203 &sysmsg->sysmsg_result); 3204 } 3205 nlookup_done(&nd); 3206 return (error); 3207 } 3208 3209 /* 3210 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3211 * 3212 * Return target name of a symbolic link. The path is relative to the 3213 * directory associated with fd. 3214 */ 3215 int 3216 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap) 3217 { 3218 struct nlookupdata nd; 3219 struct file *fp; 3220 int error; 3221 3222 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3223 if (error == 0) { 3224 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3225 &sysmsg->sysmsg_result); 3226 } 3227 nlookup_done_at(&nd, fp); 3228 return (error); 3229 } 3230 3231 static int 3232 setfflags(struct vnode *vp, u_long flags) 3233 { 3234 struct thread *td = curthread; 3235 int error; 3236 struct vattr vattr; 3237 3238 /* 3239 * Prevent non-root users from setting flags on devices. When 3240 * a device is reused, users can retain ownership of the device 3241 * if they are allowed to set flags and programs assume that 3242 * chown can't fail when done as root. 3243 */ 3244 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3245 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3246 return (error); 3247 3248 /* 3249 * note: vget is required for any operation that might mod the vnode 3250 * so VINACTIVE is properly cleared. 3251 */ 3252 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3253 VATTR_NULL(&vattr); 3254 vattr.va_flags = flags; 3255 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3256 vput(vp); 3257 } 3258 return (error); 3259 } 3260 3261 /* 3262 * chflags(const char *path, u_long flags) 3263 * 3264 * Change flags of a file given a path name. 3265 */ 3266 int 3267 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap) 3268 { 3269 struct nlookupdata nd; 3270 struct vnode *vp; 3271 int error; 3272 3273 vp = NULL; 3274 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3275 if (error == 0) 3276 error = nlookup(&nd); 3277 if (error == 0) 3278 error = ncp_writechk(&nd.nl_nch); 3279 if (error == 0) 3280 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3281 nlookup_done(&nd); 3282 if (error == 0) { 3283 error = setfflags(vp, uap->flags); 3284 vrele(vp); 3285 } 3286 return (error); 3287 } 3288 3289 /* 3290 * lchflags(const char *path, u_long flags) 3291 * 3292 * Change flags of a file given a path name, but don't follow symlinks. 3293 */ 3294 int 3295 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap) 3296 { 3297 struct nlookupdata nd; 3298 struct vnode *vp; 3299 int error; 3300 3301 vp = NULL; 3302 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3303 if (error == 0) 3304 error = nlookup(&nd); 3305 if (error == 0) 3306 error = ncp_writechk(&nd.nl_nch); 3307 if (error == 0) 3308 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3309 nlookup_done(&nd); 3310 if (error == 0) { 3311 error = setfflags(vp, uap->flags); 3312 vrele(vp); 3313 } 3314 return (error); 3315 } 3316 3317 /* 3318 * fchflags_args(int fd, u_flags flags) 3319 * 3320 * Change flags of a file given a file descriptor. 3321 */ 3322 int 3323 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap) 3324 { 3325 struct thread *td = curthread; 3326 struct file *fp; 3327 int error; 3328 3329 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3330 return (error); 3331 if (fp->f_nchandle.ncp) 3332 error = ncp_writechk(&fp->f_nchandle); 3333 if (error == 0) 3334 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3335 fdrop(fp); 3336 return (error); 3337 } 3338 3339 /* 3340 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3341 * change flags given a pathname relative to a filedescriptor 3342 */ 3343 int 3344 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap) 3345 { 3346 struct nlookupdata nd; 3347 struct vnode *vp; 3348 struct file *fp; 3349 int error; 3350 int lookupflags; 3351 3352 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3353 return (EINVAL); 3354 3355 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3356 3357 vp = NULL; 3358 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3359 if (error == 0) 3360 error = nlookup(&nd); 3361 if (error == 0) 3362 error = ncp_writechk(&nd.nl_nch); 3363 if (error == 0) 3364 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3365 nlookup_done_at(&nd, fp); 3366 if (error == 0) { 3367 error = setfflags(vp, uap->flags); 3368 vrele(vp); 3369 } 3370 return (error); 3371 } 3372 3373 3374 static int 3375 setfmode(struct vnode *vp, int mode) 3376 { 3377 struct thread *td = curthread; 3378 int error; 3379 struct vattr vattr; 3380 3381 /* 3382 * note: vget is required for any operation that might mod the vnode 3383 * so VINACTIVE is properly cleared. 3384 */ 3385 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3386 VATTR_NULL(&vattr); 3387 vattr.va_mode = mode & ALLPERMS; 3388 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3389 cache_inval_wxok(vp); 3390 vput(vp); 3391 } 3392 return error; 3393 } 3394 3395 int 3396 kern_chmod(struct nlookupdata *nd, int mode) 3397 { 3398 struct vnode *vp; 3399 int error; 3400 3401 if ((error = nlookup(nd)) != 0) 3402 return (error); 3403 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3404 return (error); 3405 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3406 error = setfmode(vp, mode); 3407 vrele(vp); 3408 return (error); 3409 } 3410 3411 /* 3412 * chmod_args(char *path, int mode) 3413 * 3414 * Change mode of a file given path name. 3415 */ 3416 int 3417 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap) 3418 { 3419 struct nlookupdata nd; 3420 int error; 3421 3422 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3423 if (error == 0) 3424 error = kern_chmod(&nd, uap->mode); 3425 nlookup_done(&nd); 3426 return (error); 3427 } 3428 3429 /* 3430 * lchmod_args(char *path, int mode) 3431 * 3432 * Change mode of a file given path name (don't follow links.) 3433 */ 3434 int 3435 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap) 3436 { 3437 struct nlookupdata nd; 3438 int error; 3439 3440 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3441 if (error == 0) 3442 error = kern_chmod(&nd, uap->mode); 3443 nlookup_done(&nd); 3444 return (error); 3445 } 3446 3447 /* 3448 * fchmod_args(int fd, int mode) 3449 * 3450 * Change mode of a file given a file descriptor. 3451 */ 3452 int 3453 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap) 3454 { 3455 struct thread *td = curthread; 3456 struct file *fp; 3457 int error; 3458 3459 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3460 return (error); 3461 if (fp->f_nchandle.ncp) 3462 error = ncp_writechk(&fp->f_nchandle); 3463 if (error == 0) 3464 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3465 fdrop(fp); 3466 return (error); 3467 } 3468 3469 /* 3470 * fchmodat_args(char *path, int mode) 3471 * 3472 * Change mode of a file pointed to by fd/path. 3473 */ 3474 int 3475 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap) 3476 { 3477 struct nlookupdata nd; 3478 struct file *fp; 3479 int error; 3480 int flags; 3481 3482 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3483 return (EINVAL); 3484 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3485 3486 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3487 UIO_USERSPACE, flags); 3488 if (error == 0) 3489 error = kern_chmod(&nd, uap->mode); 3490 nlookup_done_at(&nd, fp); 3491 return (error); 3492 } 3493 3494 static int 3495 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3496 { 3497 struct thread *td = curthread; 3498 int error; 3499 struct vattr vattr; 3500 uid_t o_uid; 3501 gid_t o_gid; 3502 uint64_t size; 3503 3504 /* 3505 * note: vget is required for any operation that might mod the vnode 3506 * so VINACTIVE is properly cleared. 3507 */ 3508 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3509 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3510 return error; 3511 o_uid = vattr.va_uid; 3512 o_gid = vattr.va_gid; 3513 size = vattr.va_size; 3514 3515 VATTR_NULL(&vattr); 3516 vattr.va_uid = uid; 3517 vattr.va_gid = gid; 3518 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3519 vput(vp); 3520 } 3521 3522 if (error == 0) { 3523 if (uid == -1) 3524 uid = o_uid; 3525 if (gid == -1) 3526 gid = o_gid; 3527 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3528 VFS_ACCOUNT(mp, uid, gid, size); 3529 } 3530 3531 return error; 3532 } 3533 3534 int 3535 kern_chown(struct nlookupdata *nd, int uid, int gid) 3536 { 3537 struct vnode *vp; 3538 int error; 3539 3540 if ((error = nlookup(nd)) != 0) 3541 return (error); 3542 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3543 return (error); 3544 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3545 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3546 vrele(vp); 3547 return (error); 3548 } 3549 3550 /* 3551 * chown(char *path, int uid, int gid) 3552 * 3553 * Set ownership given a path name. 3554 */ 3555 int 3556 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap) 3557 { 3558 struct nlookupdata nd; 3559 int error; 3560 3561 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3562 if (error == 0) 3563 error = kern_chown(&nd, uap->uid, uap->gid); 3564 nlookup_done(&nd); 3565 return (error); 3566 } 3567 3568 /* 3569 * lchown_args(char *path, int uid, int gid) 3570 * 3571 * Set ownership given a path name, do not cross symlinks. 3572 */ 3573 int 3574 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap) 3575 { 3576 struct nlookupdata nd; 3577 int error; 3578 3579 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3580 if (error == 0) 3581 error = kern_chown(&nd, uap->uid, uap->gid); 3582 nlookup_done(&nd); 3583 return (error); 3584 } 3585 3586 /* 3587 * fchown_args(int fd, int uid, int gid) 3588 * 3589 * Set ownership given a file descriptor. 3590 */ 3591 int 3592 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap) 3593 { 3594 struct thread *td = curthread; 3595 struct proc *p = td->td_proc; 3596 struct file *fp; 3597 int error; 3598 3599 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3600 return (error); 3601 if (fp->f_nchandle.ncp) 3602 error = ncp_writechk(&fp->f_nchandle); 3603 if (error == 0) 3604 error = setfown(p->p_fd->fd_ncdir.mount, 3605 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3606 fdrop(fp); 3607 return (error); 3608 } 3609 3610 /* 3611 * fchownat(int fd, char *path, int uid, int gid, int flags) 3612 * 3613 * Set ownership of file pointed to by fd/path. 3614 */ 3615 int 3616 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap) 3617 { 3618 struct nlookupdata nd; 3619 struct file *fp; 3620 int error; 3621 int flags; 3622 3623 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3624 return (EINVAL); 3625 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3626 3627 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3628 UIO_USERSPACE, flags); 3629 if (error == 0) 3630 error = kern_chown(&nd, uap->uid, uap->gid); 3631 nlookup_done_at(&nd, fp); 3632 return (error); 3633 } 3634 3635 3636 static int 3637 getutimes(struct timeval *tvp, struct timespec *tsp) 3638 { 3639 struct timeval tv[2]; 3640 int error; 3641 3642 if (tvp == NULL) { 3643 microtime(&tv[0]); 3644 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3645 tsp[1] = tsp[0]; 3646 } else { 3647 if ((error = itimerfix(tvp)) != 0) 3648 return (error); 3649 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3650 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3651 } 3652 return 0; 3653 } 3654 3655 static int 3656 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3657 { 3658 struct timespec tsnow; 3659 int error; 3660 3661 *nullflag = 0; 3662 nanotime(&tsnow); 3663 if (ts == NULL) { 3664 newts[0] = tsnow; 3665 newts[1] = tsnow; 3666 *nullflag = 1; 3667 return (0); 3668 } 3669 3670 newts[0] = ts[0]; 3671 newts[1] = ts[1]; 3672 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3673 return (0); 3674 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3675 *nullflag = 1; 3676 3677 if (newts[0].tv_nsec == UTIME_OMIT) 3678 newts[0].tv_sec = VNOVAL; 3679 else if (newts[0].tv_nsec == UTIME_NOW) 3680 newts[0] = tsnow; 3681 else if ((error = itimespecfix(&newts[0])) != 0) 3682 return (error); 3683 3684 if (newts[1].tv_nsec == UTIME_OMIT) 3685 newts[1].tv_sec = VNOVAL; 3686 else if (newts[1].tv_nsec == UTIME_NOW) 3687 newts[1] = tsnow; 3688 else if ((error = itimespecfix(&newts[1])) != 0) 3689 return (error); 3690 3691 return (0); 3692 } 3693 3694 static int 3695 setutimes(struct vnode *vp, struct vattr *vattr, 3696 const struct timespec *ts, int nullflag) 3697 { 3698 struct thread *td = curthread; 3699 int error; 3700 3701 VATTR_NULL(vattr); 3702 vattr->va_atime = ts[0]; 3703 vattr->va_mtime = ts[1]; 3704 if (nullflag) 3705 vattr->va_vaflags |= VA_UTIMES_NULL; 3706 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3707 3708 return error; 3709 } 3710 3711 int 3712 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3713 { 3714 struct timespec ts[2]; 3715 int error; 3716 3717 if (tptr) { 3718 if ((error = getutimes(tptr, ts)) != 0) 3719 return (error); 3720 } 3721 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3722 return (error); 3723 } 3724 3725 /* 3726 * utimes_args(char *path, struct timeval *tptr) 3727 * 3728 * Set the access and modification times of a file. 3729 */ 3730 int 3731 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap) 3732 { 3733 struct timeval tv[2]; 3734 struct nlookupdata nd; 3735 int error; 3736 3737 if (uap->tptr) { 3738 error = copyin(uap->tptr, tv, sizeof(tv)); 3739 if (error) 3740 return (error); 3741 } 3742 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3743 if (error == 0) 3744 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3745 nlookup_done(&nd); 3746 return (error); 3747 } 3748 3749 /* 3750 * lutimes_args(char *path, struct timeval *tptr) 3751 * 3752 * Set the access and modification times of a file. 3753 */ 3754 int 3755 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap) 3756 { 3757 struct timeval tv[2]; 3758 struct nlookupdata nd; 3759 int error; 3760 3761 if (uap->tptr) { 3762 error = copyin(uap->tptr, tv, sizeof(tv)); 3763 if (error) 3764 return (error); 3765 } 3766 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3767 if (error == 0) 3768 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3769 nlookup_done(&nd); 3770 return (error); 3771 } 3772 3773 /* 3774 * Set utimes on a file descriptor. The creds used to open the 3775 * file are used to determine whether the operation is allowed 3776 * or not. 3777 */ 3778 int 3779 kern_futimens(int fd, struct timespec *ts) 3780 { 3781 struct thread *td = curthread; 3782 struct timespec newts[2]; 3783 struct file *fp; 3784 struct vnode *vp; 3785 struct vattr vattr; 3786 struct vattr_lite lva; 3787 int nullflag; 3788 int error; 3789 3790 error = getutimens(ts, newts, &nullflag); 3791 if (error) 3792 return (error); 3793 if ((error = holdvnode(td, fd, &fp)) != 0) 3794 return (error); 3795 if (fp->f_nchandle.ncp) 3796 error = ncp_writechk(&fp->f_nchandle); 3797 if (error == 0) { 3798 vp = fp->f_data; 3799 error = vget(vp, LK_EXCLUSIVE); 3800 if (error == 0) { 3801 error = VOP_GETATTR_FP(vp, &vattr, fp); 3802 if (error == 0) { 3803 lva.va_type = vattr.va_type; 3804 lva.va_nlink = vattr.va_nlink; 3805 lva.va_mode = vattr.va_mode; 3806 lva.va_uid = vattr.va_uid; 3807 lva.va_gid = vattr.va_gid; 3808 lva.va_size = vattr.va_size; 3809 lva.va_flags = vattr.va_flags; 3810 3811 error = naccess_lva(&lva, NLC_OWN | NLC_WRITE, 3812 fp->f_cred); 3813 } 3814 if (error == 0) { 3815 error = setutimes(vp, &vattr, newts, nullflag); 3816 } 3817 vput(vp); 3818 } 3819 } 3820 fdrop(fp); 3821 return (error); 3822 } 3823 3824 /* 3825 * futimens_args(int fd, struct timespec *ts) 3826 * 3827 * Set the access and modification times of a file. 3828 */ 3829 int 3830 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap) 3831 { 3832 struct timespec ts[2]; 3833 int error; 3834 3835 if (uap->ts) { 3836 error = copyin(uap->ts, ts, sizeof(ts)); 3837 if (error) 3838 return (error); 3839 } 3840 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3841 return (error); 3842 } 3843 3844 int 3845 kern_futimes(int fd, struct timeval *tptr) 3846 { 3847 struct timespec ts[2]; 3848 int error; 3849 3850 if (tptr) { 3851 if ((error = getutimes(tptr, ts)) != 0) 3852 return (error); 3853 } 3854 error = kern_futimens(fd, tptr ? ts : NULL); 3855 return (error); 3856 } 3857 3858 /* 3859 * futimes_args(int fd, struct timeval *tptr) 3860 * 3861 * Set the access and modification times of a file. 3862 */ 3863 int 3864 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap) 3865 { 3866 struct timeval tv[2]; 3867 int error; 3868 3869 if (uap->tptr) { 3870 error = copyin(uap->tptr, tv, sizeof(tv)); 3871 if (error) 3872 return (error); 3873 } 3874 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3875 return (error); 3876 } 3877 3878 int 3879 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3880 { 3881 struct timespec newts[2]; 3882 struct vnode *vp; 3883 struct vattr vattr; 3884 int nullflag; 3885 int error; 3886 3887 if (flags & ~AT_SYMLINK_NOFOLLOW) 3888 return (EINVAL); 3889 3890 error = getutimens(ts, newts, &nullflag); 3891 if (error) 3892 return (error); 3893 3894 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3895 if ((error = nlookup(nd)) != 0) 3896 return (error); 3897 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3898 return (error); 3899 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3900 return (error); 3901 if ((error = vn_writechk(vp)) == 0) { 3902 error = vget(vp, LK_EXCLUSIVE); 3903 if (error == 0) { 3904 error = setutimes(vp, &vattr, newts, nullflag); 3905 vput(vp); 3906 } 3907 } 3908 vrele(vp); 3909 return (error); 3910 } 3911 3912 /* 3913 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3914 * 3915 * Set file access and modification times of a file. 3916 */ 3917 int 3918 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap) 3919 { 3920 struct timespec ts[2]; 3921 struct nlookupdata nd; 3922 struct file *fp; 3923 int error; 3924 int flags; 3925 3926 if (uap->ts) { 3927 error = copyin(uap->ts, ts, sizeof(ts)); 3928 if (error) 3929 return (error); 3930 } 3931 3932 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3933 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3934 UIO_USERSPACE, flags); 3935 if (error == 0) 3936 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3937 nlookup_done_at(&nd, fp); 3938 return (error); 3939 } 3940 3941 int 3942 kern_truncate(struct nlookupdata *nd, off_t length) 3943 { 3944 struct vnode *vp; 3945 struct vattr vattr; 3946 int error; 3947 uid_t uid = 0; 3948 gid_t gid = 0; 3949 uint64_t old_size = 0; 3950 3951 if (length < 0) 3952 return(EINVAL); 3953 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3954 if ((error = nlookup(nd)) != 0) 3955 return (error); 3956 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3957 return (error); 3958 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3959 return (error); 3960 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3961 if (error) { 3962 vrele(vp); 3963 return (error); 3964 } 3965 if (vp->v_type == VDIR) { 3966 error = EISDIR; 3967 goto done; 3968 } 3969 if (vfs_quota_enabled) { 3970 error = VOP_GETATTR(vp, &vattr); 3971 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3972 uid = vattr.va_uid; 3973 gid = vattr.va_gid; 3974 old_size = vattr.va_size; 3975 } 3976 3977 if ((error = vn_writechk(vp)) == 0) { 3978 VATTR_NULL(&vattr); 3979 vattr.va_size = length; 3980 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3981 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3982 } 3983 done: 3984 vput(vp); 3985 return (error); 3986 } 3987 3988 /* 3989 * truncate(char *path, int pad, off_t length) 3990 * 3991 * Truncate a file given its path name. 3992 */ 3993 int 3994 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap) 3995 { 3996 struct nlookupdata nd; 3997 int error; 3998 3999 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4000 if (error == 0) 4001 error = kern_truncate(&nd, uap->length); 4002 nlookup_done(&nd); 4003 return error; 4004 } 4005 4006 int 4007 kern_ftruncate(int fd, off_t length) 4008 { 4009 struct thread *td = curthread; 4010 struct vattr vattr; 4011 struct vnode *vp; 4012 struct file *fp; 4013 int error; 4014 uid_t uid = 0; 4015 gid_t gid = 0; 4016 uint64_t old_size = 0; 4017 struct mount *mp; 4018 4019 if (length < 0) 4020 return(EINVAL); 4021 if ((error = holdvnode(td, fd, &fp)) != 0) 4022 return (error); 4023 if (fp->f_nchandle.ncp) { 4024 error = ncp_writechk(&fp->f_nchandle); 4025 if (error) 4026 goto done; 4027 } 4028 if ((fp->f_flag & FWRITE) == 0) { 4029 error = EINVAL; 4030 goto done; 4031 } 4032 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 4033 error = EINVAL; 4034 goto done; 4035 } 4036 vp = (struct vnode *)fp->f_data; 4037 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4038 if (vp->v_type == VDIR) { 4039 error = EISDIR; 4040 vn_unlock(vp); 4041 goto done; 4042 } 4043 4044 if (vfs_quota_enabled) { 4045 error = VOP_GETATTR_FP(vp, &vattr, fp); 4046 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4047 uid = vattr.va_uid; 4048 gid = vattr.va_gid; 4049 old_size = vattr.va_size; 4050 } 4051 4052 if ((error = vn_writechk(vp)) == 0) { 4053 VATTR_NULL(&vattr); 4054 vattr.va_size = length; 4055 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4056 mp = vq_vptomp(vp); 4057 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4058 } 4059 vn_unlock(vp); 4060 done: 4061 fdrop(fp); 4062 return (error); 4063 } 4064 4065 /* 4066 * ftruncate_args(int fd, int pad, off_t length) 4067 * 4068 * Truncate a file given a file descriptor. 4069 */ 4070 int 4071 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap) 4072 { 4073 int error; 4074 4075 error = kern_ftruncate(uap->fd, uap->length); 4076 4077 return (error); 4078 } 4079 4080 /* 4081 * fsync(int fd) 4082 * 4083 * Sync an open file. 4084 */ 4085 int 4086 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap) 4087 { 4088 struct thread *td = curthread; 4089 struct vnode *vp; 4090 struct file *fp; 4091 vm_object_t obj; 4092 int error; 4093 4094 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4095 return (error); 4096 vp = (struct vnode *)fp->f_data; 4097 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4098 if ((obj = vp->v_object) != NULL) { 4099 if (vp->v_mount == NULL || 4100 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4101 vm_object_page_clean(obj, 0, 0, 0); 4102 } 4103 } 4104 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4105 if (error == 0 && vp->v_mount) 4106 error = buf_fsync(vp); 4107 vn_unlock(vp); 4108 fdrop(fp); 4109 4110 return (error); 4111 } 4112 4113 int 4114 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4115 { 4116 struct nchandle fnchd; 4117 struct nchandle tnchd; 4118 struct namecache *ncp; 4119 struct vnode *fdvp; 4120 struct vnode *tdvp; 4121 struct mount *mp; 4122 struct mount *userenlk; 4123 int error; 4124 u_int fncp_gen; 4125 u_int tncp_gen; 4126 4127 bwillinode(1); 4128 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4129 if ((error = nlookup(fromnd)) != 0) 4130 return (error); 4131 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4132 return (ENOENT); 4133 fnchd.mount = fromnd->nl_nch.mount; 4134 cache_hold(&fnchd); 4135 4136 /* 4137 * unlock the source nch so we can lookup the target nch without 4138 * deadlocking. The target may or may not exist so we do not check 4139 * for a target vp like kern_mkdir() and other creation functions do. 4140 * 4141 * The source and target directories are ref'd and rechecked after 4142 * everything is relocked to determine if the source or target file 4143 * has been renamed. 4144 */ 4145 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4146 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4147 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4148 4149 if (fromnd->nl_nch.ncp->nc_vp && 4150 fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4151 userenlk = fnchd.mount; 4152 cache_unlock(&fromnd->nl_nch); 4153 lockmgr(&userenlk->mnt_renlock, LK_EXCLUSIVE); 4154 } else { 4155 userenlk = NULL; 4156 cache_unlock(&fromnd->nl_nch); 4157 } 4158 4159 /* 4160 * Lookup target 4161 */ 4162 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4163 if ((error = nlookup(tond)) != 0) { 4164 cache_drop(&fnchd); 4165 goto done; 4166 } 4167 tncp_gen = tond->nl_nch.ncp->nc_generation; 4168 4169 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4170 cache_drop(&fnchd); 4171 error = ENOENT; 4172 goto done; 4173 } 4174 tnchd.mount = tond->nl_nch.mount; 4175 cache_hold(&tnchd); 4176 4177 /* 4178 * If the source and target are the same there is nothing to do 4179 */ 4180 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4181 cache_drop(&fnchd); 4182 cache_drop(&tnchd); 4183 error = 0; 4184 goto done; 4185 } 4186 4187 /* 4188 * Mount points cannot be renamed or overwritten 4189 */ 4190 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4191 NCF_ISMOUNTPT 4192 ) { 4193 cache_drop(&fnchd); 4194 cache_drop(&tnchd); 4195 error = EINVAL; 4196 goto done; 4197 } 4198 4199 /* 4200 * Lock all four namecache entries. tond is already locked. 4201 */ 4202 cache_lock4_tondlocked(&fnchd, &fromnd->nl_nch, 4203 &tnchd, &tond->nl_nch, 4204 fromnd->nl_cred, tond->nl_cred); 4205 fromnd->nl_flags |= NLC_NCPISLOCKED; 4206 4207 /* 4208 * If the namecache generation changed for either fromnd or tond, 4209 * we must retry. 4210 */ 4211 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4212 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4213 krateprintf(&krate_rename, 4214 "kern_rename: retry due to race on: " 4215 "\"%s\" -> \"%s\"\n", 4216 fromnd->nl_nch.ncp->nc_name, 4217 tond->nl_nch.ncp->nc_name); 4218 error = EAGAIN; 4219 goto finish; 4220 } 4221 4222 /* 4223 * If either fromnd or tond are marked destroyed a ripout occured 4224 * out from under us and we must retry. 4225 */ 4226 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4227 fromnd->nl_nch.ncp->nc_vp == NULL || 4228 (tond->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED))) { 4229 krateprintf(&krate_rename, 4230 "kern_rename: retry due to ripout on: " 4231 "\"%s\" -> \"%s\"\n", 4232 fromnd->nl_nch.ncp->nc_name, 4233 tond->nl_nch.ncp->nc_name); 4234 error = EAGAIN; 4235 goto finish; 4236 } 4237 4238 /* 4239 * Make sure the parent directories linkages are the same. We have 4240 * already checked that fromnd and tond are not mount points so this 4241 * should not loop forever on a cross-mount. 4242 */ 4243 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4244 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4245 error = EAGAIN; 4246 goto finish; 4247 } 4248 4249 /* 4250 * Both the source and target must be within the same filesystem and 4251 * in the same filesystem as their parent directories within the 4252 * namecache topology. 4253 * 4254 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4255 */ 4256 mp = fnchd.mount; 4257 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4258 mp != tond->nl_nch.mount) { 4259 error = EXDEV; 4260 goto finish; 4261 } 4262 4263 /* 4264 * Make sure the mount point is writable 4265 */ 4266 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4267 goto finish; 4268 } 4269 4270 /* 4271 * If the target exists and either the source or target is a directory, 4272 * then both must be directories. 4273 * 4274 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4275 * have become NULL. 4276 */ 4277 if (tond->nl_nch.ncp->nc_vp) { 4278 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4279 error = ENOENT; 4280 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4281 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4282 error = ENOTDIR; 4283 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4284 error = EISDIR; 4285 } 4286 } 4287 4288 /* 4289 * You cannot rename a source into itself or a subdirectory of itself. 4290 * We check this by travsersing the target directory upwards looking 4291 * for a match against the source. 4292 * 4293 * Only required when renaming a directory, in which case userenlk is 4294 * non-NULL. 4295 */ 4296 if (__predict_false(userenlk && error == 0)) { 4297 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4298 if (fromnd->nl_nch.ncp == ncp) { 4299 error = EINVAL; 4300 break; 4301 } 4302 } 4303 } 4304 4305 /* 4306 * Even though the namespaces are different, they may still represent 4307 * hardlinks to the same file. The filesystem might have a hard time 4308 * with this so we issue a NREMOVE of the source instead of a NRENAME 4309 * when we detect the situation. 4310 */ 4311 if (error == 0) { 4312 fdvp = fromnd->nl_dvp; 4313 tdvp = tond->nl_dvp; 4314 if (fdvp == NULL || tdvp == NULL) { 4315 error = EPERM; 4316 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4317 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4318 fromnd->nl_cred); 4319 } else { 4320 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4321 fdvp, tdvp, tond->nl_cred); 4322 } 4323 } 4324 finish: 4325 cache_put(&tnchd); 4326 cache_put(&fnchd); 4327 done: 4328 if (userenlk) 4329 lockmgr(&userenlk->mnt_renlock, LK_RELEASE); 4330 return (error); 4331 } 4332 4333 /* 4334 * rename_args(char *from, char *to) 4335 * 4336 * Rename files. Source and destination must either both be directories, 4337 * or both not be directories. If target is a directory, it must be empty. 4338 */ 4339 int 4340 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap) 4341 { 4342 struct nlookupdata fromnd, tond; 4343 int error; 4344 4345 do { 4346 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4347 if (error == 0) { 4348 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4349 if (error == 0) 4350 error = kern_rename(&fromnd, &tond); 4351 nlookup_done(&tond); 4352 } 4353 nlookup_done(&fromnd); 4354 } while (error == EAGAIN); 4355 return (error); 4356 } 4357 4358 /* 4359 * renameat_args(int oldfd, char *old, int newfd, char *new) 4360 * 4361 * Rename files using paths relative to the directories associated with 4362 * oldfd and newfd. Source and destination must either both be directories, 4363 * or both not be directories. If target is a directory, it must be empty. 4364 */ 4365 int 4366 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap) 4367 { 4368 struct nlookupdata oldnd, newnd; 4369 struct file *oldfp, *newfp; 4370 int error; 4371 4372 do { 4373 error = nlookup_init_at(&oldnd, &oldfp, 4374 uap->oldfd, uap->old, 4375 UIO_USERSPACE, 0); 4376 if (error == 0) { 4377 error = nlookup_init_at(&newnd, &newfp, 4378 uap->newfd, uap->new, 4379 UIO_USERSPACE, 0); 4380 if (error == 0) 4381 error = kern_rename(&oldnd, &newnd); 4382 nlookup_done_at(&newnd, newfp); 4383 } 4384 nlookup_done_at(&oldnd, oldfp); 4385 } while (error == EAGAIN); 4386 return (error); 4387 } 4388 4389 int 4390 kern_mkdir(struct nlookupdata *nd, int mode) 4391 { 4392 struct thread *td = curthread; 4393 struct proc *p = td->td_proc; 4394 struct vnode *vp; 4395 struct vattr vattr; 4396 int error; 4397 4398 bwillinode(1); 4399 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4400 if ((error = nlookup(nd)) != 0) 4401 return (error); 4402 4403 if (nd->nl_nch.ncp->nc_vp) 4404 return (EEXIST); 4405 if (nd->nl_dvp == NULL) 4406 return (EINVAL); 4407 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4408 return (error); 4409 VATTR_NULL(&vattr); 4410 vattr.va_type = VDIR; 4411 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4412 4413 vp = NULL; 4414 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4415 if (error == 0) 4416 vput(vp); 4417 return (error); 4418 } 4419 4420 /* 4421 * mkdir_args(char *path, int mode) 4422 * 4423 * Make a directory file. 4424 */ 4425 int 4426 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap) 4427 { 4428 struct nlookupdata nd; 4429 int error; 4430 4431 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4432 if (error == 0) 4433 error = kern_mkdir(&nd, uap->mode); 4434 nlookup_done(&nd); 4435 return (error); 4436 } 4437 4438 /* 4439 * mkdirat_args(int fd, char *path, mode_t mode) 4440 * 4441 * Make a directory file. The path is relative to the directory associated 4442 * with fd. 4443 */ 4444 int 4445 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap) 4446 { 4447 struct nlookupdata nd; 4448 struct file *fp; 4449 int error; 4450 4451 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4452 if (error == 0) 4453 error = kern_mkdir(&nd, uap->mode); 4454 nlookup_done_at(&nd, fp); 4455 return (error); 4456 } 4457 4458 int 4459 kern_rmdir(struct nlookupdata *nd) 4460 { 4461 int error; 4462 4463 bwillinode(1); 4464 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4465 if ((error = nlookup(nd)) != 0) 4466 return (error); 4467 4468 /* 4469 * Do not allow directories representing mount points to be 4470 * deleted, even if empty. Check write perms on mount point 4471 * in case the vnode is aliased (aka nullfs). 4472 */ 4473 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4474 return (EBUSY); 4475 if (nd->nl_dvp == NULL) 4476 return (EINVAL); 4477 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4478 return (error); 4479 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4480 return (error); 4481 } 4482 4483 /* 4484 * rmdir_args(char *path) 4485 * 4486 * Remove a directory file. 4487 */ 4488 int 4489 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap) 4490 { 4491 struct nlookupdata nd; 4492 int error; 4493 4494 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4495 if (error == 0) 4496 error = kern_rmdir(&nd); 4497 nlookup_done(&nd); 4498 return (error); 4499 } 4500 4501 int 4502 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4503 enum uio_seg direction) 4504 { 4505 struct thread *td = curthread; 4506 struct vnode *vp; 4507 struct file *fp; 4508 struct uio auio; 4509 struct iovec aiov; 4510 off_t loff; 4511 int error, eofflag; 4512 4513 if ((error = holdvnode(td, fd, &fp)) != 0) 4514 return (error); 4515 if ((fp->f_flag & FREAD) == 0) { 4516 error = EBADF; 4517 goto done; 4518 } 4519 vp = (struct vnode *)fp->f_data; 4520 if (vp->v_type != VDIR) { 4521 error = EINVAL; 4522 goto done; 4523 } 4524 aiov.iov_base = buf; 4525 aiov.iov_len = count; 4526 auio.uio_iov = &aiov; 4527 auio.uio_iovcnt = 1; 4528 auio.uio_rw = UIO_READ; 4529 auio.uio_segflg = direction; 4530 auio.uio_td = td; 4531 auio.uio_resid = count; 4532 loff = auio.uio_offset = fp->f_offset; 4533 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4534 fp->f_offset = auio.uio_offset; 4535 if (error) 4536 goto done; 4537 4538 /* 4539 * WARNING! *basep may not be wide enough to accomodate the 4540 * seek offset. XXX should we hack this to return the upper 32 bits 4541 * for offsets greater then 4G? 4542 */ 4543 if (basep) { 4544 *basep = (long)loff; 4545 } 4546 *res = count - auio.uio_resid; 4547 done: 4548 fdrop(fp); 4549 return (error); 4550 } 4551 4552 /* 4553 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4554 * 4555 * Read a block of directory entries in a file system independent format. 4556 */ 4557 int 4558 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap) 4559 { 4560 long base; 4561 int error; 4562 4563 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4564 &sysmsg->sysmsg_result, UIO_USERSPACE); 4565 4566 if (error == 0 && uap->basep) 4567 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4568 return (error); 4569 } 4570 4571 /* 4572 * getdents_args(int fd, char *buf, size_t count) 4573 */ 4574 int 4575 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap) 4576 { 4577 int error; 4578 4579 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4580 &sysmsg->sysmsg_result, UIO_USERSPACE); 4581 4582 return (error); 4583 } 4584 4585 /* 4586 * Set the mode mask for creation of filesystem nodes. 4587 * 4588 * umask(int newmask) 4589 */ 4590 int 4591 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap) 4592 { 4593 struct thread *td = curthread; 4594 struct proc *p = td->td_proc; 4595 struct filedesc *fdp; 4596 4597 fdp = p->p_fd; 4598 sysmsg->sysmsg_result = fdp->fd_cmask; 4599 fdp->fd_cmask = uap->newmask & ALLPERMS; 4600 return (0); 4601 } 4602 4603 /* 4604 * revoke(char *path) 4605 * 4606 * Void all references to file by ripping underlying filesystem 4607 * away from vnode. 4608 */ 4609 int 4610 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap) 4611 { 4612 struct nlookupdata nd; 4613 struct vattr vattr; 4614 struct vnode *vp; 4615 struct ucred *cred; 4616 int error; 4617 4618 vp = NULL; 4619 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4620 if (error == 0) 4621 error = nlookup(&nd); 4622 if (error == 0) 4623 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4624 cred = crhold(nd.nl_cred); 4625 nlookup_done(&nd); 4626 if (error == 0) { 4627 if (error == 0) 4628 error = VOP_GETATTR(vp, &vattr); 4629 if (error == 0 && cred->cr_uid != vattr.va_uid) 4630 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4631 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4632 if (vcount(vp) > 0) 4633 error = vrevoke(vp, cred); 4634 } else if (error == 0) { 4635 error = vrevoke(vp, cred); 4636 } 4637 vrele(vp); 4638 } 4639 if (cred) 4640 crfree(cred); 4641 return (error); 4642 } 4643 4644 /* 4645 * getfh_args(char *fname, fhandle_t *fhp) 4646 * 4647 * Get (NFS) file handle 4648 * 4649 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4650 * mount. This allows nullfs mounts to be explicitly exported. 4651 * 4652 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4653 * 4654 * nullfs mounts of subdirectories are not safe. That is, it will 4655 * work, but you do not really have protection against access to 4656 * the related parent directories. 4657 */ 4658 int 4659 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap) 4660 { 4661 struct thread *td = curthread; 4662 struct nlookupdata nd; 4663 fhandle_t fh; 4664 struct vnode *vp; 4665 struct mount *mp; 4666 int error; 4667 4668 /* 4669 * Must be super user 4670 */ 4671 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4672 return (error); 4673 4674 vp = NULL; 4675 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4676 if (error == 0) 4677 error = nlookup(&nd); 4678 if (error == 0) 4679 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4680 mp = nd.nl_nch.mount; 4681 nlookup_done(&nd); 4682 if (error == 0) { 4683 bzero(&fh, sizeof(fh)); 4684 fh.fh_fsid = mp->mnt_stat.f_fsid; 4685 error = VFS_VPTOFH(vp, &fh.fh_fid); 4686 vput(vp); 4687 if (error == 0) 4688 error = copyout(&fh, uap->fhp, sizeof(fh)); 4689 } 4690 return (error); 4691 } 4692 4693 /* 4694 * fhopen_args(const struct fhandle *u_fhp, int flags) 4695 * 4696 * syscall for the rpc.lockd to use to translate a NFS file handle into 4697 * an open descriptor. 4698 * 4699 * warning: do not remove the priv_check() call or this becomes one giant 4700 * security hole. 4701 */ 4702 int 4703 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap) 4704 { 4705 struct thread *td = curthread; 4706 struct filedesc *fdp = td->td_proc->p_fd; 4707 struct mount *mp; 4708 struct vnode *vp; 4709 struct fhandle fhp; 4710 struct vattr vat; 4711 struct vattr *vap = &vat; 4712 struct flock lf; 4713 int fmode, mode, error = 0, type; 4714 struct file *nfp; 4715 struct file *fp; 4716 int indx; 4717 4718 /* 4719 * Must be super user 4720 */ 4721 error = priv_check(td, PRIV_ROOT); 4722 if (error) 4723 return (error); 4724 4725 fmode = FFLAGS(uap->flags); 4726 4727 /* 4728 * Why not allow a non-read/write open for our lockd? 4729 */ 4730 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4731 return (EINVAL); 4732 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4733 if (error) 4734 return(error); 4735 4736 /* 4737 * Find the mount point 4738 */ 4739 mp = vfs_getvfs(&fhp.fh_fsid); 4740 if (mp == NULL) { 4741 error = ESTALE; 4742 goto done2; 4743 } 4744 /* now give me my vnode, it gets returned to me locked */ 4745 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4746 if (error) 4747 goto done; 4748 /* 4749 * from now on we have to make sure not 4750 * to forget about the vnode 4751 * any error that causes an abort must vput(vp) 4752 * just set error = err and 'goto bad;'. 4753 */ 4754 4755 /* 4756 * from vn_open 4757 */ 4758 if (vp->v_type == VLNK) { 4759 error = EMLINK; 4760 goto bad; 4761 } 4762 if (vp->v_type == VSOCK) { 4763 error = EOPNOTSUPP; 4764 goto bad; 4765 } 4766 mode = 0; 4767 if (fmode & (FWRITE | O_TRUNC)) { 4768 if (vp->v_type == VDIR) { 4769 error = EISDIR; 4770 goto bad; 4771 } 4772 error = vn_writechk(vp); 4773 if (error) 4774 goto bad; 4775 mode |= VWRITE; 4776 } 4777 if (fmode & FREAD) 4778 mode |= VREAD; 4779 if (mode) { 4780 error = VOP_ACCESS(vp, mode, td->td_ucred); 4781 if (error) 4782 goto bad; 4783 } 4784 if (fmode & O_TRUNC) { 4785 vn_unlock(vp); /* XXX */ 4786 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4787 VATTR_NULL(vap); 4788 vap->va_size = 0; 4789 error = VOP_SETATTR(vp, vap, td->td_ucred); 4790 if (error) 4791 goto bad; 4792 } 4793 4794 /* 4795 * VOP_OPEN needs the file pointer so it can potentially override 4796 * it. 4797 * 4798 * WARNING! no f_nchandle will be associated when fhopen()ing a 4799 * directory. XXX 4800 */ 4801 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4802 goto bad; 4803 fp = nfp; 4804 4805 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4806 if (error) { 4807 /* 4808 * setting f_ops this way prevents VOP_CLOSE from being 4809 * called or fdrop() releasing the vp from v_data. Since 4810 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4811 */ 4812 fp->f_ops = &badfileops; 4813 fp->f_data = NULL; 4814 goto bad_drop; 4815 } 4816 4817 /* 4818 * The fp is given its own reference, we still have our ref and lock. 4819 * 4820 * Assert that all regular files must be created with a VM object. 4821 */ 4822 if (vp->v_type == VREG && vp->v_object == NULL) { 4823 kprintf("fhopen: regular file did not " 4824 "have VM object: %p\n", 4825 vp); 4826 goto bad_drop; 4827 } 4828 4829 /* 4830 * The open was successful. Handle any locking requirements. 4831 */ 4832 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4833 lf.l_whence = SEEK_SET; 4834 lf.l_start = 0; 4835 lf.l_len = 0; 4836 if (fmode & O_EXLOCK) 4837 lf.l_type = F_WRLCK; 4838 else 4839 lf.l_type = F_RDLCK; 4840 if (fmode & FNONBLOCK) 4841 type = 0; 4842 else 4843 type = F_WAIT; 4844 vn_unlock(vp); 4845 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4846 &lf, type)) != 0) { 4847 /* 4848 * release our private reference. 4849 */ 4850 fsetfd(fdp, NULL, indx); 4851 fdrop(fp); 4852 vrele(vp); 4853 goto done; 4854 } 4855 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4856 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4857 } 4858 4859 /* 4860 * Clean up. Associate the file pointer with the previously 4861 * reserved descriptor and return it. 4862 */ 4863 vput(vp); 4864 if (uap->flags & O_CLOEXEC) 4865 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4866 fsetfd(fdp, fp, indx); 4867 fdrop(fp); 4868 sysmsg->sysmsg_result = indx; 4869 mount_drop(mp); 4870 4871 return (error); 4872 4873 bad_drop: 4874 fsetfd(fdp, NULL, indx); 4875 fdrop(fp); 4876 bad: 4877 vput(vp); 4878 done: 4879 mount_drop(mp); 4880 done2: 4881 return (error); 4882 } 4883 4884 /* 4885 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4886 */ 4887 int 4888 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap) 4889 { 4890 struct thread *td = curthread; 4891 struct stat sb; 4892 fhandle_t fh; 4893 struct mount *mp; 4894 struct vnode *vp; 4895 int error; 4896 4897 /* 4898 * Must be super user 4899 */ 4900 error = priv_check(td, PRIV_ROOT); 4901 if (error) 4902 return (error); 4903 4904 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4905 if (error) 4906 return (error); 4907 4908 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4909 error = ESTALE; 4910 if (error == 0) { 4911 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4912 error = vn_stat(vp, &sb, td->td_ucred); 4913 vput(vp); 4914 } 4915 } 4916 if (error == 0) 4917 error = copyout(&sb, uap->sb, sizeof(sb)); 4918 if (mp) 4919 mount_drop(mp); 4920 4921 return (error); 4922 } 4923 4924 /* 4925 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4926 */ 4927 int 4928 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap) 4929 { 4930 struct thread *td = curthread; 4931 struct proc *p = td->td_proc; 4932 struct statfs *sp; 4933 struct mount *mp; 4934 struct vnode *vp; 4935 struct statfs sb; 4936 char *fullpath, *freepath; 4937 fhandle_t fh; 4938 int error; 4939 4940 /* 4941 * Must be super user 4942 */ 4943 if ((error = priv_check(td, PRIV_ROOT))) 4944 return (error); 4945 4946 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4947 return (error); 4948 4949 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4950 error = ESTALE; 4951 goto done; 4952 } 4953 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4954 error = ESTALE; 4955 goto done; 4956 } 4957 4958 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4959 goto done; 4960 mp = vp->v_mount; 4961 sp = &mp->mnt_stat; 4962 vput(vp); 4963 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4964 goto done; 4965 4966 error = mount_path(p, mp, &fullpath, &freepath); 4967 if (error) 4968 goto done; 4969 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4970 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4971 kfree(freepath, M_TEMP); 4972 4973 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4974 if (priv_check(td, PRIV_ROOT)) { 4975 bcopy(sp, &sb, sizeof(sb)); 4976 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4977 sp = &sb; 4978 } 4979 error = copyout(sp, uap->buf, sizeof(*sp)); 4980 done: 4981 if (mp) 4982 mount_drop(mp); 4983 4984 return (error); 4985 } 4986 4987 /* 4988 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4989 */ 4990 int 4991 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap) 4992 { 4993 struct thread *td = curthread; 4994 struct proc *p = td->td_proc; 4995 struct statvfs *sp; 4996 struct mount *mp; 4997 struct vnode *vp; 4998 fhandle_t fh; 4999 int error; 5000 5001 /* 5002 * Must be super user 5003 */ 5004 if ((error = priv_check(td, PRIV_ROOT))) 5005 return (error); 5006 5007 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 5008 return (error); 5009 5010 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 5011 error = ESTALE; 5012 goto done; 5013 } 5014 if (p != NULL && !chroot_visible_mnt(mp, p)) { 5015 error = ESTALE; 5016 goto done; 5017 } 5018 5019 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 5020 goto done; 5021 mp = vp->v_mount; 5022 sp = &mp->mnt_vstat; 5023 vput(vp); 5024 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 5025 goto done; 5026 5027 sp->f_flag = 0; 5028 if (mp->mnt_flag & MNT_RDONLY) 5029 sp->f_flag |= ST_RDONLY; 5030 if (mp->mnt_flag & MNT_NOSUID) 5031 sp->f_flag |= ST_NOSUID; 5032 error = copyout(sp, uap->buf, sizeof(*sp)); 5033 done: 5034 if (mp) 5035 mount_drop(mp); 5036 return (error); 5037 } 5038 5039 5040 /* 5041 * Syscall to push extended attribute configuration information into the 5042 * VFS. Accepts a path, which it converts to a mountpoint, as well as 5043 * a command (int cmd), and attribute name and misc data. For now, the 5044 * attribute name is left in userspace for consumption by the VFS_op. 5045 * It will probably be changed to be copied into sysspace by the 5046 * syscall in the future, once issues with various consumers of the 5047 * attribute code have raised their hands. 5048 * 5049 * Currently this is used only by UFS Extended Attributes. 5050 */ 5051 int 5052 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap) 5053 { 5054 struct nlookupdata nd; 5055 struct vnode *vp; 5056 char attrname[EXTATTR_MAXNAMELEN]; 5057 int error; 5058 size_t size; 5059 5060 attrname[0] = 0; 5061 vp = NULL; 5062 error = 0; 5063 5064 if (error == 0 && uap->filename) { 5065 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5066 NLC_FOLLOW); 5067 if (error == 0) 5068 error = nlookup(&nd); 5069 if (error == 0) 5070 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5071 nlookup_done(&nd); 5072 } 5073 5074 if (error == 0 && uap->attrname) { 5075 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5076 &size); 5077 } 5078 5079 if (error == 0) { 5080 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5081 if (error == 0) 5082 error = nlookup(&nd); 5083 if (error == 0) 5084 error = ncp_writechk(&nd.nl_nch); 5085 if (error == 0) { 5086 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5087 uap->attrnamespace, 5088 uap->attrname, nd.nl_cred); 5089 } 5090 nlookup_done(&nd); 5091 } 5092 5093 return (error); 5094 } 5095 5096 /* 5097 * Syscall to get a named extended attribute on a file or directory. 5098 */ 5099 int 5100 sys_extattr_set_file(struct sysmsg *sysmsg, 5101 const struct extattr_set_file_args *uap) 5102 { 5103 char attrname[EXTATTR_MAXNAMELEN]; 5104 struct nlookupdata nd; 5105 struct vnode *vp; 5106 struct uio auio; 5107 struct iovec aiov; 5108 int error; 5109 5110 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5111 if (error) 5112 return (error); 5113 5114 vp = NULL; 5115 5116 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5117 if (error == 0) 5118 error = nlookup(&nd); 5119 if (error == 0) 5120 error = ncp_writechk(&nd.nl_nch); 5121 if (error == 0) 5122 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5123 if (error) { 5124 nlookup_done(&nd); 5125 return (error); 5126 } 5127 5128 bzero(&auio, sizeof(auio)); 5129 aiov.iov_base = uap->data; 5130 aiov.iov_len = uap->nbytes; 5131 auio.uio_iov = &aiov; 5132 auio.uio_iovcnt = 1; 5133 auio.uio_offset = 0; 5134 auio.uio_resid = uap->nbytes; 5135 auio.uio_rw = UIO_WRITE; 5136 auio.uio_td = curthread; 5137 5138 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5139 &auio, nd.nl_cred); 5140 5141 vput(vp); 5142 nlookup_done(&nd); 5143 return (error); 5144 } 5145 5146 /* 5147 * Syscall to get a named extended attribute on a file or directory. 5148 */ 5149 int 5150 sys_extattr_get_file(struct sysmsg *sysmsg, 5151 const struct extattr_get_file_args *uap) 5152 { 5153 char attrname[EXTATTR_MAXNAMELEN]; 5154 struct nlookupdata nd; 5155 struct uio auio; 5156 struct iovec aiov; 5157 struct vnode *vp; 5158 int error; 5159 5160 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5161 if (error) 5162 return (error); 5163 5164 vp = NULL; 5165 5166 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5167 if (error == 0) 5168 error = nlookup(&nd); 5169 if (error == 0) 5170 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5171 if (error) { 5172 nlookup_done(&nd); 5173 return (error); 5174 } 5175 5176 bzero(&auio, sizeof(auio)); 5177 aiov.iov_base = uap->data; 5178 aiov.iov_len = uap->nbytes; 5179 auio.uio_iov = &aiov; 5180 auio.uio_iovcnt = 1; 5181 auio.uio_offset = 0; 5182 auio.uio_resid = uap->nbytes; 5183 auio.uio_rw = UIO_READ; 5184 auio.uio_td = curthread; 5185 5186 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5187 &auio, nd.nl_cred); 5188 sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid; 5189 5190 vput(vp); 5191 nlookup_done(&nd); 5192 return(error); 5193 } 5194 5195 /* 5196 * Syscall to delete a named extended attribute from a file or directory. 5197 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5198 */ 5199 int 5200 sys_extattr_delete_file(struct sysmsg *sysmsg, 5201 const struct extattr_delete_file_args *uap) 5202 { 5203 char attrname[EXTATTR_MAXNAMELEN]; 5204 struct nlookupdata nd; 5205 struct vnode *vp; 5206 int error; 5207 5208 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5209 if (error) 5210 return(error); 5211 5212 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5213 if (error == 0) 5214 error = nlookup(&nd); 5215 if (error == 0) 5216 error = ncp_writechk(&nd.nl_nch); 5217 if (error == 0) { 5218 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5219 if (error == 0) { 5220 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5221 attrname, NULL, nd.nl_cred); 5222 vput(vp); 5223 } 5224 } 5225 nlookup_done(&nd); 5226 return(error); 5227 } 5228 5229 /* 5230 * Determine if the mount is visible to the process. 5231 */ 5232 static int 5233 chroot_visible_mnt(struct mount *mp, struct proc *p) 5234 { 5235 struct nchandle nch; 5236 5237 /* 5238 * Traverse from the mount point upwards. If we hit the process 5239 * root then the mount point is visible to the process. 5240 */ 5241 nch = mp->mnt_ncmountpt; 5242 while (nch.ncp) { 5243 if (nch.mount == p->p_fd->fd_nrdir.mount && 5244 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5245 return(1); 5246 } 5247 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5248 nch = nch.mount->mnt_ncmounton; 5249 } else { 5250 nch.ncp = nch.ncp->nc_parent; 5251 } 5252 } 5253 5254 /* 5255 * If the mount point is not visible to the process, but the 5256 * process root is in a subdirectory of the mount, return 5257 * TRUE anyway. 5258 */ 5259 if (p->p_fd->fd_nrdir.mount == mp) 5260 return(1); 5261 5262 return(0); 5263 } 5264 5265 /* Sets priv to PRIV_ROOT in case no matching fs */ 5266 static int 5267 get_fspriv(const char *fsname) 5268 { 5269 5270 if (strncmp("null", fsname, 5) == 0) { 5271 return PRIV_VFS_MOUNT_NULLFS; 5272 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5273 return PRIV_VFS_MOUNT_TMPFS; 5274 } 5275 5276 return PRIV_ROOT; 5277 } 5278 5279 int 5280 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap) 5281 { 5282 struct nlookupdata nd; 5283 char *rbuf; 5284 char *fbuf; 5285 ssize_t rlen; 5286 int error; 5287 5288 /* 5289 * Invalid length if less than 0. 0 is allowed 5290 */ 5291 if ((ssize_t)uap->len < 0) 5292 return EINVAL; 5293 5294 rbuf = NULL; 5295 fbuf = NULL; 5296 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5297 if (error) 5298 goto done; 5299 5300 nd.nl_flags |= NLC_SHAREDLOCK; 5301 error = nlookup(&nd); 5302 if (error) 5303 goto done; 5304 5305 if (nd.nl_nch.ncp->nc_vp == NULL) { 5306 error = ENOENT; 5307 goto done; 5308 } 5309 5310 /* 5311 * Shortcut test for existence. 5312 */ 5313 if (uap->len == 0) { 5314 error = ENAMETOOLONG; 5315 goto done; 5316 } 5317 5318 /* 5319 * Obtain the path relative to the process root. The nch must not 5320 * be locked for the cache_fullpath() call. 5321 */ 5322 if (nd.nl_flags & NLC_NCPISLOCKED) { 5323 nd.nl_flags &= ~NLC_NCPISLOCKED; 5324 cache_unlock(&nd.nl_nch); 5325 } 5326 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5327 if (error) 5328 goto done; 5329 5330 rlen = (ssize_t)strlen(rbuf); 5331 if (rlen >= uap->len) { 5332 error = ENAMETOOLONG; 5333 goto done; 5334 } 5335 error = copyout(rbuf, uap->buf, rlen + 1); 5336 if (error == 0) 5337 sysmsg->sysmsg_szresult = rlen; 5338 done: 5339 nlookup_done(&nd); 5340 if (fbuf) 5341 kfree(fbuf, M_TEMP); 5342 5343 return error; 5344 } 5345