1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysmsg.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fspriv(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct sysmsg *sysmsg, const struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 int priv = 0; 123 int flags = uap->flags; 124 struct vattr va; 125 struct nlookupdata nd; 126 char fstypename[MFSNAMELEN]; 127 struct ucred *cred; 128 129 cred = td->td_ucred; 130 131 /* We do not allow user mounts inside a jail for now */ 132 if (usermount && jailed(cred)) { 133 error = EPERM; 134 goto done; 135 } 136 137 /* 138 * Extract the file system type. We need to know this early, to take 139 * appropriate actions for jails and nullfs mounts. 140 */ 141 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 142 goto done; 143 144 /* 145 * Select the correct priv according to the file system type. 146 */ 147 priv = get_fspriv(fstypename); 148 149 if (usermount == 0 && (error = priv_check(td, priv))) 150 goto done; 151 152 /* 153 * Do not allow NFS export by non-root users. 154 */ 155 if (flags & MNT_EXPORTED) { 156 error = priv_check(td, priv); 157 if (error) 158 goto done; 159 } 160 /* 161 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 162 */ 163 if (priv_check(td, priv)) 164 flags |= MNT_NOSUID | MNT_NODEV; 165 166 /* 167 * Lookup the requested path and extract the nch and vnode. 168 */ 169 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 170 if (error == 0) { 171 if ((error = nlookup(&nd)) == 0) { 172 if (nd.nl_nch.ncp->nc_vp == NULL) 173 error = ENOENT; 174 } 175 } 176 if (error) { 177 nlookup_done(&nd); 178 goto done; 179 } 180 181 /* 182 * If the target filesystem is resolved via a nullfs mount, then 183 * nd.nl_nch.mount will be pointing to the nullfs mount structure 184 * instead of the target file system. We need it in case we are 185 * doing an update. 186 */ 187 nullmp = nd.nl_nch.mount; 188 189 /* 190 * Extract the locked+refd ncp and cleanup the nd structure 191 */ 192 nch = nd.nl_nch; 193 cache_zero(&nd.nl_nch); 194 nlookup_done(&nd); 195 196 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 197 (mp = cache_findmount(&nch)) != NULL) { 198 cache_dropmount(mp); 199 hasmount = 1; 200 } else { 201 hasmount = 0; 202 } 203 204 205 /* 206 * now we have the locked ref'd nch and unreferenced vnode. 207 */ 208 vp = nch.ncp->nc_vp; 209 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 210 cache_put(&nch); 211 goto done; 212 } 213 cache_unlock(&nch); 214 215 /* 216 * Now we have an unlocked ref'd nch and a locked ref'd vp 217 */ 218 if (flags & MNT_UPDATE) { 219 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 220 cache_drop(&nch); 221 vput(vp); 222 error = EINVAL; 223 goto done; 224 } 225 226 if (strncmp(fstypename, "null", 5) == 0) { 227 KKASSERT(nullmp); 228 mp = nullmp; 229 } else { 230 mp = vp->v_mount; 231 } 232 233 flag = mp->mnt_flag; 234 flag2 = mp->mnt_kern_flag; 235 /* 236 * We only allow the filesystem to be reloaded if it 237 * is currently mounted read-only. 238 */ 239 if ((flags & MNT_RELOAD) && 240 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 241 cache_drop(&nch); 242 vput(vp); 243 error = EOPNOTSUPP; /* Needs translation */ 244 goto done; 245 } 246 /* 247 * Only root, or the user that did the original mount is 248 * permitted to update it. 249 */ 250 if (mp->mnt_stat.f_owner != cred->cr_uid && 251 (error = priv_check(td, priv))) { 252 cache_drop(&nch); 253 vput(vp); 254 goto done; 255 } 256 if (vfs_busy(mp, LK_NOWAIT)) { 257 cache_drop(&nch); 258 vput(vp); 259 error = EBUSY; 260 goto done; 261 } 262 if (hasmount) { 263 cache_drop(&nch); 264 vfs_unbusy(mp); 265 vput(vp); 266 error = EBUSY; 267 goto done; 268 } 269 mp->mnt_flag |= flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 270 lwkt_gettoken(&mp->mnt_token); 271 vn_unlock(vp); 272 vfsp = mp->mnt_vfc; 273 goto update; 274 } 275 276 /* 277 * If the user is not root, ensure that they own the directory 278 * onto which we are attempting to mount. 279 */ 280 if ((error = VOP_GETATTR(vp, &va)) || 281 (va.va_uid != cred->cr_uid && 282 (error = priv_check(td, priv)))) { 283 cache_drop(&nch); 284 vput(vp); 285 goto done; 286 } 287 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 288 cache_drop(&nch); 289 vput(vp); 290 goto done; 291 } 292 if (vp->v_type != VDIR) { 293 cache_drop(&nch); 294 vput(vp); 295 error = ENOTDIR; 296 goto done; 297 } 298 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 299 cache_drop(&nch); 300 vput(vp); 301 error = EPERM; 302 goto done; 303 } 304 vfsp = vfsconf_find_by_name(fstypename); 305 if (vfsp == NULL) { 306 linker_file_t lf; 307 308 /* Only load modules for root (very important!) */ 309 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 310 cache_drop(&nch); 311 vput(vp); 312 goto done; 313 } 314 error = linker_load_file(fstypename, &lf); 315 if (error || lf == NULL) { 316 cache_drop(&nch); 317 vput(vp); 318 if (lf == NULL) 319 error = ENODEV; 320 goto done; 321 } 322 lf->userrefs++; 323 /* lookup again, see if the VFS was loaded */ 324 vfsp = vfsconf_find_by_name(fstypename); 325 if (vfsp == NULL) { 326 lf->userrefs--; 327 linker_file_unload(lf); 328 cache_drop(&nch); 329 vput(vp); 330 error = ENODEV; 331 goto done; 332 } 333 } 334 if (hasmount) { 335 cache_drop(&nch); 336 vput(vp); 337 error = EBUSY; 338 goto done; 339 } 340 341 /* 342 * Allocate and initialize the filesystem. 343 */ 344 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 345 mount_init(mp, vfsp->vfc_vfsops); 346 vfs_busy(mp, LK_NOWAIT); 347 mp->mnt_vfc = vfsp; 348 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 349 vfsp->vfc_refcount++; 350 mp->mnt_stat.f_type = vfsp->vfc_typenum; 351 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 352 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 353 mp->mnt_stat.f_owner = cred->cr_uid; 354 lwkt_gettoken(&mp->mnt_token); 355 vn_unlock(vp); 356 update: 357 /* 358 * (per-mount token acquired at this point) 359 * 360 * Set the mount level flags. 361 */ 362 if (flags & MNT_RDONLY) 363 mp->mnt_flag |= MNT_RDONLY; 364 else if (mp->mnt_flag & MNT_RDONLY) 365 mp->mnt_kern_flag |= MNTK_WANTRDWR; 366 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 367 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 368 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 369 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 370 MNT_AUTOMOUNTED); 371 mp->mnt_flag |= flags & (MNT_NOSUID | MNT_NOEXEC | 372 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 373 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 374 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 375 MNT_AUTOMOUNTED); 376 377 /* 378 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 379 * This way the initial VFS_MOUNT() call will also be MPSAFE. 380 */ 381 if (vfsp->vfc_flags & VFCF_MPSAFE) 382 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 383 384 /* 385 * Mount the filesystem. 386 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 387 * get. 388 */ 389 if (mp->mnt_flag & MNT_UPDATE) { 390 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 391 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 392 mp->mnt_flag &= ~MNT_RDONLY; 393 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 394 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 395 if (error) { 396 mp->mnt_flag = flag; 397 mp->mnt_kern_flag = flag2; 398 } 399 lwkt_reltoken(&mp->mnt_token); 400 vfs_unbusy(mp); 401 vrele(vp); 402 cache_drop(&nch); 403 goto done; 404 } 405 mp->mnt_ncmounton = nch; 406 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 408 409 /* 410 * Put the new filesystem on the mount list after root. The mount 411 * point gets its own mnt_ncmountpt (unless the VFS already set one 412 * up) which represents the root of the mount. The lookup code 413 * detects the mount point going forward and checks the root of 414 * the mount going backwards. 415 * 416 * It is not necessary to invalidate or purge the vnode underneath 417 * because elements under the mount will be given their own glue 418 * namecache record. 419 */ 420 if (!error) { 421 if (mp->mnt_ncmountpt.ncp == NULL) { 422 /* 423 * Allocate, then unlock, but leave the ref intact. 424 * This is the mnt_refs (1) that we will retain 425 * through to the unmount. 426 */ 427 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 428 cache_unlock(&mp->mnt_ncmountpt); 429 } 430 vn_unlock(vp); 431 cache_lock(&nch); 432 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 433 cache_unlock(&nch); 434 cache_ismounting(mp); 435 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 436 437 mountlist_insert(mp, MNTINS_LAST); 438 vn_unlock(vp); 439 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 440 error = vfs_allocate_syncvnode(mp); 441 lwkt_reltoken(&mp->mnt_token); 442 vfs_unbusy(mp); 443 error = VFS_START(mp, 0); 444 vrele(vp); 445 KNOTE(&fs_klist, VQ_MOUNT); 446 } else { 447 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 448 vn_syncer_thr_stop(mp); 449 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 450 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 451 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 452 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 454 if (mp->mnt_cred) { 455 crfree(mp->mnt_cred); 456 mp->mnt_cred = NULL; 457 } 458 mp->mnt_vfc->vfc_refcount--; 459 lwkt_reltoken(&mp->mnt_token); 460 vfs_unbusy(mp); 461 kfree(mp, M_MOUNT); 462 cache_drop(&nch); 463 vput(vp); 464 } 465 done: 466 return (error); 467 } 468 469 /* 470 * Scan all active processes to see if any of them have a current 471 * or root directory onto which the new filesystem has just been 472 * mounted. If so, replace them with the new mount point. 473 * 474 * Both old_nch and new_nch are ref'd on call but not locked. 475 * new_nch must be temporarily locked so it can be associated with the 476 * vnode representing the root of the mount point. 477 */ 478 struct checkdirs_info { 479 struct nchandle old_nch; 480 struct nchandle new_nch; 481 struct vnode *old_vp; 482 struct vnode *new_vp; 483 }; 484 485 static int checkdirs_callback(struct proc *p, void *data); 486 487 static void 488 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 489 { 490 struct checkdirs_info info; 491 struct vnode *olddp; 492 struct vnode *newdp; 493 struct mount *mp; 494 495 /* 496 * If the old mount point's vnode has a usecount of 1, it is not 497 * being held as a descriptor anywhere. 498 */ 499 olddp = old_nch->ncp->nc_vp; 500 if (olddp == NULL || VREFCNT(olddp) == 1) 501 return; 502 503 /* 504 * Force the root vnode of the new mount point to be resolved 505 * so we can update any matching processes. 506 */ 507 mp = new_nch->mount; 508 if (VFS_ROOT(mp, &newdp)) 509 panic("mount: lost mount"); 510 vn_unlock(newdp); 511 cache_lock(new_nch); 512 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 513 cache_setunresolved(new_nch); 514 cache_setvp(new_nch, newdp); 515 cache_unlock(new_nch); 516 517 /* 518 * Special handling of the root node 519 */ 520 if (rootvnode == olddp) { 521 vref(newdp); 522 vfs_cache_setroot(newdp, cache_hold(new_nch)); 523 } 524 525 /* 526 * Pass newdp separately so the callback does not have to access 527 * it via new_nch->ncp->nc_vp. 528 */ 529 info.old_nch = *old_nch; 530 info.new_nch = *new_nch; 531 info.new_vp = newdp; 532 allproc_scan(checkdirs_callback, &info, 0); 533 vput(newdp); 534 } 535 536 /* 537 * NOTE: callback is not MP safe because the scanned process's filedesc 538 * structure can be ripped out from under us, amoung other things. 539 */ 540 static int 541 checkdirs_callback(struct proc *p, void *data) 542 { 543 struct checkdirs_info *info = data; 544 struct filedesc *fdp; 545 struct nchandle ncdrop1; 546 struct nchandle ncdrop2; 547 struct vnode *vprele1; 548 struct vnode *vprele2; 549 550 if ((fdp = p->p_fd) != NULL) { 551 cache_zero(&ncdrop1); 552 cache_zero(&ncdrop2); 553 vprele1 = NULL; 554 vprele2 = NULL; 555 556 /* 557 * MPUNSAFE - XXX fdp can be pulled out from under a 558 * foreign process. 559 * 560 * A shared filedesc is ok, we don't have to copy it 561 * because we are making this change globally. 562 */ 563 spin_lock(&fdp->fd_spin); 564 if (fdp->fd_ncdir.mount == info->old_nch.mount && 565 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 566 vprele1 = fdp->fd_cdir; 567 vref(info->new_vp); 568 fdp->fd_cdir = info->new_vp; 569 ncdrop1 = fdp->fd_ncdir; 570 cache_copy(&info->new_nch, &fdp->fd_ncdir); 571 } 572 if (fdp->fd_nrdir.mount == info->old_nch.mount && 573 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 574 vprele2 = fdp->fd_rdir; 575 vref(info->new_vp); 576 fdp->fd_rdir = info->new_vp; 577 ncdrop2 = fdp->fd_nrdir; 578 cache_copy(&info->new_nch, &fdp->fd_nrdir); 579 } 580 spin_unlock(&fdp->fd_spin); 581 if (ncdrop1.ncp) 582 cache_drop(&ncdrop1); 583 if (ncdrop2.ncp) 584 cache_drop(&ncdrop2); 585 if (vprele1) 586 vrele(vprele1); 587 if (vprele2) 588 vrele(vprele2); 589 } 590 return(0); 591 } 592 593 /* 594 * Unmount a file system. 595 * 596 * Note: unmount takes a path to the vnode mounted on as argument, 597 * not special file (as before). 598 * 599 * umount_args(char *path, int flags) 600 * 601 * MPALMOSTSAFE 602 */ 603 int 604 sys_unmount(struct sysmsg *sysmsg, const struct unmount_args *uap) 605 { 606 struct thread *td = curthread; 607 struct proc *p __debugvar = td->td_proc; 608 struct mount *mp = NULL; 609 struct nlookupdata nd; 610 char fstypename[MFSNAMELEN]; 611 int priv = 0; 612 int error; 613 struct ucred *cred; 614 615 cred = td->td_ucred; 616 617 KKASSERT(p); 618 619 /* We do not allow user umounts inside a jail for now */ 620 if (usermount && jailed(cred)) { 621 error = EPERM; 622 goto done; 623 } 624 625 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 626 NLC_FOLLOW | NLC_IGNBADDIR); 627 if (error == 0) 628 error = nlookup(&nd); 629 if (error) 630 goto out; 631 632 mp = nd.nl_nch.mount; 633 634 /* Figure out the fsname in order to select proper privs */ 635 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 636 priv = get_fspriv(fstypename); 637 638 if (usermount == 0 && (error = priv_check(td, priv))) { 639 nlookup_done(&nd); 640 goto done; 641 } 642 643 /* 644 * Only root, or the user that did the original mount is 645 * permitted to unmount this filesystem. 646 */ 647 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 648 (error = priv_check(td, priv))) 649 goto out; 650 651 /* 652 * Don't allow unmounting the root file system. 653 */ 654 if (mp->mnt_flag & MNT_ROOTFS) { 655 error = EINVAL; 656 goto out; 657 } 658 659 /* 660 * Must be the root of the filesystem 661 */ 662 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 663 error = EINVAL; 664 goto out; 665 } 666 667 /* Check if this mount belongs to this prison */ 668 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 669 mp->mnt_cred->cr_prison != cred->cr_prison)) { 670 kprintf("mountpoint %s does not belong to this jail\n", 671 uap->path); 672 error = EPERM; 673 goto out; 674 } 675 676 /* 677 * If no error try to issue the unmount. We lose our cache 678 * ref when we call nlookup_done so we must hold the mount point 679 * to prevent use-after-free races. 680 */ 681 out: 682 if (error == 0) { 683 mount_hold(mp); 684 nlookup_done(&nd); 685 error = dounmount(mp, uap->flags, 0); 686 mount_drop(mp); 687 } else { 688 nlookup_done(&nd); 689 } 690 done: 691 return (error); 692 } 693 694 /* 695 * Do the actual file system unmount (interlocked against the mountlist 696 * token and mp->mnt_token). 697 */ 698 static int 699 dounmount_interlock(struct mount *mp) 700 { 701 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 702 return (EBUSY); 703 mp->mnt_kern_flag |= MNTK_UNMOUNT; 704 return(0); 705 } 706 707 static int 708 unmount_allproc_cb(struct proc *p, void *arg) 709 { 710 struct mount *mp; 711 712 if (p->p_textnch.ncp == NULL) 713 return 0; 714 715 mp = (struct mount *)arg; 716 if (p->p_textnch.mount == mp) 717 cache_drop(&p->p_textnch); 718 719 return 0; 720 } 721 722 /* 723 * The guts of the unmount code. The mount owns one ref and one hold 724 * count. If we successfully interlock the unmount, those refs are ours. 725 * (The ref is from mnt_ncmountpt). 726 * 727 * When halting we shortcut certain mount types such as devfs by not actually 728 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 729 * from the mountlist so higher-level filesytems can unmount cleanly. 730 * 731 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 732 */ 733 int 734 dounmount(struct mount *mp, int flags, int halting) 735 { 736 struct namecache *ncp; 737 struct nchandle nch; 738 struct vnode *vp; 739 int error; 740 int async_flag; 741 int lflags; 742 int freeok = 1; 743 int hadsyncer = 0; 744 int retry; 745 int quickhalt; 746 747 lwkt_gettoken(&mp->mnt_token); 748 749 /* 750 * When halting, certain mount points can essentially just 751 * be unhooked and otherwise ignored. 752 */ 753 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 754 quickhalt = 1; 755 freeok = 0; 756 } else { 757 quickhalt = 0; 758 } 759 760 761 /* 762 * Exclusive access for unmounting purposes. 763 */ 764 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 765 goto out; 766 767 /* 768 * We now 'own' the last mp->mnt_refs 769 * 770 * Allow filesystems to detect that a forced unmount is in progress. 771 */ 772 if (flags & MNT_FORCE) 773 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 774 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 775 error = lockmgr(&mp->mnt_lock, lflags); 776 if (error) { 777 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 778 if (mp->mnt_kern_flag & MNTK_MWAIT) { 779 mp->mnt_kern_flag &= ~MNTK_MWAIT; 780 wakeup(mp); 781 } 782 goto out; 783 } 784 785 if (mp->mnt_flag & MNT_EXPUBLIC) 786 vfs_setpublicfs(NULL, NULL, NULL); 787 788 vfs_msync(mp, MNT_WAIT); 789 async_flag = mp->mnt_flag & MNT_ASYNC; 790 mp->mnt_flag &=~ MNT_ASYNC; 791 792 /* 793 * Decomission our special mnt_syncer vnode. This also stops 794 * the vnlru code. If we are unable to unmount we recommission 795 * the vnode. 796 * 797 * Then sync the filesystem. 798 */ 799 if ((vp = mp->mnt_syncer) != NULL) { 800 mp->mnt_syncer = NULL; 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 802 vrele(vp); 803 hadsyncer = 1; 804 } 805 806 /* 807 * Sync normally-mounted filesystem. 808 */ 809 if (quickhalt == 0) { 810 if ((mp->mnt_flag & MNT_RDONLY) == 0) 811 VFS_SYNC(mp, MNT_WAIT); 812 } 813 814 /* 815 * nchandle records ref the mount structure. Expect a count of 1 816 * (our mount->mnt_ncmountpt). 817 * 818 * Scans can get temporary refs on a mountpoint (thought really 819 * heavy duty stuff like cache_findmount() do not). 820 */ 821 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 822 /* 823 * Invalidate the namecache topology under the mount. 824 * nullfs mounts alias a real mount's namecache topology 825 * and it should not be invalidated in that case. 826 */ 827 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 828 cache_lock(&mp->mnt_ncmountpt); 829 cache_inval(&mp->mnt_ncmountpt, 830 CINV_DESTROY|CINV_CHILDREN); 831 cache_unlock(&mp->mnt_ncmountpt); 832 } 833 834 /* 835 * Clear pcpu caches 836 */ 837 cache_unmounting(mp); 838 if (mp->mnt_refs != 1) 839 cache_clearmntcache(mp); 840 841 /* 842 * Break out if we are good. Don't count ncp refs if the 843 * mount is aliased. 844 */ 845 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 846 NULL : mp->mnt_ncmountpt.ncp; 847 if (mp->mnt_refs == 1 && 848 (ncp == NULL || (ncp->nc_refs == 1 && 849 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 850 break; 851 } 852 853 /* 854 * If forcing the unmount, clean out any p->p_textnch 855 * nchandles that match this mount. 856 */ 857 if (flags & MNT_FORCE) 858 allproc_scan(&unmount_allproc_cb, mp, 0); 859 860 /* 861 * Sleep and retry. 862 */ 863 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 864 if ((retry & 15) == 15) { 865 mount_warning(mp, 866 "(%p) debug - retry %d, " 867 "%d namecache refs, %d mount refs", 868 mp, retry, 869 (ncp ? ncp->nc_refs - 1 : 0), 870 mp->mnt_refs - 1); 871 } 872 } 873 874 error = 0; 875 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 876 NULL : mp->mnt_ncmountpt.ncp; 877 if (mp->mnt_refs != 1 || 878 (ncp != NULL && (ncp->nc_refs != 1 || 879 TAILQ_FIRST(&ncp->nc_list)))) { 880 mount_warning(mp, 881 "(%p): %d namecache refs, %d mount refs " 882 "still present", 883 mp, 884 (ncp ? ncp->nc_refs - 1 : 0), 885 mp->mnt_refs - 1); 886 if (flags & MNT_FORCE) { 887 freeok = 0; 888 mount_warning(mp, "forcing unmount\n"); 889 } else { 890 error = EBUSY; 891 } 892 } 893 894 /* 895 * So far so good, sync the filesystem once more and 896 * call the VFS unmount code if the sync succeeds. 897 */ 898 if (error == 0 && quickhalt == 0) { 899 if (mp->mnt_flag & MNT_RDONLY) { 900 error = VFS_UNMOUNT(mp, flags); 901 } else { 902 error = VFS_SYNC(mp, MNT_WAIT); 903 if (error == 0 || /* no error */ 904 error == EOPNOTSUPP || /* no sync avail */ 905 (flags & MNT_FORCE)) { /* force anyway */ 906 error = VFS_UNMOUNT(mp, flags); 907 } 908 } 909 if (error) { 910 mount_warning(mp, 911 "(%p) unmount: vfs refused to unmount, " 912 "error %d", 913 mp, error); 914 } 915 } 916 917 /* 918 * If an error occurred we can still recover, restoring the 919 * syncer vnode and misc flags. 920 */ 921 if (error) { 922 if (mp->mnt_syncer == NULL && hadsyncer) 923 vfs_allocate_syncvnode(mp); 924 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 925 mp->mnt_flag |= async_flag; 926 lockmgr(&mp->mnt_lock, LK_RELEASE); 927 if (mp->mnt_kern_flag & MNTK_MWAIT) { 928 mp->mnt_kern_flag &= ~MNTK_MWAIT; 929 wakeup(mp); 930 } 931 goto out; 932 } 933 /* 934 * Clean up any journals still associated with the mount after 935 * filesystem activity has ceased. 936 */ 937 journal_remove_all_journals(mp, 938 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 939 940 mountlist_remove(mp); 941 942 /* 943 * Remove any installed vnode ops here so the individual VFSs don't 944 * have to. 945 * 946 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 947 * 948 * When quickhalting we have to keep these intact because the 949 * underlying vnodes have not been destroyed, and some might be 950 * dirty. 951 */ 952 if (quickhalt == 0) { 953 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 954 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 955 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 956 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 957 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 958 } 959 960 if (mp->mnt_ncmountpt.ncp != NULL) { 961 nch = mp->mnt_ncmountpt; 962 cache_zero(&mp->mnt_ncmountpt); 963 cache_clrmountpt(&nch); 964 cache_drop(&nch); 965 } 966 if (mp->mnt_ncmounton.ncp != NULL) { 967 cache_unmounting(mp); 968 nch = mp->mnt_ncmounton; 969 cache_zero(&mp->mnt_ncmounton); 970 cache_clrmountpt(&nch); 971 cache_drop(&nch); 972 } 973 974 if (mp->mnt_cred) { 975 crfree(mp->mnt_cred); 976 mp->mnt_cred = NULL; 977 } 978 979 mp->mnt_vfc->vfc_refcount--; 980 981 /* 982 * If not quickhalting the mount, we expect there to be no 983 * vnodes left. 984 */ 985 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 986 panic("unmount: dangling vnode"); 987 988 /* 989 * Release the lock 990 */ 991 lockmgr(&mp->mnt_lock, LK_RELEASE); 992 if (mp->mnt_kern_flag & MNTK_MWAIT) { 993 mp->mnt_kern_flag &= ~MNTK_MWAIT; 994 wakeup(mp); 995 } 996 997 /* 998 * If we reach here and freeok != 0 we must free the mount. 999 * mnt_refs should already have dropped to 0, so if it is not 1000 * zero we must cycle the caches and wait. 1001 * 1002 * When we are satisfied that the mount has disconnected we can 1003 * drop the hold on the mp that represented the mount (though the 1004 * caller might actually have another, so the caller's drop may 1005 * do the actual free). 1006 */ 1007 if (freeok) { 1008 if (mp->mnt_refs > 0) 1009 cache_clearmntcache(mp); 1010 while (mp->mnt_refs > 0) { 1011 cache_unmounting(mp); 1012 wakeup(mp); 1013 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1014 cache_clearmntcache(mp); 1015 } 1016 lwkt_reltoken(&mp->mnt_token); 1017 mount_drop(mp); 1018 mp = NULL; 1019 } else { 1020 cache_clearmntcache(mp); 1021 } 1022 error = 0; 1023 KNOTE(&fs_klist, VQ_UNMOUNT); 1024 out: 1025 if (mp) 1026 lwkt_reltoken(&mp->mnt_token); 1027 return (error); 1028 } 1029 1030 static 1031 void 1032 mount_warning(struct mount *mp, const char *ctl, ...) 1033 { 1034 char *ptr; 1035 char *buf; 1036 __va_list va; 1037 1038 __va_start(va, ctl); 1039 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1040 &ptr, &buf, 0) == 0) { 1041 kprintf("unmount(%s): ", ptr); 1042 kvprintf(ctl, va); 1043 kprintf("\n"); 1044 kfree(buf, M_TEMP); 1045 } else { 1046 kprintf("unmount(%p", mp); 1047 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1048 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1049 kprintf("): "); 1050 kvprintf(ctl, va); 1051 kprintf("\n"); 1052 } 1053 __va_end(va); 1054 } 1055 1056 /* 1057 * Shim cache_fullpath() to handle the case where a process is chrooted into 1058 * a subdirectory of a mount. In this case if the root mount matches the 1059 * process root directory's mount we have to specify the process's root 1060 * directory instead of the mount point, because the mount point might 1061 * be above the root directory. 1062 */ 1063 static 1064 int 1065 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1066 { 1067 struct nchandle *nch; 1068 1069 if (p && p->p_fd->fd_nrdir.mount == mp) 1070 nch = &p->p_fd->fd_nrdir; 1071 else 1072 nch = &mp->mnt_ncmountpt; 1073 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1074 } 1075 1076 /* 1077 * Sync each mounted filesystem. 1078 */ 1079 1080 #ifdef DEBUG 1081 static int syncprt = 0; 1082 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1083 #endif /* DEBUG */ 1084 1085 static int sync_callback(struct mount *mp, void *data); 1086 1087 int 1088 sys_sync(struct sysmsg *sysmsg, const struct sync_args *uap) 1089 { 1090 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1091 return (0); 1092 } 1093 1094 static 1095 int 1096 sync_callback(struct mount *mp, void *data __unused) 1097 { 1098 int asyncflag; 1099 1100 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1101 lwkt_gettoken(&mp->mnt_token); 1102 asyncflag = mp->mnt_flag & MNT_ASYNC; 1103 mp->mnt_flag &= ~MNT_ASYNC; 1104 lwkt_reltoken(&mp->mnt_token); 1105 vfs_msync(mp, MNT_NOWAIT); 1106 VFS_SYNC(mp, MNT_NOWAIT); 1107 lwkt_gettoken(&mp->mnt_token); 1108 mp->mnt_flag |= asyncflag; 1109 lwkt_reltoken(&mp->mnt_token); 1110 } 1111 return(0); 1112 } 1113 1114 /* XXX PRISON: could be per prison flag */ 1115 static int prison_quotas; 1116 #if 0 1117 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1118 #endif 1119 1120 /* 1121 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1122 * 1123 * Change filesystem quotas. 1124 * 1125 * MPALMOSTSAFE 1126 */ 1127 int 1128 sys_quotactl(struct sysmsg *sysmsg, const struct quotactl_args *uap) 1129 { 1130 struct nlookupdata nd; 1131 struct thread *td; 1132 struct mount *mp; 1133 int error; 1134 1135 td = curthread; 1136 if (td->td_ucred->cr_prison && !prison_quotas) { 1137 error = EPERM; 1138 goto done; 1139 } 1140 1141 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1142 if (error == 0) 1143 error = nlookup(&nd); 1144 if (error == 0) { 1145 mp = nd.nl_nch.mount; 1146 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1147 uap->arg, nd.nl_cred); 1148 } 1149 nlookup_done(&nd); 1150 done: 1151 return (error); 1152 } 1153 1154 /* 1155 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1156 * void *buf, int buflen) 1157 * 1158 * This function operates on a mount point and executes the specified 1159 * operation using the specified control data, and possibly returns data. 1160 * 1161 * The actual number of bytes stored in the result buffer is returned, 0 1162 * if none, otherwise an error is returned. 1163 * 1164 * MPALMOSTSAFE 1165 */ 1166 int 1167 sys_mountctl(struct sysmsg *sysmsg, const struct mountctl_args *uap) 1168 { 1169 struct thread *td = curthread; 1170 struct file *fp; 1171 void *ctl = NULL; 1172 void *buf = NULL; 1173 char *path = NULL; 1174 int error; 1175 1176 /* 1177 * Sanity and permissions checks. We must be root. 1178 */ 1179 if (td->td_ucred->cr_prison != NULL) 1180 return (EPERM); 1181 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1182 (error = priv_check(td, PRIV_ROOT)) != 0) 1183 return (error); 1184 1185 /* 1186 * Argument length checks 1187 */ 1188 if (uap->ctllen < 0 || uap->ctllen > 1024) 1189 return (EINVAL); 1190 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1191 return (EINVAL); 1192 if (uap->path == NULL) 1193 return (EINVAL); 1194 1195 /* 1196 * Allocate the necessary buffers and copyin data 1197 */ 1198 path = objcache_get(namei_oc, M_WAITOK); 1199 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1200 if (error) 1201 goto done; 1202 1203 if (uap->ctllen) { 1204 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1205 error = copyin(uap->ctl, ctl, uap->ctllen); 1206 if (error) 1207 goto done; 1208 } 1209 if (uap->buflen) 1210 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1211 1212 /* 1213 * Validate the descriptor 1214 */ 1215 if (uap->fd >= 0) { 1216 fp = holdfp(td, uap->fd, -1); 1217 if (fp == NULL) { 1218 error = EBADF; 1219 goto done; 1220 } 1221 } else { 1222 fp = NULL; 1223 } 1224 1225 /* 1226 * Execute the internal kernel function and clean up. 1227 */ 1228 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1229 buf, uap->buflen, &sysmsg->sysmsg_result); 1230 if (fp) 1231 dropfp(td, uap->fd, fp); 1232 if (error == 0 && sysmsg->sysmsg_result > 0) 1233 error = copyout(buf, uap->buf, sysmsg->sysmsg_result); 1234 done: 1235 if (path) 1236 objcache_put(namei_oc, path); 1237 if (ctl) 1238 kfree(ctl, M_TEMP); 1239 if (buf) 1240 kfree(buf, M_TEMP); 1241 return (error); 1242 } 1243 1244 /* 1245 * Execute a mount control operation by resolving the path to a mount point 1246 * and calling vop_mountctl(). 1247 * 1248 * Use the mount point from the nch instead of the vnode so nullfs mounts 1249 * can properly spike the VOP. 1250 */ 1251 int 1252 kern_mountctl(const char *path, int op, struct file *fp, 1253 const void *ctl, int ctllen, 1254 void *buf, int buflen, int *res) 1255 { 1256 struct vnode *vp; 1257 struct nlookupdata nd; 1258 struct nchandle nch; 1259 struct mount *mp; 1260 int error; 1261 1262 *res = 0; 1263 vp = NULL; 1264 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1265 if (error) 1266 return (error); 1267 error = nlookup(&nd); 1268 if (error) { 1269 nlookup_done(&nd); 1270 return (error); 1271 } 1272 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1273 if (error) { 1274 nlookup_done(&nd); 1275 return (error); 1276 } 1277 1278 /* 1279 * Yes, all this is needed to use the nch.mount below, because 1280 * we must maintain a ref on the mount to avoid ripouts (e.g. 1281 * due to heavy mount/unmount use by synth or poudriere). 1282 */ 1283 nch = nd.nl_nch; 1284 cache_zero(&nd.nl_nch); 1285 cache_unlock(&nch); 1286 nlookup_done(&nd); 1287 vn_unlock(vp); 1288 1289 mp = nch.mount; 1290 1291 /* 1292 * Must be the root of the filesystem 1293 */ 1294 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1295 cache_drop(&nch); 1296 vrele(vp); 1297 return (EINVAL); 1298 } 1299 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1300 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1301 path); 1302 cache_drop(&nch); 1303 vrele(vp); 1304 return (EINVAL); 1305 } 1306 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1307 buf, buflen, res); 1308 vrele(vp); 1309 cache_drop(&nch); 1310 1311 return (error); 1312 } 1313 1314 int 1315 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1316 { 1317 struct thread *td = curthread; 1318 struct proc *p = td->td_proc; 1319 struct mount *mp; 1320 struct statfs *sp; 1321 char *fullpath, *freepath; 1322 int error; 1323 1324 if ((error = nlookup(nd)) != 0) 1325 return (error); 1326 mp = nd->nl_nch.mount; 1327 sp = &mp->mnt_stat; 1328 1329 /* 1330 * Ignore refresh error, user should have visibility. 1331 * This can happen if a NFS mount goes bad (e.g. server 1332 * revokes perms or goes down). 1333 */ 1334 error = VFS_STATFS(mp, sp, nd->nl_cred); 1335 /* ignore error */ 1336 1337 error = mount_path(p, mp, &fullpath, &freepath); 1338 if (error) 1339 return(error); 1340 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1341 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1342 kfree(freepath, M_TEMP); 1343 1344 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1345 bcopy(sp, buf, sizeof(*buf)); 1346 /* Only root should have access to the fsid's. */ 1347 if (priv_check(td, PRIV_ROOT)) 1348 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1349 return (0); 1350 } 1351 1352 /* 1353 * statfs_args(char *path, struct statfs *buf) 1354 * 1355 * Get filesystem statistics. 1356 */ 1357 int 1358 sys_statfs(struct sysmsg *sysmsg, const struct statfs_args *uap) 1359 { 1360 struct nlookupdata nd; 1361 struct statfs buf; 1362 int error; 1363 1364 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1365 if (error == 0) 1366 error = kern_statfs(&nd, &buf); 1367 nlookup_done(&nd); 1368 if (error == 0) 1369 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1370 return (error); 1371 } 1372 1373 int 1374 kern_fstatfs(int fd, struct statfs *buf) 1375 { 1376 struct thread *td = curthread; 1377 struct proc *p = td->td_proc; 1378 struct file *fp; 1379 struct mount *mp; 1380 struct statfs *sp; 1381 char *fullpath, *freepath; 1382 int error; 1383 1384 KKASSERT(p); 1385 if ((error = holdvnode(td, fd, &fp)) != 0) 1386 return (error); 1387 1388 /* 1389 * Try to use mount info from any overlays rather than the 1390 * mount info for the underlying vnode, otherwise we will 1391 * fail when operating on null-mounted paths inside a chroot. 1392 */ 1393 if ((mp = fp->f_nchandle.mount) == NULL) 1394 mp = ((struct vnode *)fp->f_data)->v_mount; 1395 if (mp == NULL) { 1396 error = EBADF; 1397 goto done; 1398 } 1399 if (fp->f_cred == NULL) { 1400 error = EINVAL; 1401 goto done; 1402 } 1403 1404 /* 1405 * Ignore refresh error, user should have visibility. 1406 * This can happen if a NFS mount goes bad (e.g. server 1407 * revokes perms or goes down). 1408 */ 1409 sp = &mp->mnt_stat; 1410 error = VFS_STATFS(mp, sp, fp->f_cred); 1411 1412 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1413 goto done; 1414 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1415 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1416 kfree(freepath, M_TEMP); 1417 1418 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1419 bcopy(sp, buf, sizeof(*buf)); 1420 1421 /* Only root should have access to the fsid's. */ 1422 if (priv_check(td, PRIV_ROOT)) 1423 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1424 error = 0; 1425 done: 1426 fdrop(fp); 1427 return (error); 1428 } 1429 1430 /* 1431 * fstatfs_args(int fd, struct statfs *buf) 1432 * 1433 * Get filesystem statistics. 1434 */ 1435 int 1436 sys_fstatfs(struct sysmsg *sysmsg, const struct fstatfs_args *uap) 1437 { 1438 struct statfs buf; 1439 int error; 1440 1441 error = kern_fstatfs(uap->fd, &buf); 1442 1443 if (error == 0) 1444 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1445 return (error); 1446 } 1447 1448 int 1449 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1450 { 1451 struct mount *mp; 1452 struct statvfs *sp; 1453 int error; 1454 1455 if ((error = nlookup(nd)) != 0) 1456 return (error); 1457 mp = nd->nl_nch.mount; 1458 sp = &mp->mnt_vstat; 1459 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1460 return (error); 1461 1462 sp->f_flag = 0; 1463 if (mp->mnt_flag & MNT_RDONLY) 1464 sp->f_flag |= ST_RDONLY; 1465 if (mp->mnt_flag & MNT_NOSUID) 1466 sp->f_flag |= ST_NOSUID; 1467 bcopy(sp, buf, sizeof(*buf)); 1468 return (0); 1469 } 1470 1471 /* 1472 * statfs_args(char *path, struct statfs *buf) 1473 * 1474 * Get filesystem statistics. 1475 */ 1476 int 1477 sys_statvfs(struct sysmsg *sysmsg, const struct statvfs_args *uap) 1478 { 1479 struct nlookupdata nd; 1480 struct statvfs buf; 1481 int error; 1482 1483 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1484 if (error == 0) 1485 error = kern_statvfs(&nd, &buf); 1486 nlookup_done(&nd); 1487 if (error == 0) 1488 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1489 return (error); 1490 } 1491 1492 int 1493 kern_fstatvfs(int fd, struct statvfs *buf) 1494 { 1495 struct thread *td = curthread; 1496 struct file *fp; 1497 struct mount *mp; 1498 struct statvfs *sp; 1499 int error; 1500 1501 if ((error = holdvnode(td, fd, &fp)) != 0) 1502 return (error); 1503 if ((mp = fp->f_nchandle.mount) == NULL) 1504 mp = ((struct vnode *)fp->f_data)->v_mount; 1505 if (mp == NULL) { 1506 error = EBADF; 1507 goto done; 1508 } 1509 if (fp->f_cred == NULL) { 1510 error = EINVAL; 1511 goto done; 1512 } 1513 sp = &mp->mnt_vstat; 1514 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1515 goto done; 1516 1517 sp->f_flag = 0; 1518 if (mp->mnt_flag & MNT_RDONLY) 1519 sp->f_flag |= ST_RDONLY; 1520 if (mp->mnt_flag & MNT_NOSUID) 1521 sp->f_flag |= ST_NOSUID; 1522 1523 bcopy(sp, buf, sizeof(*buf)); 1524 error = 0; 1525 done: 1526 fdrop(fp); 1527 return (error); 1528 } 1529 1530 /* 1531 * fstatfs_args(int fd, struct statfs *buf) 1532 * 1533 * Get filesystem statistics. 1534 */ 1535 int 1536 sys_fstatvfs(struct sysmsg *sysmsg, const struct fstatvfs_args *uap) 1537 { 1538 struct statvfs buf; 1539 int error; 1540 1541 error = kern_fstatvfs(uap->fd, &buf); 1542 1543 if (error == 0) 1544 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1545 return (error); 1546 } 1547 1548 /* 1549 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1550 * 1551 * Get statistics on all filesystems. 1552 */ 1553 1554 struct getfsstat_info { 1555 struct statfs *sfsp; 1556 long count; 1557 long maxcount; 1558 int error; 1559 int flags; 1560 struct thread *td; 1561 }; 1562 1563 static int getfsstat_callback(struct mount *, void *); 1564 1565 int 1566 sys_getfsstat(struct sysmsg *sysmsg, const struct getfsstat_args *uap) 1567 { 1568 struct thread *td = curthread; 1569 struct getfsstat_info info; 1570 1571 bzero(&info, sizeof(info)); 1572 1573 info.maxcount = uap->bufsize / sizeof(struct statfs); 1574 info.sfsp = uap->buf; 1575 info.count = 0; 1576 info.flags = uap->flags; 1577 info.td = td; 1578 1579 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1580 if (info.sfsp && info.count > info.maxcount) 1581 sysmsg->sysmsg_result = info.maxcount; 1582 else 1583 sysmsg->sysmsg_result = info.count; 1584 return (info.error); 1585 } 1586 1587 static int 1588 getfsstat_callback(struct mount *mp, void *data) 1589 { 1590 struct getfsstat_info *info = data; 1591 struct statfs *sp; 1592 char *freepath; 1593 char *fullpath; 1594 int error; 1595 1596 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1597 return(0); 1598 1599 if (info->sfsp && info->count < info->maxcount) { 1600 sp = &mp->mnt_stat; 1601 1602 /* 1603 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1604 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1605 * overrides MNT_WAIT. 1606 * 1607 * Ignore refresh error, user should have visibility. 1608 * This can happen if a NFS mount goes bad (e.g. server 1609 * revokes perms or goes down). 1610 */ 1611 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1612 (info->flags & MNT_WAIT)) && 1613 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1614 /* ignore error */ 1615 } 1616 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1617 1618 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1619 if (error) { 1620 info->error = error; 1621 return(-1); 1622 } 1623 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1624 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1625 kfree(freepath, M_TEMP); 1626 1627 error = copyout(sp, info->sfsp, sizeof(*sp)); 1628 if (error) { 1629 info->error = error; 1630 return (-1); 1631 } 1632 ++info->sfsp; 1633 } 1634 info->count++; 1635 return(0); 1636 } 1637 1638 /* 1639 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1640 long bufsize, int flags) 1641 * 1642 * Get statistics on all filesystems. 1643 */ 1644 1645 struct getvfsstat_info { 1646 struct statfs *sfsp; 1647 struct statvfs *vsfsp; 1648 long count; 1649 long maxcount; 1650 int error; 1651 int flags; 1652 struct thread *td; 1653 }; 1654 1655 static int getvfsstat_callback(struct mount *, void *); 1656 1657 int 1658 sys_getvfsstat(struct sysmsg *sysmsg, const struct getvfsstat_args *uap) 1659 { 1660 struct thread *td = curthread; 1661 struct getvfsstat_info info; 1662 1663 bzero(&info, sizeof(info)); 1664 1665 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1666 info.sfsp = uap->buf; 1667 info.vsfsp = uap->vbuf; 1668 info.count = 0; 1669 info.flags = uap->flags; 1670 info.td = td; 1671 1672 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1673 if (info.vsfsp && info.count > info.maxcount) 1674 sysmsg->sysmsg_result = info.maxcount; 1675 else 1676 sysmsg->sysmsg_result = info.count; 1677 return (info.error); 1678 } 1679 1680 static int 1681 getvfsstat_callback(struct mount *mp, void *data) 1682 { 1683 struct getvfsstat_info *info = data; 1684 struct statfs *sp; 1685 struct statvfs *vsp; 1686 char *freepath; 1687 char *fullpath; 1688 int error; 1689 1690 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1691 return(0); 1692 1693 if (info->vsfsp && info->count < info->maxcount) { 1694 sp = &mp->mnt_stat; 1695 vsp = &mp->mnt_vstat; 1696 1697 /* 1698 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1699 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1700 * overrides MNT_WAIT. 1701 * 1702 * Ignore refresh error, user should have visibility. 1703 * This can happen if a NFS mount goes bad (e.g. server 1704 * revokes perms or goes down). 1705 */ 1706 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1707 (info->flags & MNT_WAIT)) && 1708 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1709 /* ignore error */ 1710 } 1711 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1712 1713 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1714 (info->flags & MNT_WAIT)) && 1715 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1716 /* ignore error */ 1717 } 1718 vsp->f_flag = 0; 1719 if (mp->mnt_flag & MNT_RDONLY) 1720 vsp->f_flag |= ST_RDONLY; 1721 if (mp->mnt_flag & MNT_NOSUID) 1722 vsp->f_flag |= ST_NOSUID; 1723 1724 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1725 if (error) { 1726 info->error = error; 1727 return(-1); 1728 } 1729 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1730 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1731 kfree(freepath, M_TEMP); 1732 1733 error = copyout(sp, info->sfsp, sizeof(*sp)); 1734 if (error == 0) 1735 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1736 if (error) { 1737 info->error = error; 1738 return (-1); 1739 } 1740 ++info->sfsp; 1741 ++info->vsfsp; 1742 } 1743 info->count++; 1744 return(0); 1745 } 1746 1747 1748 /* 1749 * fchdir_args(int fd) 1750 * 1751 * Change current working directory to a given file descriptor. 1752 */ 1753 int 1754 sys_fchdir(struct sysmsg *sysmsg, const struct fchdir_args *uap) 1755 { 1756 struct thread *td = curthread; 1757 struct proc *p = td->td_proc; 1758 struct filedesc *fdp = p->p_fd; 1759 struct vnode *vp, *ovp; 1760 struct mount *mp; 1761 struct file *fp; 1762 struct nchandle nch, onch, tnch; 1763 int error; 1764 1765 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1766 return (error); 1767 lwkt_gettoken(&p->p_token); 1768 vp = (struct vnode *)fp->f_data; 1769 vref(vp); 1770 vn_lock(vp, LK_SHARED | LK_RETRY); 1771 if (fp->f_nchandle.ncp == NULL) 1772 error = ENOTDIR; 1773 else 1774 error = checkvp_chdir(vp, td); 1775 if (error) { 1776 vput(vp); 1777 goto done; 1778 } 1779 cache_copy(&fp->f_nchandle, &nch); 1780 1781 /* 1782 * If the ncp has become a mount point, traverse through 1783 * the mount point. 1784 */ 1785 1786 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1787 (mp = cache_findmount(&nch)) != NULL 1788 ) { 1789 error = nlookup_mp(mp, &tnch); 1790 if (error == 0) { 1791 cache_unlock(&tnch); /* leave ref intact */ 1792 vput(vp); 1793 vp = tnch.ncp->nc_vp; 1794 error = vget(vp, LK_SHARED); 1795 KKASSERT(error == 0); 1796 cache_drop(&nch); 1797 nch = tnch; 1798 } 1799 cache_dropmount(mp); 1800 } 1801 if (error == 0) { 1802 spin_lock(&fdp->fd_spin); 1803 ovp = fdp->fd_cdir; 1804 onch = fdp->fd_ncdir; 1805 fdp->fd_cdir = vp; 1806 fdp->fd_ncdir = nch; 1807 spin_unlock(&fdp->fd_spin); 1808 vn_unlock(vp); /* leave ref intact */ 1809 cache_drop(&onch); 1810 vrele(ovp); 1811 } else { 1812 cache_drop(&nch); 1813 vput(vp); 1814 } 1815 fdrop(fp); 1816 done: 1817 lwkt_reltoken(&p->p_token); 1818 return (error); 1819 } 1820 1821 int 1822 kern_chdir(struct nlookupdata *nd) 1823 { 1824 struct thread *td = curthread; 1825 struct proc *p = td->td_proc; 1826 struct filedesc *fdp = p->p_fd; 1827 struct vnode *vp, *ovp; 1828 struct nchandle onch; 1829 int error; 1830 1831 nd->nl_flags |= NLC_SHAREDLOCK; 1832 if ((error = nlookup(nd)) != 0) 1833 return (error); 1834 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1835 return (ENOENT); 1836 if ((error = vget(vp, LK_SHARED)) != 0) 1837 return (error); 1838 1839 lwkt_gettoken(&p->p_token); 1840 error = checkvp_chdir(vp, td); 1841 vn_unlock(vp); 1842 if (error == 0) { 1843 spin_lock(&fdp->fd_spin); 1844 ovp = fdp->fd_cdir; 1845 onch = fdp->fd_ncdir; 1846 fdp->fd_ncdir = nd->nl_nch; 1847 fdp->fd_cdir = vp; 1848 spin_unlock(&fdp->fd_spin); 1849 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1850 cache_drop(&onch); 1851 vrele(ovp); 1852 cache_zero(&nd->nl_nch); 1853 } else { 1854 vrele(vp); 1855 } 1856 lwkt_reltoken(&p->p_token); 1857 return (error); 1858 } 1859 1860 /* 1861 * chdir_args(char *path) 1862 * 1863 * Change current working directory (``.''). 1864 */ 1865 int 1866 sys_chdir(struct sysmsg *sysmsg, const struct chdir_args *uap) 1867 { 1868 struct nlookupdata nd; 1869 int error; 1870 1871 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1872 if (error == 0) 1873 error = kern_chdir(&nd); 1874 nlookup_done(&nd); 1875 return (error); 1876 } 1877 1878 /* 1879 * Helper function for raised chroot(2) security function: Refuse if 1880 * any filedescriptors are open directories. 1881 */ 1882 static int 1883 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1884 { 1885 struct vnode *vp; 1886 struct file *fp; 1887 int error; 1888 int fd; 1889 1890 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1891 if ((error = holdvnode(td, fd, &fp)) != 0) 1892 continue; 1893 vp = (struct vnode *)fp->f_data; 1894 if (vp->v_type != VDIR) { 1895 fdrop(fp); 1896 continue; 1897 } 1898 fdrop(fp); 1899 return(EPERM); 1900 } 1901 return (0); 1902 } 1903 1904 /* 1905 * This sysctl determines if we will allow a process to chroot(2) if it 1906 * has a directory open: 1907 * 0: disallowed for all processes. 1908 * 1: allowed for processes that were not already chroot(2)'ed. 1909 * 2: allowed for all processes. 1910 */ 1911 1912 static int chroot_allow_open_directories = 1; 1913 1914 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1915 &chroot_allow_open_directories, 0, ""); 1916 1917 /* 1918 * chroot to the specified namecache entry. We obtain the vp from the 1919 * namecache data. The passed ncp must be locked and referenced and will 1920 * remain locked and referenced on return. 1921 */ 1922 int 1923 kern_chroot(struct nchandle *nch) 1924 { 1925 struct thread *td = curthread; 1926 struct proc *p = td->td_proc; 1927 struct filedesc *fdp = p->p_fd; 1928 struct vnode *vp; 1929 int error; 1930 1931 /* 1932 * Only privileged user can chroot 1933 */ 1934 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1935 if (error) 1936 return (error); 1937 1938 /* 1939 * Disallow open directory descriptors (fchdir() breakouts). 1940 */ 1941 if (chroot_allow_open_directories == 0 || 1942 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1943 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1944 return (error); 1945 } 1946 if ((vp = nch->ncp->nc_vp) == NULL) 1947 return (ENOENT); 1948 1949 if ((error = vget(vp, LK_SHARED)) != 0) 1950 return (error); 1951 1952 /* 1953 * Check the validity of vp as a directory to change to and 1954 * associate it with rdir/jdir. 1955 */ 1956 error = checkvp_chdir(vp, td); 1957 vn_unlock(vp); /* leave reference intact */ 1958 if (error == 0) { 1959 lwkt_gettoken(&p->p_token); 1960 vrele(fdp->fd_rdir); 1961 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1962 cache_drop(&fdp->fd_nrdir); 1963 cache_copy(nch, &fdp->fd_nrdir); 1964 if (fdp->fd_jdir == NULL) { 1965 fdp->fd_jdir = vp; 1966 vref(fdp->fd_jdir); 1967 cache_copy(nch, &fdp->fd_njdir); 1968 } 1969 if ((p->p_flags & P_DIDCHROOT) == 0) { 1970 p->p_flags |= P_DIDCHROOT; 1971 if (p->p_depth <= 65535 - 32) 1972 p->p_depth += 32; 1973 } 1974 lwkt_reltoken(&p->p_token); 1975 } else { 1976 vrele(vp); 1977 } 1978 return (error); 1979 } 1980 1981 /* 1982 * chroot_args(char *path) 1983 * 1984 * Change notion of root (``/'') directory. 1985 */ 1986 int 1987 sys_chroot(struct sysmsg *sysmsg, const struct chroot_args *uap) 1988 { 1989 struct thread *td __debugvar = curthread; 1990 struct nlookupdata nd; 1991 int error; 1992 1993 KKASSERT(td->td_proc); 1994 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1995 if (error == 0) { 1996 nd.nl_flags |= NLC_EXEC; 1997 error = nlookup(&nd); 1998 if (error == 0) 1999 error = kern_chroot(&nd.nl_nch); 2000 } 2001 nlookup_done(&nd); 2002 return(error); 2003 } 2004 2005 int 2006 sys_chroot_kernel(struct sysmsg *sysmsg, const struct chroot_kernel_args *uap) 2007 { 2008 struct thread *td = curthread; 2009 struct nlookupdata nd; 2010 struct nchandle *nch; 2011 struct vnode *vp; 2012 int error; 2013 2014 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2015 if (error) 2016 goto error_nond; 2017 2018 error = nlookup(&nd); 2019 if (error) 2020 goto error_out; 2021 2022 nch = &nd.nl_nch; 2023 2024 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 2025 if (error) 2026 goto error_out; 2027 2028 if ((vp = nch->ncp->nc_vp) == NULL) { 2029 error = ENOENT; 2030 goto error_out; 2031 } 2032 2033 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2034 goto error_out; 2035 2036 vfs_cache_setroot(vp, cache_hold(nch)); 2037 2038 error_out: 2039 nlookup_done(&nd); 2040 error_nond: 2041 return(error); 2042 } 2043 2044 /* 2045 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2046 * determine whether it is legal to chdir to the vnode. The vnode's state 2047 * is not changed by this call. 2048 */ 2049 static int 2050 checkvp_chdir(struct vnode *vp, struct thread *td) 2051 { 2052 int error; 2053 2054 if (vp->v_type != VDIR) 2055 error = ENOTDIR; 2056 else 2057 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2058 return (error); 2059 } 2060 2061 int 2062 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2063 { 2064 struct thread *td = curthread; 2065 struct proc *p = td->td_proc; 2066 struct lwp *lp = td->td_lwp; 2067 struct filedesc *fdp = p->p_fd; 2068 int cmode, flags; 2069 struct file *nfp; 2070 struct file *fp; 2071 struct vnode *vp; 2072 int type, indx, error = 0; 2073 struct flock lf; 2074 2075 if ((oflags & O_ACCMODE) == O_ACCMODE) 2076 return (EINVAL); 2077 flags = FFLAGS(oflags); 2078 error = falloc(lp, &nfp, NULL); 2079 if (error) 2080 return (error); 2081 fp = nfp; 2082 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2083 2084 /* 2085 * XXX p_dupfd is a real mess. It allows a device to return a 2086 * file descriptor to be duplicated rather then doing the open 2087 * itself. 2088 */ 2089 lp->lwp_dupfd = -1; 2090 2091 /* 2092 * Call vn_open() to do the lookup and assign the vnode to the 2093 * file pointer. vn_open() does not change the ref count on fp 2094 * and the vnode, on success, will be inherited by the file pointer 2095 * and unlocked. 2096 * 2097 * Request a shared lock on the vnode if possible. 2098 * 2099 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2100 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2101 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2102 * 2103 * NOTE: We need a flag to separate terminal vnode locking from 2104 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2105 * and O_RDWR only need to lock the terminal vnode exclusively. 2106 */ 2107 nd->nl_flags |= NLC_LOCKVP; 2108 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2109 nd->nl_flags |= NLC_SHAREDLOCK; 2110 if (flags & O_RDWR) 2111 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2112 } 2113 2114 error = vn_open(nd, fp, flags, cmode); 2115 nlookup_done(nd); 2116 2117 if (error) { 2118 /* 2119 * handle special fdopen() case. bleh. dupfdopen() is 2120 * responsible for dropping the old contents of ofiles[indx] 2121 * if it succeeds. 2122 * 2123 * Note that fsetfd() will add a ref to fp which represents 2124 * the fd_files[] assignment. We must still drop our 2125 * reference. 2126 */ 2127 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2128 if (fdalloc(p, 0, &indx) == 0) { 2129 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2130 if (error == 0) { 2131 *res = indx; 2132 fdrop(fp); /* our ref */ 2133 return (0); 2134 } 2135 fsetfd(fdp, NULL, indx); 2136 } 2137 } 2138 fdrop(fp); /* our ref */ 2139 if (error == ERESTART) 2140 error = EINTR; 2141 return (error); 2142 } 2143 2144 /* 2145 * ref the vnode for ourselves so it can't be ripped out from under 2146 * is. XXX need an ND flag to request that the vnode be returned 2147 * anyway. 2148 * 2149 * Reserve a file descriptor but do not assign it until the open 2150 * succeeds. 2151 */ 2152 vp = (struct vnode *)fp->f_data; 2153 vref(vp); 2154 if ((error = fdalloc(p, 0, &indx)) != 0) { 2155 fdrop(fp); 2156 vrele(vp); 2157 return (error); 2158 } 2159 2160 /* 2161 * If no error occurs the vp will have been assigned to the file 2162 * pointer. 2163 */ 2164 lp->lwp_dupfd = 0; 2165 2166 if (flags & (O_EXLOCK | O_SHLOCK)) { 2167 lf.l_whence = SEEK_SET; 2168 lf.l_start = 0; 2169 lf.l_len = 0; 2170 if (flags & O_EXLOCK) 2171 lf.l_type = F_WRLCK; 2172 else 2173 lf.l_type = F_RDLCK; 2174 if (flags & FNONBLOCK) 2175 type = 0; 2176 else 2177 type = F_WAIT; 2178 2179 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2180 /* 2181 * lock request failed. Clean up the reserved 2182 * descriptor. 2183 */ 2184 vrele(vp); 2185 fsetfd(fdp, NULL, indx); 2186 fdrop(fp); 2187 return (error); 2188 } 2189 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2190 } 2191 #if 0 2192 /* 2193 * Assert that all regular file vnodes were created with a object. 2194 */ 2195 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2196 ("open: regular file has no backing object after vn_open")); 2197 #endif 2198 2199 vrele(vp); 2200 2201 /* 2202 * release our private reference, leaving the one associated with the 2203 * descriptor table intact. 2204 */ 2205 if (oflags & O_CLOEXEC) 2206 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2207 fsetfd(fdp, fp, indx); 2208 fdrop(fp); 2209 *res = indx; 2210 2211 return (error); 2212 } 2213 2214 /* 2215 * open_args(char *path, int flags, int mode) 2216 * 2217 * Check permissions, allocate an open file structure, 2218 * and call the device open routine if any. 2219 */ 2220 int 2221 sys_open(struct sysmsg *sysmsg, const struct open_args *uap) 2222 { 2223 struct nlookupdata nd; 2224 int error; 2225 2226 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2227 if (error == 0) { 2228 error = kern_open(&nd, uap->flags, 2229 uap->mode, &sysmsg->sysmsg_result); 2230 } 2231 nlookup_done(&nd); 2232 return (error); 2233 } 2234 2235 /* 2236 * openat_args(int fd, char *path, int flags, int mode) 2237 */ 2238 int 2239 sys_openat(struct sysmsg *sysmsg, const struct openat_args *uap) 2240 { 2241 struct nlookupdata nd; 2242 int error; 2243 struct file *fp; 2244 2245 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2246 if (error == 0) { 2247 error = kern_open(&nd, uap->flags, uap->mode, 2248 &sysmsg->sysmsg_result); 2249 } 2250 nlookup_done_at(&nd, fp); 2251 return (error); 2252 } 2253 2254 int 2255 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2256 { 2257 struct thread *td = curthread; 2258 struct proc *p = td->td_proc; 2259 struct vnode *vp; 2260 struct vattr vattr; 2261 int error; 2262 int whiteout = 0; 2263 2264 KKASSERT(p); 2265 2266 VATTR_NULL(&vattr); 2267 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2268 vattr.va_rmajor = rmajor; 2269 vattr.va_rminor = rminor; 2270 2271 switch (mode & S_IFMT) { 2272 case S_IFMT: /* used by badsect to flag bad sectors */ 2273 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2274 vattr.va_type = VBAD; 2275 break; 2276 case S_IFCHR: 2277 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2278 vattr.va_type = VCHR; 2279 break; 2280 case S_IFBLK: 2281 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2282 vattr.va_type = VBLK; 2283 break; 2284 case S_IFWHT: 2285 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2286 whiteout = 1; 2287 break; 2288 case S_IFDIR: /* special directories support for HAMMER */ 2289 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2290 vattr.va_type = VDIR; 2291 break; 2292 default: 2293 error = EINVAL; 2294 break; 2295 } 2296 2297 if (error) 2298 return (error); 2299 2300 bwillinode(1); 2301 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2302 if ((error = nlookup(nd)) != 0) 2303 return (error); 2304 if (nd->nl_nch.ncp->nc_vp) 2305 return (EEXIST); 2306 if (nd->nl_dvp == NULL) 2307 return (EINVAL); 2308 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2309 return (error); 2310 2311 if (whiteout) { 2312 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2313 nd->nl_cred, NAMEI_CREATE); 2314 } else { 2315 vp = NULL; 2316 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2317 &vp, nd->nl_cred, &vattr); 2318 if (error == 0) 2319 vput(vp); 2320 } 2321 return (error); 2322 } 2323 2324 /* 2325 * mknod_args(char *path, int mode, int dev) 2326 * 2327 * Create a special file. 2328 */ 2329 int 2330 sys_mknod(struct sysmsg *sysmsg, const struct mknod_args *uap) 2331 { 2332 struct nlookupdata nd; 2333 int error; 2334 2335 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2336 if (error == 0) { 2337 error = kern_mknod(&nd, uap->mode, 2338 umajor(uap->dev), uminor(uap->dev)); 2339 } 2340 nlookup_done(&nd); 2341 return (error); 2342 } 2343 2344 /* 2345 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2346 * 2347 * Create a special file. The path is relative to the directory associated 2348 * with fd. 2349 */ 2350 int 2351 sys_mknodat(struct sysmsg *sysmsg, const struct mknodat_args *uap) 2352 { 2353 struct nlookupdata nd; 2354 struct file *fp; 2355 int error; 2356 2357 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2358 if (error == 0) { 2359 error = kern_mknod(&nd, uap->mode, 2360 umajor(uap->dev), uminor(uap->dev)); 2361 } 2362 nlookup_done_at(&nd, fp); 2363 return (error); 2364 } 2365 2366 int 2367 kern_mkfifo(struct nlookupdata *nd, int mode) 2368 { 2369 struct thread *td = curthread; 2370 struct proc *p = td->td_proc; 2371 struct vattr vattr; 2372 struct vnode *vp; 2373 int error; 2374 2375 bwillinode(1); 2376 2377 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2378 if ((error = nlookup(nd)) != 0) 2379 return (error); 2380 if (nd->nl_nch.ncp->nc_vp) 2381 return (EEXIST); 2382 if (nd->nl_dvp == NULL) 2383 return (EINVAL); 2384 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2385 return (error); 2386 2387 VATTR_NULL(&vattr); 2388 vattr.va_type = VFIFO; 2389 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2390 vp = NULL; 2391 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2392 if (error == 0) 2393 vput(vp); 2394 return (error); 2395 } 2396 2397 /* 2398 * mkfifo_args(char *path, int mode) 2399 * 2400 * Create a named pipe. 2401 */ 2402 int 2403 sys_mkfifo(struct sysmsg *sysmsg, const struct mkfifo_args *uap) 2404 { 2405 struct nlookupdata nd; 2406 int error; 2407 2408 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2409 if (error == 0) 2410 error = kern_mkfifo(&nd, uap->mode); 2411 nlookup_done(&nd); 2412 return (error); 2413 } 2414 2415 /* 2416 * mkfifoat_args(int fd, char *path, mode_t mode) 2417 * 2418 * Create a named pipe. The path is relative to the directory associated 2419 * with fd. 2420 */ 2421 int 2422 sys_mkfifoat(struct sysmsg *sysmsg, const struct mkfifoat_args *uap) 2423 { 2424 struct nlookupdata nd; 2425 struct file *fp; 2426 int error; 2427 2428 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2429 if (error == 0) 2430 error = kern_mkfifo(&nd, uap->mode); 2431 nlookup_done_at(&nd, fp); 2432 return (error); 2433 } 2434 2435 static int hardlink_check_uid = 0; 2436 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2437 &hardlink_check_uid, 0, 2438 "Unprivileged processes cannot create hard links to files owned by other " 2439 "users"); 2440 static int hardlink_check_gid = 0; 2441 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2442 &hardlink_check_gid, 0, 2443 "Unprivileged processes cannot create hard links to files owned by other " 2444 "groups"); 2445 2446 static int 2447 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2448 { 2449 struct vattr va; 2450 int error; 2451 2452 /* 2453 * Shortcut if disabled 2454 */ 2455 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2456 return (0); 2457 2458 /* 2459 * Privileged user can always hardlink 2460 */ 2461 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2462 return (0); 2463 2464 /* 2465 * Otherwise only if the originating file is owned by the 2466 * same user or group. Note that any group is allowed if 2467 * the file is owned by the caller. 2468 */ 2469 error = VOP_GETATTR(vp, &va); 2470 if (error != 0) 2471 return (error); 2472 2473 if (hardlink_check_uid) { 2474 if (cred->cr_uid != va.va_uid) 2475 return (EPERM); 2476 } 2477 2478 if (hardlink_check_gid) { 2479 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2480 return (EPERM); 2481 } 2482 2483 return (0); 2484 } 2485 2486 int 2487 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2488 { 2489 struct thread *td = curthread; 2490 struct vnode *vp; 2491 int error; 2492 2493 /* 2494 * Lookup the source and obtained a locked vnode. 2495 * 2496 * You may only hardlink a file which you have write permission 2497 * on or which you own. 2498 * 2499 * XXX relookup on vget failure / race ? 2500 */ 2501 bwillinode(1); 2502 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2503 if ((error = nlookup(nd)) != 0) 2504 return (error); 2505 vp = nd->nl_nch.ncp->nc_vp; 2506 KKASSERT(vp != NULL); 2507 if (vp->v_type == VDIR) 2508 return (EPERM); /* POSIX */ 2509 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2510 return (error); 2511 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2512 return (error); 2513 2514 /* 2515 * Unlock the source so we can lookup the target without deadlocking 2516 * (XXX vp is locked already, possible other deadlock?). The target 2517 * must not exist. 2518 */ 2519 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2520 nd->nl_flags &= ~NLC_NCPISLOCKED; 2521 cache_unlock(&nd->nl_nch); 2522 vn_unlock(vp); 2523 2524 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2525 if ((error = nlookup(linknd)) != 0) { 2526 vrele(vp); 2527 return (error); 2528 } 2529 if (linknd->nl_nch.ncp->nc_vp) { 2530 vrele(vp); 2531 return (EEXIST); 2532 } 2533 if (linknd->nl_dvp == NULL) { 2534 vrele(vp); 2535 return (EINVAL); 2536 } 2537 VFS_MODIFYING(vp->v_mount); 2538 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2539 if (error) { 2540 vrele(vp); 2541 return (error); 2542 } 2543 2544 /* 2545 * Finally run the new API VOP. 2546 */ 2547 error = can_hardlink(vp, td, td->td_ucred); 2548 if (error == 0) { 2549 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2550 vp, linknd->nl_cred); 2551 } 2552 vput(vp); 2553 return (error); 2554 } 2555 2556 /* 2557 * link_args(char *path, char *link) 2558 * 2559 * Make a hard file link. 2560 */ 2561 int 2562 sys_link(struct sysmsg *sysmsg, const struct link_args *uap) 2563 { 2564 struct nlookupdata nd, linknd; 2565 int error; 2566 2567 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2568 if (error == 0) { 2569 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2570 if (error == 0) 2571 error = kern_link(&nd, &linknd); 2572 nlookup_done(&linknd); 2573 } 2574 nlookup_done(&nd); 2575 return (error); 2576 } 2577 2578 /* 2579 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2580 * 2581 * Make a hard file link. The path1 argument is relative to the directory 2582 * associated with fd1, and similarly the path2 argument is relative to 2583 * the directory associated with fd2. 2584 */ 2585 int 2586 sys_linkat(struct sysmsg *sysmsg, const struct linkat_args *uap) 2587 { 2588 struct nlookupdata nd, linknd; 2589 struct file *fp1, *fp2; 2590 int error; 2591 2592 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2593 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2594 if (error == 0) { 2595 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2596 uap->path2, UIO_USERSPACE, 0); 2597 if (error == 0) 2598 error = kern_link(&nd, &linknd); 2599 nlookup_done_at(&linknd, fp2); 2600 } 2601 nlookup_done_at(&nd, fp1); 2602 return (error); 2603 } 2604 2605 int 2606 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2607 { 2608 struct vattr vattr; 2609 struct vnode *vp; 2610 struct vnode *dvp; 2611 int error; 2612 2613 bwillinode(1); 2614 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2615 if ((error = nlookup(nd)) != 0) 2616 return (error); 2617 if (nd->nl_nch.ncp->nc_vp) 2618 return (EEXIST); 2619 if (nd->nl_dvp == NULL) 2620 return (EINVAL); 2621 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2622 return (error); 2623 dvp = nd->nl_dvp; 2624 VATTR_NULL(&vattr); 2625 vattr.va_mode = mode; 2626 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2627 if (error == 0) 2628 vput(vp); 2629 return (error); 2630 } 2631 2632 /* 2633 * symlink(char *path, char *link) 2634 * 2635 * Make a symbolic link. 2636 */ 2637 int 2638 sys_symlink(struct sysmsg *sysmsg, const struct symlink_args *uap) 2639 { 2640 struct thread *td = curthread; 2641 struct nlookupdata nd; 2642 char *path; 2643 int error; 2644 int mode; 2645 2646 path = objcache_get(namei_oc, M_WAITOK); 2647 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2648 if (error == 0) { 2649 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2650 if (error == 0) { 2651 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2652 error = kern_symlink(&nd, path, mode); 2653 } 2654 nlookup_done(&nd); 2655 } 2656 objcache_put(namei_oc, path); 2657 return (error); 2658 } 2659 2660 /* 2661 * symlinkat_args(char *path1, int fd, char *path2) 2662 * 2663 * Make a symbolic link. The path2 argument is relative to the directory 2664 * associated with fd. 2665 */ 2666 int 2667 sys_symlinkat(struct sysmsg *sysmsg, const struct symlinkat_args *uap) 2668 { 2669 struct thread *td = curthread; 2670 struct nlookupdata nd; 2671 struct file *fp; 2672 char *path1; 2673 int error; 2674 int mode; 2675 2676 path1 = objcache_get(namei_oc, M_WAITOK); 2677 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2678 if (error == 0) { 2679 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2680 UIO_USERSPACE, 0); 2681 if (error == 0) { 2682 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2683 error = kern_symlink(&nd, path1, mode); 2684 } 2685 nlookup_done_at(&nd, fp); 2686 } 2687 objcache_put(namei_oc, path1); 2688 return (error); 2689 } 2690 2691 /* 2692 * undelete_args(char *path) 2693 * 2694 * Delete a whiteout from the filesystem. 2695 */ 2696 int 2697 sys_undelete(struct sysmsg *sysmsg, const struct undelete_args *uap) 2698 { 2699 struct nlookupdata nd; 2700 int error; 2701 2702 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2703 bwillinode(1); 2704 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2705 if (error == 0) 2706 error = nlookup(&nd); 2707 if (error == 0 && nd.nl_dvp == NULL) 2708 error = EINVAL; 2709 if (error == 0) 2710 error = ncp_writechk(&nd.nl_nch); 2711 if (error == 0) { 2712 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2713 NAMEI_DELETE); 2714 } 2715 nlookup_done(&nd); 2716 return (error); 2717 } 2718 2719 int 2720 kern_unlink(struct nlookupdata *nd) 2721 { 2722 int error; 2723 2724 bwillinode(1); 2725 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2726 if ((error = nlookup(nd)) != 0) 2727 return (error); 2728 if (nd->nl_dvp == NULL) 2729 return EINVAL; 2730 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2731 return (error); 2732 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2733 return (error); 2734 } 2735 2736 /* 2737 * unlink_args(char *path) 2738 * 2739 * Delete a name from the filesystem. 2740 */ 2741 int 2742 sys_unlink(struct sysmsg *sysmsg, const struct unlink_args *uap) 2743 { 2744 struct nlookupdata nd; 2745 int error; 2746 2747 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2748 if (error == 0) 2749 error = kern_unlink(&nd); 2750 nlookup_done(&nd); 2751 return (error); 2752 } 2753 2754 2755 /* 2756 * unlinkat_args(int fd, char *path, int flags) 2757 * 2758 * Delete the file or directory entry pointed to by fd/path. 2759 */ 2760 int 2761 sys_unlinkat(struct sysmsg *sysmsg, const struct unlinkat_args *uap) 2762 { 2763 struct nlookupdata nd; 2764 struct file *fp; 2765 int error; 2766 2767 if (uap->flags & ~AT_REMOVEDIR) 2768 return (EINVAL); 2769 2770 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2771 if (error == 0) { 2772 if (uap->flags & AT_REMOVEDIR) 2773 error = kern_rmdir(&nd); 2774 else 2775 error = kern_unlink(&nd); 2776 } 2777 nlookup_done_at(&nd, fp); 2778 return (error); 2779 } 2780 2781 int 2782 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2783 { 2784 struct thread *td = curthread; 2785 struct file *fp; 2786 struct vnode *vp; 2787 struct vattr vattr; 2788 off_t new_offset; 2789 int error; 2790 2791 fp = holdfp(td, fd, -1); 2792 if (fp == NULL) 2793 return (EBADF); 2794 if (fp->f_type != DTYPE_VNODE) { 2795 error = ESPIPE; 2796 goto done; 2797 } 2798 vp = (struct vnode *)fp->f_data; 2799 2800 switch (whence) { 2801 case L_INCR: 2802 spin_lock(&fp->f_spin); 2803 new_offset = fp->f_offset + offset; 2804 error = 0; 2805 break; 2806 case L_XTND: 2807 error = VOP_GETATTR_QUICK(vp, &vattr); 2808 spin_lock(&fp->f_spin); 2809 new_offset = offset + vattr.va_size; 2810 break; 2811 case L_SET: 2812 new_offset = offset; 2813 error = 0; 2814 spin_lock(&fp->f_spin); 2815 break; 2816 default: 2817 new_offset = 0; 2818 error = EINVAL; 2819 spin_lock(&fp->f_spin); 2820 break; 2821 } 2822 2823 /* 2824 * Validate the seek position. Negative offsets are not allowed 2825 * for regular files or directories. 2826 * 2827 * Normally we would also not want to allow negative offsets for 2828 * character and block-special devices. However kvm addresses 2829 * on 64 bit architectures might appear to be negative and must 2830 * be allowed. 2831 */ 2832 if (error == 0) { 2833 if (new_offset < 0 && 2834 (vp->v_type == VREG || vp->v_type == VDIR)) { 2835 error = EINVAL; 2836 } else { 2837 fp->f_offset = new_offset; 2838 } 2839 } 2840 *res = fp->f_offset; 2841 spin_unlock(&fp->f_spin); 2842 done: 2843 dropfp(td, fd, fp); 2844 2845 return (error); 2846 } 2847 2848 /* 2849 * lseek_args(int fd, int pad, off_t offset, int whence) 2850 * 2851 * Reposition read/write file offset. 2852 */ 2853 int 2854 sys_lseek(struct sysmsg *sysmsg, const struct lseek_args *uap) 2855 { 2856 int error; 2857 2858 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2859 &sysmsg->sysmsg_offset); 2860 2861 return (error); 2862 } 2863 2864 /* 2865 * Check if current process can access given file. amode is a bitmask of *_OK 2866 * access bits. flags is a bitmask of AT_* flags. 2867 */ 2868 int 2869 kern_access(struct nlookupdata *nd, int amode, int flags) 2870 { 2871 struct vnode *vp; 2872 int error, mode; 2873 2874 if (flags & ~AT_EACCESS) 2875 return (EINVAL); 2876 nd->nl_flags |= NLC_SHAREDLOCK; 2877 if ((error = nlookup(nd)) != 0) 2878 return (error); 2879 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2880 return (error); 2881 retry: 2882 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2883 if (error) 2884 return (error); 2885 2886 /* Flags == 0 means only check for existence. */ 2887 if (amode) { 2888 mode = 0; 2889 if (amode & R_OK) 2890 mode |= VREAD; 2891 if (amode & W_OK) 2892 mode |= VWRITE; 2893 if (amode & X_OK) 2894 mode |= VEXEC; 2895 if ((mode & VWRITE) == 0 || 2896 (error = vn_writechk(vp)) == 0) { 2897 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2898 } 2899 2900 /* 2901 * If the file handle is stale we have to re-resolve the 2902 * entry with the ncp held exclusively. This is a hack 2903 * at the moment. 2904 */ 2905 if (error == ESTALE) { 2906 vput(vp); 2907 cache_unlock(&nd->nl_nch); 2908 cache_lock(&nd->nl_nch); 2909 cache_setunresolved(&nd->nl_nch); 2910 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2911 if (error == 0) { 2912 vp = NULL; 2913 goto retry; 2914 } 2915 return(error); 2916 } 2917 } 2918 vput(vp); 2919 return (error); 2920 } 2921 2922 /* 2923 * access_args(char *path, int flags) 2924 * 2925 * Check access permissions. 2926 */ 2927 int 2928 sys_access(struct sysmsg *sysmsg, const struct access_args *uap) 2929 { 2930 struct nlookupdata nd; 2931 int error; 2932 2933 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2934 if (error == 0) 2935 error = kern_access(&nd, uap->flags, 0); 2936 nlookup_done(&nd); 2937 return (error); 2938 } 2939 2940 2941 /* 2942 * eaccess_args(char *path, int flags) 2943 * 2944 * Check access permissions. 2945 */ 2946 int 2947 sys_eaccess(struct sysmsg *sysmsg, const struct eaccess_args *uap) 2948 { 2949 struct nlookupdata nd; 2950 int error; 2951 2952 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2953 if (error == 0) 2954 error = kern_access(&nd, uap->flags, AT_EACCESS); 2955 nlookup_done(&nd); 2956 return (error); 2957 } 2958 2959 2960 /* 2961 * faccessat_args(int fd, char *path, int amode, int flags) 2962 * 2963 * Check access permissions. 2964 */ 2965 int 2966 sys_faccessat(struct sysmsg *sysmsg, const struct faccessat_args *uap) 2967 { 2968 struct nlookupdata nd; 2969 struct file *fp; 2970 int error; 2971 2972 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2973 NLC_FOLLOW); 2974 if (error == 0) 2975 error = kern_access(&nd, uap->amode, uap->flags); 2976 nlookup_done_at(&nd, fp); 2977 return (error); 2978 } 2979 2980 int 2981 kern_stat(struct nlookupdata *nd, struct stat *st) 2982 { 2983 int error; 2984 struct vnode *vp; 2985 2986 nd->nl_flags |= NLC_SHAREDLOCK; 2987 if ((error = nlookup(nd)) != 0) 2988 return (error); 2989 again: 2990 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2991 return (ENOENT); 2992 2993 #if 1 2994 error = cache_vref(&nd->nl_nch, NULL, &vp); 2995 #else 2996 error = vget(vp, LK_SHARED); 2997 #endif 2998 if (error) 2999 return (error); 3000 error = vn_stat(vp, st, nd->nl_cred); 3001 3002 /* 3003 * If the file handle is stale we have to re-resolve the 3004 * entry with the ncp held exclusively. This is a hack 3005 * at the moment. 3006 */ 3007 if (error == ESTALE) { 3008 #if 1 3009 vrele(vp); 3010 #else 3011 vput(vp); 3012 #endif 3013 cache_unlock(&nd->nl_nch); 3014 cache_lock(&nd->nl_nch); 3015 cache_setunresolved(&nd->nl_nch); 3016 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 3017 if (error == 0) 3018 goto again; 3019 } else { 3020 #if 1 3021 vrele(vp); 3022 #else 3023 vput(vp); 3024 #endif 3025 } 3026 return (error); 3027 } 3028 3029 /* 3030 * stat_args(char *path, struct stat *ub) 3031 * 3032 * Get file status; this version follows links. 3033 */ 3034 int 3035 sys_stat(struct sysmsg *sysmsg, const struct stat_args *uap) 3036 { 3037 struct nlookupdata nd; 3038 struct stat st; 3039 int error; 3040 3041 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3042 if (error == 0) { 3043 error = kern_stat(&nd, &st); 3044 if (error == 0) 3045 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3046 } 3047 nlookup_done(&nd); 3048 return (error); 3049 } 3050 3051 /* 3052 * lstat_args(char *path, struct stat *ub) 3053 * 3054 * Get file status; this version does not follow links. 3055 */ 3056 int 3057 sys_lstat(struct sysmsg *sysmsg, const struct lstat_args *uap) 3058 { 3059 struct nlookupdata nd; 3060 struct stat st; 3061 int error; 3062 3063 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3064 if (error == 0) { 3065 error = kern_stat(&nd, &st); 3066 if (error == 0) 3067 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3068 } 3069 nlookup_done(&nd); 3070 return (error); 3071 } 3072 3073 /* 3074 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3075 * 3076 * Get status of file pointed to by fd/path. 3077 */ 3078 int 3079 sys_fstatat(struct sysmsg *sysmsg, const struct fstatat_args *uap) 3080 { 3081 struct nlookupdata nd; 3082 struct stat st; 3083 int error; 3084 int flags; 3085 struct file *fp; 3086 3087 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3088 return (EINVAL); 3089 3090 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3091 3092 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3093 UIO_USERSPACE, flags); 3094 if (error == 0) { 3095 error = kern_stat(&nd, &st); 3096 if (error == 0) 3097 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3098 } 3099 nlookup_done_at(&nd, fp); 3100 return (error); 3101 } 3102 3103 static int 3104 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3105 { 3106 struct nlookupdata nd; 3107 struct vnode *vp; 3108 int error; 3109 3110 vp = NULL; 3111 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3112 if (error == 0) 3113 error = nlookup(&nd); 3114 if (error == 0) 3115 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3116 nlookup_done(&nd); 3117 if (error == 0) { 3118 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3119 vput(vp); 3120 } 3121 return (error); 3122 } 3123 3124 /* 3125 * pathconf_Args(char *path, int name) 3126 * 3127 * Get configurable pathname variables. 3128 */ 3129 int 3130 sys_pathconf(struct sysmsg *sysmsg, const struct pathconf_args *uap) 3131 { 3132 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3133 &sysmsg->sysmsg_reg)); 3134 } 3135 3136 /* 3137 * lpathconf_Args(char *path, int name) 3138 * 3139 * Get configurable pathname variables, but don't follow symlinks. 3140 */ 3141 int 3142 sys_lpathconf(struct sysmsg *sysmsg, const struct lpathconf_args *uap) 3143 { 3144 return (kern_pathconf(uap->path, uap->name, 0, &sysmsg->sysmsg_reg)); 3145 } 3146 3147 /* 3148 * XXX: daver 3149 * kern_readlink isn't properly split yet. There is a copyin burried 3150 * in VOP_READLINK(). 3151 */ 3152 int 3153 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3154 { 3155 struct thread *td = curthread; 3156 struct vnode *vp; 3157 struct iovec aiov; 3158 struct uio auio; 3159 int error; 3160 3161 nd->nl_flags |= NLC_SHAREDLOCK; 3162 if ((error = nlookup(nd)) != 0) 3163 return (error); 3164 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3165 if (error) 3166 return (error); 3167 if (vp->v_type != VLNK) { 3168 error = EINVAL; 3169 } else { 3170 aiov.iov_base = buf; 3171 aiov.iov_len = count; 3172 auio.uio_iov = &aiov; 3173 auio.uio_iovcnt = 1; 3174 auio.uio_offset = 0; 3175 auio.uio_rw = UIO_READ; 3176 auio.uio_segflg = UIO_USERSPACE; 3177 auio.uio_td = td; 3178 auio.uio_resid = count; 3179 error = VOP_READLINK(vp, &auio, td->td_ucred); 3180 } 3181 vput(vp); 3182 *res = count - auio.uio_resid; 3183 return (error); 3184 } 3185 3186 /* 3187 * readlink_args(char *path, char *buf, int count) 3188 * 3189 * Return target name of a symbolic link. 3190 */ 3191 int 3192 sys_readlink(struct sysmsg *sysmsg, const struct readlink_args *uap) 3193 { 3194 struct nlookupdata nd; 3195 int error; 3196 3197 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3198 if (error == 0) { 3199 error = kern_readlink(&nd, uap->buf, uap->count, 3200 &sysmsg->sysmsg_result); 3201 } 3202 nlookup_done(&nd); 3203 return (error); 3204 } 3205 3206 /* 3207 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3208 * 3209 * Return target name of a symbolic link. The path is relative to the 3210 * directory associated with fd. 3211 */ 3212 int 3213 sys_readlinkat(struct sysmsg *sysmsg, const struct readlinkat_args *uap) 3214 { 3215 struct nlookupdata nd; 3216 struct file *fp; 3217 int error; 3218 3219 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3220 if (error == 0) { 3221 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3222 &sysmsg->sysmsg_result); 3223 } 3224 nlookup_done_at(&nd, fp); 3225 return (error); 3226 } 3227 3228 static int 3229 setfflags(struct vnode *vp, u_long flags) 3230 { 3231 struct thread *td = curthread; 3232 int error; 3233 struct vattr vattr; 3234 3235 /* 3236 * Prevent non-root users from setting flags on devices. When 3237 * a device is reused, users can retain ownership of the device 3238 * if they are allowed to set flags and programs assume that 3239 * chown can't fail when done as root. 3240 */ 3241 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3242 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3243 return (error); 3244 3245 /* 3246 * note: vget is required for any operation that might mod the vnode 3247 * so VINACTIVE is properly cleared. 3248 */ 3249 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3250 VATTR_NULL(&vattr); 3251 vattr.va_flags = flags; 3252 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3253 vput(vp); 3254 } 3255 return (error); 3256 } 3257 3258 /* 3259 * chflags(const char *path, u_long flags) 3260 * 3261 * Change flags of a file given a path name. 3262 */ 3263 int 3264 sys_chflags(struct sysmsg *sysmsg, const struct chflags_args *uap) 3265 { 3266 struct nlookupdata nd; 3267 struct vnode *vp; 3268 int error; 3269 3270 vp = NULL; 3271 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3272 if (error == 0) 3273 error = nlookup(&nd); 3274 if (error == 0) 3275 error = ncp_writechk(&nd.nl_nch); 3276 if (error == 0) 3277 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3278 nlookup_done(&nd); 3279 if (error == 0) { 3280 error = setfflags(vp, uap->flags); 3281 vrele(vp); 3282 } 3283 return (error); 3284 } 3285 3286 /* 3287 * lchflags(const char *path, u_long flags) 3288 * 3289 * Change flags of a file given a path name, but don't follow symlinks. 3290 */ 3291 int 3292 sys_lchflags(struct sysmsg *sysmsg, const struct lchflags_args *uap) 3293 { 3294 struct nlookupdata nd; 3295 struct vnode *vp; 3296 int error; 3297 3298 vp = NULL; 3299 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3300 if (error == 0) 3301 error = nlookup(&nd); 3302 if (error == 0) 3303 error = ncp_writechk(&nd.nl_nch); 3304 if (error == 0) 3305 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3306 nlookup_done(&nd); 3307 if (error == 0) { 3308 error = setfflags(vp, uap->flags); 3309 vrele(vp); 3310 } 3311 return (error); 3312 } 3313 3314 /* 3315 * fchflags_args(int fd, u_flags flags) 3316 * 3317 * Change flags of a file given a file descriptor. 3318 */ 3319 int 3320 sys_fchflags(struct sysmsg *sysmsg, const struct fchflags_args *uap) 3321 { 3322 struct thread *td = curthread; 3323 struct file *fp; 3324 int error; 3325 3326 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3327 return (error); 3328 if (fp->f_nchandle.ncp) 3329 error = ncp_writechk(&fp->f_nchandle); 3330 if (error == 0) 3331 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3332 fdrop(fp); 3333 return (error); 3334 } 3335 3336 /* 3337 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3338 * change flags given a pathname relative to a filedescriptor 3339 */ 3340 int 3341 sys_chflagsat(struct sysmsg *sysmsg, const struct chflagsat_args *uap) 3342 { 3343 struct nlookupdata nd; 3344 struct vnode *vp; 3345 struct file *fp; 3346 int error; 3347 int lookupflags; 3348 3349 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3350 return (EINVAL); 3351 3352 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3353 3354 vp = NULL; 3355 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3356 if (error == 0) 3357 error = nlookup(&nd); 3358 if (error == 0) 3359 error = ncp_writechk(&nd.nl_nch); 3360 if (error == 0) 3361 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3362 nlookup_done_at(&nd, fp); 3363 if (error == 0) { 3364 error = setfflags(vp, uap->flags); 3365 vrele(vp); 3366 } 3367 return (error); 3368 } 3369 3370 3371 static int 3372 setfmode(struct vnode *vp, int mode) 3373 { 3374 struct thread *td = curthread; 3375 int error; 3376 struct vattr vattr; 3377 3378 /* 3379 * note: vget is required for any operation that might mod the vnode 3380 * so VINACTIVE is properly cleared. 3381 */ 3382 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3383 VATTR_NULL(&vattr); 3384 vattr.va_mode = mode & ALLPERMS; 3385 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3386 cache_inval_wxok(vp); 3387 vput(vp); 3388 } 3389 return error; 3390 } 3391 3392 int 3393 kern_chmod(struct nlookupdata *nd, int mode) 3394 { 3395 struct vnode *vp; 3396 int error; 3397 3398 if ((error = nlookup(nd)) != 0) 3399 return (error); 3400 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3401 return (error); 3402 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3403 error = setfmode(vp, mode); 3404 vrele(vp); 3405 return (error); 3406 } 3407 3408 /* 3409 * chmod_args(char *path, int mode) 3410 * 3411 * Change mode of a file given path name. 3412 */ 3413 int 3414 sys_chmod(struct sysmsg *sysmsg, const struct chmod_args *uap) 3415 { 3416 struct nlookupdata nd; 3417 int error; 3418 3419 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3420 if (error == 0) 3421 error = kern_chmod(&nd, uap->mode); 3422 nlookup_done(&nd); 3423 return (error); 3424 } 3425 3426 /* 3427 * lchmod_args(char *path, int mode) 3428 * 3429 * Change mode of a file given path name (don't follow links.) 3430 */ 3431 int 3432 sys_lchmod(struct sysmsg *sysmsg, const struct lchmod_args *uap) 3433 { 3434 struct nlookupdata nd; 3435 int error; 3436 3437 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3438 if (error == 0) 3439 error = kern_chmod(&nd, uap->mode); 3440 nlookup_done(&nd); 3441 return (error); 3442 } 3443 3444 /* 3445 * fchmod_args(int fd, int mode) 3446 * 3447 * Change mode of a file given a file descriptor. 3448 */ 3449 int 3450 sys_fchmod(struct sysmsg *sysmsg, const struct fchmod_args *uap) 3451 { 3452 struct thread *td = curthread; 3453 struct file *fp; 3454 int error; 3455 3456 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3457 return (error); 3458 if (fp->f_nchandle.ncp) 3459 error = ncp_writechk(&fp->f_nchandle); 3460 if (error == 0) 3461 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3462 fdrop(fp); 3463 return (error); 3464 } 3465 3466 /* 3467 * fchmodat_args(char *path, int mode) 3468 * 3469 * Change mode of a file pointed to by fd/path. 3470 */ 3471 int 3472 sys_fchmodat(struct sysmsg *sysmsg, const struct fchmodat_args *uap) 3473 { 3474 struct nlookupdata nd; 3475 struct file *fp; 3476 int error; 3477 int flags; 3478 3479 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3480 return (EINVAL); 3481 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3482 3483 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3484 UIO_USERSPACE, flags); 3485 if (error == 0) 3486 error = kern_chmod(&nd, uap->mode); 3487 nlookup_done_at(&nd, fp); 3488 return (error); 3489 } 3490 3491 static int 3492 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3493 { 3494 struct thread *td = curthread; 3495 int error; 3496 struct vattr vattr; 3497 uid_t o_uid; 3498 gid_t o_gid; 3499 uint64_t size; 3500 3501 /* 3502 * note: vget is required for any operation that might mod the vnode 3503 * so VINACTIVE is properly cleared. 3504 */ 3505 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3506 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3507 return error; 3508 o_uid = vattr.va_uid; 3509 o_gid = vattr.va_gid; 3510 size = vattr.va_size; 3511 3512 VATTR_NULL(&vattr); 3513 vattr.va_uid = uid; 3514 vattr.va_gid = gid; 3515 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3516 vput(vp); 3517 } 3518 3519 if (error == 0) { 3520 if (uid == -1) 3521 uid = o_uid; 3522 if (gid == -1) 3523 gid = o_gid; 3524 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3525 VFS_ACCOUNT(mp, uid, gid, size); 3526 } 3527 3528 return error; 3529 } 3530 3531 int 3532 kern_chown(struct nlookupdata *nd, int uid, int gid) 3533 { 3534 struct vnode *vp; 3535 int error; 3536 3537 if ((error = nlookup(nd)) != 0) 3538 return (error); 3539 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3540 return (error); 3541 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3542 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3543 vrele(vp); 3544 return (error); 3545 } 3546 3547 /* 3548 * chown(char *path, int uid, int gid) 3549 * 3550 * Set ownership given a path name. 3551 */ 3552 int 3553 sys_chown(struct sysmsg *sysmsg, const struct chown_args *uap) 3554 { 3555 struct nlookupdata nd; 3556 int error; 3557 3558 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3559 if (error == 0) 3560 error = kern_chown(&nd, uap->uid, uap->gid); 3561 nlookup_done(&nd); 3562 return (error); 3563 } 3564 3565 /* 3566 * lchown_args(char *path, int uid, int gid) 3567 * 3568 * Set ownership given a path name, do not cross symlinks. 3569 */ 3570 int 3571 sys_lchown(struct sysmsg *sysmsg, const struct lchown_args *uap) 3572 { 3573 struct nlookupdata nd; 3574 int error; 3575 3576 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3577 if (error == 0) 3578 error = kern_chown(&nd, uap->uid, uap->gid); 3579 nlookup_done(&nd); 3580 return (error); 3581 } 3582 3583 /* 3584 * fchown_args(int fd, int uid, int gid) 3585 * 3586 * Set ownership given a file descriptor. 3587 */ 3588 int 3589 sys_fchown(struct sysmsg *sysmsg, const struct fchown_args *uap) 3590 { 3591 struct thread *td = curthread; 3592 struct proc *p = td->td_proc; 3593 struct file *fp; 3594 int error; 3595 3596 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3597 return (error); 3598 if (fp->f_nchandle.ncp) 3599 error = ncp_writechk(&fp->f_nchandle); 3600 if (error == 0) 3601 error = setfown(p->p_fd->fd_ncdir.mount, 3602 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3603 fdrop(fp); 3604 return (error); 3605 } 3606 3607 /* 3608 * fchownat(int fd, char *path, int uid, int gid, int flags) 3609 * 3610 * Set ownership of file pointed to by fd/path. 3611 */ 3612 int 3613 sys_fchownat(struct sysmsg *sysmsg, const struct fchownat_args *uap) 3614 { 3615 struct nlookupdata nd; 3616 struct file *fp; 3617 int error; 3618 int flags; 3619 3620 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3621 return (EINVAL); 3622 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3623 3624 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3625 UIO_USERSPACE, flags); 3626 if (error == 0) 3627 error = kern_chown(&nd, uap->uid, uap->gid); 3628 nlookup_done_at(&nd, fp); 3629 return (error); 3630 } 3631 3632 3633 static int 3634 getutimes(struct timeval *tvp, struct timespec *tsp) 3635 { 3636 struct timeval tv[2]; 3637 int error; 3638 3639 if (tvp == NULL) { 3640 microtime(&tv[0]); 3641 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3642 tsp[1] = tsp[0]; 3643 } else { 3644 if ((error = itimerfix(tvp)) != 0) 3645 return (error); 3646 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3647 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3648 } 3649 return 0; 3650 } 3651 3652 static int 3653 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3654 { 3655 struct timespec tsnow; 3656 int error; 3657 3658 *nullflag = 0; 3659 nanotime(&tsnow); 3660 if (ts == NULL) { 3661 newts[0] = tsnow; 3662 newts[1] = tsnow; 3663 *nullflag = 1; 3664 return (0); 3665 } 3666 3667 newts[0] = ts[0]; 3668 newts[1] = ts[1]; 3669 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3670 return (0); 3671 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3672 *nullflag = 1; 3673 3674 if (newts[0].tv_nsec == UTIME_OMIT) 3675 newts[0].tv_sec = VNOVAL; 3676 else if (newts[0].tv_nsec == UTIME_NOW) 3677 newts[0] = tsnow; 3678 else if ((error = itimespecfix(&newts[0])) != 0) 3679 return (error); 3680 3681 if (newts[1].tv_nsec == UTIME_OMIT) 3682 newts[1].tv_sec = VNOVAL; 3683 else if (newts[1].tv_nsec == UTIME_NOW) 3684 newts[1] = tsnow; 3685 else if ((error = itimespecfix(&newts[1])) != 0) 3686 return (error); 3687 3688 return (0); 3689 } 3690 3691 static int 3692 setutimes(struct vnode *vp, struct vattr *vattr, 3693 const struct timespec *ts, int nullflag) 3694 { 3695 struct thread *td = curthread; 3696 int error; 3697 3698 VATTR_NULL(vattr); 3699 vattr->va_atime = ts[0]; 3700 vattr->va_mtime = ts[1]; 3701 if (nullflag) 3702 vattr->va_vaflags |= VA_UTIMES_NULL; 3703 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3704 3705 return error; 3706 } 3707 3708 int 3709 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3710 { 3711 struct timespec ts[2]; 3712 int error; 3713 3714 if (tptr) { 3715 if ((error = getutimes(tptr, ts)) != 0) 3716 return (error); 3717 } 3718 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3719 return (error); 3720 } 3721 3722 /* 3723 * utimes_args(char *path, struct timeval *tptr) 3724 * 3725 * Set the access and modification times of a file. 3726 */ 3727 int 3728 sys_utimes(struct sysmsg *sysmsg, const struct utimes_args *uap) 3729 { 3730 struct timeval tv[2]; 3731 struct nlookupdata nd; 3732 int error; 3733 3734 if (uap->tptr) { 3735 error = copyin(uap->tptr, tv, sizeof(tv)); 3736 if (error) 3737 return (error); 3738 } 3739 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3740 if (error == 0) 3741 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3742 nlookup_done(&nd); 3743 return (error); 3744 } 3745 3746 /* 3747 * lutimes_args(char *path, struct timeval *tptr) 3748 * 3749 * Set the access and modification times of a file. 3750 */ 3751 int 3752 sys_lutimes(struct sysmsg *sysmsg, const struct lutimes_args *uap) 3753 { 3754 struct timeval tv[2]; 3755 struct nlookupdata nd; 3756 int error; 3757 3758 if (uap->tptr) { 3759 error = copyin(uap->tptr, tv, sizeof(tv)); 3760 if (error) 3761 return (error); 3762 } 3763 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3764 if (error == 0) 3765 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3766 nlookup_done(&nd); 3767 return (error); 3768 } 3769 3770 /* 3771 * Set utimes on a file descriptor. The creds used to open the 3772 * file are used to determine whether the operation is allowed 3773 * or not. 3774 */ 3775 int 3776 kern_futimens(int fd, struct timespec *ts) 3777 { 3778 struct thread *td = curthread; 3779 struct timespec newts[2]; 3780 struct file *fp; 3781 struct vnode *vp; 3782 struct vattr vattr; 3783 int nullflag; 3784 int error; 3785 3786 error = getutimens(ts, newts, &nullflag); 3787 if (error) 3788 return (error); 3789 if ((error = holdvnode(td, fd, &fp)) != 0) 3790 return (error); 3791 if (fp->f_nchandle.ncp) 3792 error = ncp_writechk(&fp->f_nchandle); 3793 if (error == 0) { 3794 vp = fp->f_data; 3795 error = vget(vp, LK_EXCLUSIVE); 3796 if (error == 0) { 3797 error = VOP_GETATTR_FP(vp, &vattr, fp); 3798 if (error == 0) { 3799 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3800 fp->f_cred); 3801 } 3802 if (error == 0) { 3803 error = setutimes(vp, &vattr, newts, nullflag); 3804 } 3805 vput(vp); 3806 } 3807 } 3808 fdrop(fp); 3809 return (error); 3810 } 3811 3812 /* 3813 * futimens_args(int fd, struct timespec *ts) 3814 * 3815 * Set the access and modification times of a file. 3816 */ 3817 int 3818 sys_futimens(struct sysmsg *sysmsg, const struct futimens_args *uap) 3819 { 3820 struct timespec ts[2]; 3821 int error; 3822 3823 if (uap->ts) { 3824 error = copyin(uap->ts, ts, sizeof(ts)); 3825 if (error) 3826 return (error); 3827 } 3828 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3829 return (error); 3830 } 3831 3832 int 3833 kern_futimes(int fd, struct timeval *tptr) 3834 { 3835 struct timespec ts[2]; 3836 int error; 3837 3838 if (tptr) { 3839 if ((error = getutimes(tptr, ts)) != 0) 3840 return (error); 3841 } 3842 error = kern_futimens(fd, tptr ? ts : NULL); 3843 return (error); 3844 } 3845 3846 /* 3847 * futimes_args(int fd, struct timeval *tptr) 3848 * 3849 * Set the access and modification times of a file. 3850 */ 3851 int 3852 sys_futimes(struct sysmsg *sysmsg, const struct futimes_args *uap) 3853 { 3854 struct timeval tv[2]; 3855 int error; 3856 3857 if (uap->tptr) { 3858 error = copyin(uap->tptr, tv, sizeof(tv)); 3859 if (error) 3860 return (error); 3861 } 3862 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3863 return (error); 3864 } 3865 3866 int 3867 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3868 { 3869 struct timespec newts[2]; 3870 struct vnode *vp; 3871 struct vattr vattr; 3872 int nullflag; 3873 int error; 3874 3875 if (flags & ~AT_SYMLINK_NOFOLLOW) 3876 return (EINVAL); 3877 3878 error = getutimens(ts, newts, &nullflag); 3879 if (error) 3880 return (error); 3881 3882 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3883 if ((error = nlookup(nd)) != 0) 3884 return (error); 3885 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3886 return (error); 3887 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3888 return (error); 3889 if ((error = vn_writechk(vp)) == 0) { 3890 error = vget(vp, LK_EXCLUSIVE); 3891 if (error == 0) { 3892 error = setutimes(vp, &vattr, newts, nullflag); 3893 vput(vp); 3894 } 3895 } 3896 vrele(vp); 3897 return (error); 3898 } 3899 3900 /* 3901 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3902 * 3903 * Set file access and modification times of a file. 3904 */ 3905 int 3906 sys_utimensat(struct sysmsg *sysmsg, const struct utimensat_args *uap) 3907 { 3908 struct timespec ts[2]; 3909 struct nlookupdata nd; 3910 struct file *fp; 3911 int error; 3912 int flags; 3913 3914 if (uap->ts) { 3915 error = copyin(uap->ts, ts, sizeof(ts)); 3916 if (error) 3917 return (error); 3918 } 3919 3920 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3921 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3922 UIO_USERSPACE, flags); 3923 if (error == 0) 3924 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3925 nlookup_done_at(&nd, fp); 3926 return (error); 3927 } 3928 3929 int 3930 kern_truncate(struct nlookupdata *nd, off_t length) 3931 { 3932 struct vnode *vp; 3933 struct vattr vattr; 3934 int error; 3935 uid_t uid = 0; 3936 gid_t gid = 0; 3937 uint64_t old_size = 0; 3938 3939 if (length < 0) 3940 return(EINVAL); 3941 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3942 if ((error = nlookup(nd)) != 0) 3943 return (error); 3944 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3945 return (error); 3946 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3947 return (error); 3948 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3949 if (error) { 3950 vrele(vp); 3951 return (error); 3952 } 3953 if (vp->v_type == VDIR) { 3954 error = EISDIR; 3955 goto done; 3956 } 3957 if (vfs_quota_enabled) { 3958 error = VOP_GETATTR(vp, &vattr); 3959 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3960 uid = vattr.va_uid; 3961 gid = vattr.va_gid; 3962 old_size = vattr.va_size; 3963 } 3964 3965 if ((error = vn_writechk(vp)) == 0) { 3966 VATTR_NULL(&vattr); 3967 vattr.va_size = length; 3968 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3969 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3970 } 3971 done: 3972 vput(vp); 3973 return (error); 3974 } 3975 3976 /* 3977 * truncate(char *path, int pad, off_t length) 3978 * 3979 * Truncate a file given its path name. 3980 */ 3981 int 3982 sys_truncate(struct sysmsg *sysmsg, const struct truncate_args *uap) 3983 { 3984 struct nlookupdata nd; 3985 int error; 3986 3987 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3988 if (error == 0) 3989 error = kern_truncate(&nd, uap->length); 3990 nlookup_done(&nd); 3991 return error; 3992 } 3993 3994 int 3995 kern_ftruncate(int fd, off_t length) 3996 { 3997 struct thread *td = curthread; 3998 struct vattr vattr; 3999 struct vnode *vp; 4000 struct file *fp; 4001 int error; 4002 uid_t uid = 0; 4003 gid_t gid = 0; 4004 uint64_t old_size = 0; 4005 struct mount *mp; 4006 4007 if (length < 0) 4008 return(EINVAL); 4009 if ((error = holdvnode(td, fd, &fp)) != 0) 4010 return (error); 4011 if (fp->f_nchandle.ncp) { 4012 error = ncp_writechk(&fp->f_nchandle); 4013 if (error) 4014 goto done; 4015 } 4016 if ((fp->f_flag & FWRITE) == 0) { 4017 error = EINVAL; 4018 goto done; 4019 } 4020 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 4021 error = EINVAL; 4022 goto done; 4023 } 4024 vp = (struct vnode *)fp->f_data; 4025 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4026 if (vp->v_type == VDIR) { 4027 error = EISDIR; 4028 vn_unlock(vp); 4029 goto done; 4030 } 4031 4032 if (vfs_quota_enabled) { 4033 error = VOP_GETATTR_FP(vp, &vattr, fp); 4034 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4035 uid = vattr.va_uid; 4036 gid = vattr.va_gid; 4037 old_size = vattr.va_size; 4038 } 4039 4040 if ((error = vn_writechk(vp)) == 0) { 4041 VATTR_NULL(&vattr); 4042 vattr.va_size = length; 4043 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4044 mp = vq_vptomp(vp); 4045 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4046 } 4047 vn_unlock(vp); 4048 done: 4049 fdrop(fp); 4050 return (error); 4051 } 4052 4053 /* 4054 * ftruncate_args(int fd, int pad, off_t length) 4055 * 4056 * Truncate a file given a file descriptor. 4057 */ 4058 int 4059 sys_ftruncate(struct sysmsg *sysmsg, const struct ftruncate_args *uap) 4060 { 4061 int error; 4062 4063 error = kern_ftruncate(uap->fd, uap->length); 4064 4065 return (error); 4066 } 4067 4068 /* 4069 * fsync(int fd) 4070 * 4071 * Sync an open file. 4072 */ 4073 int 4074 sys_fsync(struct sysmsg *sysmsg, const struct fsync_args *uap) 4075 { 4076 struct thread *td = curthread; 4077 struct vnode *vp; 4078 struct file *fp; 4079 vm_object_t obj; 4080 int error; 4081 4082 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4083 return (error); 4084 vp = (struct vnode *)fp->f_data; 4085 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4086 if ((obj = vp->v_object) != NULL) { 4087 if (vp->v_mount == NULL || 4088 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4089 vm_object_page_clean(obj, 0, 0, 0); 4090 } 4091 } 4092 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4093 if (error == 0 && vp->v_mount) 4094 error = buf_fsync(vp); 4095 vn_unlock(vp); 4096 fdrop(fp); 4097 4098 return (error); 4099 } 4100 4101 int 4102 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4103 { 4104 struct nchandle fnchd; 4105 struct nchandle tnchd; 4106 struct namecache *ncp; 4107 struct vnode *fdvp; 4108 struct vnode *tdvp; 4109 struct mount *mp; 4110 int error; 4111 u_int fncp_gen; 4112 u_int tncp_gen; 4113 4114 bwillinode(1); 4115 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4116 if ((error = nlookup(fromnd)) != 0) 4117 return (error); 4118 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4119 return (ENOENT); 4120 fnchd.mount = fromnd->nl_nch.mount; 4121 cache_hold(&fnchd); 4122 4123 /* 4124 * unlock the source nch so we can lookup the target nch without 4125 * deadlocking. The target may or may not exist so we do not check 4126 * for a target vp like kern_mkdir() and other creation functions do. 4127 * 4128 * The source and target directories are ref'd and rechecked after 4129 * everything is relocked to determine if the source or target file 4130 * has been renamed. 4131 */ 4132 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4133 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4134 4135 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4136 4137 cache_unlock(&fromnd->nl_nch); 4138 4139 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4140 if ((error = nlookup(tond)) != 0) { 4141 cache_drop(&fnchd); 4142 return (error); 4143 } 4144 tncp_gen = tond->nl_nch.ncp->nc_generation; 4145 4146 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4147 cache_drop(&fnchd); 4148 return (ENOENT); 4149 } 4150 tnchd.mount = tond->nl_nch.mount; 4151 cache_hold(&tnchd); 4152 4153 /* 4154 * If the source and target are the same there is nothing to do 4155 */ 4156 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4157 cache_drop(&fnchd); 4158 cache_drop(&tnchd); 4159 return (0); 4160 } 4161 4162 /* 4163 * Mount points cannot be renamed or overwritten 4164 */ 4165 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4166 NCF_ISMOUNTPT 4167 ) { 4168 cache_drop(&fnchd); 4169 cache_drop(&tnchd); 4170 return (EINVAL); 4171 } 4172 4173 /* 4174 * Relock the source ncp. cache_relock() will deal with any 4175 * deadlocks against the already-locked tond and will also 4176 * make sure both are resolved. 4177 * 4178 * NOTE AFTER RELOCKING: The source or target ncp may have become 4179 * invalid while they were unlocked, nc_vp and nc_mount could 4180 * be NULL. 4181 */ 4182 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4183 &tond->nl_nch, tond->nl_cred); 4184 fromnd->nl_flags |= NLC_NCPISLOCKED; 4185 4186 /* 4187 * If the namecache generation changed for either fromnd or tond, 4188 * we must retry. 4189 */ 4190 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4191 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4192 kprintf("kern_rename: retry due to gen on: " 4193 "\"%s\" -> \"%s\"\n", 4194 fromnd->nl_nch.ncp->nc_name, 4195 tond->nl_nch.ncp->nc_name); 4196 cache_drop(&fnchd); 4197 cache_drop(&tnchd); 4198 return (EAGAIN); 4199 } 4200 4201 /* 4202 * If either fromnd or tond are marked destroyed a ripout occured 4203 * out from under us and we must retry. 4204 */ 4205 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4206 fromnd->nl_nch.ncp->nc_vp == NULL || 4207 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4208 kprintf("kern_rename: retry due to ripout on: " 4209 "\"%s\" -> \"%s\"\n", 4210 fromnd->nl_nch.ncp->nc_name, 4211 tond->nl_nch.ncp->nc_name); 4212 cache_drop(&fnchd); 4213 cache_drop(&tnchd); 4214 return (EAGAIN); 4215 } 4216 4217 /* 4218 * Make sure the parent directories linkages are the same. 4219 * XXX shouldn't be needed any more w/ generation check above. 4220 */ 4221 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4222 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4223 cache_drop(&fnchd); 4224 cache_drop(&tnchd); 4225 return (ENOENT); 4226 } 4227 4228 /* 4229 * Both the source and target must be within the same filesystem and 4230 * in the same filesystem as their parent directories within the 4231 * namecache topology. 4232 * 4233 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4234 */ 4235 mp = fnchd.mount; 4236 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4237 mp != tond->nl_nch.mount) { 4238 cache_drop(&fnchd); 4239 cache_drop(&tnchd); 4240 return (EXDEV); 4241 } 4242 4243 /* 4244 * Make sure the mount point is writable 4245 */ 4246 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4247 cache_drop(&fnchd); 4248 cache_drop(&tnchd); 4249 return (error); 4250 } 4251 4252 /* 4253 * If the target exists and either the source or target is a directory, 4254 * then both must be directories. 4255 * 4256 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4257 * have become NULL. 4258 */ 4259 if (tond->nl_nch.ncp->nc_vp) { 4260 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4261 error = ENOENT; 4262 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4263 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4264 error = ENOTDIR; 4265 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4266 error = EISDIR; 4267 } 4268 } 4269 4270 /* 4271 * You cannot rename a source into itself or a subdirectory of itself. 4272 * We check this by travsersing the target directory upwards looking 4273 * for a match against the source. 4274 * 4275 * XXX MPSAFE 4276 */ 4277 if (error == 0) { 4278 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4279 if (fromnd->nl_nch.ncp == ncp) { 4280 error = EINVAL; 4281 break; 4282 } 4283 } 4284 } 4285 4286 cache_drop(&fnchd); 4287 cache_drop(&tnchd); 4288 4289 /* 4290 * Even though the namespaces are different, they may still represent 4291 * hardlinks to the same file. The filesystem might have a hard time 4292 * with this so we issue a NREMOVE of the source instead of a NRENAME 4293 * when we detect the situation. 4294 */ 4295 if (error == 0) { 4296 fdvp = fromnd->nl_dvp; 4297 tdvp = tond->nl_dvp; 4298 if (fdvp == NULL || tdvp == NULL) { 4299 error = EPERM; 4300 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4301 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4302 fromnd->nl_cred); 4303 } else { 4304 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4305 fdvp, tdvp, tond->nl_cred); 4306 } 4307 } 4308 return (error); 4309 } 4310 4311 /* 4312 * rename_args(char *from, char *to) 4313 * 4314 * Rename files. Source and destination must either both be directories, 4315 * or both not be directories. If target is a directory, it must be empty. 4316 */ 4317 int 4318 sys_rename(struct sysmsg *sysmsg, const struct rename_args *uap) 4319 { 4320 struct nlookupdata fromnd, tond; 4321 int error; 4322 4323 do { 4324 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4325 if (error == 0) { 4326 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4327 if (error == 0) 4328 error = kern_rename(&fromnd, &tond); 4329 nlookup_done(&tond); 4330 } 4331 nlookup_done(&fromnd); 4332 } while (error == EAGAIN); 4333 return (error); 4334 } 4335 4336 /* 4337 * renameat_args(int oldfd, char *old, int newfd, char *new) 4338 * 4339 * Rename files using paths relative to the directories associated with 4340 * oldfd and newfd. Source and destination must either both be directories, 4341 * or both not be directories. If target is a directory, it must be empty. 4342 */ 4343 int 4344 sys_renameat(struct sysmsg *sysmsg, const struct renameat_args *uap) 4345 { 4346 struct nlookupdata oldnd, newnd; 4347 struct file *oldfp, *newfp; 4348 int error; 4349 4350 do { 4351 error = nlookup_init_at(&oldnd, &oldfp, 4352 uap->oldfd, uap->old, 4353 UIO_USERSPACE, 0); 4354 if (error == 0) { 4355 error = nlookup_init_at(&newnd, &newfp, 4356 uap->newfd, uap->new, 4357 UIO_USERSPACE, 0); 4358 if (error == 0) 4359 error = kern_rename(&oldnd, &newnd); 4360 nlookup_done_at(&newnd, newfp); 4361 } 4362 nlookup_done_at(&oldnd, oldfp); 4363 } while (error == EAGAIN); 4364 return (error); 4365 } 4366 4367 int 4368 kern_mkdir(struct nlookupdata *nd, int mode) 4369 { 4370 struct thread *td = curthread; 4371 struct proc *p = td->td_proc; 4372 struct vnode *vp; 4373 struct vattr vattr; 4374 int error; 4375 4376 bwillinode(1); 4377 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4378 if ((error = nlookup(nd)) != 0) 4379 return (error); 4380 4381 if (nd->nl_nch.ncp->nc_vp) 4382 return (EEXIST); 4383 if (nd->nl_dvp == NULL) 4384 return (EINVAL); 4385 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4386 return (error); 4387 VATTR_NULL(&vattr); 4388 vattr.va_type = VDIR; 4389 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4390 4391 vp = NULL; 4392 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4393 if (error == 0) 4394 vput(vp); 4395 return (error); 4396 } 4397 4398 /* 4399 * mkdir_args(char *path, int mode) 4400 * 4401 * Make a directory file. 4402 */ 4403 int 4404 sys_mkdir(struct sysmsg *sysmsg, const struct mkdir_args *uap) 4405 { 4406 struct nlookupdata nd; 4407 int error; 4408 4409 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4410 if (error == 0) 4411 error = kern_mkdir(&nd, uap->mode); 4412 nlookup_done(&nd); 4413 return (error); 4414 } 4415 4416 /* 4417 * mkdirat_args(int fd, char *path, mode_t mode) 4418 * 4419 * Make a directory file. The path is relative to the directory associated 4420 * with fd. 4421 */ 4422 int 4423 sys_mkdirat(struct sysmsg *sysmsg, const struct mkdirat_args *uap) 4424 { 4425 struct nlookupdata nd; 4426 struct file *fp; 4427 int error; 4428 4429 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4430 if (error == 0) 4431 error = kern_mkdir(&nd, uap->mode); 4432 nlookup_done_at(&nd, fp); 4433 return (error); 4434 } 4435 4436 int 4437 kern_rmdir(struct nlookupdata *nd) 4438 { 4439 int error; 4440 4441 bwillinode(1); 4442 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4443 if ((error = nlookup(nd)) != 0) 4444 return (error); 4445 4446 /* 4447 * Do not allow directories representing mount points to be 4448 * deleted, even if empty. Check write perms on mount point 4449 * in case the vnode is aliased (aka nullfs). 4450 */ 4451 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4452 return (EBUSY); 4453 if (nd->nl_dvp == NULL) 4454 return (EINVAL); 4455 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4456 return (error); 4457 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4458 return (error); 4459 } 4460 4461 /* 4462 * rmdir_args(char *path) 4463 * 4464 * Remove a directory file. 4465 */ 4466 int 4467 sys_rmdir(struct sysmsg *sysmsg, const struct rmdir_args *uap) 4468 { 4469 struct nlookupdata nd; 4470 int error; 4471 4472 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4473 if (error == 0) 4474 error = kern_rmdir(&nd); 4475 nlookup_done(&nd); 4476 return (error); 4477 } 4478 4479 int 4480 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4481 enum uio_seg direction) 4482 { 4483 struct thread *td = curthread; 4484 struct vnode *vp; 4485 struct file *fp; 4486 struct uio auio; 4487 struct iovec aiov; 4488 off_t loff; 4489 int error, eofflag; 4490 4491 if ((error = holdvnode(td, fd, &fp)) != 0) 4492 return (error); 4493 if ((fp->f_flag & FREAD) == 0) { 4494 error = EBADF; 4495 goto done; 4496 } 4497 vp = (struct vnode *)fp->f_data; 4498 if (vp->v_type != VDIR) { 4499 error = EINVAL; 4500 goto done; 4501 } 4502 aiov.iov_base = buf; 4503 aiov.iov_len = count; 4504 auio.uio_iov = &aiov; 4505 auio.uio_iovcnt = 1; 4506 auio.uio_rw = UIO_READ; 4507 auio.uio_segflg = direction; 4508 auio.uio_td = td; 4509 auio.uio_resid = count; 4510 loff = auio.uio_offset = fp->f_offset; 4511 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4512 fp->f_offset = auio.uio_offset; 4513 if (error) 4514 goto done; 4515 4516 /* 4517 * WARNING! *basep may not be wide enough to accomodate the 4518 * seek offset. XXX should we hack this to return the upper 32 bits 4519 * for offsets greater then 4G? 4520 */ 4521 if (basep) { 4522 *basep = (long)loff; 4523 } 4524 *res = count - auio.uio_resid; 4525 done: 4526 fdrop(fp); 4527 return (error); 4528 } 4529 4530 /* 4531 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4532 * 4533 * Read a block of directory entries in a file system independent format. 4534 */ 4535 int 4536 sys_getdirentries(struct sysmsg *sysmsg, const struct getdirentries_args *uap) 4537 { 4538 long base; 4539 int error; 4540 4541 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4542 &sysmsg->sysmsg_result, UIO_USERSPACE); 4543 4544 if (error == 0 && uap->basep) 4545 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4546 return (error); 4547 } 4548 4549 /* 4550 * getdents_args(int fd, char *buf, size_t count) 4551 */ 4552 int 4553 sys_getdents(struct sysmsg *sysmsg, const struct getdents_args *uap) 4554 { 4555 int error; 4556 4557 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4558 &sysmsg->sysmsg_result, UIO_USERSPACE); 4559 4560 return (error); 4561 } 4562 4563 /* 4564 * Set the mode mask for creation of filesystem nodes. 4565 * 4566 * umask(int newmask) 4567 */ 4568 int 4569 sys_umask(struct sysmsg *sysmsg, const struct umask_args *uap) 4570 { 4571 struct thread *td = curthread; 4572 struct proc *p = td->td_proc; 4573 struct filedesc *fdp; 4574 4575 fdp = p->p_fd; 4576 sysmsg->sysmsg_result = fdp->fd_cmask; 4577 fdp->fd_cmask = uap->newmask & ALLPERMS; 4578 return (0); 4579 } 4580 4581 /* 4582 * revoke(char *path) 4583 * 4584 * Void all references to file by ripping underlying filesystem 4585 * away from vnode. 4586 */ 4587 int 4588 sys_revoke(struct sysmsg *sysmsg, const struct revoke_args *uap) 4589 { 4590 struct nlookupdata nd; 4591 struct vattr vattr; 4592 struct vnode *vp; 4593 struct ucred *cred; 4594 int error; 4595 4596 vp = NULL; 4597 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4598 if (error == 0) 4599 error = nlookup(&nd); 4600 if (error == 0) 4601 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4602 cred = crhold(nd.nl_cred); 4603 nlookup_done(&nd); 4604 if (error == 0) { 4605 if (error == 0) 4606 error = VOP_GETATTR(vp, &vattr); 4607 if (error == 0 && cred->cr_uid != vattr.va_uid) 4608 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4609 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4610 if (vcount(vp) > 0) 4611 error = vrevoke(vp, cred); 4612 } else if (error == 0) { 4613 error = vrevoke(vp, cred); 4614 } 4615 vrele(vp); 4616 } 4617 if (cred) 4618 crfree(cred); 4619 return (error); 4620 } 4621 4622 /* 4623 * getfh_args(char *fname, fhandle_t *fhp) 4624 * 4625 * Get (NFS) file handle 4626 * 4627 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4628 * mount. This allows nullfs mounts to be explicitly exported. 4629 * 4630 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4631 * 4632 * nullfs mounts of subdirectories are not safe. That is, it will 4633 * work, but you do not really have protection against access to 4634 * the related parent directories. 4635 */ 4636 int 4637 sys_getfh(struct sysmsg *sysmsg, const struct getfh_args *uap) 4638 { 4639 struct thread *td = curthread; 4640 struct nlookupdata nd; 4641 fhandle_t fh; 4642 struct vnode *vp; 4643 struct mount *mp; 4644 int error; 4645 4646 /* 4647 * Must be super user 4648 */ 4649 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4650 return (error); 4651 4652 vp = NULL; 4653 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4654 if (error == 0) 4655 error = nlookup(&nd); 4656 if (error == 0) 4657 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4658 mp = nd.nl_nch.mount; 4659 nlookup_done(&nd); 4660 if (error == 0) { 4661 bzero(&fh, sizeof(fh)); 4662 fh.fh_fsid = mp->mnt_stat.f_fsid; 4663 error = VFS_VPTOFH(vp, &fh.fh_fid); 4664 vput(vp); 4665 if (error == 0) 4666 error = copyout(&fh, uap->fhp, sizeof(fh)); 4667 } 4668 return (error); 4669 } 4670 4671 /* 4672 * fhopen_args(const struct fhandle *u_fhp, int flags) 4673 * 4674 * syscall for the rpc.lockd to use to translate a NFS file handle into 4675 * an open descriptor. 4676 * 4677 * warning: do not remove the priv_check() call or this becomes one giant 4678 * security hole. 4679 */ 4680 int 4681 sys_fhopen(struct sysmsg *sysmsg, const struct fhopen_args *uap) 4682 { 4683 struct thread *td = curthread; 4684 struct filedesc *fdp = td->td_proc->p_fd; 4685 struct mount *mp; 4686 struct vnode *vp; 4687 struct fhandle fhp; 4688 struct vattr vat; 4689 struct vattr *vap = &vat; 4690 struct flock lf; 4691 int fmode, mode, error = 0, type; 4692 struct file *nfp; 4693 struct file *fp; 4694 int indx; 4695 4696 /* 4697 * Must be super user 4698 */ 4699 error = priv_check(td, PRIV_ROOT); 4700 if (error) 4701 return (error); 4702 4703 fmode = FFLAGS(uap->flags); 4704 4705 /* 4706 * Why not allow a non-read/write open for our lockd? 4707 */ 4708 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4709 return (EINVAL); 4710 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4711 if (error) 4712 return(error); 4713 4714 /* 4715 * Find the mount point 4716 */ 4717 mp = vfs_getvfs(&fhp.fh_fsid); 4718 if (mp == NULL) { 4719 error = ESTALE; 4720 goto done2; 4721 } 4722 /* now give me my vnode, it gets returned to me locked */ 4723 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4724 if (error) 4725 goto done; 4726 /* 4727 * from now on we have to make sure not 4728 * to forget about the vnode 4729 * any error that causes an abort must vput(vp) 4730 * just set error = err and 'goto bad;'. 4731 */ 4732 4733 /* 4734 * from vn_open 4735 */ 4736 if (vp->v_type == VLNK) { 4737 error = EMLINK; 4738 goto bad; 4739 } 4740 if (vp->v_type == VSOCK) { 4741 error = EOPNOTSUPP; 4742 goto bad; 4743 } 4744 mode = 0; 4745 if (fmode & (FWRITE | O_TRUNC)) { 4746 if (vp->v_type == VDIR) { 4747 error = EISDIR; 4748 goto bad; 4749 } 4750 error = vn_writechk(vp); 4751 if (error) 4752 goto bad; 4753 mode |= VWRITE; 4754 } 4755 if (fmode & FREAD) 4756 mode |= VREAD; 4757 if (mode) { 4758 error = VOP_ACCESS(vp, mode, td->td_ucred); 4759 if (error) 4760 goto bad; 4761 } 4762 if (fmode & O_TRUNC) { 4763 vn_unlock(vp); /* XXX */ 4764 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4765 VATTR_NULL(vap); 4766 vap->va_size = 0; 4767 error = VOP_SETATTR(vp, vap, td->td_ucred); 4768 if (error) 4769 goto bad; 4770 } 4771 4772 /* 4773 * VOP_OPEN needs the file pointer so it can potentially override 4774 * it. 4775 * 4776 * WARNING! no f_nchandle will be associated when fhopen()ing a 4777 * directory. XXX 4778 */ 4779 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4780 goto bad; 4781 fp = nfp; 4782 4783 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4784 if (error) { 4785 /* 4786 * setting f_ops this way prevents VOP_CLOSE from being 4787 * called or fdrop() releasing the vp from v_data. Since 4788 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4789 */ 4790 fp->f_ops = &badfileops; 4791 fp->f_data = NULL; 4792 goto bad_drop; 4793 } 4794 4795 /* 4796 * The fp is given its own reference, we still have our ref and lock. 4797 * 4798 * Assert that all regular files must be created with a VM object. 4799 */ 4800 if (vp->v_type == VREG && vp->v_object == NULL) { 4801 kprintf("fhopen: regular file did not " 4802 "have VM object: %p\n", 4803 vp); 4804 goto bad_drop; 4805 } 4806 4807 /* 4808 * The open was successful. Handle any locking requirements. 4809 */ 4810 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4811 lf.l_whence = SEEK_SET; 4812 lf.l_start = 0; 4813 lf.l_len = 0; 4814 if (fmode & O_EXLOCK) 4815 lf.l_type = F_WRLCK; 4816 else 4817 lf.l_type = F_RDLCK; 4818 if (fmode & FNONBLOCK) 4819 type = 0; 4820 else 4821 type = F_WAIT; 4822 vn_unlock(vp); 4823 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4824 &lf, type)) != 0) { 4825 /* 4826 * release our private reference. 4827 */ 4828 fsetfd(fdp, NULL, indx); 4829 fdrop(fp); 4830 vrele(vp); 4831 goto done; 4832 } 4833 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4834 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4835 } 4836 4837 /* 4838 * Clean up. Associate the file pointer with the previously 4839 * reserved descriptor and return it. 4840 */ 4841 vput(vp); 4842 if (uap->flags & O_CLOEXEC) 4843 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4844 fsetfd(fdp, fp, indx); 4845 fdrop(fp); 4846 sysmsg->sysmsg_result = indx; 4847 mount_drop(mp); 4848 4849 return (error); 4850 4851 bad_drop: 4852 fsetfd(fdp, NULL, indx); 4853 fdrop(fp); 4854 bad: 4855 vput(vp); 4856 done: 4857 mount_drop(mp); 4858 done2: 4859 return (error); 4860 } 4861 4862 /* 4863 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4864 */ 4865 int 4866 sys_fhstat(struct sysmsg *sysmsg, const struct fhstat_args *uap) 4867 { 4868 struct thread *td = curthread; 4869 struct stat sb; 4870 fhandle_t fh; 4871 struct mount *mp; 4872 struct vnode *vp; 4873 int error; 4874 4875 /* 4876 * Must be super user 4877 */ 4878 error = priv_check(td, PRIV_ROOT); 4879 if (error) 4880 return (error); 4881 4882 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4883 if (error) 4884 return (error); 4885 4886 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4887 error = ESTALE; 4888 if (error == 0) { 4889 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4890 error = vn_stat(vp, &sb, td->td_ucred); 4891 vput(vp); 4892 } 4893 } 4894 if (error == 0) 4895 error = copyout(&sb, uap->sb, sizeof(sb)); 4896 if (mp) 4897 mount_drop(mp); 4898 4899 return (error); 4900 } 4901 4902 /* 4903 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4904 */ 4905 int 4906 sys_fhstatfs(struct sysmsg *sysmsg, const struct fhstatfs_args *uap) 4907 { 4908 struct thread *td = curthread; 4909 struct proc *p = td->td_proc; 4910 struct statfs *sp; 4911 struct mount *mp; 4912 struct vnode *vp; 4913 struct statfs sb; 4914 char *fullpath, *freepath; 4915 fhandle_t fh; 4916 int error; 4917 4918 /* 4919 * Must be super user 4920 */ 4921 if ((error = priv_check(td, PRIV_ROOT))) 4922 return (error); 4923 4924 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4925 return (error); 4926 4927 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4928 error = ESTALE; 4929 goto done; 4930 } 4931 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4932 error = ESTALE; 4933 goto done; 4934 } 4935 4936 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4937 goto done; 4938 mp = vp->v_mount; 4939 sp = &mp->mnt_stat; 4940 vput(vp); 4941 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4942 goto done; 4943 4944 error = mount_path(p, mp, &fullpath, &freepath); 4945 if (error) 4946 goto done; 4947 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4948 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4949 kfree(freepath, M_TEMP); 4950 4951 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4952 if (priv_check(td, PRIV_ROOT)) { 4953 bcopy(sp, &sb, sizeof(sb)); 4954 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4955 sp = &sb; 4956 } 4957 error = copyout(sp, uap->buf, sizeof(*sp)); 4958 done: 4959 if (mp) 4960 mount_drop(mp); 4961 4962 return (error); 4963 } 4964 4965 /* 4966 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4967 */ 4968 int 4969 sys_fhstatvfs(struct sysmsg *sysmsg, const struct fhstatvfs_args *uap) 4970 { 4971 struct thread *td = curthread; 4972 struct proc *p = td->td_proc; 4973 struct statvfs *sp; 4974 struct mount *mp; 4975 struct vnode *vp; 4976 fhandle_t fh; 4977 int error; 4978 4979 /* 4980 * Must be super user 4981 */ 4982 if ((error = priv_check(td, PRIV_ROOT))) 4983 return (error); 4984 4985 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4986 return (error); 4987 4988 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4989 error = ESTALE; 4990 goto done; 4991 } 4992 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4993 error = ESTALE; 4994 goto done; 4995 } 4996 4997 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4998 goto done; 4999 mp = vp->v_mount; 5000 sp = &mp->mnt_vstat; 5001 vput(vp); 5002 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 5003 goto done; 5004 5005 sp->f_flag = 0; 5006 if (mp->mnt_flag & MNT_RDONLY) 5007 sp->f_flag |= ST_RDONLY; 5008 if (mp->mnt_flag & MNT_NOSUID) 5009 sp->f_flag |= ST_NOSUID; 5010 error = copyout(sp, uap->buf, sizeof(*sp)); 5011 done: 5012 if (mp) 5013 mount_drop(mp); 5014 return (error); 5015 } 5016 5017 5018 /* 5019 * Syscall to push extended attribute configuration information into the 5020 * VFS. Accepts a path, which it converts to a mountpoint, as well as 5021 * a command (int cmd), and attribute name and misc data. For now, the 5022 * attribute name is left in userspace for consumption by the VFS_op. 5023 * It will probably be changed to be copied into sysspace by the 5024 * syscall in the future, once issues with various consumers of the 5025 * attribute code have raised their hands. 5026 * 5027 * Currently this is used only by UFS Extended Attributes. 5028 */ 5029 int 5030 sys_extattrctl(struct sysmsg *sysmsg, const struct extattrctl_args *uap) 5031 { 5032 struct nlookupdata nd; 5033 struct vnode *vp; 5034 char attrname[EXTATTR_MAXNAMELEN]; 5035 int error; 5036 size_t size; 5037 5038 attrname[0] = 0; 5039 vp = NULL; 5040 error = 0; 5041 5042 if (error == 0 && uap->filename) { 5043 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5044 NLC_FOLLOW); 5045 if (error == 0) 5046 error = nlookup(&nd); 5047 if (error == 0) 5048 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5049 nlookup_done(&nd); 5050 } 5051 5052 if (error == 0 && uap->attrname) { 5053 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5054 &size); 5055 } 5056 5057 if (error == 0) { 5058 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5059 if (error == 0) 5060 error = nlookup(&nd); 5061 if (error == 0) 5062 error = ncp_writechk(&nd.nl_nch); 5063 if (error == 0) { 5064 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5065 uap->attrnamespace, 5066 uap->attrname, nd.nl_cred); 5067 } 5068 nlookup_done(&nd); 5069 } 5070 5071 return (error); 5072 } 5073 5074 /* 5075 * Syscall to get a named extended attribute on a file or directory. 5076 */ 5077 int 5078 sys_extattr_set_file(struct sysmsg *sysmsg, 5079 const struct extattr_set_file_args *uap) 5080 { 5081 char attrname[EXTATTR_MAXNAMELEN]; 5082 struct nlookupdata nd; 5083 struct vnode *vp; 5084 struct uio auio; 5085 struct iovec aiov; 5086 int error; 5087 5088 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5089 if (error) 5090 return (error); 5091 5092 vp = NULL; 5093 5094 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5095 if (error == 0) 5096 error = nlookup(&nd); 5097 if (error == 0) 5098 error = ncp_writechk(&nd.nl_nch); 5099 if (error == 0) 5100 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5101 if (error) { 5102 nlookup_done(&nd); 5103 return (error); 5104 } 5105 5106 bzero(&auio, sizeof(auio)); 5107 aiov.iov_base = uap->data; 5108 aiov.iov_len = uap->nbytes; 5109 auio.uio_iov = &aiov; 5110 auio.uio_iovcnt = 1; 5111 auio.uio_offset = 0; 5112 auio.uio_resid = uap->nbytes; 5113 auio.uio_rw = UIO_WRITE; 5114 auio.uio_td = curthread; 5115 5116 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5117 &auio, nd.nl_cred); 5118 5119 vput(vp); 5120 nlookup_done(&nd); 5121 return (error); 5122 } 5123 5124 /* 5125 * Syscall to get a named extended attribute on a file or directory. 5126 */ 5127 int 5128 sys_extattr_get_file(struct sysmsg *sysmsg, 5129 const struct extattr_get_file_args *uap) 5130 { 5131 char attrname[EXTATTR_MAXNAMELEN]; 5132 struct nlookupdata nd; 5133 struct uio auio; 5134 struct iovec aiov; 5135 struct vnode *vp; 5136 int error; 5137 5138 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5139 if (error) 5140 return (error); 5141 5142 vp = NULL; 5143 5144 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5145 if (error == 0) 5146 error = nlookup(&nd); 5147 if (error == 0) 5148 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5149 if (error) { 5150 nlookup_done(&nd); 5151 return (error); 5152 } 5153 5154 bzero(&auio, sizeof(auio)); 5155 aiov.iov_base = uap->data; 5156 aiov.iov_len = uap->nbytes; 5157 auio.uio_iov = &aiov; 5158 auio.uio_iovcnt = 1; 5159 auio.uio_offset = 0; 5160 auio.uio_resid = uap->nbytes; 5161 auio.uio_rw = UIO_READ; 5162 auio.uio_td = curthread; 5163 5164 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5165 &auio, nd.nl_cred); 5166 sysmsg->sysmsg_result = uap->nbytes - auio.uio_resid; 5167 5168 vput(vp); 5169 nlookup_done(&nd); 5170 return(error); 5171 } 5172 5173 /* 5174 * Syscall to delete a named extended attribute from a file or directory. 5175 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5176 */ 5177 int 5178 sys_extattr_delete_file(struct sysmsg *sysmsg, 5179 const struct extattr_delete_file_args *uap) 5180 { 5181 char attrname[EXTATTR_MAXNAMELEN]; 5182 struct nlookupdata nd; 5183 struct vnode *vp; 5184 int error; 5185 5186 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5187 if (error) 5188 return(error); 5189 5190 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5191 if (error == 0) 5192 error = nlookup(&nd); 5193 if (error == 0) 5194 error = ncp_writechk(&nd.nl_nch); 5195 if (error == 0) { 5196 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5197 if (error == 0) { 5198 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5199 attrname, NULL, nd.nl_cred); 5200 vput(vp); 5201 } 5202 } 5203 nlookup_done(&nd); 5204 return(error); 5205 } 5206 5207 /* 5208 * Determine if the mount is visible to the process. 5209 */ 5210 static int 5211 chroot_visible_mnt(struct mount *mp, struct proc *p) 5212 { 5213 struct nchandle nch; 5214 5215 /* 5216 * Traverse from the mount point upwards. If we hit the process 5217 * root then the mount point is visible to the process. 5218 */ 5219 nch = mp->mnt_ncmountpt; 5220 while (nch.ncp) { 5221 if (nch.mount == p->p_fd->fd_nrdir.mount && 5222 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5223 return(1); 5224 } 5225 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5226 nch = nch.mount->mnt_ncmounton; 5227 } else { 5228 nch.ncp = nch.ncp->nc_parent; 5229 } 5230 } 5231 5232 /* 5233 * If the mount point is not visible to the process, but the 5234 * process root is in a subdirectory of the mount, return 5235 * TRUE anyway. 5236 */ 5237 if (p->p_fd->fd_nrdir.mount == mp) 5238 return(1); 5239 5240 return(0); 5241 } 5242 5243 /* Sets priv to PRIV_ROOT in case no matching fs */ 5244 static int 5245 get_fspriv(const char *fsname) 5246 { 5247 5248 if (strncmp("null", fsname, 5) == 0) { 5249 return PRIV_VFS_MOUNT_NULLFS; 5250 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5251 return PRIV_VFS_MOUNT_TMPFS; 5252 } 5253 5254 return PRIV_ROOT; 5255 } 5256 5257 int 5258 sys___realpath(struct sysmsg *sysmsg, const struct __realpath_args *uap) 5259 { 5260 struct nlookupdata nd; 5261 char *rbuf; 5262 char *fbuf; 5263 ssize_t rlen; 5264 int error; 5265 5266 /* 5267 * Invalid length if less than 0. 0 is allowed 5268 */ 5269 if ((ssize_t)uap->len < 0) 5270 return EINVAL; 5271 5272 rbuf = NULL; 5273 fbuf = NULL; 5274 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5275 if (error) 5276 goto done; 5277 5278 nd.nl_flags |= NLC_SHAREDLOCK; 5279 error = nlookup(&nd); 5280 if (error) 5281 goto done; 5282 5283 if (nd.nl_nch.ncp->nc_vp == NULL) { 5284 error = ENOENT; 5285 goto done; 5286 } 5287 5288 /* 5289 * Shortcut test for existence. 5290 */ 5291 if (uap->len == 0) { 5292 error = ENAMETOOLONG; 5293 goto done; 5294 } 5295 5296 /* 5297 * Obtain the path relative to the process root. The nch must not 5298 * be locked for the cache_fullpath() call. 5299 */ 5300 if (nd.nl_flags & NLC_NCPISLOCKED) { 5301 nd.nl_flags &= ~NLC_NCPISLOCKED; 5302 cache_unlock(&nd.nl_nch); 5303 } 5304 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5305 if (error) 5306 goto done; 5307 5308 rlen = (ssize_t)strlen(rbuf); 5309 if (rlen >= uap->len) { 5310 error = ENAMETOOLONG; 5311 goto done; 5312 } 5313 error = copyout(rbuf, uap->buf, rlen + 1); 5314 if (error == 0) 5315 sysmsg->sysmsg_szresult = rlen; 5316 done: 5317 nlookup_done(&nd); 5318 if (fbuf) 5319 kfree(fbuf, M_TEMP); 5320 5321 return error; 5322 } 5323