1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int get_fspriv(const char *); 84 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 85 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 86 static int getutimes (struct timeval *, struct timespec *); 87 static int getutimens (const struct timespec *, struct timespec *, int *); 88 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 89 static int setfmode (struct vnode *, int); 90 static int setfflags (struct vnode *, u_long); 91 static int setutimes (struct vnode *, struct vattr *, 92 const struct timespec *, int); 93 94 static int usermount = 0; /* if 1, non-root can mount fs. */ 95 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 96 "Allow non-root users to mount filesystems"); 97 98 static int debug_unmount = 0; /* if 1 loop until unmount success */ 99 SYSCTL_INT(_vfs, OID_AUTO, debug_unmount, CTLFLAG_RW, &debug_unmount, 0, 100 "Stall failed unmounts in loop"); 101 /* 102 * Virtual File System System Calls 103 */ 104 105 /* 106 * Mount a file system. 107 * 108 * mount_args(char *type, char *path, int flags, caddr_t data) 109 * 110 * MPALMOSTSAFE 111 */ 112 int 113 sys_mount(struct mount_args *uap) 114 { 115 struct thread *td = curthread; 116 struct vnode *vp; 117 struct nchandle nch; 118 struct mount *mp, *nullmp; 119 struct vfsconf *vfsp; 120 int error, flag = 0, flag2 = 0; 121 int hasmount; 122 int priv = 0; 123 struct vattr va; 124 struct nlookupdata nd; 125 char fstypename[MFSNAMELEN]; 126 struct ucred *cred; 127 128 cred = td->td_ucred; 129 130 /* We do not allow user mounts inside a jail for now */ 131 if (usermount && jailed(cred)) { 132 error = EPERM; 133 goto done; 134 } 135 136 /* 137 * Extract the file system type. We need to know this early, to take 138 * appropriate actions for jails and nullfs mounts. 139 */ 140 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) 141 goto done; 142 143 /* 144 * Select the correct priv according to the file system type. 145 */ 146 priv = get_fspriv(fstypename); 147 148 if (usermount == 0 && (error = priv_check(td, priv))) 149 goto done; 150 151 /* 152 * Do not allow NFS export by non-root users. 153 */ 154 if (uap->flags & MNT_EXPORTED) { 155 error = priv_check(td, priv); 156 if (error) 157 goto done; 158 } 159 /* 160 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 161 */ 162 if (priv_check(td, priv)) 163 uap->flags |= MNT_NOSUID | MNT_NODEV; 164 165 /* 166 * Lookup the requested path and extract the nch and vnode. 167 */ 168 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 169 if (error == 0) { 170 if ((error = nlookup(&nd)) == 0) { 171 if (nd.nl_nch.ncp->nc_vp == NULL) 172 error = ENOENT; 173 } 174 } 175 if (error) { 176 nlookup_done(&nd); 177 goto done; 178 } 179 180 /* 181 * If the target filesystem is resolved via a nullfs mount, then 182 * nd.nl_nch.mount will be pointing to the nullfs mount structure 183 * instead of the target file system. We need it in case we are 184 * doing an update. 185 */ 186 nullmp = nd.nl_nch.mount; 187 188 /* 189 * Extract the locked+refd ncp and cleanup the nd structure 190 */ 191 nch = nd.nl_nch; 192 cache_zero(&nd.nl_nch); 193 nlookup_done(&nd); 194 195 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 196 (mp = cache_findmount(&nch)) != NULL) { 197 cache_dropmount(mp); 198 hasmount = 1; 199 } else { 200 hasmount = 0; 201 } 202 203 204 /* 205 * now we have the locked ref'd nch and unreferenced vnode. 206 */ 207 vp = nch.ncp->nc_vp; 208 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 209 cache_put(&nch); 210 goto done; 211 } 212 cache_unlock(&nch); 213 214 /* 215 * Now we have an unlocked ref'd nch and a locked ref'd vp 216 */ 217 if (uap->flags & MNT_UPDATE) { 218 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 219 cache_drop(&nch); 220 vput(vp); 221 error = EINVAL; 222 goto done; 223 } 224 225 if (strncmp(fstypename, "null", 5) == 0) { 226 KKASSERT(nullmp); 227 mp = nullmp; 228 } else { 229 mp = vp->v_mount; 230 } 231 232 flag = mp->mnt_flag; 233 flag2 = mp->mnt_kern_flag; 234 /* 235 * We only allow the filesystem to be reloaded if it 236 * is currently mounted read-only. 237 */ 238 if ((uap->flags & MNT_RELOAD) && 239 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 240 cache_drop(&nch); 241 vput(vp); 242 error = EOPNOTSUPP; /* Needs translation */ 243 goto done; 244 } 245 /* 246 * Only root, or the user that did the original mount is 247 * permitted to update it. 248 */ 249 if (mp->mnt_stat.f_owner != cred->cr_uid && 250 (error = priv_check(td, priv))) { 251 cache_drop(&nch); 252 vput(vp); 253 goto done; 254 } 255 if (vfs_busy(mp, LK_NOWAIT)) { 256 cache_drop(&nch); 257 vput(vp); 258 error = EBUSY; 259 goto done; 260 } 261 if (hasmount) { 262 cache_drop(&nch); 263 vfs_unbusy(mp); 264 vput(vp); 265 error = EBUSY; 266 goto done; 267 } 268 mp->mnt_flag |= 269 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 270 lwkt_gettoken(&mp->mnt_token); 271 vn_unlock(vp); 272 vfsp = mp->mnt_vfc; 273 goto update; 274 } 275 276 /* 277 * If the user is not root, ensure that they own the directory 278 * onto which we are attempting to mount. 279 */ 280 if ((error = VOP_GETATTR(vp, &va)) || 281 (va.va_uid != cred->cr_uid && 282 (error = priv_check(td, priv)))) { 283 cache_drop(&nch); 284 vput(vp); 285 goto done; 286 } 287 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 288 cache_drop(&nch); 289 vput(vp); 290 goto done; 291 } 292 if (vp->v_type != VDIR) { 293 cache_drop(&nch); 294 vput(vp); 295 error = ENOTDIR; 296 goto done; 297 } 298 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 299 cache_drop(&nch); 300 vput(vp); 301 error = EPERM; 302 goto done; 303 } 304 vfsp = vfsconf_find_by_name(fstypename); 305 if (vfsp == NULL) { 306 linker_file_t lf; 307 308 /* Only load modules for root (very important!) */ 309 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 310 cache_drop(&nch); 311 vput(vp); 312 goto done; 313 } 314 error = linker_load_file(fstypename, &lf); 315 if (error || lf == NULL) { 316 cache_drop(&nch); 317 vput(vp); 318 if (lf == NULL) 319 error = ENODEV; 320 goto done; 321 } 322 lf->userrefs++; 323 /* lookup again, see if the VFS was loaded */ 324 vfsp = vfsconf_find_by_name(fstypename); 325 if (vfsp == NULL) { 326 lf->userrefs--; 327 linker_file_unload(lf); 328 cache_drop(&nch); 329 vput(vp); 330 error = ENODEV; 331 goto done; 332 } 333 } 334 if (hasmount) { 335 cache_drop(&nch); 336 vput(vp); 337 error = EBUSY; 338 goto done; 339 } 340 341 /* 342 * Allocate and initialize the filesystem. 343 */ 344 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 345 mount_init(mp, vfsp->vfc_vfsops); 346 vfs_busy(mp, LK_NOWAIT); 347 mp->mnt_vfc = vfsp; 348 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 349 vfsp->vfc_refcount++; 350 mp->mnt_stat.f_type = vfsp->vfc_typenum; 351 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 352 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 353 mp->mnt_stat.f_owner = cred->cr_uid; 354 lwkt_gettoken(&mp->mnt_token); 355 vn_unlock(vp); 356 update: 357 /* 358 * (per-mount token acquired at this point) 359 * 360 * Set the mount level flags. 361 */ 362 if (uap->flags & MNT_RDONLY) 363 mp->mnt_flag |= MNT_RDONLY; 364 else if (mp->mnt_flag & MNT_RDONLY) 365 mp->mnt_kern_flag |= MNTK_WANTRDWR; 366 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 367 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 368 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 369 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 370 MNT_AUTOMOUNTED); 371 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 372 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 373 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 374 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 375 MNT_AUTOMOUNTED); 376 377 /* 378 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 379 * This way the initial VFS_MOUNT() call will also be MPSAFE. 380 */ 381 if (vfsp->vfc_flags & VFCF_MPSAFE) 382 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 383 384 /* 385 * Mount the filesystem. 386 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 387 * get. 388 */ 389 if (mp->mnt_flag & MNT_UPDATE) { 390 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 391 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 392 mp->mnt_flag &= ~MNT_RDONLY; 393 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 394 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 395 if (error) { 396 mp->mnt_flag = flag; 397 mp->mnt_kern_flag = flag2; 398 } 399 lwkt_reltoken(&mp->mnt_token); 400 vfs_unbusy(mp); 401 vrele(vp); 402 cache_drop(&nch); 403 goto done; 404 } 405 mp->mnt_ncmounton = nch; 406 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 407 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 408 409 /* 410 * Put the new filesystem on the mount list after root. The mount 411 * point gets its own mnt_ncmountpt (unless the VFS already set one 412 * up) which represents the root of the mount. The lookup code 413 * detects the mount point going forward and checks the root of 414 * the mount going backwards. 415 * 416 * It is not necessary to invalidate or purge the vnode underneath 417 * because elements under the mount will be given their own glue 418 * namecache record. 419 */ 420 if (!error) { 421 if (mp->mnt_ncmountpt.ncp == NULL) { 422 /* 423 * Allocate, then unlock, but leave the ref intact. 424 * This is the mnt_refs (1) that we will retain 425 * through to the unmount. 426 */ 427 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 428 cache_unlock(&mp->mnt_ncmountpt); 429 } 430 vn_unlock(vp); 431 cache_lock(&nch); 432 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 433 cache_unlock(&nch); 434 cache_ismounting(mp); 435 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 436 437 mountlist_insert(mp, MNTINS_LAST); 438 vn_unlock(vp); 439 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 440 error = vfs_allocate_syncvnode(mp); 441 lwkt_reltoken(&mp->mnt_token); 442 vfs_unbusy(mp); 443 error = VFS_START(mp, 0); 444 vrele(vp); 445 KNOTE(&fs_klist, VQ_MOUNT); 446 } else { 447 bzero(&mp->mnt_ncmounton, sizeof(mp->mnt_ncmounton)); 448 vn_syncer_thr_stop(mp); 449 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 450 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 451 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 452 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 453 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 454 if (mp->mnt_cred) { 455 crfree(mp->mnt_cred); 456 mp->mnt_cred = NULL; 457 } 458 mp->mnt_vfc->vfc_refcount--; 459 lwkt_reltoken(&mp->mnt_token); 460 vfs_unbusy(mp); 461 kfree(mp, M_MOUNT); 462 cache_drop(&nch); 463 vput(vp); 464 } 465 done: 466 return (error); 467 } 468 469 /* 470 * Scan all active processes to see if any of them have a current 471 * or root directory onto which the new filesystem has just been 472 * mounted. If so, replace them with the new mount point. 473 * 474 * Both old_nch and new_nch are ref'd on call but not locked. 475 * new_nch must be temporarily locked so it can be associated with the 476 * vnode representing the root of the mount point. 477 */ 478 struct checkdirs_info { 479 struct nchandle old_nch; 480 struct nchandle new_nch; 481 struct vnode *old_vp; 482 struct vnode *new_vp; 483 }; 484 485 static int checkdirs_callback(struct proc *p, void *data); 486 487 static void 488 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 489 { 490 struct checkdirs_info info; 491 struct vnode *olddp; 492 struct vnode *newdp; 493 struct mount *mp; 494 495 /* 496 * If the old mount point's vnode has a usecount of 1, it is not 497 * being held as a descriptor anywhere. 498 */ 499 olddp = old_nch->ncp->nc_vp; 500 if (olddp == NULL || VREFCNT(olddp) == 1) 501 return; 502 503 /* 504 * Force the root vnode of the new mount point to be resolved 505 * so we can update any matching processes. 506 */ 507 mp = new_nch->mount; 508 if (VFS_ROOT(mp, &newdp)) 509 panic("mount: lost mount"); 510 vn_unlock(newdp); 511 cache_lock(new_nch); 512 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 513 cache_setunresolved(new_nch); 514 cache_setvp(new_nch, newdp); 515 cache_unlock(new_nch); 516 517 /* 518 * Special handling of the root node 519 */ 520 if (rootvnode == olddp) { 521 vref(newdp); 522 vfs_cache_setroot(newdp, cache_hold(new_nch)); 523 } 524 525 /* 526 * Pass newdp separately so the callback does not have to access 527 * it via new_nch->ncp->nc_vp. 528 */ 529 info.old_nch = *old_nch; 530 info.new_nch = *new_nch; 531 info.new_vp = newdp; 532 allproc_scan(checkdirs_callback, &info, 0); 533 vput(newdp); 534 } 535 536 /* 537 * NOTE: callback is not MP safe because the scanned process's filedesc 538 * structure can be ripped out from under us, amoung other things. 539 */ 540 static int 541 checkdirs_callback(struct proc *p, void *data) 542 { 543 struct checkdirs_info *info = data; 544 struct filedesc *fdp; 545 struct nchandle ncdrop1; 546 struct nchandle ncdrop2; 547 struct vnode *vprele1; 548 struct vnode *vprele2; 549 550 if ((fdp = p->p_fd) != NULL) { 551 cache_zero(&ncdrop1); 552 cache_zero(&ncdrop2); 553 vprele1 = NULL; 554 vprele2 = NULL; 555 556 /* 557 * MPUNSAFE - XXX fdp can be pulled out from under a 558 * foreign process. 559 * 560 * A shared filedesc is ok, we don't have to copy it 561 * because we are making this change globally. 562 */ 563 spin_lock(&fdp->fd_spin); 564 if (fdp->fd_ncdir.mount == info->old_nch.mount && 565 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 566 vprele1 = fdp->fd_cdir; 567 vref(info->new_vp); 568 fdp->fd_cdir = info->new_vp; 569 ncdrop1 = fdp->fd_ncdir; 570 cache_copy(&info->new_nch, &fdp->fd_ncdir); 571 } 572 if (fdp->fd_nrdir.mount == info->old_nch.mount && 573 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 574 vprele2 = fdp->fd_rdir; 575 vref(info->new_vp); 576 fdp->fd_rdir = info->new_vp; 577 ncdrop2 = fdp->fd_nrdir; 578 cache_copy(&info->new_nch, &fdp->fd_nrdir); 579 } 580 spin_unlock(&fdp->fd_spin); 581 if (ncdrop1.ncp) 582 cache_drop(&ncdrop1); 583 if (ncdrop2.ncp) 584 cache_drop(&ncdrop2); 585 if (vprele1) 586 vrele(vprele1); 587 if (vprele2) 588 vrele(vprele2); 589 } 590 return(0); 591 } 592 593 /* 594 * Unmount a file system. 595 * 596 * Note: unmount takes a path to the vnode mounted on as argument, 597 * not special file (as before). 598 * 599 * umount_args(char *path, int flags) 600 * 601 * MPALMOSTSAFE 602 */ 603 int 604 sys_unmount(struct unmount_args *uap) 605 { 606 struct thread *td = curthread; 607 struct proc *p __debugvar = td->td_proc; 608 struct mount *mp = NULL; 609 struct nlookupdata nd; 610 char fstypename[MFSNAMELEN]; 611 int priv = 0; 612 int error; 613 struct ucred *cred; 614 615 cred = td->td_ucred; 616 617 KKASSERT(p); 618 619 /* We do not allow user umounts inside a jail for now */ 620 if (usermount && jailed(cred)) { 621 error = EPERM; 622 goto done; 623 } 624 625 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 626 NLC_FOLLOW | NLC_IGNBADDIR); 627 if (error == 0) 628 error = nlookup(&nd); 629 if (error) 630 goto out; 631 632 mp = nd.nl_nch.mount; 633 634 /* Figure out the fsname in order to select proper privs */ 635 ksnprintf(fstypename, MFSNAMELEN, "%s", mp->mnt_vfc->vfc_name); 636 priv = get_fspriv(fstypename); 637 638 if (usermount == 0 && (error = priv_check(td, priv))) { 639 nlookup_done(&nd); 640 goto done; 641 } 642 643 /* 644 * Only root, or the user that did the original mount is 645 * permitted to unmount this filesystem. 646 */ 647 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 648 (error = priv_check(td, priv))) 649 goto out; 650 651 /* 652 * Don't allow unmounting the root file system. 653 */ 654 if (mp->mnt_flag & MNT_ROOTFS) { 655 error = EINVAL; 656 goto out; 657 } 658 659 /* 660 * Must be the root of the filesystem 661 */ 662 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 663 error = EINVAL; 664 goto out; 665 } 666 667 /* Check if this mount belongs to this prison */ 668 if (jailed(cred) && mp->mnt_cred && (!mp->mnt_cred->cr_prison || 669 mp->mnt_cred->cr_prison != cred->cr_prison)) { 670 kprintf("mountpoint %s does not belong to this jail\n", 671 uap->path); 672 error = EPERM; 673 goto out; 674 } 675 676 /* 677 * If no error try to issue the unmount. We lose our cache 678 * ref when we call nlookup_done so we must hold the mount point 679 * to prevent use-after-free races. 680 */ 681 out: 682 if (error == 0) { 683 mount_hold(mp); 684 nlookup_done(&nd); 685 error = dounmount(mp, uap->flags, 0); 686 mount_drop(mp); 687 } else { 688 nlookup_done(&nd); 689 } 690 done: 691 return (error); 692 } 693 694 /* 695 * Do the actual file system unmount (interlocked against the mountlist 696 * token and mp->mnt_token). 697 */ 698 static int 699 dounmount_interlock(struct mount *mp) 700 { 701 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 702 return (EBUSY); 703 mp->mnt_kern_flag |= MNTK_UNMOUNT; 704 return(0); 705 } 706 707 static int 708 unmount_allproc_cb(struct proc *p, void *arg) 709 { 710 struct mount *mp; 711 712 if (p->p_textnch.ncp == NULL) 713 return 0; 714 715 mp = (struct mount *)arg; 716 if (p->p_textnch.mount == mp) 717 cache_drop(&p->p_textnch); 718 719 return 0; 720 } 721 722 /* 723 * The guts of the unmount code. The mount owns one ref and one hold 724 * count. If we successfully interlock the unmount, those refs are ours. 725 * (The ref is from mnt_ncmountpt). 726 * 727 * When halting we shortcut certain mount types such as devfs by not actually 728 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 729 * from the mountlist so higher-level filesytems can unmount cleanly. 730 * 731 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 732 */ 733 int 734 dounmount(struct mount *mp, int flags, int halting) 735 { 736 struct namecache *ncp; 737 struct nchandle nch; 738 struct vnode *vp; 739 int error; 740 int async_flag; 741 int lflags; 742 int freeok = 1; 743 int hadsyncer = 0; 744 int retry; 745 int quickhalt; 746 747 lwkt_gettoken(&mp->mnt_token); 748 749 /* 750 * When halting, certain mount points can essentially just 751 * be unhooked and otherwise ignored. 752 */ 753 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 754 quickhalt = 1; 755 freeok = 0; 756 } else { 757 quickhalt = 0; 758 } 759 760 761 /* 762 * Exclusive access for unmounting purposes. 763 */ 764 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 765 goto out; 766 767 /* 768 * We now 'own' the last mp->mnt_refs 769 * 770 * Allow filesystems to detect that a forced unmount is in progress. 771 */ 772 if (flags & MNT_FORCE) 773 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 774 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 775 error = lockmgr(&mp->mnt_lock, lflags); 776 if (error) { 777 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 778 if (mp->mnt_kern_flag & MNTK_MWAIT) { 779 mp->mnt_kern_flag &= ~MNTK_MWAIT; 780 wakeup(mp); 781 } 782 goto out; 783 } 784 785 if (mp->mnt_flag & MNT_EXPUBLIC) 786 vfs_setpublicfs(NULL, NULL, NULL); 787 788 vfs_msync(mp, MNT_WAIT); 789 async_flag = mp->mnt_flag & MNT_ASYNC; 790 mp->mnt_flag &=~ MNT_ASYNC; 791 792 /* 793 * Decomission our special mnt_syncer vnode. This also stops 794 * the vnlru code. If we are unable to unmount we recommission 795 * the vnode. 796 * 797 * Then sync the filesystem. 798 */ 799 if ((vp = mp->mnt_syncer) != NULL) { 800 mp->mnt_syncer = NULL; 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 802 vrele(vp); 803 hadsyncer = 1; 804 } 805 806 /* 807 * Sync normally-mounted filesystem. 808 */ 809 if (quickhalt == 0) { 810 if ((mp->mnt_flag & MNT_RDONLY) == 0) 811 VFS_SYNC(mp, MNT_WAIT); 812 } 813 814 /* 815 * nchandle records ref the mount structure. Expect a count of 1 816 * (our mount->mnt_ncmountpt). 817 * 818 * Scans can get temporary refs on a mountpoint (thought really 819 * heavy duty stuff like cache_findmount() do not). 820 */ 821 for (retry = 0; (retry < 10 || debug_unmount); ++retry) { 822 /* 823 * Invalidate the namecache topology under the mount. 824 * nullfs mounts alias a real mount's namecache topology 825 * and it should not be invalidated in that case. 826 */ 827 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 828 cache_lock(&mp->mnt_ncmountpt); 829 cache_inval(&mp->mnt_ncmountpt, 830 CINV_DESTROY|CINV_CHILDREN); 831 cache_unlock(&mp->mnt_ncmountpt); 832 } 833 834 /* 835 * Clear pcpu caches 836 */ 837 cache_unmounting(mp); 838 if (mp->mnt_refs != 1) 839 cache_clearmntcache(mp); 840 841 /* 842 * Break out if we are good. Don't count ncp refs if the 843 * mount is aliased. 844 */ 845 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 846 NULL : mp->mnt_ncmountpt.ncp; 847 if (mp->mnt_refs == 1 && 848 (ncp == NULL || (ncp->nc_refs == 1 && 849 TAILQ_FIRST(&ncp->nc_list) == NULL))) { 850 break; 851 } 852 853 /* 854 * If forcing the unmount, clean out any p->p_textnch 855 * nchandles that match this mount. 856 */ 857 if (flags & MNT_FORCE) 858 allproc_scan(&unmount_allproc_cb, mp, 0); 859 860 /* 861 * Sleep and retry. 862 */ 863 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 864 if ((retry & 15) == 15) { 865 mount_warning(mp, 866 "(%p) debug - retry %d, " 867 "%d namecache refs, %d mount refs", 868 mp, retry, 869 (ncp ? ncp->nc_refs - 1 : 0), 870 mp->mnt_refs - 1); 871 } 872 } 873 874 error = 0; 875 ncp = (mp->mnt_kern_flag & MNTK_NCALIASED) ? 876 NULL : mp->mnt_ncmountpt.ncp; 877 if (mp->mnt_refs != 1 || 878 (ncp != NULL && (ncp->nc_refs != 1 || 879 TAILQ_FIRST(&ncp->nc_list)))) { 880 mount_warning(mp, 881 "(%p): %d namecache refs, %d mount refs " 882 "still present", 883 mp, 884 (ncp ? ncp->nc_refs - 1 : 0), 885 mp->mnt_refs - 1); 886 if (flags & MNT_FORCE) { 887 freeok = 0; 888 mount_warning(mp, "forcing unmount\n"); 889 } else { 890 error = EBUSY; 891 } 892 } 893 894 /* 895 * So far so good, sync the filesystem once more and 896 * call the VFS unmount code if the sync succeeds. 897 */ 898 if (error == 0 && quickhalt == 0) { 899 if (mp->mnt_flag & MNT_RDONLY) { 900 error = VFS_UNMOUNT(mp, flags); 901 } else { 902 error = VFS_SYNC(mp, MNT_WAIT); 903 if (error == 0 || /* no error */ 904 error == EOPNOTSUPP || /* no sync avail */ 905 (flags & MNT_FORCE)) { /* force anyway */ 906 error = VFS_UNMOUNT(mp, flags); 907 } 908 } 909 if (error) { 910 mount_warning(mp, 911 "(%p) unmount: vfs refused to unmount, " 912 "error %d", 913 mp, error); 914 } 915 } 916 917 /* 918 * If an error occurred we can still recover, restoring the 919 * syncer vnode and misc flags. 920 */ 921 if (error) { 922 if (mp->mnt_syncer == NULL && hadsyncer) 923 vfs_allocate_syncvnode(mp); 924 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 925 mp->mnt_flag |= async_flag; 926 lockmgr(&mp->mnt_lock, LK_RELEASE); 927 if (mp->mnt_kern_flag & MNTK_MWAIT) { 928 mp->mnt_kern_flag &= ~MNTK_MWAIT; 929 wakeup(mp); 930 } 931 goto out; 932 } 933 /* 934 * Clean up any journals still associated with the mount after 935 * filesystem activity has ceased. 936 */ 937 journal_remove_all_journals(mp, 938 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 939 940 mountlist_remove(mp); 941 942 /* 943 * Remove any installed vnode ops here so the individual VFSs don't 944 * have to. 945 * 946 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 947 * 948 * When quickhalting we have to keep these intact because the 949 * underlying vnodes have not been destroyed, and some might be 950 * dirty. 951 */ 952 if (quickhalt == 0) { 953 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 954 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 955 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 956 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 957 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 958 } 959 960 if (mp->mnt_ncmountpt.ncp != NULL) { 961 nch = mp->mnt_ncmountpt; 962 cache_zero(&mp->mnt_ncmountpt); 963 cache_clrmountpt(&nch); 964 cache_drop(&nch); 965 } 966 if (mp->mnt_ncmounton.ncp != NULL) { 967 cache_unmounting(mp); 968 nch = mp->mnt_ncmounton; 969 cache_zero(&mp->mnt_ncmounton); 970 cache_clrmountpt(&nch); 971 cache_drop(&nch); 972 } 973 974 if (mp->mnt_cred) { 975 crfree(mp->mnt_cred); 976 mp->mnt_cred = NULL; 977 } 978 979 mp->mnt_vfc->vfc_refcount--; 980 981 /* 982 * If not quickhalting the mount, we expect there to be no 983 * vnodes left. 984 */ 985 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 986 panic("unmount: dangling vnode"); 987 988 /* 989 * Release the lock 990 */ 991 lockmgr(&mp->mnt_lock, LK_RELEASE); 992 if (mp->mnt_kern_flag & MNTK_MWAIT) { 993 mp->mnt_kern_flag &= ~MNTK_MWAIT; 994 wakeup(mp); 995 } 996 997 /* 998 * If we reach here and freeok != 0 we must free the mount. 999 * mnt_refs should already have dropped to 0, so if it is not 1000 * zero we must cycle the caches and wait. 1001 * 1002 * When we are satisfied that the mount has disconnected we can 1003 * drop the hold on the mp that represented the mount (though the 1004 * caller might actually have another, so the caller's drop may 1005 * do the actual free). 1006 */ 1007 if (freeok) { 1008 if (mp->mnt_refs > 0) 1009 cache_clearmntcache(mp); 1010 while (mp->mnt_refs > 0) { 1011 cache_unmounting(mp); 1012 wakeup(mp); 1013 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 1014 cache_clearmntcache(mp); 1015 } 1016 lwkt_reltoken(&mp->mnt_token); 1017 mount_drop(mp); 1018 mp = NULL; 1019 } else { 1020 cache_clearmntcache(mp); 1021 } 1022 error = 0; 1023 KNOTE(&fs_klist, VQ_UNMOUNT); 1024 out: 1025 if (mp) 1026 lwkt_reltoken(&mp->mnt_token); 1027 return (error); 1028 } 1029 1030 static 1031 void 1032 mount_warning(struct mount *mp, const char *ctl, ...) 1033 { 1034 char *ptr; 1035 char *buf; 1036 __va_list va; 1037 1038 __va_start(va, ctl); 1039 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 1040 &ptr, &buf, 0) == 0) { 1041 kprintf("unmount(%s): ", ptr); 1042 kvprintf(ctl, va); 1043 kprintf("\n"); 1044 kfree(buf, M_TEMP); 1045 } else { 1046 kprintf("unmount(%p", mp); 1047 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 1048 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 1049 kprintf("): "); 1050 kvprintf(ctl, va); 1051 kprintf("\n"); 1052 } 1053 __va_end(va); 1054 } 1055 1056 /* 1057 * Shim cache_fullpath() to handle the case where a process is chrooted into 1058 * a subdirectory of a mount. In this case if the root mount matches the 1059 * process root directory's mount we have to specify the process's root 1060 * directory instead of the mount point, because the mount point might 1061 * be above the root directory. 1062 */ 1063 static 1064 int 1065 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 1066 { 1067 struct nchandle *nch; 1068 1069 if (p && p->p_fd->fd_nrdir.mount == mp) 1070 nch = &p->p_fd->fd_nrdir; 1071 else 1072 nch = &mp->mnt_ncmountpt; 1073 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1074 } 1075 1076 /* 1077 * Sync each mounted filesystem. 1078 */ 1079 1080 #ifdef DEBUG 1081 static int syncprt = 0; 1082 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1083 #endif /* DEBUG */ 1084 1085 static int sync_callback(struct mount *mp, void *data); 1086 1087 int 1088 sys_sync(struct sync_args *uap) 1089 { 1090 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1091 return (0); 1092 } 1093 1094 static 1095 int 1096 sync_callback(struct mount *mp, void *data __unused) 1097 { 1098 int asyncflag; 1099 1100 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1101 lwkt_gettoken(&mp->mnt_token); 1102 asyncflag = mp->mnt_flag & MNT_ASYNC; 1103 mp->mnt_flag &= ~MNT_ASYNC; 1104 lwkt_reltoken(&mp->mnt_token); 1105 vfs_msync(mp, MNT_NOWAIT); 1106 VFS_SYNC(mp, MNT_NOWAIT); 1107 lwkt_gettoken(&mp->mnt_token); 1108 mp->mnt_flag |= asyncflag; 1109 lwkt_reltoken(&mp->mnt_token); 1110 } 1111 return(0); 1112 } 1113 1114 /* XXX PRISON: could be per prison flag */ 1115 static int prison_quotas; 1116 #if 0 1117 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1118 #endif 1119 1120 /* 1121 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1122 * 1123 * Change filesystem quotas. 1124 * 1125 * MPALMOSTSAFE 1126 */ 1127 int 1128 sys_quotactl(struct quotactl_args *uap) 1129 { 1130 struct nlookupdata nd; 1131 struct thread *td; 1132 struct mount *mp; 1133 int error; 1134 1135 td = curthread; 1136 if (td->td_ucred->cr_prison && !prison_quotas) { 1137 error = EPERM; 1138 goto done; 1139 } 1140 1141 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1142 if (error == 0) 1143 error = nlookup(&nd); 1144 if (error == 0) { 1145 mp = nd.nl_nch.mount; 1146 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1147 uap->arg, nd.nl_cred); 1148 } 1149 nlookup_done(&nd); 1150 done: 1151 return (error); 1152 } 1153 1154 /* 1155 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1156 * void *buf, int buflen) 1157 * 1158 * This function operates on a mount point and executes the specified 1159 * operation using the specified control data, and possibly returns data. 1160 * 1161 * The actual number of bytes stored in the result buffer is returned, 0 1162 * if none, otherwise an error is returned. 1163 * 1164 * MPALMOSTSAFE 1165 */ 1166 int 1167 sys_mountctl(struct mountctl_args *uap) 1168 { 1169 struct thread *td = curthread; 1170 struct file *fp; 1171 void *ctl = NULL; 1172 void *buf = NULL; 1173 char *path = NULL; 1174 int error; 1175 1176 /* 1177 * Sanity and permissions checks. We must be root. 1178 */ 1179 if (td->td_ucred->cr_prison != NULL) 1180 return (EPERM); 1181 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1182 (error = priv_check(td, PRIV_ROOT)) != 0) 1183 return (error); 1184 1185 /* 1186 * Argument length checks 1187 */ 1188 if (uap->ctllen < 0 || uap->ctllen > 1024) 1189 return (EINVAL); 1190 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1191 return (EINVAL); 1192 if (uap->path == NULL) 1193 return (EINVAL); 1194 1195 /* 1196 * Allocate the necessary buffers and copyin data 1197 */ 1198 path = objcache_get(namei_oc, M_WAITOK); 1199 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1200 if (error) 1201 goto done; 1202 1203 if (uap->ctllen) { 1204 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1205 error = copyin(uap->ctl, ctl, uap->ctllen); 1206 if (error) 1207 goto done; 1208 } 1209 if (uap->buflen) 1210 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1211 1212 /* 1213 * Validate the descriptor 1214 */ 1215 if (uap->fd >= 0) { 1216 fp = holdfp(td, uap->fd, -1); 1217 if (fp == NULL) { 1218 error = EBADF; 1219 goto done; 1220 } 1221 } else { 1222 fp = NULL; 1223 } 1224 1225 /* 1226 * Execute the internal kernel function and clean up. 1227 */ 1228 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1229 buf, uap->buflen, &uap->sysmsg_result); 1230 if (fp) 1231 dropfp(td, uap->fd, fp); 1232 if (error == 0 && uap->sysmsg_result > 0) 1233 error = copyout(buf, uap->buf, uap->sysmsg_result); 1234 done: 1235 if (path) 1236 objcache_put(namei_oc, path); 1237 if (ctl) 1238 kfree(ctl, M_TEMP); 1239 if (buf) 1240 kfree(buf, M_TEMP); 1241 return (error); 1242 } 1243 1244 /* 1245 * Execute a mount control operation by resolving the path to a mount point 1246 * and calling vop_mountctl(). 1247 * 1248 * Use the mount point from the nch instead of the vnode so nullfs mounts 1249 * can properly spike the VOP. 1250 */ 1251 int 1252 kern_mountctl(const char *path, int op, struct file *fp, 1253 const void *ctl, int ctllen, 1254 void *buf, int buflen, int *res) 1255 { 1256 struct vnode *vp; 1257 struct nlookupdata nd; 1258 struct nchandle nch; 1259 struct mount *mp; 1260 int error; 1261 1262 *res = 0; 1263 vp = NULL; 1264 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1265 if (error) 1266 return (error); 1267 error = nlookup(&nd); 1268 if (error) { 1269 nlookup_done(&nd); 1270 return (error); 1271 } 1272 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1273 if (error) { 1274 nlookup_done(&nd); 1275 return (error); 1276 } 1277 1278 /* 1279 * Yes, all this is needed to use the nch.mount below, because 1280 * we must maintain a ref on the mount to avoid ripouts (e.g. 1281 * due to heavy mount/unmount use by synth or poudriere). 1282 */ 1283 nch = nd.nl_nch; 1284 cache_zero(&nd.nl_nch); 1285 cache_unlock(&nch); 1286 nlookup_done(&nd); 1287 vn_unlock(vp); 1288 1289 mp = nch.mount; 1290 1291 /* 1292 * Must be the root of the filesystem 1293 */ 1294 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1295 cache_drop(&nch); 1296 vrele(vp); 1297 return (EINVAL); 1298 } 1299 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1300 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1301 path); 1302 cache_drop(&nch); 1303 vrele(vp); 1304 return (EINVAL); 1305 } 1306 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1307 buf, buflen, res); 1308 vrele(vp); 1309 cache_drop(&nch); 1310 1311 return (error); 1312 } 1313 1314 int 1315 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1316 { 1317 struct thread *td = curthread; 1318 struct proc *p = td->td_proc; 1319 struct mount *mp; 1320 struct statfs *sp; 1321 char *fullpath, *freepath; 1322 int error; 1323 1324 if ((error = nlookup(nd)) != 0) 1325 return (error); 1326 mp = nd->nl_nch.mount; 1327 sp = &mp->mnt_stat; 1328 1329 /* 1330 * Ignore refresh error, user should have visibility. 1331 * This can happen if a NFS mount goes bad (e.g. server 1332 * revokes perms or goes down). 1333 */ 1334 error = VFS_STATFS(mp, sp, nd->nl_cred); 1335 /* ignore error */ 1336 1337 error = mount_path(p, mp, &fullpath, &freepath); 1338 if (error) 1339 return(error); 1340 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1341 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1342 kfree(freepath, M_TEMP); 1343 1344 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1345 bcopy(sp, buf, sizeof(*buf)); 1346 /* Only root should have access to the fsid's. */ 1347 if (priv_check(td, PRIV_ROOT)) 1348 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1349 return (0); 1350 } 1351 1352 /* 1353 * statfs_args(char *path, struct statfs *buf) 1354 * 1355 * Get filesystem statistics. 1356 */ 1357 int 1358 sys_statfs(struct statfs_args *uap) 1359 { 1360 struct nlookupdata nd; 1361 struct statfs buf; 1362 int error; 1363 1364 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1365 if (error == 0) 1366 error = kern_statfs(&nd, &buf); 1367 nlookup_done(&nd); 1368 if (error == 0) 1369 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1370 return (error); 1371 } 1372 1373 int 1374 kern_fstatfs(int fd, struct statfs *buf) 1375 { 1376 struct thread *td = curthread; 1377 struct proc *p = td->td_proc; 1378 struct file *fp; 1379 struct mount *mp; 1380 struct statfs *sp; 1381 char *fullpath, *freepath; 1382 int error; 1383 1384 KKASSERT(p); 1385 if ((error = holdvnode(td, fd, &fp)) != 0) 1386 return (error); 1387 1388 /* 1389 * Try to use mount info from any overlays rather than the 1390 * mount info for the underlying vnode, otherwise we will 1391 * fail when operating on null-mounted paths inside a chroot. 1392 */ 1393 if ((mp = fp->f_nchandle.mount) == NULL) 1394 mp = ((struct vnode *)fp->f_data)->v_mount; 1395 if (mp == NULL) { 1396 error = EBADF; 1397 goto done; 1398 } 1399 if (fp->f_cred == NULL) { 1400 error = EINVAL; 1401 goto done; 1402 } 1403 1404 /* 1405 * Ignore refresh error, user should have visibility. 1406 * This can happen if a NFS mount goes bad (e.g. server 1407 * revokes perms or goes down). 1408 */ 1409 sp = &mp->mnt_stat; 1410 error = VFS_STATFS(mp, sp, fp->f_cred); 1411 1412 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1413 goto done; 1414 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1415 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1416 kfree(freepath, M_TEMP); 1417 1418 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1419 bcopy(sp, buf, sizeof(*buf)); 1420 1421 /* Only root should have access to the fsid's. */ 1422 if (priv_check(td, PRIV_ROOT)) 1423 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1424 error = 0; 1425 done: 1426 fdrop(fp); 1427 return (error); 1428 } 1429 1430 /* 1431 * fstatfs_args(int fd, struct statfs *buf) 1432 * 1433 * Get filesystem statistics. 1434 */ 1435 int 1436 sys_fstatfs(struct fstatfs_args *uap) 1437 { 1438 struct statfs buf; 1439 int error; 1440 1441 error = kern_fstatfs(uap->fd, &buf); 1442 1443 if (error == 0) 1444 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1445 return (error); 1446 } 1447 1448 int 1449 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1450 { 1451 struct mount *mp; 1452 struct statvfs *sp; 1453 int error; 1454 1455 if ((error = nlookup(nd)) != 0) 1456 return (error); 1457 mp = nd->nl_nch.mount; 1458 sp = &mp->mnt_vstat; 1459 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1460 return (error); 1461 1462 sp->f_flag = 0; 1463 if (mp->mnt_flag & MNT_RDONLY) 1464 sp->f_flag |= ST_RDONLY; 1465 if (mp->mnt_flag & MNT_NOSUID) 1466 sp->f_flag |= ST_NOSUID; 1467 bcopy(sp, buf, sizeof(*buf)); 1468 return (0); 1469 } 1470 1471 /* 1472 * statfs_args(char *path, struct statfs *buf) 1473 * 1474 * Get filesystem statistics. 1475 */ 1476 int 1477 sys_statvfs(struct statvfs_args *uap) 1478 { 1479 struct nlookupdata nd; 1480 struct statvfs buf; 1481 int error; 1482 1483 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1484 if (error == 0) 1485 error = kern_statvfs(&nd, &buf); 1486 nlookup_done(&nd); 1487 if (error == 0) 1488 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1489 return (error); 1490 } 1491 1492 int 1493 kern_fstatvfs(int fd, struct statvfs *buf) 1494 { 1495 struct thread *td = curthread; 1496 struct file *fp; 1497 struct mount *mp; 1498 struct statvfs *sp; 1499 int error; 1500 1501 if ((error = holdvnode(td, fd, &fp)) != 0) 1502 return (error); 1503 if ((mp = fp->f_nchandle.mount) == NULL) 1504 mp = ((struct vnode *)fp->f_data)->v_mount; 1505 if (mp == NULL) { 1506 error = EBADF; 1507 goto done; 1508 } 1509 if (fp->f_cred == NULL) { 1510 error = EINVAL; 1511 goto done; 1512 } 1513 sp = &mp->mnt_vstat; 1514 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1515 goto done; 1516 1517 sp->f_flag = 0; 1518 if (mp->mnt_flag & MNT_RDONLY) 1519 sp->f_flag |= ST_RDONLY; 1520 if (mp->mnt_flag & MNT_NOSUID) 1521 sp->f_flag |= ST_NOSUID; 1522 1523 bcopy(sp, buf, sizeof(*buf)); 1524 error = 0; 1525 done: 1526 fdrop(fp); 1527 return (error); 1528 } 1529 1530 /* 1531 * fstatfs_args(int fd, struct statfs *buf) 1532 * 1533 * Get filesystem statistics. 1534 */ 1535 int 1536 sys_fstatvfs(struct fstatvfs_args *uap) 1537 { 1538 struct statvfs buf; 1539 int error; 1540 1541 error = kern_fstatvfs(uap->fd, &buf); 1542 1543 if (error == 0) 1544 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1545 return (error); 1546 } 1547 1548 /* 1549 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1550 * 1551 * Get statistics on all filesystems. 1552 */ 1553 1554 struct getfsstat_info { 1555 struct statfs *sfsp; 1556 long count; 1557 long maxcount; 1558 int error; 1559 int flags; 1560 struct thread *td; 1561 }; 1562 1563 static int getfsstat_callback(struct mount *, void *); 1564 1565 int 1566 sys_getfsstat(struct getfsstat_args *uap) 1567 { 1568 struct thread *td = curthread; 1569 struct getfsstat_info info; 1570 1571 bzero(&info, sizeof(info)); 1572 1573 info.maxcount = uap->bufsize / sizeof(struct statfs); 1574 info.sfsp = uap->buf; 1575 info.count = 0; 1576 info.flags = uap->flags; 1577 info.td = td; 1578 1579 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1580 if (info.sfsp && info.count > info.maxcount) 1581 uap->sysmsg_result = info.maxcount; 1582 else 1583 uap->sysmsg_result = info.count; 1584 return (info.error); 1585 } 1586 1587 static int 1588 getfsstat_callback(struct mount *mp, void *data) 1589 { 1590 struct getfsstat_info *info = data; 1591 struct statfs *sp; 1592 char *freepath; 1593 char *fullpath; 1594 int error; 1595 1596 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1597 return(0); 1598 1599 if (info->sfsp && info->count < info->maxcount) { 1600 sp = &mp->mnt_stat; 1601 1602 /* 1603 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1604 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1605 * overrides MNT_WAIT. 1606 * 1607 * Ignore refresh error, user should have visibility. 1608 * This can happen if a NFS mount goes bad (e.g. server 1609 * revokes perms or goes down). 1610 */ 1611 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1612 (info->flags & MNT_WAIT)) && 1613 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1614 /* ignore error */ 1615 } 1616 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1617 1618 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1619 if (error) { 1620 info->error = error; 1621 return(-1); 1622 } 1623 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1624 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1625 kfree(freepath, M_TEMP); 1626 1627 error = copyout(sp, info->sfsp, sizeof(*sp)); 1628 if (error) { 1629 info->error = error; 1630 return (-1); 1631 } 1632 ++info->sfsp; 1633 } 1634 info->count++; 1635 return(0); 1636 } 1637 1638 /* 1639 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1640 long bufsize, int flags) 1641 * 1642 * Get statistics on all filesystems. 1643 */ 1644 1645 struct getvfsstat_info { 1646 struct statfs *sfsp; 1647 struct statvfs *vsfsp; 1648 long count; 1649 long maxcount; 1650 int error; 1651 int flags; 1652 struct thread *td; 1653 }; 1654 1655 static int getvfsstat_callback(struct mount *, void *); 1656 1657 int 1658 sys_getvfsstat(struct getvfsstat_args *uap) 1659 { 1660 struct thread *td = curthread; 1661 struct getvfsstat_info info; 1662 1663 bzero(&info, sizeof(info)); 1664 1665 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1666 info.sfsp = uap->buf; 1667 info.vsfsp = uap->vbuf; 1668 info.count = 0; 1669 info.flags = uap->flags; 1670 info.td = td; 1671 1672 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1673 if (info.vsfsp && info.count > info.maxcount) 1674 uap->sysmsg_result = info.maxcount; 1675 else 1676 uap->sysmsg_result = info.count; 1677 return (info.error); 1678 } 1679 1680 static int 1681 getvfsstat_callback(struct mount *mp, void *data) 1682 { 1683 struct getvfsstat_info *info = data; 1684 struct statfs *sp; 1685 struct statvfs *vsp; 1686 char *freepath; 1687 char *fullpath; 1688 int error; 1689 1690 if (info->td->td_proc && !chroot_visible_mnt(mp, info->td->td_proc)) 1691 return(0); 1692 1693 if (info->vsfsp && info->count < info->maxcount) { 1694 sp = &mp->mnt_stat; 1695 vsp = &mp->mnt_vstat; 1696 1697 /* 1698 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1699 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1700 * overrides MNT_WAIT. 1701 * 1702 * Ignore refresh error, user should have visibility. 1703 * This can happen if a NFS mount goes bad (e.g. server 1704 * revokes perms or goes down). 1705 */ 1706 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1707 (info->flags & MNT_WAIT)) && 1708 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1709 /* ignore error */ 1710 } 1711 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1712 1713 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1714 (info->flags & MNT_WAIT)) && 1715 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1716 /* ignore error */ 1717 } 1718 vsp->f_flag = 0; 1719 if (mp->mnt_flag & MNT_RDONLY) 1720 vsp->f_flag |= ST_RDONLY; 1721 if (mp->mnt_flag & MNT_NOSUID) 1722 vsp->f_flag |= ST_NOSUID; 1723 1724 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1725 if (error) { 1726 info->error = error; 1727 return(-1); 1728 } 1729 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1730 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1731 kfree(freepath, M_TEMP); 1732 1733 error = copyout(sp, info->sfsp, sizeof(*sp)); 1734 if (error == 0) 1735 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1736 if (error) { 1737 info->error = error; 1738 return (-1); 1739 } 1740 ++info->sfsp; 1741 ++info->vsfsp; 1742 } 1743 info->count++; 1744 return(0); 1745 } 1746 1747 1748 /* 1749 * fchdir_args(int fd) 1750 * 1751 * Change current working directory to a given file descriptor. 1752 */ 1753 int 1754 sys_fchdir(struct fchdir_args *uap) 1755 { 1756 struct thread *td = curthread; 1757 struct proc *p = td->td_proc; 1758 struct filedesc *fdp = p->p_fd; 1759 struct vnode *vp, *ovp; 1760 struct mount *mp; 1761 struct file *fp; 1762 struct nchandle nch, onch, tnch; 1763 int error; 1764 1765 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1766 return (error); 1767 lwkt_gettoken(&p->p_token); 1768 vp = (struct vnode *)fp->f_data; 1769 vref(vp); 1770 vn_lock(vp, LK_SHARED | LK_RETRY); 1771 if (fp->f_nchandle.ncp == NULL) 1772 error = ENOTDIR; 1773 else 1774 error = checkvp_chdir(vp, td); 1775 if (error) { 1776 vput(vp); 1777 goto done; 1778 } 1779 cache_copy(&fp->f_nchandle, &nch); 1780 1781 /* 1782 * If the ncp has become a mount point, traverse through 1783 * the mount point. 1784 */ 1785 1786 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1787 (mp = cache_findmount(&nch)) != NULL 1788 ) { 1789 error = nlookup_mp(mp, &tnch); 1790 if (error == 0) { 1791 cache_unlock(&tnch); /* leave ref intact */ 1792 vput(vp); 1793 vp = tnch.ncp->nc_vp; 1794 error = vget(vp, LK_SHARED); 1795 KKASSERT(error == 0); 1796 cache_drop(&nch); 1797 nch = tnch; 1798 } 1799 cache_dropmount(mp); 1800 } 1801 if (error == 0) { 1802 spin_lock(&fdp->fd_spin); 1803 ovp = fdp->fd_cdir; 1804 onch = fdp->fd_ncdir; 1805 fdp->fd_cdir = vp; 1806 fdp->fd_ncdir = nch; 1807 spin_unlock(&fdp->fd_spin); 1808 vn_unlock(vp); /* leave ref intact */ 1809 cache_drop(&onch); 1810 vrele(ovp); 1811 } else { 1812 cache_drop(&nch); 1813 vput(vp); 1814 } 1815 fdrop(fp); 1816 done: 1817 lwkt_reltoken(&p->p_token); 1818 return (error); 1819 } 1820 1821 int 1822 kern_chdir(struct nlookupdata *nd) 1823 { 1824 struct thread *td = curthread; 1825 struct proc *p = td->td_proc; 1826 struct filedesc *fdp = p->p_fd; 1827 struct vnode *vp, *ovp; 1828 struct nchandle onch; 1829 int error; 1830 1831 nd->nl_flags |= NLC_SHAREDLOCK; 1832 if ((error = nlookup(nd)) != 0) 1833 return (error); 1834 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1835 return (ENOENT); 1836 if ((error = vget(vp, LK_SHARED)) != 0) 1837 return (error); 1838 1839 lwkt_gettoken(&p->p_token); 1840 error = checkvp_chdir(vp, td); 1841 vn_unlock(vp); 1842 if (error == 0) { 1843 spin_lock(&fdp->fd_spin); 1844 ovp = fdp->fd_cdir; 1845 onch = fdp->fd_ncdir; 1846 fdp->fd_ncdir = nd->nl_nch; 1847 fdp->fd_cdir = vp; 1848 spin_unlock(&fdp->fd_spin); 1849 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1850 cache_drop(&onch); 1851 vrele(ovp); 1852 cache_zero(&nd->nl_nch); 1853 } else { 1854 vrele(vp); 1855 } 1856 lwkt_reltoken(&p->p_token); 1857 return (error); 1858 } 1859 1860 /* 1861 * chdir_args(char *path) 1862 * 1863 * Change current working directory (``.''). 1864 */ 1865 int 1866 sys_chdir(struct chdir_args *uap) 1867 { 1868 struct nlookupdata nd; 1869 int error; 1870 1871 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1872 if (error == 0) 1873 error = kern_chdir(&nd); 1874 nlookup_done(&nd); 1875 return (error); 1876 } 1877 1878 /* 1879 * Helper function for raised chroot(2) security function: Refuse if 1880 * any filedescriptors are open directories. 1881 */ 1882 static int 1883 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1884 { 1885 struct vnode *vp; 1886 struct file *fp; 1887 int error; 1888 int fd; 1889 1890 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1891 if ((error = holdvnode(td, fd, &fp)) != 0) 1892 continue; 1893 vp = (struct vnode *)fp->f_data; 1894 if (vp->v_type != VDIR) { 1895 fdrop(fp); 1896 continue; 1897 } 1898 fdrop(fp); 1899 return(EPERM); 1900 } 1901 return (0); 1902 } 1903 1904 /* 1905 * This sysctl determines if we will allow a process to chroot(2) if it 1906 * has a directory open: 1907 * 0: disallowed for all processes. 1908 * 1: allowed for processes that were not already chroot(2)'ed. 1909 * 2: allowed for all processes. 1910 */ 1911 1912 static int chroot_allow_open_directories = 1; 1913 1914 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1915 &chroot_allow_open_directories, 0, ""); 1916 1917 /* 1918 * chroot to the specified namecache entry. We obtain the vp from the 1919 * namecache data. The passed ncp must be locked and referenced and will 1920 * remain locked and referenced on return. 1921 */ 1922 int 1923 kern_chroot(struct nchandle *nch) 1924 { 1925 struct thread *td = curthread; 1926 struct proc *p = td->td_proc; 1927 struct filedesc *fdp = p->p_fd; 1928 struct vnode *vp; 1929 int error; 1930 1931 /* 1932 * Only privileged user can chroot 1933 */ 1934 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1935 if (error) 1936 return (error); 1937 1938 /* 1939 * Disallow open directory descriptors (fchdir() breakouts). 1940 */ 1941 if (chroot_allow_open_directories == 0 || 1942 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1943 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1944 return (error); 1945 } 1946 if ((vp = nch->ncp->nc_vp) == NULL) 1947 return (ENOENT); 1948 1949 if ((error = vget(vp, LK_SHARED)) != 0) 1950 return (error); 1951 1952 /* 1953 * Check the validity of vp as a directory to change to and 1954 * associate it with rdir/jdir. 1955 */ 1956 error = checkvp_chdir(vp, td); 1957 vn_unlock(vp); /* leave reference intact */ 1958 if (error == 0) { 1959 lwkt_gettoken(&p->p_token); 1960 vrele(fdp->fd_rdir); 1961 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1962 cache_drop(&fdp->fd_nrdir); 1963 cache_copy(nch, &fdp->fd_nrdir); 1964 if (fdp->fd_jdir == NULL) { 1965 fdp->fd_jdir = vp; 1966 vref(fdp->fd_jdir); 1967 cache_copy(nch, &fdp->fd_njdir); 1968 } 1969 if ((p->p_flags & P_DIDCHROOT) == 0) { 1970 p->p_flags |= P_DIDCHROOT; 1971 if (p->p_depth <= 65535 - 32) 1972 p->p_depth += 32; 1973 } 1974 lwkt_reltoken(&p->p_token); 1975 } else { 1976 vrele(vp); 1977 } 1978 return (error); 1979 } 1980 1981 /* 1982 * chroot_args(char *path) 1983 * 1984 * Change notion of root (``/'') directory. 1985 */ 1986 int 1987 sys_chroot(struct chroot_args *uap) 1988 { 1989 struct thread *td __debugvar = curthread; 1990 struct nlookupdata nd; 1991 int error; 1992 1993 KKASSERT(td->td_proc); 1994 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1995 if (error == 0) { 1996 nd.nl_flags |= NLC_EXEC; 1997 error = nlookup(&nd); 1998 if (error == 0) 1999 error = kern_chroot(&nd.nl_nch); 2000 } 2001 nlookup_done(&nd); 2002 return(error); 2003 } 2004 2005 int 2006 sys_chroot_kernel(struct chroot_kernel_args *uap) 2007 { 2008 struct thread *td = curthread; 2009 struct nlookupdata nd; 2010 struct nchandle *nch; 2011 struct vnode *vp; 2012 int error; 2013 2014 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2015 if (error) 2016 goto error_nond; 2017 2018 error = nlookup(&nd); 2019 if (error) 2020 goto error_out; 2021 2022 nch = &nd.nl_nch; 2023 2024 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 2025 if (error) 2026 goto error_out; 2027 2028 if ((vp = nch->ncp->nc_vp) == NULL) { 2029 error = ENOENT; 2030 goto error_out; 2031 } 2032 2033 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 2034 goto error_out; 2035 2036 vfs_cache_setroot(vp, cache_hold(nch)); 2037 2038 error_out: 2039 nlookup_done(&nd); 2040 error_nond: 2041 return(error); 2042 } 2043 2044 /* 2045 * Common routine for chroot and chdir. Given a locked, referenced vnode, 2046 * determine whether it is legal to chdir to the vnode. The vnode's state 2047 * is not changed by this call. 2048 */ 2049 static int 2050 checkvp_chdir(struct vnode *vp, struct thread *td) 2051 { 2052 int error; 2053 2054 if (vp->v_type != VDIR) 2055 error = ENOTDIR; 2056 else 2057 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 2058 return (error); 2059 } 2060 2061 int 2062 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 2063 { 2064 struct thread *td = curthread; 2065 struct proc *p = td->td_proc; 2066 struct lwp *lp = td->td_lwp; 2067 struct filedesc *fdp = p->p_fd; 2068 int cmode, flags; 2069 struct file *nfp; 2070 struct file *fp; 2071 struct vnode *vp; 2072 int type, indx, error = 0; 2073 struct flock lf; 2074 2075 if ((oflags & O_ACCMODE) == O_ACCMODE) 2076 return (EINVAL); 2077 flags = FFLAGS(oflags); 2078 error = falloc(lp, &nfp, NULL); 2079 if (error) 2080 return (error); 2081 fp = nfp; 2082 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2083 2084 /* 2085 * XXX p_dupfd is a real mess. It allows a device to return a 2086 * file descriptor to be duplicated rather then doing the open 2087 * itself. 2088 */ 2089 lp->lwp_dupfd = -1; 2090 2091 /* 2092 * Call vn_open() to do the lookup and assign the vnode to the 2093 * file pointer. vn_open() does not change the ref count on fp 2094 * and the vnode, on success, will be inherited by the file pointer 2095 * and unlocked. 2096 * 2097 * Request a shared lock on the vnode if possible. 2098 * 2099 * When NLC_SHAREDLOCK is set we may still need an exclusive vnode 2100 * lock for O_RDWR opens on executables in order to avoid a VTEXT 2101 * detection race. The NLC_EXCLLOCK_IFEXEC handles this case. 2102 * 2103 * NOTE: We need a flag to separate terminal vnode locking from 2104 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2105 * and O_RDWR only need to lock the terminal vnode exclusively. 2106 */ 2107 nd->nl_flags |= NLC_LOCKVP; 2108 if ((flags & (O_CREAT|O_TRUNC)) == 0) { 2109 nd->nl_flags |= NLC_SHAREDLOCK; 2110 if (flags & O_RDWR) 2111 nd->nl_flags |= NLC_EXCLLOCK_IFEXEC; 2112 } 2113 2114 error = vn_open(nd, fp, flags, cmode); 2115 nlookup_done(nd); 2116 2117 if (error) { 2118 /* 2119 * handle special fdopen() case. bleh. dupfdopen() is 2120 * responsible for dropping the old contents of ofiles[indx] 2121 * if it succeeds. 2122 * 2123 * Note that fsetfd() will add a ref to fp which represents 2124 * the fd_files[] assignment. We must still drop our 2125 * reference. 2126 */ 2127 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2128 if (fdalloc(p, 0, &indx) == 0) { 2129 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2130 if (error == 0) { 2131 *res = indx; 2132 fdrop(fp); /* our ref */ 2133 return (0); 2134 } 2135 fsetfd(fdp, NULL, indx); 2136 } 2137 } 2138 fdrop(fp); /* our ref */ 2139 if (error == ERESTART) 2140 error = EINTR; 2141 return (error); 2142 } 2143 2144 /* 2145 * ref the vnode for ourselves so it can't be ripped out from under 2146 * is. XXX need an ND flag to request that the vnode be returned 2147 * anyway. 2148 * 2149 * Reserve a file descriptor but do not assign it until the open 2150 * succeeds. 2151 */ 2152 vp = (struct vnode *)fp->f_data; 2153 vref(vp); 2154 if ((error = fdalloc(p, 0, &indx)) != 0) { 2155 fdrop(fp); 2156 vrele(vp); 2157 return (error); 2158 } 2159 2160 /* 2161 * If no error occurs the vp will have been assigned to the file 2162 * pointer. 2163 */ 2164 lp->lwp_dupfd = 0; 2165 2166 if (flags & (O_EXLOCK | O_SHLOCK)) { 2167 lf.l_whence = SEEK_SET; 2168 lf.l_start = 0; 2169 lf.l_len = 0; 2170 if (flags & O_EXLOCK) 2171 lf.l_type = F_WRLCK; 2172 else 2173 lf.l_type = F_RDLCK; 2174 if (flags & FNONBLOCK) 2175 type = 0; 2176 else 2177 type = F_WAIT; 2178 2179 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2180 /* 2181 * lock request failed. Clean up the reserved 2182 * descriptor. 2183 */ 2184 vrele(vp); 2185 fsetfd(fdp, NULL, indx); 2186 fdrop(fp); 2187 return (error); 2188 } 2189 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2190 } 2191 #if 0 2192 /* 2193 * Assert that all regular file vnodes were created with a object. 2194 */ 2195 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2196 ("open: regular file has no backing object after vn_open")); 2197 #endif 2198 2199 vrele(vp); 2200 2201 /* 2202 * release our private reference, leaving the one associated with the 2203 * descriptor table intact. 2204 */ 2205 if (oflags & O_CLOEXEC) 2206 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2207 fsetfd(fdp, fp, indx); 2208 fdrop(fp); 2209 *res = indx; 2210 2211 return (error); 2212 } 2213 2214 /* 2215 * open_args(char *path, int flags, int mode) 2216 * 2217 * Check permissions, allocate an open file structure, 2218 * and call the device open routine if any. 2219 */ 2220 int 2221 sys_open(struct open_args *uap) 2222 { 2223 struct nlookupdata nd; 2224 int error; 2225 2226 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2227 if (error == 0) { 2228 error = kern_open(&nd, uap->flags, 2229 uap->mode, &uap->sysmsg_result); 2230 } 2231 nlookup_done(&nd); 2232 return (error); 2233 } 2234 2235 /* 2236 * openat_args(int fd, char *path, int flags, int mode) 2237 */ 2238 int 2239 sys_openat(struct openat_args *uap) 2240 { 2241 struct nlookupdata nd; 2242 int error; 2243 struct file *fp; 2244 2245 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2246 if (error == 0) { 2247 error = kern_open(&nd, uap->flags, uap->mode, 2248 &uap->sysmsg_result); 2249 } 2250 nlookup_done_at(&nd, fp); 2251 return (error); 2252 } 2253 2254 int 2255 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2256 { 2257 struct thread *td = curthread; 2258 struct proc *p = td->td_proc; 2259 struct vnode *vp; 2260 struct vattr vattr; 2261 int error; 2262 int whiteout = 0; 2263 2264 KKASSERT(p); 2265 2266 VATTR_NULL(&vattr); 2267 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2268 vattr.va_rmajor = rmajor; 2269 vattr.va_rminor = rminor; 2270 2271 switch (mode & S_IFMT) { 2272 case S_IFMT: /* used by badsect to flag bad sectors */ 2273 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2274 vattr.va_type = VBAD; 2275 break; 2276 case S_IFCHR: 2277 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2278 vattr.va_type = VCHR; 2279 break; 2280 case S_IFBLK: 2281 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2282 vattr.va_type = VBLK; 2283 break; 2284 case S_IFWHT: 2285 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2286 whiteout = 1; 2287 break; 2288 case S_IFDIR: /* special directories support for HAMMER */ 2289 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2290 vattr.va_type = VDIR; 2291 break; 2292 default: 2293 error = EINVAL; 2294 break; 2295 } 2296 2297 if (error) 2298 return (error); 2299 2300 bwillinode(1); 2301 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2302 if ((error = nlookup(nd)) != 0) 2303 return (error); 2304 if (nd->nl_nch.ncp->nc_vp) 2305 return (EEXIST); 2306 if (nd->nl_dvp == NULL) 2307 return (EINVAL); 2308 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2309 return (error); 2310 2311 if (whiteout) { 2312 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2313 nd->nl_cred, NAMEI_CREATE); 2314 } else { 2315 vp = NULL; 2316 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2317 &vp, nd->nl_cred, &vattr); 2318 if (error == 0) 2319 vput(vp); 2320 } 2321 return (error); 2322 } 2323 2324 /* 2325 * mknod_args(char *path, int mode, int dev) 2326 * 2327 * Create a special file. 2328 */ 2329 int 2330 sys_mknod(struct mknod_args *uap) 2331 { 2332 struct nlookupdata nd; 2333 int error; 2334 2335 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2336 if (error == 0) { 2337 error = kern_mknod(&nd, uap->mode, 2338 umajor(uap->dev), uminor(uap->dev)); 2339 } 2340 nlookup_done(&nd); 2341 return (error); 2342 } 2343 2344 /* 2345 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2346 * 2347 * Create a special file. The path is relative to the directory associated 2348 * with fd. 2349 */ 2350 int 2351 sys_mknodat(struct mknodat_args *uap) 2352 { 2353 struct nlookupdata nd; 2354 struct file *fp; 2355 int error; 2356 2357 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2358 if (error == 0) { 2359 error = kern_mknod(&nd, uap->mode, 2360 umajor(uap->dev), uminor(uap->dev)); 2361 } 2362 nlookup_done_at(&nd, fp); 2363 return (error); 2364 } 2365 2366 int 2367 kern_mkfifo(struct nlookupdata *nd, int mode) 2368 { 2369 struct thread *td = curthread; 2370 struct proc *p = td->td_proc; 2371 struct vattr vattr; 2372 struct vnode *vp; 2373 int error; 2374 2375 bwillinode(1); 2376 2377 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2378 if ((error = nlookup(nd)) != 0) 2379 return (error); 2380 if (nd->nl_nch.ncp->nc_vp) 2381 return (EEXIST); 2382 if (nd->nl_dvp == NULL) 2383 return (EINVAL); 2384 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2385 return (error); 2386 2387 VATTR_NULL(&vattr); 2388 vattr.va_type = VFIFO; 2389 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2390 vp = NULL; 2391 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2392 if (error == 0) 2393 vput(vp); 2394 return (error); 2395 } 2396 2397 /* 2398 * mkfifo_args(char *path, int mode) 2399 * 2400 * Create a named pipe. 2401 */ 2402 int 2403 sys_mkfifo(struct mkfifo_args *uap) 2404 { 2405 struct nlookupdata nd; 2406 int error; 2407 2408 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2409 if (error == 0) 2410 error = kern_mkfifo(&nd, uap->mode); 2411 nlookup_done(&nd); 2412 return (error); 2413 } 2414 2415 /* 2416 * mkfifoat_args(int fd, char *path, mode_t mode) 2417 * 2418 * Create a named pipe. The path is relative to the directory associated 2419 * with fd. 2420 */ 2421 int 2422 sys_mkfifoat(struct mkfifoat_args *uap) 2423 { 2424 struct nlookupdata nd; 2425 struct file *fp; 2426 int error; 2427 2428 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2429 if (error == 0) 2430 error = kern_mkfifo(&nd, uap->mode); 2431 nlookup_done_at(&nd, fp); 2432 return (error); 2433 } 2434 2435 static int hardlink_check_uid = 0; 2436 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2437 &hardlink_check_uid, 0, 2438 "Unprivileged processes cannot create hard links to files owned by other " 2439 "users"); 2440 static int hardlink_check_gid = 0; 2441 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2442 &hardlink_check_gid, 0, 2443 "Unprivileged processes cannot create hard links to files owned by other " 2444 "groups"); 2445 2446 static int 2447 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2448 { 2449 struct vattr va; 2450 int error; 2451 2452 /* 2453 * Shortcut if disabled 2454 */ 2455 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2456 return (0); 2457 2458 /* 2459 * Privileged user can always hardlink 2460 */ 2461 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2462 return (0); 2463 2464 /* 2465 * Otherwise only if the originating file is owned by the 2466 * same user or group. Note that any group is allowed if 2467 * the file is owned by the caller. 2468 */ 2469 error = VOP_GETATTR(vp, &va); 2470 if (error != 0) 2471 return (error); 2472 2473 if (hardlink_check_uid) { 2474 if (cred->cr_uid != va.va_uid) 2475 return (EPERM); 2476 } 2477 2478 if (hardlink_check_gid) { 2479 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2480 return (EPERM); 2481 } 2482 2483 return (0); 2484 } 2485 2486 int 2487 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2488 { 2489 struct thread *td = curthread; 2490 struct vnode *vp; 2491 int error; 2492 2493 /* 2494 * Lookup the source and obtained a locked vnode. 2495 * 2496 * You may only hardlink a file which you have write permission 2497 * on or which you own. 2498 * 2499 * XXX relookup on vget failure / race ? 2500 */ 2501 bwillinode(1); 2502 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2503 if ((error = nlookup(nd)) != 0) 2504 return (error); 2505 vp = nd->nl_nch.ncp->nc_vp; 2506 KKASSERT(vp != NULL); 2507 if (vp->v_type == VDIR) 2508 return (EPERM); /* POSIX */ 2509 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2510 return (error); 2511 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2512 return (error); 2513 2514 /* 2515 * Unlock the source so we can lookup the target without deadlocking 2516 * (XXX vp is locked already, possible other deadlock?). The target 2517 * must not exist. 2518 */ 2519 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2520 nd->nl_flags &= ~NLC_NCPISLOCKED; 2521 cache_unlock(&nd->nl_nch); 2522 vn_unlock(vp); 2523 2524 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2525 if ((error = nlookup(linknd)) != 0) { 2526 vrele(vp); 2527 return (error); 2528 } 2529 if (linknd->nl_nch.ncp->nc_vp) { 2530 vrele(vp); 2531 return (EEXIST); 2532 } 2533 if (linknd->nl_dvp == NULL) { 2534 vrele(vp); 2535 return (EINVAL); 2536 } 2537 VFS_MODIFYING(vp->v_mount); 2538 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2539 if (error) { 2540 vrele(vp); 2541 return (error); 2542 } 2543 2544 /* 2545 * Finally run the new API VOP. 2546 */ 2547 error = can_hardlink(vp, td, td->td_ucred); 2548 if (error == 0) { 2549 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2550 vp, linknd->nl_cred); 2551 } 2552 vput(vp); 2553 return (error); 2554 } 2555 2556 /* 2557 * link_args(char *path, char *link) 2558 * 2559 * Make a hard file link. 2560 */ 2561 int 2562 sys_link(struct link_args *uap) 2563 { 2564 struct nlookupdata nd, linknd; 2565 int error; 2566 2567 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2568 if (error == 0) { 2569 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2570 if (error == 0) 2571 error = kern_link(&nd, &linknd); 2572 nlookup_done(&linknd); 2573 } 2574 nlookup_done(&nd); 2575 return (error); 2576 } 2577 2578 /* 2579 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2580 * 2581 * Make a hard file link. The path1 argument is relative to the directory 2582 * associated with fd1, and similarly the path2 argument is relative to 2583 * the directory associated with fd2. 2584 */ 2585 int 2586 sys_linkat(struct linkat_args *uap) 2587 { 2588 struct nlookupdata nd, linknd; 2589 struct file *fp1, *fp2; 2590 int error; 2591 2592 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2593 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2594 if (error == 0) { 2595 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2596 uap->path2, UIO_USERSPACE, 0); 2597 if (error == 0) 2598 error = kern_link(&nd, &linknd); 2599 nlookup_done_at(&linknd, fp2); 2600 } 2601 nlookup_done_at(&nd, fp1); 2602 return (error); 2603 } 2604 2605 int 2606 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2607 { 2608 struct vattr vattr; 2609 struct vnode *vp; 2610 struct vnode *dvp; 2611 int error; 2612 2613 bwillinode(1); 2614 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2615 if ((error = nlookup(nd)) != 0) 2616 return (error); 2617 if (nd->nl_nch.ncp->nc_vp) 2618 return (EEXIST); 2619 if (nd->nl_dvp == NULL) 2620 return (EINVAL); 2621 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2622 return (error); 2623 dvp = nd->nl_dvp; 2624 VATTR_NULL(&vattr); 2625 vattr.va_mode = mode; 2626 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2627 if (error == 0) 2628 vput(vp); 2629 return (error); 2630 } 2631 2632 /* 2633 * symlink(char *path, char *link) 2634 * 2635 * Make a symbolic link. 2636 */ 2637 int 2638 sys_symlink(struct symlink_args *uap) 2639 { 2640 struct thread *td = curthread; 2641 struct nlookupdata nd; 2642 char *path; 2643 int error; 2644 int mode; 2645 2646 path = objcache_get(namei_oc, M_WAITOK); 2647 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2648 if (error == 0) { 2649 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2650 if (error == 0) { 2651 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2652 error = kern_symlink(&nd, path, mode); 2653 } 2654 nlookup_done(&nd); 2655 } 2656 objcache_put(namei_oc, path); 2657 return (error); 2658 } 2659 2660 /* 2661 * symlinkat_args(char *path1, int fd, char *path2) 2662 * 2663 * Make a symbolic link. The path2 argument is relative to the directory 2664 * associated with fd. 2665 */ 2666 int 2667 sys_symlinkat(struct symlinkat_args *uap) 2668 { 2669 struct thread *td = curthread; 2670 struct nlookupdata nd; 2671 struct file *fp; 2672 char *path1; 2673 int error; 2674 int mode; 2675 2676 path1 = objcache_get(namei_oc, M_WAITOK); 2677 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2678 if (error == 0) { 2679 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2680 UIO_USERSPACE, 0); 2681 if (error == 0) { 2682 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2683 error = kern_symlink(&nd, path1, mode); 2684 } 2685 nlookup_done_at(&nd, fp); 2686 } 2687 objcache_put(namei_oc, path1); 2688 return (error); 2689 } 2690 2691 /* 2692 * undelete_args(char *path) 2693 * 2694 * Delete a whiteout from the filesystem. 2695 */ 2696 int 2697 sys_undelete(struct undelete_args *uap) 2698 { 2699 struct nlookupdata nd; 2700 int error; 2701 2702 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2703 bwillinode(1); 2704 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2705 if (error == 0) 2706 error = nlookup(&nd); 2707 if (error == 0 && nd.nl_dvp == NULL) 2708 error = EINVAL; 2709 if (error == 0) 2710 error = ncp_writechk(&nd.nl_nch); 2711 if (error == 0) { 2712 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2713 NAMEI_DELETE); 2714 } 2715 nlookup_done(&nd); 2716 return (error); 2717 } 2718 2719 int 2720 kern_unlink(struct nlookupdata *nd) 2721 { 2722 int error; 2723 2724 bwillinode(1); 2725 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2726 if ((error = nlookup(nd)) != 0) 2727 return (error); 2728 if (nd->nl_dvp == NULL) 2729 return EINVAL; 2730 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2731 return (error); 2732 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2733 return (error); 2734 } 2735 2736 /* 2737 * unlink_args(char *path) 2738 * 2739 * Delete a name from the filesystem. 2740 */ 2741 int 2742 sys_unlink(struct unlink_args *uap) 2743 { 2744 struct nlookupdata nd; 2745 int error; 2746 2747 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2748 if (error == 0) 2749 error = kern_unlink(&nd); 2750 nlookup_done(&nd); 2751 return (error); 2752 } 2753 2754 2755 /* 2756 * unlinkat_args(int fd, char *path, int flags) 2757 * 2758 * Delete the file or directory entry pointed to by fd/path. 2759 */ 2760 int 2761 sys_unlinkat(struct unlinkat_args *uap) 2762 { 2763 struct nlookupdata nd; 2764 struct file *fp; 2765 int error; 2766 2767 if (uap->flags & ~AT_REMOVEDIR) 2768 return (EINVAL); 2769 2770 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2771 if (error == 0) { 2772 if (uap->flags & AT_REMOVEDIR) 2773 error = kern_rmdir(&nd); 2774 else 2775 error = kern_unlink(&nd); 2776 } 2777 nlookup_done_at(&nd, fp); 2778 return (error); 2779 } 2780 2781 int 2782 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2783 { 2784 struct thread *td = curthread; 2785 struct file *fp; 2786 struct vnode *vp; 2787 struct vattr vattr; 2788 off_t new_offset; 2789 int error; 2790 2791 fp = holdfp(td, fd, -1); 2792 if (fp == NULL) 2793 return (EBADF); 2794 if (fp->f_type != DTYPE_VNODE) { 2795 error = ESPIPE; 2796 goto done; 2797 } 2798 vp = (struct vnode *)fp->f_data; 2799 2800 switch (whence) { 2801 case L_INCR: 2802 spin_lock(&fp->f_spin); 2803 new_offset = fp->f_offset + offset; 2804 error = 0; 2805 break; 2806 case L_XTND: 2807 error = VOP_GETATTR_FP(vp, &vattr, fp); 2808 spin_lock(&fp->f_spin); 2809 new_offset = offset + vattr.va_size; 2810 break; 2811 case L_SET: 2812 new_offset = offset; 2813 error = 0; 2814 spin_lock(&fp->f_spin); 2815 break; 2816 default: 2817 new_offset = 0; 2818 error = EINVAL; 2819 spin_lock(&fp->f_spin); 2820 break; 2821 } 2822 2823 /* 2824 * Validate the seek position. Negative offsets are not allowed 2825 * for regular files or directories. 2826 * 2827 * Normally we would also not want to allow negative offsets for 2828 * character and block-special devices. However kvm addresses 2829 * on 64 bit architectures might appear to be negative and must 2830 * be allowed. 2831 */ 2832 if (error == 0) { 2833 if (new_offset < 0 && 2834 (vp->v_type == VREG || vp->v_type == VDIR)) { 2835 error = EINVAL; 2836 } else { 2837 fp->f_offset = new_offset; 2838 } 2839 } 2840 *res = fp->f_offset; 2841 spin_unlock(&fp->f_spin); 2842 done: 2843 dropfp(td, fd, fp); 2844 2845 return (error); 2846 } 2847 2848 /* 2849 * lseek_args(int fd, int pad, off_t offset, int whence) 2850 * 2851 * Reposition read/write file offset. 2852 */ 2853 int 2854 sys_lseek(struct lseek_args *uap) 2855 { 2856 int error; 2857 2858 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2859 &uap->sysmsg_offset); 2860 2861 return (error); 2862 } 2863 2864 /* 2865 * Check if current process can access given file. amode is a bitmask of *_OK 2866 * access bits. flags is a bitmask of AT_* flags. 2867 */ 2868 int 2869 kern_access(struct nlookupdata *nd, int amode, int flags) 2870 { 2871 struct vnode *vp; 2872 int error, mode; 2873 2874 if (flags & ~AT_EACCESS) 2875 return (EINVAL); 2876 nd->nl_flags |= NLC_SHAREDLOCK; 2877 if ((error = nlookup(nd)) != 0) 2878 return (error); 2879 if ((amode & W_OK) && (error = ncp_writechk(&nd->nl_nch)) != 0) 2880 return (error); 2881 retry: 2882 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2883 if (error) 2884 return (error); 2885 2886 /* Flags == 0 means only check for existence. */ 2887 if (amode) { 2888 mode = 0; 2889 if (amode & R_OK) 2890 mode |= VREAD; 2891 if (amode & W_OK) 2892 mode |= VWRITE; 2893 if (amode & X_OK) 2894 mode |= VEXEC; 2895 if ((mode & VWRITE) == 0 || 2896 (error = vn_writechk(vp)) == 0) { 2897 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2898 } 2899 2900 /* 2901 * If the file handle is stale we have to re-resolve the 2902 * entry with the ncp held exclusively. This is a hack 2903 * at the moment. 2904 */ 2905 if (error == ESTALE) { 2906 vput(vp); 2907 cache_unlock(&nd->nl_nch); 2908 cache_lock(&nd->nl_nch); 2909 cache_setunresolved(&nd->nl_nch); 2910 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2911 if (error == 0) { 2912 vp = NULL; 2913 goto retry; 2914 } 2915 return(error); 2916 } 2917 } 2918 vput(vp); 2919 return (error); 2920 } 2921 2922 /* 2923 * access_args(char *path, int flags) 2924 * 2925 * Check access permissions. 2926 */ 2927 int 2928 sys_access(struct access_args *uap) 2929 { 2930 struct nlookupdata nd; 2931 int error; 2932 2933 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2934 if (error == 0) 2935 error = kern_access(&nd, uap->flags, 0); 2936 nlookup_done(&nd); 2937 return (error); 2938 } 2939 2940 2941 /* 2942 * eaccess_args(char *path, int flags) 2943 * 2944 * Check access permissions. 2945 */ 2946 int 2947 sys_eaccess(struct eaccess_args *uap) 2948 { 2949 struct nlookupdata nd; 2950 int error; 2951 2952 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2953 if (error == 0) 2954 error = kern_access(&nd, uap->flags, AT_EACCESS); 2955 nlookup_done(&nd); 2956 return (error); 2957 } 2958 2959 2960 /* 2961 * faccessat_args(int fd, char *path, int amode, int flags) 2962 * 2963 * Check access permissions. 2964 */ 2965 int 2966 sys_faccessat(struct faccessat_args *uap) 2967 { 2968 struct nlookupdata nd; 2969 struct file *fp; 2970 int error; 2971 2972 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2973 NLC_FOLLOW); 2974 if (error == 0) 2975 error = kern_access(&nd, uap->amode, uap->flags); 2976 nlookup_done_at(&nd, fp); 2977 return (error); 2978 } 2979 2980 int 2981 kern_stat(struct nlookupdata *nd, struct stat *st) 2982 { 2983 int error; 2984 struct vnode *vp; 2985 2986 nd->nl_flags |= NLC_SHAREDLOCK; 2987 if ((error = nlookup(nd)) != 0) 2988 return (error); 2989 again: 2990 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2991 return (ENOENT); 2992 2993 #if 1 2994 error = cache_vref(&nd->nl_nch, NULL, &vp); 2995 #else 2996 error = vget(vp, LK_SHARED); 2997 #endif 2998 if (error) 2999 return (error); 3000 error = vn_stat(vp, st, nd->nl_cred); 3001 3002 /* 3003 * If the file handle is stale we have to re-resolve the 3004 * entry with the ncp held exclusively. This is a hack 3005 * at the moment. 3006 */ 3007 if (error == ESTALE) { 3008 #if 1 3009 vrele(vp); 3010 #else 3011 vput(vp); 3012 #endif 3013 cache_unlock(&nd->nl_nch); 3014 cache_lock(&nd->nl_nch); 3015 cache_setunresolved(&nd->nl_nch); 3016 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 3017 if (error == 0) 3018 goto again; 3019 } else { 3020 #if 1 3021 vrele(vp); 3022 #else 3023 vput(vp); 3024 #endif 3025 } 3026 return (error); 3027 } 3028 3029 /* 3030 * stat_args(char *path, struct stat *ub) 3031 * 3032 * Get file status; this version follows links. 3033 */ 3034 int 3035 sys_stat(struct stat_args *uap) 3036 { 3037 struct nlookupdata nd; 3038 struct stat st; 3039 int error; 3040 3041 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3042 if (error == 0) { 3043 error = kern_stat(&nd, &st); 3044 if (error == 0) 3045 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3046 } 3047 nlookup_done(&nd); 3048 return (error); 3049 } 3050 3051 /* 3052 * lstat_args(char *path, struct stat *ub) 3053 * 3054 * Get file status; this version does not follow links. 3055 */ 3056 int 3057 sys_lstat(struct lstat_args *uap) 3058 { 3059 struct nlookupdata nd; 3060 struct stat st; 3061 int error; 3062 3063 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3064 if (error == 0) { 3065 error = kern_stat(&nd, &st); 3066 if (error == 0) 3067 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 3068 } 3069 nlookup_done(&nd); 3070 return (error); 3071 } 3072 3073 /* 3074 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 3075 * 3076 * Get status of file pointed to by fd/path. 3077 */ 3078 int 3079 sys_fstatat(struct fstatat_args *uap) 3080 { 3081 struct nlookupdata nd; 3082 struct stat st; 3083 int error; 3084 int flags; 3085 struct file *fp; 3086 3087 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3088 return (EINVAL); 3089 3090 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3091 3092 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3093 UIO_USERSPACE, flags); 3094 if (error == 0) { 3095 error = kern_stat(&nd, &st); 3096 if (error == 0) 3097 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 3098 } 3099 nlookup_done_at(&nd, fp); 3100 return (error); 3101 } 3102 3103 static int 3104 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 3105 { 3106 struct nlookupdata nd; 3107 struct vnode *vp; 3108 int error; 3109 3110 vp = NULL; 3111 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 3112 if (error == 0) 3113 error = nlookup(&nd); 3114 if (error == 0) 3115 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 3116 nlookup_done(&nd); 3117 if (error == 0) { 3118 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3119 vput(vp); 3120 } 3121 return (error); 3122 } 3123 3124 /* 3125 * pathconf_Args(char *path, int name) 3126 * 3127 * Get configurable pathname variables. 3128 */ 3129 int 3130 sys_pathconf(struct pathconf_args *uap) 3131 { 3132 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3133 &uap->sysmsg_reg)); 3134 } 3135 3136 /* 3137 * lpathconf_Args(char *path, int name) 3138 * 3139 * Get configurable pathname variables, but don't follow symlinks. 3140 */ 3141 int 3142 sys_lpathconf(struct lpathconf_args *uap) 3143 { 3144 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3145 } 3146 3147 /* 3148 * XXX: daver 3149 * kern_readlink isn't properly split yet. There is a copyin burried 3150 * in VOP_READLINK(). 3151 */ 3152 int 3153 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3154 { 3155 struct thread *td = curthread; 3156 struct vnode *vp; 3157 struct iovec aiov; 3158 struct uio auio; 3159 int error; 3160 3161 nd->nl_flags |= NLC_SHAREDLOCK; 3162 if ((error = nlookup(nd)) != 0) 3163 return (error); 3164 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3165 if (error) 3166 return (error); 3167 if (vp->v_type != VLNK) { 3168 error = EINVAL; 3169 } else { 3170 aiov.iov_base = buf; 3171 aiov.iov_len = count; 3172 auio.uio_iov = &aiov; 3173 auio.uio_iovcnt = 1; 3174 auio.uio_offset = 0; 3175 auio.uio_rw = UIO_READ; 3176 auio.uio_segflg = UIO_USERSPACE; 3177 auio.uio_td = td; 3178 auio.uio_resid = count; 3179 error = VOP_READLINK(vp, &auio, td->td_ucred); 3180 } 3181 vput(vp); 3182 *res = count - auio.uio_resid; 3183 return (error); 3184 } 3185 3186 /* 3187 * readlink_args(char *path, char *buf, int count) 3188 * 3189 * Return target name of a symbolic link. 3190 */ 3191 int 3192 sys_readlink(struct readlink_args *uap) 3193 { 3194 struct nlookupdata nd; 3195 int error; 3196 3197 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3198 if (error == 0) { 3199 error = kern_readlink(&nd, uap->buf, uap->count, 3200 &uap->sysmsg_result); 3201 } 3202 nlookup_done(&nd); 3203 return (error); 3204 } 3205 3206 /* 3207 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3208 * 3209 * Return target name of a symbolic link. The path is relative to the 3210 * directory associated with fd. 3211 */ 3212 int 3213 sys_readlinkat(struct readlinkat_args *uap) 3214 { 3215 struct nlookupdata nd; 3216 struct file *fp; 3217 int error; 3218 3219 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3220 if (error == 0) { 3221 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3222 &uap->sysmsg_result); 3223 } 3224 nlookup_done_at(&nd, fp); 3225 return (error); 3226 } 3227 3228 static int 3229 setfflags(struct vnode *vp, u_long flags) 3230 { 3231 struct thread *td = curthread; 3232 int error; 3233 struct vattr vattr; 3234 3235 /* 3236 * Prevent non-root users from setting flags on devices. When 3237 * a device is reused, users can retain ownership of the device 3238 * if they are allowed to set flags and programs assume that 3239 * chown can't fail when done as root. 3240 */ 3241 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3242 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3243 return (error); 3244 3245 /* 3246 * note: vget is required for any operation that might mod the vnode 3247 * so VINACTIVE is properly cleared. 3248 */ 3249 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3250 VATTR_NULL(&vattr); 3251 vattr.va_flags = flags; 3252 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3253 vput(vp); 3254 } 3255 return (error); 3256 } 3257 3258 /* 3259 * chflags(const char *path, u_long flags) 3260 * 3261 * Change flags of a file given a path name. 3262 */ 3263 int 3264 sys_chflags(struct chflags_args *uap) 3265 { 3266 struct nlookupdata nd; 3267 struct vnode *vp; 3268 int error; 3269 3270 vp = NULL; 3271 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3272 if (error == 0) 3273 error = nlookup(&nd); 3274 if (error == 0) 3275 error = ncp_writechk(&nd.nl_nch); 3276 if (error == 0) 3277 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3278 nlookup_done(&nd); 3279 if (error == 0) { 3280 error = setfflags(vp, uap->flags); 3281 vrele(vp); 3282 } 3283 return (error); 3284 } 3285 3286 /* 3287 * lchflags(const char *path, u_long flags) 3288 * 3289 * Change flags of a file given a path name, but don't follow symlinks. 3290 */ 3291 int 3292 sys_lchflags(struct lchflags_args *uap) 3293 { 3294 struct nlookupdata nd; 3295 struct vnode *vp; 3296 int error; 3297 3298 vp = NULL; 3299 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3300 if (error == 0) 3301 error = nlookup(&nd); 3302 if (error == 0) 3303 error = ncp_writechk(&nd.nl_nch); 3304 if (error == 0) 3305 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3306 nlookup_done(&nd); 3307 if (error == 0) { 3308 error = setfflags(vp, uap->flags); 3309 vrele(vp); 3310 } 3311 return (error); 3312 } 3313 3314 /* 3315 * fchflags_args(int fd, u_flags flags) 3316 * 3317 * Change flags of a file given a file descriptor. 3318 */ 3319 int 3320 sys_fchflags(struct fchflags_args *uap) 3321 { 3322 struct thread *td = curthread; 3323 struct file *fp; 3324 int error; 3325 3326 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3327 return (error); 3328 if (fp->f_nchandle.ncp) 3329 error = ncp_writechk(&fp->f_nchandle); 3330 if (error == 0) 3331 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3332 fdrop(fp); 3333 return (error); 3334 } 3335 3336 /* 3337 * chflagsat_args(int fd, const char *path, u_long flags, int atflags) 3338 * change flags given a pathname relative to a filedescriptor 3339 */ 3340 int sys_chflagsat(struct chflagsat_args *uap) 3341 { 3342 struct nlookupdata nd; 3343 struct vnode *vp; 3344 struct file *fp; 3345 int error; 3346 int lookupflags; 3347 3348 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3349 return (EINVAL); 3350 3351 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3352 3353 vp = NULL; 3354 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3355 if (error == 0) 3356 error = nlookup(&nd); 3357 if (error == 0) 3358 error = ncp_writechk(&nd.nl_nch); 3359 if (error == 0) 3360 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3361 nlookup_done_at(&nd, fp); 3362 if (error == 0) { 3363 error = setfflags(vp, uap->flags); 3364 vrele(vp); 3365 } 3366 return (error); 3367 } 3368 3369 3370 static int 3371 setfmode(struct vnode *vp, int mode) 3372 { 3373 struct thread *td = curthread; 3374 int error; 3375 struct vattr vattr; 3376 3377 /* 3378 * note: vget is required for any operation that might mod the vnode 3379 * so VINACTIVE is properly cleared. 3380 */ 3381 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3382 VATTR_NULL(&vattr); 3383 vattr.va_mode = mode & ALLPERMS; 3384 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3385 cache_inval_wxok(vp); 3386 vput(vp); 3387 } 3388 return error; 3389 } 3390 3391 int 3392 kern_chmod(struct nlookupdata *nd, int mode) 3393 { 3394 struct vnode *vp; 3395 int error; 3396 3397 if ((error = nlookup(nd)) != 0) 3398 return (error); 3399 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3400 return (error); 3401 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3402 error = setfmode(vp, mode); 3403 vrele(vp); 3404 return (error); 3405 } 3406 3407 /* 3408 * chmod_args(char *path, int mode) 3409 * 3410 * Change mode of a file given path name. 3411 */ 3412 int 3413 sys_chmod(struct chmod_args *uap) 3414 { 3415 struct nlookupdata nd; 3416 int error; 3417 3418 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3419 if (error == 0) 3420 error = kern_chmod(&nd, uap->mode); 3421 nlookup_done(&nd); 3422 return (error); 3423 } 3424 3425 /* 3426 * lchmod_args(char *path, int mode) 3427 * 3428 * Change mode of a file given path name (don't follow links.) 3429 */ 3430 int 3431 sys_lchmod(struct lchmod_args *uap) 3432 { 3433 struct nlookupdata nd; 3434 int error; 3435 3436 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3437 if (error == 0) 3438 error = kern_chmod(&nd, uap->mode); 3439 nlookup_done(&nd); 3440 return (error); 3441 } 3442 3443 /* 3444 * fchmod_args(int fd, int mode) 3445 * 3446 * Change mode of a file given a file descriptor. 3447 */ 3448 int 3449 sys_fchmod(struct fchmod_args *uap) 3450 { 3451 struct thread *td = curthread; 3452 struct file *fp; 3453 int error; 3454 3455 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3456 return (error); 3457 if (fp->f_nchandle.ncp) 3458 error = ncp_writechk(&fp->f_nchandle); 3459 if (error == 0) 3460 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3461 fdrop(fp); 3462 return (error); 3463 } 3464 3465 /* 3466 * fchmodat_args(char *path, int mode) 3467 * 3468 * Change mode of a file pointed to by fd/path. 3469 */ 3470 int 3471 sys_fchmodat(struct fchmodat_args *uap) 3472 { 3473 struct nlookupdata nd; 3474 struct file *fp; 3475 int error; 3476 int flags; 3477 3478 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3479 return (EINVAL); 3480 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3481 3482 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3483 UIO_USERSPACE, flags); 3484 if (error == 0) 3485 error = kern_chmod(&nd, uap->mode); 3486 nlookup_done_at(&nd, fp); 3487 return (error); 3488 } 3489 3490 static int 3491 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3492 { 3493 struct thread *td = curthread; 3494 int error; 3495 struct vattr vattr; 3496 uid_t o_uid; 3497 gid_t o_gid; 3498 uint64_t size; 3499 3500 /* 3501 * note: vget is required for any operation that might mod the vnode 3502 * so VINACTIVE is properly cleared. 3503 */ 3504 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3505 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3506 return error; 3507 o_uid = vattr.va_uid; 3508 o_gid = vattr.va_gid; 3509 size = vattr.va_size; 3510 3511 VATTR_NULL(&vattr); 3512 vattr.va_uid = uid; 3513 vattr.va_gid = gid; 3514 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3515 vput(vp); 3516 } 3517 3518 if (error == 0) { 3519 if (uid == -1) 3520 uid = o_uid; 3521 if (gid == -1) 3522 gid = o_gid; 3523 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3524 VFS_ACCOUNT(mp, uid, gid, size); 3525 } 3526 3527 return error; 3528 } 3529 3530 int 3531 kern_chown(struct nlookupdata *nd, int uid, int gid) 3532 { 3533 struct vnode *vp; 3534 int error; 3535 3536 if ((error = nlookup(nd)) != 0) 3537 return (error); 3538 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3539 return (error); 3540 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3541 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3542 vrele(vp); 3543 return (error); 3544 } 3545 3546 /* 3547 * chown(char *path, int uid, int gid) 3548 * 3549 * Set ownership given a path name. 3550 */ 3551 int 3552 sys_chown(struct chown_args *uap) 3553 { 3554 struct nlookupdata nd; 3555 int error; 3556 3557 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3558 if (error == 0) 3559 error = kern_chown(&nd, uap->uid, uap->gid); 3560 nlookup_done(&nd); 3561 return (error); 3562 } 3563 3564 /* 3565 * lchown_args(char *path, int uid, int gid) 3566 * 3567 * Set ownership given a path name, do not cross symlinks. 3568 */ 3569 int 3570 sys_lchown(struct lchown_args *uap) 3571 { 3572 struct nlookupdata nd; 3573 int error; 3574 3575 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3576 if (error == 0) 3577 error = kern_chown(&nd, uap->uid, uap->gid); 3578 nlookup_done(&nd); 3579 return (error); 3580 } 3581 3582 /* 3583 * fchown_args(int fd, int uid, int gid) 3584 * 3585 * Set ownership given a file descriptor. 3586 */ 3587 int 3588 sys_fchown(struct fchown_args *uap) 3589 { 3590 struct thread *td = curthread; 3591 struct proc *p = td->td_proc; 3592 struct file *fp; 3593 int error; 3594 3595 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3596 return (error); 3597 if (fp->f_nchandle.ncp) 3598 error = ncp_writechk(&fp->f_nchandle); 3599 if (error == 0) 3600 error = setfown(p->p_fd->fd_ncdir.mount, 3601 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3602 fdrop(fp); 3603 return (error); 3604 } 3605 3606 /* 3607 * fchownat(int fd, char *path, int uid, int gid, int flags) 3608 * 3609 * Set ownership of file pointed to by fd/path. 3610 */ 3611 int 3612 sys_fchownat(struct fchownat_args *uap) 3613 { 3614 struct nlookupdata nd; 3615 struct file *fp; 3616 int error; 3617 int flags; 3618 3619 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3620 return (EINVAL); 3621 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3622 3623 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3624 UIO_USERSPACE, flags); 3625 if (error == 0) 3626 error = kern_chown(&nd, uap->uid, uap->gid); 3627 nlookup_done_at(&nd, fp); 3628 return (error); 3629 } 3630 3631 3632 static int 3633 getutimes(struct timeval *tvp, struct timespec *tsp) 3634 { 3635 struct timeval tv[2]; 3636 int error; 3637 3638 if (tvp == NULL) { 3639 microtime(&tv[0]); 3640 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3641 tsp[1] = tsp[0]; 3642 } else { 3643 if ((error = itimerfix(tvp)) != 0) 3644 return (error); 3645 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3646 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3647 } 3648 return 0; 3649 } 3650 3651 static int 3652 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3653 { 3654 struct timespec tsnow; 3655 int error; 3656 3657 *nullflag = 0; 3658 nanotime(&tsnow); 3659 if (ts == NULL) { 3660 newts[0] = tsnow; 3661 newts[1] = tsnow; 3662 *nullflag = 1; 3663 return (0); 3664 } 3665 3666 newts[0] = ts[0]; 3667 newts[1] = ts[1]; 3668 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3669 return (0); 3670 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3671 *nullflag = 1; 3672 3673 if (newts[0].tv_nsec == UTIME_OMIT) 3674 newts[0].tv_sec = VNOVAL; 3675 else if (newts[0].tv_nsec == UTIME_NOW) 3676 newts[0] = tsnow; 3677 else if ((error = itimespecfix(&newts[0])) != 0) 3678 return (error); 3679 3680 if (newts[1].tv_nsec == UTIME_OMIT) 3681 newts[1].tv_sec = VNOVAL; 3682 else if (newts[1].tv_nsec == UTIME_NOW) 3683 newts[1] = tsnow; 3684 else if ((error = itimespecfix(&newts[1])) != 0) 3685 return (error); 3686 3687 return (0); 3688 } 3689 3690 static int 3691 setutimes(struct vnode *vp, struct vattr *vattr, 3692 const struct timespec *ts, int nullflag) 3693 { 3694 struct thread *td = curthread; 3695 int error; 3696 3697 VATTR_NULL(vattr); 3698 vattr->va_atime = ts[0]; 3699 vattr->va_mtime = ts[1]; 3700 if (nullflag) 3701 vattr->va_vaflags |= VA_UTIMES_NULL; 3702 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3703 3704 return error; 3705 } 3706 3707 int 3708 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3709 { 3710 struct timespec ts[2]; 3711 int error; 3712 3713 if (tptr) { 3714 if ((error = getutimes(tptr, ts)) != 0) 3715 return (error); 3716 } 3717 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3718 return (error); 3719 } 3720 3721 /* 3722 * utimes_args(char *path, struct timeval *tptr) 3723 * 3724 * Set the access and modification times of a file. 3725 */ 3726 int 3727 sys_utimes(struct utimes_args *uap) 3728 { 3729 struct timeval tv[2]; 3730 struct nlookupdata nd; 3731 int error; 3732 3733 if (uap->tptr) { 3734 error = copyin(uap->tptr, tv, sizeof(tv)); 3735 if (error) 3736 return (error); 3737 } 3738 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3739 if (error == 0) 3740 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3741 nlookup_done(&nd); 3742 return (error); 3743 } 3744 3745 /* 3746 * lutimes_args(char *path, struct timeval *tptr) 3747 * 3748 * Set the access and modification times of a file. 3749 */ 3750 int 3751 sys_lutimes(struct lutimes_args *uap) 3752 { 3753 struct timeval tv[2]; 3754 struct nlookupdata nd; 3755 int error; 3756 3757 if (uap->tptr) { 3758 error = copyin(uap->tptr, tv, sizeof(tv)); 3759 if (error) 3760 return (error); 3761 } 3762 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3763 if (error == 0) 3764 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3765 nlookup_done(&nd); 3766 return (error); 3767 } 3768 3769 /* 3770 * Set utimes on a file descriptor. The creds used to open the 3771 * file are used to determine whether the operation is allowed 3772 * or not. 3773 */ 3774 int 3775 kern_futimens(int fd, struct timespec *ts) 3776 { 3777 struct thread *td = curthread; 3778 struct timespec newts[2]; 3779 struct file *fp; 3780 struct vnode *vp; 3781 struct vattr vattr; 3782 int nullflag; 3783 int error; 3784 3785 error = getutimens(ts, newts, &nullflag); 3786 if (error) 3787 return (error); 3788 if ((error = holdvnode(td, fd, &fp)) != 0) 3789 return (error); 3790 if (fp->f_nchandle.ncp) 3791 error = ncp_writechk(&fp->f_nchandle); 3792 if (error == 0) { 3793 vp = fp->f_data; 3794 error = vget(vp, LK_EXCLUSIVE); 3795 if (error == 0) { 3796 error = VOP_GETATTR_FP(vp, &vattr, fp); 3797 if (error == 0) { 3798 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3799 fp->f_cred); 3800 } 3801 if (error == 0) { 3802 error = setutimes(vp, &vattr, newts, nullflag); 3803 } 3804 vput(vp); 3805 } 3806 } 3807 fdrop(fp); 3808 return (error); 3809 } 3810 3811 /* 3812 * futimens_args(int fd, struct timespec *ts) 3813 * 3814 * Set the access and modification times of a file. 3815 */ 3816 int 3817 sys_futimens(struct futimens_args *uap) 3818 { 3819 struct timespec ts[2]; 3820 int error; 3821 3822 if (uap->ts) { 3823 error = copyin(uap->ts, ts, sizeof(ts)); 3824 if (error) 3825 return (error); 3826 } 3827 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3828 return (error); 3829 } 3830 3831 int 3832 kern_futimes(int fd, struct timeval *tptr) 3833 { 3834 struct timespec ts[2]; 3835 int error; 3836 3837 if (tptr) { 3838 if ((error = getutimes(tptr, ts)) != 0) 3839 return (error); 3840 } 3841 error = kern_futimens(fd, tptr ? ts : NULL); 3842 return (error); 3843 } 3844 3845 /* 3846 * futimes_args(int fd, struct timeval *tptr) 3847 * 3848 * Set the access and modification times of a file. 3849 */ 3850 int 3851 sys_futimes(struct futimes_args *uap) 3852 { 3853 struct timeval tv[2]; 3854 int error; 3855 3856 if (uap->tptr) { 3857 error = copyin(uap->tptr, tv, sizeof(tv)); 3858 if (error) 3859 return (error); 3860 } 3861 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3862 return (error); 3863 } 3864 3865 int 3866 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3867 { 3868 struct timespec newts[2]; 3869 struct vnode *vp; 3870 struct vattr vattr; 3871 int nullflag; 3872 int error; 3873 3874 if (flags & ~AT_SYMLINK_NOFOLLOW) 3875 return (EINVAL); 3876 3877 error = getutimens(ts, newts, &nullflag); 3878 if (error) 3879 return (error); 3880 3881 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3882 if ((error = nlookup(nd)) != 0) 3883 return (error); 3884 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3885 return (error); 3886 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3887 return (error); 3888 if ((error = vn_writechk(vp)) == 0) { 3889 error = vget(vp, LK_EXCLUSIVE); 3890 if (error == 0) { 3891 error = setutimes(vp, &vattr, newts, nullflag); 3892 vput(vp); 3893 } 3894 } 3895 vrele(vp); 3896 return (error); 3897 } 3898 3899 /* 3900 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3901 * 3902 * Set file access and modification times of a file. 3903 */ 3904 int 3905 sys_utimensat(struct utimensat_args *uap) 3906 { 3907 struct timespec ts[2]; 3908 struct nlookupdata nd; 3909 struct file *fp; 3910 int error; 3911 int flags; 3912 3913 if (uap->ts) { 3914 error = copyin(uap->ts, ts, sizeof(ts)); 3915 if (error) 3916 return (error); 3917 } 3918 3919 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3920 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3921 UIO_USERSPACE, flags); 3922 if (error == 0) 3923 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3924 nlookup_done_at(&nd, fp); 3925 return (error); 3926 } 3927 3928 int 3929 kern_truncate(struct nlookupdata *nd, off_t length) 3930 { 3931 struct vnode *vp; 3932 struct vattr vattr; 3933 int error; 3934 uid_t uid = 0; 3935 gid_t gid = 0; 3936 uint64_t old_size = 0; 3937 3938 if (length < 0) 3939 return(EINVAL); 3940 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3941 if ((error = nlookup(nd)) != 0) 3942 return (error); 3943 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3944 return (error); 3945 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3946 return (error); 3947 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3948 if (error) { 3949 vrele(vp); 3950 return (error); 3951 } 3952 if (vp->v_type == VDIR) { 3953 error = EISDIR; 3954 goto done; 3955 } 3956 if (vfs_quota_enabled) { 3957 error = VOP_GETATTR(vp, &vattr); 3958 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3959 uid = vattr.va_uid; 3960 gid = vattr.va_gid; 3961 old_size = vattr.va_size; 3962 } 3963 3964 if ((error = vn_writechk(vp)) == 0) { 3965 VATTR_NULL(&vattr); 3966 vattr.va_size = length; 3967 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3968 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3969 } 3970 done: 3971 vput(vp); 3972 return (error); 3973 } 3974 3975 /* 3976 * truncate(char *path, int pad, off_t length) 3977 * 3978 * Truncate a file given its path name. 3979 */ 3980 int 3981 sys_truncate(struct truncate_args *uap) 3982 { 3983 struct nlookupdata nd; 3984 int error; 3985 3986 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3987 if (error == 0) 3988 error = kern_truncate(&nd, uap->length); 3989 nlookup_done(&nd); 3990 return error; 3991 } 3992 3993 int 3994 kern_ftruncate(int fd, off_t length) 3995 { 3996 struct thread *td = curthread; 3997 struct vattr vattr; 3998 struct vnode *vp; 3999 struct file *fp; 4000 int error; 4001 uid_t uid = 0; 4002 gid_t gid = 0; 4003 uint64_t old_size = 0; 4004 struct mount *mp; 4005 4006 if (length < 0) 4007 return(EINVAL); 4008 if ((error = holdvnode(td, fd, &fp)) != 0) 4009 return (error); 4010 if (fp->f_nchandle.ncp) { 4011 error = ncp_writechk(&fp->f_nchandle); 4012 if (error) 4013 goto done; 4014 } 4015 if ((fp->f_flag & FWRITE) == 0) { 4016 error = EINVAL; 4017 goto done; 4018 } 4019 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 4020 error = EINVAL; 4021 goto done; 4022 } 4023 vp = (struct vnode *)fp->f_data; 4024 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4025 if (vp->v_type == VDIR) { 4026 error = EISDIR; 4027 vn_unlock(vp); 4028 goto done; 4029 } 4030 4031 if (vfs_quota_enabled) { 4032 error = VOP_GETATTR_FP(vp, &vattr, fp); 4033 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 4034 uid = vattr.va_uid; 4035 gid = vattr.va_gid; 4036 old_size = vattr.va_size; 4037 } 4038 4039 if ((error = vn_writechk(vp)) == 0) { 4040 VATTR_NULL(&vattr); 4041 vattr.va_size = length; 4042 error = VOP_SETATTR_FP(vp, &vattr, fp->f_cred, fp); 4043 mp = vq_vptomp(vp); 4044 VFS_ACCOUNT(mp, uid, gid, length - old_size); 4045 } 4046 vn_unlock(vp); 4047 done: 4048 fdrop(fp); 4049 return (error); 4050 } 4051 4052 /* 4053 * ftruncate_args(int fd, int pad, off_t length) 4054 * 4055 * Truncate a file given a file descriptor. 4056 */ 4057 int 4058 sys_ftruncate(struct ftruncate_args *uap) 4059 { 4060 int error; 4061 4062 error = kern_ftruncate(uap->fd, uap->length); 4063 4064 return (error); 4065 } 4066 4067 /* 4068 * fsync(int fd) 4069 * 4070 * Sync an open file. 4071 */ 4072 int 4073 sys_fsync(struct fsync_args *uap) 4074 { 4075 struct thread *td = curthread; 4076 struct vnode *vp; 4077 struct file *fp; 4078 vm_object_t obj; 4079 int error; 4080 4081 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 4082 return (error); 4083 vp = (struct vnode *)fp->f_data; 4084 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4085 if ((obj = vp->v_object) != NULL) { 4086 if (vp->v_mount == NULL || 4087 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 4088 vm_object_page_clean(obj, 0, 0, 0); 4089 } 4090 } 4091 error = VOP_FSYNC_FP(vp, MNT_WAIT, VOP_FSYNC_SYSCALL, fp); 4092 if (error == 0 && vp->v_mount) 4093 error = buf_fsync(vp); 4094 vn_unlock(vp); 4095 fdrop(fp); 4096 4097 return (error); 4098 } 4099 4100 int 4101 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 4102 { 4103 struct nchandle fnchd; 4104 struct nchandle tnchd; 4105 struct namecache *ncp; 4106 struct vnode *fdvp; 4107 struct vnode *tdvp; 4108 struct mount *mp; 4109 int error; 4110 u_int fncp_gen; 4111 u_int tncp_gen; 4112 4113 bwillinode(1); 4114 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 4115 if ((error = nlookup(fromnd)) != 0) 4116 return (error); 4117 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4118 return (ENOENT); 4119 fnchd.mount = fromnd->nl_nch.mount; 4120 cache_hold(&fnchd); 4121 4122 /* 4123 * unlock the source nch so we can lookup the target nch without 4124 * deadlocking. The target may or may not exist so we do not check 4125 * for a target vp like kern_mkdir() and other creation functions do. 4126 * 4127 * The source and target directories are ref'd and rechecked after 4128 * everything is relocked to determine if the source or target file 4129 * has been renamed. 4130 */ 4131 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4132 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4133 4134 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4135 4136 cache_unlock(&fromnd->nl_nch); 4137 4138 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4139 if ((error = nlookup(tond)) != 0) { 4140 cache_drop(&fnchd); 4141 return (error); 4142 } 4143 tncp_gen = tond->nl_nch.ncp->nc_generation; 4144 4145 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4146 cache_drop(&fnchd); 4147 return (ENOENT); 4148 } 4149 tnchd.mount = tond->nl_nch.mount; 4150 cache_hold(&tnchd); 4151 4152 /* 4153 * If the source and target are the same there is nothing to do 4154 */ 4155 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4156 cache_drop(&fnchd); 4157 cache_drop(&tnchd); 4158 return (0); 4159 } 4160 4161 /* 4162 * Mount points cannot be renamed or overwritten 4163 */ 4164 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4165 NCF_ISMOUNTPT 4166 ) { 4167 cache_drop(&fnchd); 4168 cache_drop(&tnchd); 4169 return (EINVAL); 4170 } 4171 4172 /* 4173 * Relock the source ncp. cache_relock() will deal with any 4174 * deadlocks against the already-locked tond and will also 4175 * make sure both are resolved. 4176 * 4177 * NOTE AFTER RELOCKING: The source or target ncp may have become 4178 * invalid while they were unlocked, nc_vp and nc_mount could 4179 * be NULL. 4180 */ 4181 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4182 &tond->nl_nch, tond->nl_cred); 4183 fromnd->nl_flags |= NLC_NCPISLOCKED; 4184 4185 /* 4186 * If the namecache generation changed for either fromnd or tond, 4187 * we must retry. 4188 */ 4189 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4190 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4191 kprintf("kern_rename: retry due to gen on: " 4192 "\"%s\" -> \"%s\"\n", 4193 fromnd->nl_nch.ncp->nc_name, 4194 tond->nl_nch.ncp->nc_name); 4195 cache_drop(&fnchd); 4196 cache_drop(&tnchd); 4197 return (EAGAIN); 4198 } 4199 4200 /* 4201 * If either fromnd or tond are marked destroyed a ripout occured 4202 * out from under us and we must retry. 4203 */ 4204 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4205 fromnd->nl_nch.ncp->nc_vp == NULL || 4206 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4207 kprintf("kern_rename: retry due to ripout on: " 4208 "\"%s\" -> \"%s\"\n", 4209 fromnd->nl_nch.ncp->nc_name, 4210 tond->nl_nch.ncp->nc_name); 4211 cache_drop(&fnchd); 4212 cache_drop(&tnchd); 4213 return (EAGAIN); 4214 } 4215 4216 /* 4217 * Make sure the parent directories linkages are the same. 4218 * XXX shouldn't be needed any more w/ generation check above. 4219 */ 4220 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4221 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4222 cache_drop(&fnchd); 4223 cache_drop(&tnchd); 4224 return (ENOENT); 4225 } 4226 4227 /* 4228 * Both the source and target must be within the same filesystem and 4229 * in the same filesystem as their parent directories within the 4230 * namecache topology. 4231 * 4232 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4233 */ 4234 mp = fnchd.mount; 4235 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4236 mp != tond->nl_nch.mount) { 4237 cache_drop(&fnchd); 4238 cache_drop(&tnchd); 4239 return (EXDEV); 4240 } 4241 4242 /* 4243 * Make sure the mount point is writable 4244 */ 4245 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4246 cache_drop(&fnchd); 4247 cache_drop(&tnchd); 4248 return (error); 4249 } 4250 4251 /* 4252 * If the target exists and either the source or target is a directory, 4253 * then both must be directories. 4254 * 4255 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4256 * have become NULL. 4257 */ 4258 if (tond->nl_nch.ncp->nc_vp) { 4259 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4260 error = ENOENT; 4261 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4262 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4263 error = ENOTDIR; 4264 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4265 error = EISDIR; 4266 } 4267 } 4268 4269 /* 4270 * You cannot rename a source into itself or a subdirectory of itself. 4271 * We check this by travsersing the target directory upwards looking 4272 * for a match against the source. 4273 * 4274 * XXX MPSAFE 4275 */ 4276 if (error == 0) { 4277 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4278 if (fromnd->nl_nch.ncp == ncp) { 4279 error = EINVAL; 4280 break; 4281 } 4282 } 4283 } 4284 4285 cache_drop(&fnchd); 4286 cache_drop(&tnchd); 4287 4288 /* 4289 * Even though the namespaces are different, they may still represent 4290 * hardlinks to the same file. The filesystem might have a hard time 4291 * with this so we issue a NREMOVE of the source instead of a NRENAME 4292 * when we detect the situation. 4293 */ 4294 if (error == 0) { 4295 fdvp = fromnd->nl_dvp; 4296 tdvp = tond->nl_dvp; 4297 if (fdvp == NULL || tdvp == NULL) { 4298 error = EPERM; 4299 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4300 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4301 fromnd->nl_cred); 4302 } else { 4303 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4304 fdvp, tdvp, tond->nl_cred); 4305 } 4306 } 4307 return (error); 4308 } 4309 4310 /* 4311 * rename_args(char *from, char *to) 4312 * 4313 * Rename files. Source and destination must either both be directories, 4314 * or both not be directories. If target is a directory, it must be empty. 4315 */ 4316 int 4317 sys_rename(struct rename_args *uap) 4318 { 4319 struct nlookupdata fromnd, tond; 4320 int error; 4321 4322 do { 4323 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4324 if (error == 0) { 4325 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4326 if (error == 0) 4327 error = kern_rename(&fromnd, &tond); 4328 nlookup_done(&tond); 4329 } 4330 nlookup_done(&fromnd); 4331 } while (error == EAGAIN); 4332 return (error); 4333 } 4334 4335 /* 4336 * renameat_args(int oldfd, char *old, int newfd, char *new) 4337 * 4338 * Rename files using paths relative to the directories associated with 4339 * oldfd and newfd. Source and destination must either both be directories, 4340 * or both not be directories. If target is a directory, it must be empty. 4341 */ 4342 int 4343 sys_renameat(struct renameat_args *uap) 4344 { 4345 struct nlookupdata oldnd, newnd; 4346 struct file *oldfp, *newfp; 4347 int error; 4348 4349 do { 4350 error = nlookup_init_at(&oldnd, &oldfp, 4351 uap->oldfd, uap->old, 4352 UIO_USERSPACE, 0); 4353 if (error == 0) { 4354 error = nlookup_init_at(&newnd, &newfp, 4355 uap->newfd, uap->new, 4356 UIO_USERSPACE, 0); 4357 if (error == 0) 4358 error = kern_rename(&oldnd, &newnd); 4359 nlookup_done_at(&newnd, newfp); 4360 } 4361 nlookup_done_at(&oldnd, oldfp); 4362 } while (error == EAGAIN); 4363 return (error); 4364 } 4365 4366 int 4367 kern_mkdir(struct nlookupdata *nd, int mode) 4368 { 4369 struct thread *td = curthread; 4370 struct proc *p = td->td_proc; 4371 struct vnode *vp; 4372 struct vattr vattr; 4373 int error; 4374 4375 bwillinode(1); 4376 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4377 if ((error = nlookup(nd)) != 0) 4378 return (error); 4379 4380 if (nd->nl_nch.ncp->nc_vp) 4381 return (EEXIST); 4382 if (nd->nl_dvp == NULL) 4383 return (EINVAL); 4384 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4385 return (error); 4386 VATTR_NULL(&vattr); 4387 vattr.va_type = VDIR; 4388 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4389 4390 vp = NULL; 4391 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4392 if (error == 0) 4393 vput(vp); 4394 return (error); 4395 } 4396 4397 /* 4398 * mkdir_args(char *path, int mode) 4399 * 4400 * Make a directory file. 4401 */ 4402 int 4403 sys_mkdir(struct mkdir_args *uap) 4404 { 4405 struct nlookupdata nd; 4406 int error; 4407 4408 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4409 if (error == 0) 4410 error = kern_mkdir(&nd, uap->mode); 4411 nlookup_done(&nd); 4412 return (error); 4413 } 4414 4415 /* 4416 * mkdirat_args(int fd, char *path, mode_t mode) 4417 * 4418 * Make a directory file. The path is relative to the directory associated 4419 * with fd. 4420 */ 4421 int 4422 sys_mkdirat(struct mkdirat_args *uap) 4423 { 4424 struct nlookupdata nd; 4425 struct file *fp; 4426 int error; 4427 4428 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4429 if (error == 0) 4430 error = kern_mkdir(&nd, uap->mode); 4431 nlookup_done_at(&nd, fp); 4432 return (error); 4433 } 4434 4435 int 4436 kern_rmdir(struct nlookupdata *nd) 4437 { 4438 int error; 4439 4440 bwillinode(1); 4441 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4442 if ((error = nlookup(nd)) != 0) 4443 return (error); 4444 4445 /* 4446 * Do not allow directories representing mount points to be 4447 * deleted, even if empty. Check write perms on mount point 4448 * in case the vnode is aliased (aka nullfs). 4449 */ 4450 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4451 return (EBUSY); 4452 if (nd->nl_dvp == NULL) 4453 return (EINVAL); 4454 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4455 return (error); 4456 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4457 return (error); 4458 } 4459 4460 /* 4461 * rmdir_args(char *path) 4462 * 4463 * Remove a directory file. 4464 */ 4465 int 4466 sys_rmdir(struct rmdir_args *uap) 4467 { 4468 struct nlookupdata nd; 4469 int error; 4470 4471 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4472 if (error == 0) 4473 error = kern_rmdir(&nd); 4474 nlookup_done(&nd); 4475 return (error); 4476 } 4477 4478 int 4479 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4480 enum uio_seg direction) 4481 { 4482 struct thread *td = curthread; 4483 struct vnode *vp; 4484 struct file *fp; 4485 struct uio auio; 4486 struct iovec aiov; 4487 off_t loff; 4488 int error, eofflag; 4489 4490 if ((error = holdvnode(td, fd, &fp)) != 0) 4491 return (error); 4492 if ((fp->f_flag & FREAD) == 0) { 4493 error = EBADF; 4494 goto done; 4495 } 4496 vp = (struct vnode *)fp->f_data; 4497 if (vp->v_type != VDIR) { 4498 error = EINVAL; 4499 goto done; 4500 } 4501 aiov.iov_base = buf; 4502 aiov.iov_len = count; 4503 auio.uio_iov = &aiov; 4504 auio.uio_iovcnt = 1; 4505 auio.uio_rw = UIO_READ; 4506 auio.uio_segflg = direction; 4507 auio.uio_td = td; 4508 auio.uio_resid = count; 4509 loff = auio.uio_offset = fp->f_offset; 4510 error = VOP_READDIR_FP(vp, &auio, fp->f_cred, &eofflag, NULL, NULL, fp); 4511 fp->f_offset = auio.uio_offset; 4512 if (error) 4513 goto done; 4514 4515 /* 4516 * WARNING! *basep may not be wide enough to accomodate the 4517 * seek offset. XXX should we hack this to return the upper 32 bits 4518 * for offsets greater then 4G? 4519 */ 4520 if (basep) { 4521 *basep = (long)loff; 4522 } 4523 *res = count - auio.uio_resid; 4524 done: 4525 fdrop(fp); 4526 return (error); 4527 } 4528 4529 /* 4530 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4531 * 4532 * Read a block of directory entries in a file system independent format. 4533 */ 4534 int 4535 sys_getdirentries(struct getdirentries_args *uap) 4536 { 4537 long base; 4538 int error; 4539 4540 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4541 &uap->sysmsg_result, UIO_USERSPACE); 4542 4543 if (error == 0 && uap->basep) 4544 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4545 return (error); 4546 } 4547 4548 /* 4549 * getdents_args(int fd, char *buf, size_t count) 4550 */ 4551 int 4552 sys_getdents(struct getdents_args *uap) 4553 { 4554 int error; 4555 4556 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4557 &uap->sysmsg_result, UIO_USERSPACE); 4558 4559 return (error); 4560 } 4561 4562 /* 4563 * Set the mode mask for creation of filesystem nodes. 4564 * 4565 * umask(int newmask) 4566 */ 4567 int 4568 sys_umask(struct umask_args *uap) 4569 { 4570 struct thread *td = curthread; 4571 struct proc *p = td->td_proc; 4572 struct filedesc *fdp; 4573 4574 fdp = p->p_fd; 4575 uap->sysmsg_result = fdp->fd_cmask; 4576 fdp->fd_cmask = uap->newmask & ALLPERMS; 4577 return (0); 4578 } 4579 4580 /* 4581 * revoke(char *path) 4582 * 4583 * Void all references to file by ripping underlying filesystem 4584 * away from vnode. 4585 */ 4586 int 4587 sys_revoke(struct revoke_args *uap) 4588 { 4589 struct nlookupdata nd; 4590 struct vattr vattr; 4591 struct vnode *vp; 4592 struct ucred *cred; 4593 int error; 4594 4595 vp = NULL; 4596 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4597 if (error == 0) 4598 error = nlookup(&nd); 4599 if (error == 0) 4600 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4601 cred = crhold(nd.nl_cred); 4602 nlookup_done(&nd); 4603 if (error == 0) { 4604 if (error == 0) 4605 error = VOP_GETATTR(vp, &vattr); 4606 if (error == 0 && cred->cr_uid != vattr.va_uid) 4607 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4608 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4609 if (vcount(vp) > 0) 4610 error = vrevoke(vp, cred); 4611 } else if (error == 0) { 4612 error = vrevoke(vp, cred); 4613 } 4614 vrele(vp); 4615 } 4616 if (cred) 4617 crfree(cred); 4618 return (error); 4619 } 4620 4621 /* 4622 * getfh_args(char *fname, fhandle_t *fhp) 4623 * 4624 * Get (NFS) file handle 4625 * 4626 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4627 * mount. This allows nullfs mounts to be explicitly exported. 4628 * 4629 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4630 * 4631 * nullfs mounts of subdirectories are not safe. That is, it will 4632 * work, but you do not really have protection against access to 4633 * the related parent directories. 4634 */ 4635 int 4636 sys_getfh(struct getfh_args *uap) 4637 { 4638 struct thread *td = curthread; 4639 struct nlookupdata nd; 4640 fhandle_t fh; 4641 struct vnode *vp; 4642 struct mount *mp; 4643 int error; 4644 4645 /* 4646 * Must be super user 4647 */ 4648 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4649 return (error); 4650 4651 vp = NULL; 4652 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4653 if (error == 0) 4654 error = nlookup(&nd); 4655 if (error == 0) 4656 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4657 mp = nd.nl_nch.mount; 4658 nlookup_done(&nd); 4659 if (error == 0) { 4660 bzero(&fh, sizeof(fh)); 4661 fh.fh_fsid = mp->mnt_stat.f_fsid; 4662 error = VFS_VPTOFH(vp, &fh.fh_fid); 4663 vput(vp); 4664 if (error == 0) 4665 error = copyout(&fh, uap->fhp, sizeof(fh)); 4666 } 4667 return (error); 4668 } 4669 4670 /* 4671 * fhopen_args(const struct fhandle *u_fhp, int flags) 4672 * 4673 * syscall for the rpc.lockd to use to translate a NFS file handle into 4674 * an open descriptor. 4675 * 4676 * warning: do not remove the priv_check() call or this becomes one giant 4677 * security hole. 4678 */ 4679 int 4680 sys_fhopen(struct fhopen_args *uap) 4681 { 4682 struct thread *td = curthread; 4683 struct filedesc *fdp = td->td_proc->p_fd; 4684 struct mount *mp; 4685 struct vnode *vp; 4686 struct fhandle fhp; 4687 struct vattr vat; 4688 struct vattr *vap = &vat; 4689 struct flock lf; 4690 int fmode, mode, error = 0, type; 4691 struct file *nfp; 4692 struct file *fp; 4693 int indx; 4694 4695 /* 4696 * Must be super user 4697 */ 4698 error = priv_check(td, PRIV_ROOT); 4699 if (error) 4700 return (error); 4701 4702 fmode = FFLAGS(uap->flags); 4703 4704 /* 4705 * Why not allow a non-read/write open for our lockd? 4706 */ 4707 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4708 return (EINVAL); 4709 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4710 if (error) 4711 return(error); 4712 4713 /* 4714 * Find the mount point 4715 */ 4716 mp = vfs_getvfs(&fhp.fh_fsid); 4717 if (mp == NULL) { 4718 error = ESTALE; 4719 goto done2; 4720 } 4721 /* now give me my vnode, it gets returned to me locked */ 4722 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4723 if (error) 4724 goto done; 4725 /* 4726 * from now on we have to make sure not 4727 * to forget about the vnode 4728 * any error that causes an abort must vput(vp) 4729 * just set error = err and 'goto bad;'. 4730 */ 4731 4732 /* 4733 * from vn_open 4734 */ 4735 if (vp->v_type == VLNK) { 4736 error = EMLINK; 4737 goto bad; 4738 } 4739 if (vp->v_type == VSOCK) { 4740 error = EOPNOTSUPP; 4741 goto bad; 4742 } 4743 mode = 0; 4744 if (fmode & (FWRITE | O_TRUNC)) { 4745 if (vp->v_type == VDIR) { 4746 error = EISDIR; 4747 goto bad; 4748 } 4749 error = vn_writechk(vp); 4750 if (error) 4751 goto bad; 4752 mode |= VWRITE; 4753 } 4754 if (fmode & FREAD) 4755 mode |= VREAD; 4756 if (mode) { 4757 error = VOP_ACCESS(vp, mode, td->td_ucred); 4758 if (error) 4759 goto bad; 4760 } 4761 if (fmode & O_TRUNC) { 4762 vn_unlock(vp); /* XXX */ 4763 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4764 VATTR_NULL(vap); 4765 vap->va_size = 0; 4766 error = VOP_SETATTR(vp, vap, td->td_ucred); 4767 if (error) 4768 goto bad; 4769 } 4770 4771 /* 4772 * VOP_OPEN needs the file pointer so it can potentially override 4773 * it. 4774 * 4775 * WARNING! no f_nchandle will be associated when fhopen()ing a 4776 * directory. XXX 4777 */ 4778 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4779 goto bad; 4780 fp = nfp; 4781 4782 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4783 if (error) { 4784 /* 4785 * setting f_ops this way prevents VOP_CLOSE from being 4786 * called or fdrop() releasing the vp from v_data. Since 4787 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4788 */ 4789 fp->f_ops = &badfileops; 4790 fp->f_data = NULL; 4791 goto bad_drop; 4792 } 4793 4794 /* 4795 * The fp is given its own reference, we still have our ref and lock. 4796 * 4797 * Assert that all regular files must be created with a VM object. 4798 */ 4799 if (vp->v_type == VREG && vp->v_object == NULL) { 4800 kprintf("fhopen: regular file did not " 4801 "have VM object: %p\n", 4802 vp); 4803 goto bad_drop; 4804 } 4805 4806 /* 4807 * The open was successful. Handle any locking requirements. 4808 */ 4809 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4810 lf.l_whence = SEEK_SET; 4811 lf.l_start = 0; 4812 lf.l_len = 0; 4813 if (fmode & O_EXLOCK) 4814 lf.l_type = F_WRLCK; 4815 else 4816 lf.l_type = F_RDLCK; 4817 if (fmode & FNONBLOCK) 4818 type = 0; 4819 else 4820 type = F_WAIT; 4821 vn_unlock(vp); 4822 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4823 &lf, type)) != 0) { 4824 /* 4825 * release our private reference. 4826 */ 4827 fsetfd(fdp, NULL, indx); 4828 fdrop(fp); 4829 vrele(vp); 4830 goto done; 4831 } 4832 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4833 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4834 } 4835 4836 /* 4837 * Clean up. Associate the file pointer with the previously 4838 * reserved descriptor and return it. 4839 */ 4840 vput(vp); 4841 if (uap->flags & O_CLOEXEC) 4842 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4843 fsetfd(fdp, fp, indx); 4844 fdrop(fp); 4845 uap->sysmsg_result = indx; 4846 mount_drop(mp); 4847 4848 return (error); 4849 4850 bad_drop: 4851 fsetfd(fdp, NULL, indx); 4852 fdrop(fp); 4853 bad: 4854 vput(vp); 4855 done: 4856 mount_drop(mp); 4857 done2: 4858 return (error); 4859 } 4860 4861 /* 4862 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4863 */ 4864 int 4865 sys_fhstat(struct fhstat_args *uap) 4866 { 4867 struct thread *td = curthread; 4868 struct stat sb; 4869 fhandle_t fh; 4870 struct mount *mp; 4871 struct vnode *vp; 4872 int error; 4873 4874 /* 4875 * Must be super user 4876 */ 4877 error = priv_check(td, PRIV_ROOT); 4878 if (error) 4879 return (error); 4880 4881 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4882 if (error) 4883 return (error); 4884 4885 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4886 error = ESTALE; 4887 if (error == 0) { 4888 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4889 error = vn_stat(vp, &sb, td->td_ucred); 4890 vput(vp); 4891 } 4892 } 4893 if (error == 0) 4894 error = copyout(&sb, uap->sb, sizeof(sb)); 4895 if (mp) 4896 mount_drop(mp); 4897 4898 return (error); 4899 } 4900 4901 /* 4902 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4903 */ 4904 int 4905 sys_fhstatfs(struct fhstatfs_args *uap) 4906 { 4907 struct thread *td = curthread; 4908 struct proc *p = td->td_proc; 4909 struct statfs *sp; 4910 struct mount *mp; 4911 struct vnode *vp; 4912 struct statfs sb; 4913 char *fullpath, *freepath; 4914 fhandle_t fh; 4915 int error; 4916 4917 /* 4918 * Must be super user 4919 */ 4920 if ((error = priv_check(td, PRIV_ROOT))) 4921 return (error); 4922 4923 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4924 return (error); 4925 4926 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4927 error = ESTALE; 4928 goto done; 4929 } 4930 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4931 error = ESTALE; 4932 goto done; 4933 } 4934 4935 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4936 goto done; 4937 mp = vp->v_mount; 4938 sp = &mp->mnt_stat; 4939 vput(vp); 4940 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4941 goto done; 4942 4943 error = mount_path(p, mp, &fullpath, &freepath); 4944 if (error) 4945 goto done; 4946 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4947 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4948 kfree(freepath, M_TEMP); 4949 4950 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4951 if (priv_check(td, PRIV_ROOT)) { 4952 bcopy(sp, &sb, sizeof(sb)); 4953 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4954 sp = &sb; 4955 } 4956 error = copyout(sp, uap->buf, sizeof(*sp)); 4957 done: 4958 if (mp) 4959 mount_drop(mp); 4960 4961 return (error); 4962 } 4963 4964 /* 4965 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4966 */ 4967 int 4968 sys_fhstatvfs(struct fhstatvfs_args *uap) 4969 { 4970 struct thread *td = curthread; 4971 struct proc *p = td->td_proc; 4972 struct statvfs *sp; 4973 struct mount *mp; 4974 struct vnode *vp; 4975 fhandle_t fh; 4976 int error; 4977 4978 /* 4979 * Must be super user 4980 */ 4981 if ((error = priv_check(td, PRIV_ROOT))) 4982 return (error); 4983 4984 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4985 return (error); 4986 4987 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4988 error = ESTALE; 4989 goto done; 4990 } 4991 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4992 error = ESTALE; 4993 goto done; 4994 } 4995 4996 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4997 goto done; 4998 mp = vp->v_mount; 4999 sp = &mp->mnt_vstat; 5000 vput(vp); 5001 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 5002 goto done; 5003 5004 sp->f_flag = 0; 5005 if (mp->mnt_flag & MNT_RDONLY) 5006 sp->f_flag |= ST_RDONLY; 5007 if (mp->mnt_flag & MNT_NOSUID) 5008 sp->f_flag |= ST_NOSUID; 5009 error = copyout(sp, uap->buf, sizeof(*sp)); 5010 done: 5011 if (mp) 5012 mount_drop(mp); 5013 return (error); 5014 } 5015 5016 5017 /* 5018 * Syscall to push extended attribute configuration information into the 5019 * VFS. Accepts a path, which it converts to a mountpoint, as well as 5020 * a command (int cmd), and attribute name and misc data. For now, the 5021 * attribute name is left in userspace for consumption by the VFS_op. 5022 * It will probably be changed to be copied into sysspace by the 5023 * syscall in the future, once issues with various consumers of the 5024 * attribute code have raised their hands. 5025 * 5026 * Currently this is used only by UFS Extended Attributes. 5027 */ 5028 int 5029 sys_extattrctl(struct extattrctl_args *uap) 5030 { 5031 struct nlookupdata nd; 5032 struct vnode *vp; 5033 char attrname[EXTATTR_MAXNAMELEN]; 5034 int error; 5035 size_t size; 5036 5037 attrname[0] = 0; 5038 vp = NULL; 5039 error = 0; 5040 5041 if (error == 0 && uap->filename) { 5042 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 5043 NLC_FOLLOW); 5044 if (error == 0) 5045 error = nlookup(&nd); 5046 if (error == 0) 5047 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 5048 nlookup_done(&nd); 5049 } 5050 5051 if (error == 0 && uap->attrname) { 5052 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 5053 &size); 5054 } 5055 5056 if (error == 0) { 5057 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5058 if (error == 0) 5059 error = nlookup(&nd); 5060 if (error == 0) 5061 error = ncp_writechk(&nd.nl_nch); 5062 if (error == 0) { 5063 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 5064 uap->attrnamespace, 5065 uap->attrname, nd.nl_cred); 5066 } 5067 nlookup_done(&nd); 5068 } 5069 5070 return (error); 5071 } 5072 5073 /* 5074 * Syscall to get a named extended attribute on a file or directory. 5075 */ 5076 int 5077 sys_extattr_set_file(struct extattr_set_file_args *uap) 5078 { 5079 char attrname[EXTATTR_MAXNAMELEN]; 5080 struct nlookupdata nd; 5081 struct vnode *vp; 5082 struct uio auio; 5083 struct iovec aiov; 5084 int error; 5085 5086 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5087 if (error) 5088 return (error); 5089 5090 vp = NULL; 5091 5092 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5093 if (error == 0) 5094 error = nlookup(&nd); 5095 if (error == 0) 5096 error = ncp_writechk(&nd.nl_nch); 5097 if (error == 0) 5098 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5099 if (error) { 5100 nlookup_done(&nd); 5101 return (error); 5102 } 5103 5104 bzero(&auio, sizeof(auio)); 5105 aiov.iov_base = uap->data; 5106 aiov.iov_len = uap->nbytes; 5107 auio.uio_iov = &aiov; 5108 auio.uio_iovcnt = 1; 5109 auio.uio_offset = 0; 5110 auio.uio_resid = uap->nbytes; 5111 auio.uio_rw = UIO_WRITE; 5112 auio.uio_td = curthread; 5113 5114 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 5115 &auio, nd.nl_cred); 5116 5117 vput(vp); 5118 nlookup_done(&nd); 5119 return (error); 5120 } 5121 5122 /* 5123 * Syscall to get a named extended attribute on a file or directory. 5124 */ 5125 int 5126 sys_extattr_get_file(struct extattr_get_file_args *uap) 5127 { 5128 char attrname[EXTATTR_MAXNAMELEN]; 5129 struct nlookupdata nd; 5130 struct uio auio; 5131 struct iovec aiov; 5132 struct vnode *vp; 5133 int error; 5134 5135 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5136 if (error) 5137 return (error); 5138 5139 vp = NULL; 5140 5141 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5142 if (error == 0) 5143 error = nlookup(&nd); 5144 if (error == 0) 5145 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5146 if (error) { 5147 nlookup_done(&nd); 5148 return (error); 5149 } 5150 5151 bzero(&auio, sizeof(auio)); 5152 aiov.iov_base = uap->data; 5153 aiov.iov_len = uap->nbytes; 5154 auio.uio_iov = &aiov; 5155 auio.uio_iovcnt = 1; 5156 auio.uio_offset = 0; 5157 auio.uio_resid = uap->nbytes; 5158 auio.uio_rw = UIO_READ; 5159 auio.uio_td = curthread; 5160 5161 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5162 &auio, nd.nl_cred); 5163 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5164 5165 vput(vp); 5166 nlookup_done(&nd); 5167 return(error); 5168 } 5169 5170 /* 5171 * Syscall to delete a named extended attribute from a file or directory. 5172 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5173 */ 5174 int 5175 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5176 { 5177 char attrname[EXTATTR_MAXNAMELEN]; 5178 struct nlookupdata nd; 5179 struct vnode *vp; 5180 int error; 5181 5182 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5183 if (error) 5184 return(error); 5185 5186 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5187 if (error == 0) 5188 error = nlookup(&nd); 5189 if (error == 0) 5190 error = ncp_writechk(&nd.nl_nch); 5191 if (error == 0) { 5192 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5193 if (error == 0) { 5194 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5195 attrname, NULL, nd.nl_cred); 5196 vput(vp); 5197 } 5198 } 5199 nlookup_done(&nd); 5200 return(error); 5201 } 5202 5203 /* 5204 * Determine if the mount is visible to the process. 5205 */ 5206 static int 5207 chroot_visible_mnt(struct mount *mp, struct proc *p) 5208 { 5209 struct nchandle nch; 5210 5211 /* 5212 * Traverse from the mount point upwards. If we hit the process 5213 * root then the mount point is visible to the process. 5214 */ 5215 nch = mp->mnt_ncmountpt; 5216 while (nch.ncp) { 5217 if (nch.mount == p->p_fd->fd_nrdir.mount && 5218 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5219 return(1); 5220 } 5221 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5222 nch = nch.mount->mnt_ncmounton; 5223 } else { 5224 nch.ncp = nch.ncp->nc_parent; 5225 } 5226 } 5227 5228 /* 5229 * If the mount point is not visible to the process, but the 5230 * process root is in a subdirectory of the mount, return 5231 * TRUE anyway. 5232 */ 5233 if (p->p_fd->fd_nrdir.mount == mp) 5234 return(1); 5235 5236 return(0); 5237 } 5238 5239 /* Sets priv to PRIV_ROOT in case no matching fs */ 5240 static int 5241 get_fspriv(const char *fsname) 5242 { 5243 5244 if (strncmp("null", fsname, 5) == 0) { 5245 return PRIV_VFS_MOUNT_NULLFS; 5246 } else if (strncmp(fsname, "tmpfs", 6) == 0) { 5247 return PRIV_VFS_MOUNT_TMPFS; 5248 } 5249 5250 return PRIV_ROOT; 5251 } 5252 5253 int 5254 sys___realpath(struct __realpath_args *uap) 5255 { 5256 struct nlookupdata nd; 5257 char *rbuf; 5258 char *fbuf; 5259 ssize_t rlen; 5260 int error; 5261 5262 /* 5263 * Invalid length if less than 0. 0 is allowed 5264 */ 5265 if ((ssize_t)uap->len < 0) 5266 return EINVAL; 5267 5268 rbuf = NULL; 5269 fbuf = NULL; 5270 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5271 if (error) 5272 goto done; 5273 5274 nd.nl_flags |= NLC_SHAREDLOCK; 5275 error = nlookup(&nd); 5276 if (error) 5277 goto done; 5278 5279 if (nd.nl_nch.ncp->nc_vp == NULL) { 5280 error = ENOENT; 5281 goto done; 5282 } 5283 5284 /* 5285 * Shortcut test for existence. 5286 */ 5287 if (uap->len == 0) { 5288 error = ENAMETOOLONG; 5289 goto done; 5290 } 5291 5292 /* 5293 * Obtain the path relative to the process root. The nch must not 5294 * be locked for the cache_fullpath() call. 5295 */ 5296 if (nd.nl_flags & NLC_NCPISLOCKED) { 5297 nd.nl_flags &= ~NLC_NCPISLOCKED; 5298 cache_unlock(&nd.nl_nch); 5299 } 5300 error = cache_fullpath(curproc, &nd.nl_nch, NULL, &rbuf, &fbuf, 0); 5301 if (error) 5302 goto done; 5303 5304 rlen = (ssize_t)strlen(rbuf); 5305 if (rlen >= uap->len) { 5306 error = ENAMETOOLONG; 5307 goto done; 5308 } 5309 error = copyout(rbuf, uap->buf, rlen + 1); 5310 if (error == 0) 5311 uap->sysmsg_szresult = rlen; 5312 done: 5313 nlookup_done(&nd); 5314 if (fbuf) 5315 kfree(fbuf, M_TEMP); 5316 5317 return error; 5318 } 5319