1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 35 * $FreeBSD: src/sys/kern/vfs_syscalls.c,v 1.151.2.18 2003/04/04 20:35:58 tegge Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/buf.h> 41 #include <sys/conf.h> 42 #include <sys/sysent.h> 43 #include <sys/malloc.h> 44 #include <sys/mount.h> 45 #include <sys/mountctl.h> 46 #include <sys/sysproto.h> 47 #include <sys/filedesc.h> 48 #include <sys/kernel.h> 49 #include <sys/fcntl.h> 50 #include <sys/file.h> 51 #include <sys/linker.h> 52 #include <sys/stat.h> 53 #include <sys/unistd.h> 54 #include <sys/vnode.h> 55 #include <sys/proc.h> 56 #include <sys/priv.h> 57 #include <sys/jail.h> 58 #include <sys/namei.h> 59 #include <sys/nlookup.h> 60 #include <sys/dirent.h> 61 #include <sys/extattr.h> 62 #include <sys/spinlock.h> 63 #include <sys/kern_syscall.h> 64 #include <sys/objcache.h> 65 #include <sys/sysctl.h> 66 67 #include <sys/buf2.h> 68 #include <sys/file2.h> 69 #include <sys/spinlock2.h> 70 71 #include <vm/vm.h> 72 #include <vm/vm_object.h> 73 #include <vm/vm_page.h> 74 75 #include <machine/limits.h> 76 #include <machine/stdarg.h> 77 78 static void mount_warning(struct mount *mp, const char *ctl, ...) 79 __printflike(2, 3); 80 static int mount_path(struct proc *p, struct mount *mp, char **rb, char **fb); 81 static int checkvp_chdir (struct vnode *vn, struct thread *td); 82 static void checkdirs (struct nchandle *old_nch, struct nchandle *new_nch); 83 static int chroot_refuse_vdir_fds (thread_t td, struct filedesc *fdp); 84 static int chroot_visible_mnt(struct mount *mp, struct proc *p); 85 static int getutimes (struct timeval *, struct timespec *); 86 static int getutimens (const struct timespec *, struct timespec *, int *); 87 static int setfown (struct mount *, struct vnode *, uid_t, gid_t); 88 static int setfmode (struct vnode *, int); 89 static int setfflags (struct vnode *, int); 90 static int setutimes (struct vnode *, struct vattr *, 91 const struct timespec *, int); 92 static int usermount = 0; /* if 1, non-root can mount fs. */ 93 94 SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, 95 "Allow non-root users to mount filesystems"); 96 97 /* 98 * Virtual File System System Calls 99 */ 100 101 /* 102 * Mount a file system. 103 * 104 * mount_args(char *type, char *path, int flags, caddr_t data) 105 * 106 * MPALMOSTSAFE 107 */ 108 int 109 sys_mount(struct mount_args *uap) 110 { 111 struct thread *td = curthread; 112 struct vnode *vp; 113 struct nchandle nch; 114 struct mount *mp, *nullmp; 115 struct vfsconf *vfsp; 116 int error, flag = 0, flag2 = 0; 117 int hasmount; 118 struct vattr va; 119 struct nlookupdata nd; 120 char fstypename[MFSNAMELEN]; 121 struct ucred *cred; 122 123 cred = td->td_ucred; 124 if (jailed(cred)) { 125 error = EPERM; 126 goto done; 127 } 128 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 129 goto done; 130 131 /* 132 * Do not allow NFS export by non-root users. 133 */ 134 if (uap->flags & MNT_EXPORTED) { 135 error = priv_check(td, PRIV_ROOT); 136 if (error) 137 goto done; 138 } 139 /* 140 * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users 141 */ 142 if (priv_check(td, PRIV_ROOT)) 143 uap->flags |= MNT_NOSUID | MNT_NODEV; 144 145 /* 146 * Lookup the requested path and extract the nch and vnode. 147 */ 148 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 149 if (error == 0) { 150 if ((error = nlookup(&nd)) == 0) { 151 if (nd.nl_nch.ncp->nc_vp == NULL) 152 error = ENOENT; 153 } 154 } 155 if (error) { 156 nlookup_done(&nd); 157 goto done; 158 } 159 160 /* 161 * If the target filesystem is resolved via a nullfs mount, then 162 * nd.nl_nch.mount will be pointing to the nullfs mount structure 163 * instead of the target file system. We need it in case we are 164 * doing an update. 165 */ 166 nullmp = nd.nl_nch.mount; 167 168 /* 169 * Extract the locked+refd ncp and cleanup the nd structure 170 */ 171 nch = nd.nl_nch; 172 cache_zero(&nd.nl_nch); 173 nlookup_done(&nd); 174 175 if ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 176 (mp = cache_findmount(&nch)) != NULL) { 177 cache_dropmount(mp); 178 hasmount = 1; 179 } else { 180 hasmount = 0; 181 } 182 183 184 /* 185 * now we have the locked ref'd nch and unreferenced vnode. 186 */ 187 vp = nch.ncp->nc_vp; 188 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) { 189 cache_put(&nch); 190 goto done; 191 } 192 cache_unlock(&nch); 193 194 /* 195 * Extract the file system type. We need to know this early, to take 196 * appropriate actions if we are dealing with a nullfs. 197 */ 198 if ((error = copyinstr(uap->type, fstypename, MFSNAMELEN, NULL)) != 0) { 199 cache_drop(&nch); 200 vput(vp); 201 goto done; 202 } 203 204 /* 205 * Now we have an unlocked ref'd nch and a locked ref'd vp 206 */ 207 if (uap->flags & MNT_UPDATE) { 208 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 209 cache_drop(&nch); 210 vput(vp); 211 error = EINVAL; 212 goto done; 213 } 214 215 if (strncmp(fstypename, "null", 5) == 0) { 216 KKASSERT(nullmp); 217 mp = nullmp; 218 } else { 219 mp = vp->v_mount; 220 } 221 222 flag = mp->mnt_flag; 223 flag2 = mp->mnt_kern_flag; 224 /* 225 * We only allow the filesystem to be reloaded if it 226 * is currently mounted read-only. 227 */ 228 if ((uap->flags & MNT_RELOAD) && 229 ((mp->mnt_flag & MNT_RDONLY) == 0)) { 230 cache_drop(&nch); 231 vput(vp); 232 error = EOPNOTSUPP; /* Needs translation */ 233 goto done; 234 } 235 /* 236 * Only root, or the user that did the original mount is 237 * permitted to update it. 238 */ 239 if (mp->mnt_stat.f_owner != cred->cr_uid && 240 (error = priv_check(td, PRIV_ROOT))) { 241 cache_drop(&nch); 242 vput(vp); 243 goto done; 244 } 245 if (vfs_busy(mp, LK_NOWAIT)) { 246 cache_drop(&nch); 247 vput(vp); 248 error = EBUSY; 249 goto done; 250 } 251 if (hasmount) { 252 cache_drop(&nch); 253 vfs_unbusy(mp); 254 vput(vp); 255 error = EBUSY; 256 goto done; 257 } 258 mp->mnt_flag |= 259 uap->flags & (MNT_RELOAD | MNT_FORCE | MNT_UPDATE); 260 lwkt_gettoken(&mp->mnt_token); 261 vn_unlock(vp); 262 vfsp = mp->mnt_vfc; 263 goto update; 264 } 265 266 /* 267 * If the user is not root, ensure that they own the directory 268 * onto which we are attempting to mount. 269 */ 270 if ((error = VOP_GETATTR(vp, &va)) || 271 (va.va_uid != cred->cr_uid && 272 (error = priv_check(td, PRIV_ROOT)))) { 273 cache_drop(&nch); 274 vput(vp); 275 goto done; 276 } 277 if ((error = vinvalbuf(vp, V_SAVE, 0, 0)) != 0) { 278 cache_drop(&nch); 279 vput(vp); 280 goto done; 281 } 282 if (vp->v_type != VDIR) { 283 cache_drop(&nch); 284 vput(vp); 285 error = ENOTDIR; 286 goto done; 287 } 288 if (vp->v_mount->mnt_kern_flag & MNTK_NOSTKMNT) { 289 cache_drop(&nch); 290 vput(vp); 291 error = EPERM; 292 goto done; 293 } 294 vfsp = vfsconf_find_by_name(fstypename); 295 if (vfsp == NULL) { 296 linker_file_t lf; 297 298 /* Only load modules for root (very important!) */ 299 if ((error = priv_check(td, PRIV_ROOT)) != 0) { 300 cache_drop(&nch); 301 vput(vp); 302 goto done; 303 } 304 error = linker_load_file(fstypename, &lf); 305 if (error || lf == NULL) { 306 cache_drop(&nch); 307 vput(vp); 308 if (lf == NULL) 309 error = ENODEV; 310 goto done; 311 } 312 lf->userrefs++; 313 /* lookup again, see if the VFS was loaded */ 314 vfsp = vfsconf_find_by_name(fstypename); 315 if (vfsp == NULL) { 316 lf->userrefs--; 317 linker_file_unload(lf); 318 cache_drop(&nch); 319 vput(vp); 320 error = ENODEV; 321 goto done; 322 } 323 } 324 if (hasmount) { 325 cache_drop(&nch); 326 vput(vp); 327 error = EBUSY; 328 goto done; 329 } 330 331 /* 332 * Allocate and initialize the filesystem. 333 */ 334 mp = kmalloc(sizeof(struct mount), M_MOUNT, M_ZERO|M_WAITOK); 335 mount_init(mp); 336 vfs_busy(mp, LK_NOWAIT); 337 mp->mnt_op = vfsp->vfc_vfsops; 338 mp->mnt_vfc = vfsp; 339 mp->mnt_pbuf_count = nswbuf_kva / NSWBUF_SPLIT; 340 vfsp->vfc_refcount++; 341 mp->mnt_stat.f_type = vfsp->vfc_typenum; 342 mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; 343 strncpy(mp->mnt_stat.f_fstypename, vfsp->vfc_name, MFSNAMELEN); 344 mp->mnt_stat.f_owner = cred->cr_uid; 345 lwkt_gettoken(&mp->mnt_token); 346 vn_unlock(vp); 347 update: 348 /* 349 * (per-mount token acquired at this point) 350 * 351 * Set the mount level flags. 352 */ 353 if (uap->flags & MNT_RDONLY) 354 mp->mnt_flag |= MNT_RDONLY; 355 else if (mp->mnt_flag & MNT_RDONLY) 356 mp->mnt_kern_flag |= MNTK_WANTRDWR; 357 mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | 358 MNT_SYNCHRONOUS | MNT_ASYNC | MNT_NOATIME | 359 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 360 MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 361 MNT_AUTOMOUNTED); 362 mp->mnt_flag |= uap->flags & (MNT_NOSUID | MNT_NOEXEC | 363 MNT_NODEV | MNT_SYNCHRONOUS | MNT_ASYNC | MNT_FORCE | 364 MNT_NOSYMFOLLOW | MNT_IGNORE | MNT_TRIM | 365 MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR | 366 MNT_AUTOMOUNTED); 367 368 /* 369 * Pre-set the mount's ALL_MPSAFE flags if specified in the vfsconf. 370 * This way the initial VFS_MOUNT() call will also be MPSAFE. 371 */ 372 if (vfsp->vfc_flags & VFCF_MPSAFE) 373 mp->mnt_kern_flag |= MNTK_ALL_MPSAFE; 374 375 /* 376 * Mount the filesystem. 377 * XXX The final recipients of VFS_MOUNT just overwrite the ndp they 378 * get. 379 */ 380 error = VFS_MOUNT(mp, uap->path, uap->data, cred); 381 if (mp->mnt_flag & MNT_UPDATE) { 382 if (mp->mnt_kern_flag & MNTK_WANTRDWR) 383 mp->mnt_flag &= ~MNT_RDONLY; 384 mp->mnt_flag &=~ (MNT_UPDATE | MNT_RELOAD | MNT_FORCE); 385 mp->mnt_kern_flag &=~ MNTK_WANTRDWR; 386 if (error) { 387 mp->mnt_flag = flag; 388 mp->mnt_kern_flag = flag2; 389 } 390 lwkt_reltoken(&mp->mnt_token); 391 vfs_unbusy(mp); 392 vrele(vp); 393 cache_drop(&nch); 394 goto done; 395 } 396 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 397 398 /* 399 * Put the new filesystem on the mount list after root. The mount 400 * point gets its own mnt_ncmountpt (unless the VFS already set one 401 * up) which represents the root of the mount. The lookup code 402 * detects the mount point going forward and checks the root of 403 * the mount going backwards. 404 * 405 * It is not necessary to invalidate or purge the vnode underneath 406 * because elements under the mount will be given their own glue 407 * namecache record. 408 */ 409 if (!error) { 410 if (mp->mnt_ncmountpt.ncp == NULL) { 411 /* 412 * Allocate, then unlock, but leave the ref intact. 413 * This is the mnt_refs (1) that we will retain 414 * through to the unmount. 415 */ 416 cache_allocroot(&mp->mnt_ncmountpt, mp, NULL); 417 cache_unlock(&mp->mnt_ncmountpt); 418 } 419 vn_unlock(vp); 420 mp->mnt_ncmounton = nch; /* inherits ref */ 421 cache_lock(&nch); 422 nch.ncp->nc_flag |= NCF_ISMOUNTPT; 423 cache_unlock(&nch); 424 cache_ismounting(mp); 425 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 426 427 mountlist_insert(mp, MNTINS_LAST); 428 vn_unlock(vp); 429 checkdirs(&mp->mnt_ncmounton, &mp->mnt_ncmountpt); 430 error = vfs_allocate_syncvnode(mp); 431 lwkt_reltoken(&mp->mnt_token); 432 vfs_unbusy(mp); 433 error = VFS_START(mp, 0); 434 vrele(vp); 435 KNOTE(&fs_klist, VQ_MOUNT); 436 } else { 437 vn_syncer_thr_stop(mp); 438 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 439 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 440 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 441 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 442 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 443 mp->mnt_vfc->vfc_refcount--; 444 lwkt_reltoken(&mp->mnt_token); 445 vfs_unbusy(mp); 446 kfree(mp, M_MOUNT); 447 cache_drop(&nch); 448 vput(vp); 449 } 450 done: 451 return (error); 452 } 453 454 /* 455 * Scan all active processes to see if any of them have a current 456 * or root directory onto which the new filesystem has just been 457 * mounted. If so, replace them with the new mount point. 458 * 459 * Both old_nch and new_nch are ref'd on call but not locked. 460 * new_nch must be temporarily locked so it can be associated with the 461 * vnode representing the root of the mount point. 462 */ 463 struct checkdirs_info { 464 struct nchandle old_nch; 465 struct nchandle new_nch; 466 struct vnode *old_vp; 467 struct vnode *new_vp; 468 }; 469 470 static int checkdirs_callback(struct proc *p, void *data); 471 472 static void 473 checkdirs(struct nchandle *old_nch, struct nchandle *new_nch) 474 { 475 struct checkdirs_info info; 476 struct vnode *olddp; 477 struct vnode *newdp; 478 struct mount *mp; 479 480 /* 481 * If the old mount point's vnode has a usecount of 1, it is not 482 * being held as a descriptor anywhere. 483 */ 484 olddp = old_nch->ncp->nc_vp; 485 if (olddp == NULL || VREFCNT(olddp) == 1) 486 return; 487 488 /* 489 * Force the root vnode of the new mount point to be resolved 490 * so we can update any matching processes. 491 */ 492 mp = new_nch->mount; 493 if (VFS_ROOT(mp, &newdp)) 494 panic("mount: lost mount"); 495 vn_unlock(newdp); 496 cache_lock(new_nch); 497 vn_lock(newdp, LK_EXCLUSIVE | LK_RETRY); 498 cache_setunresolved(new_nch); 499 cache_setvp(new_nch, newdp); 500 cache_unlock(new_nch); 501 502 /* 503 * Special handling of the root node 504 */ 505 if (rootvnode == olddp) { 506 vref(newdp); 507 vfs_cache_setroot(newdp, cache_hold(new_nch)); 508 } 509 510 /* 511 * Pass newdp separately so the callback does not have to access 512 * it via new_nch->ncp->nc_vp. 513 */ 514 info.old_nch = *old_nch; 515 info.new_nch = *new_nch; 516 info.new_vp = newdp; 517 allproc_scan(checkdirs_callback, &info, 0); 518 vput(newdp); 519 } 520 521 /* 522 * NOTE: callback is not MP safe because the scanned process's filedesc 523 * structure can be ripped out from under us, amoung other things. 524 */ 525 static int 526 checkdirs_callback(struct proc *p, void *data) 527 { 528 struct checkdirs_info *info = data; 529 struct filedesc *fdp; 530 struct nchandle ncdrop1; 531 struct nchandle ncdrop2; 532 struct vnode *vprele1; 533 struct vnode *vprele2; 534 535 if ((fdp = p->p_fd) != NULL) { 536 cache_zero(&ncdrop1); 537 cache_zero(&ncdrop2); 538 vprele1 = NULL; 539 vprele2 = NULL; 540 541 /* 542 * MPUNSAFE - XXX fdp can be pulled out from under a 543 * foreign process. 544 * 545 * A shared filedesc is ok, we don't have to copy it 546 * because we are making this change globally. 547 */ 548 spin_lock(&fdp->fd_spin); 549 if (fdp->fd_ncdir.mount == info->old_nch.mount && 550 fdp->fd_ncdir.ncp == info->old_nch.ncp) { 551 vprele1 = fdp->fd_cdir; 552 vref(info->new_vp); 553 fdp->fd_cdir = info->new_vp; 554 ncdrop1 = fdp->fd_ncdir; 555 cache_copy(&info->new_nch, &fdp->fd_ncdir); 556 } 557 if (fdp->fd_nrdir.mount == info->old_nch.mount && 558 fdp->fd_nrdir.ncp == info->old_nch.ncp) { 559 vprele2 = fdp->fd_rdir; 560 vref(info->new_vp); 561 fdp->fd_rdir = info->new_vp; 562 ncdrop2 = fdp->fd_nrdir; 563 cache_copy(&info->new_nch, &fdp->fd_nrdir); 564 } 565 spin_unlock(&fdp->fd_spin); 566 if (ncdrop1.ncp) 567 cache_drop(&ncdrop1); 568 if (ncdrop2.ncp) 569 cache_drop(&ncdrop2); 570 if (vprele1) 571 vrele(vprele1); 572 if (vprele2) 573 vrele(vprele2); 574 } 575 return(0); 576 } 577 578 /* 579 * Unmount a file system. 580 * 581 * Note: unmount takes a path to the vnode mounted on as argument, 582 * not special file (as before). 583 * 584 * umount_args(char *path, int flags) 585 * 586 * MPALMOSTSAFE 587 */ 588 int 589 sys_unmount(struct unmount_args *uap) 590 { 591 struct thread *td = curthread; 592 struct proc *p __debugvar = td->td_proc; 593 struct mount *mp = NULL; 594 struct nlookupdata nd; 595 int error; 596 597 KKASSERT(p); 598 if (td->td_ucred->cr_prison != NULL) { 599 error = EPERM; 600 goto done; 601 } 602 if (usermount == 0 && (error = priv_check(td, PRIV_ROOT))) 603 goto done; 604 605 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 606 if (error == 0) 607 error = nlookup(&nd); 608 if (error) 609 goto out; 610 611 mp = nd.nl_nch.mount; 612 613 /* 614 * Only root, or the user that did the original mount is 615 * permitted to unmount this filesystem. 616 */ 617 if ((mp->mnt_stat.f_owner != td->td_ucred->cr_uid) && 618 (error = priv_check(td, PRIV_ROOT))) 619 goto out; 620 621 /* 622 * Don't allow unmounting the root file system. 623 */ 624 if (mp->mnt_flag & MNT_ROOTFS) { 625 error = EINVAL; 626 goto out; 627 } 628 629 /* 630 * Must be the root of the filesystem 631 */ 632 if (nd.nl_nch.ncp != mp->mnt_ncmountpt.ncp) { 633 error = EINVAL; 634 goto out; 635 } 636 637 /* 638 * If no error try to issue the unmount. We lose our cache 639 * ref when we call nlookup_done so we must hold the mount point 640 * to prevent use-after-free races. 641 */ 642 out: 643 if (error == 0) { 644 mount_hold(mp); 645 nlookup_done(&nd); 646 error = dounmount(mp, uap->flags, 0); 647 mount_drop(mp); 648 } else { 649 nlookup_done(&nd); 650 } 651 done: 652 return (error); 653 } 654 655 /* 656 * Do the actual file system unmount (interlocked against the mountlist 657 * token and mp->mnt_token). 658 */ 659 static int 660 dounmount_interlock(struct mount *mp) 661 { 662 if (mp->mnt_kern_flag & MNTK_UNMOUNT) 663 return (EBUSY); 664 mp->mnt_kern_flag |= MNTK_UNMOUNT; 665 return(0); 666 } 667 668 static int 669 unmount_allproc_cb(struct proc *p, void *arg) 670 { 671 struct mount *mp; 672 673 if (p->p_textnch.ncp == NULL) 674 return 0; 675 676 mp = (struct mount *)arg; 677 if (p->p_textnch.mount == mp) 678 cache_drop(&p->p_textnch); 679 680 return 0; 681 } 682 683 /* 684 * The guts of the unmount code. The mount owns one ref and one hold 685 * count. If we successfully interlock the unmount, those refs are ours. 686 * (The ref is from mnt_ncmountpt). 687 * 688 * When halting we shortcut certain mount types such as devfs by not actually 689 * issuing the VFS_SYNC() or VFS_UNMOUNT(). They are still disconnected 690 * from the mountlist so higher-level filesytems can unmount cleanly. 691 * 692 * The mount types that allow QUICKHALT are: devfs, tmpfs, procfs. 693 */ 694 int 695 dounmount(struct mount *mp, int flags, int halting) 696 { 697 struct namecache *ncp; 698 struct nchandle nch; 699 struct vnode *vp; 700 int error; 701 int async_flag; 702 int lflags; 703 int freeok = 1; 704 int retry; 705 int quickhalt; 706 707 lwkt_gettoken(&mp->mnt_token); 708 709 /* 710 * When halting, certain mount points can essentially just 711 * be unhooked and otherwise ignored. 712 */ 713 if (halting && (mp->mnt_kern_flag & MNTK_QUICKHALT)) { 714 quickhalt = 1; 715 freeok = 0; 716 } else { 717 quickhalt = 0; 718 } 719 720 721 /* 722 * Exclusive access for unmounting purposes. 723 */ 724 if ((error = mountlist_interlock(dounmount_interlock, mp)) != 0) 725 goto out; 726 727 /* 728 * We now 'own' the last mp->mnt_refs 729 * 730 * Allow filesystems to detect that a forced unmount is in progress. 731 */ 732 if (flags & MNT_FORCE) 733 mp->mnt_kern_flag |= MNTK_UNMOUNTF; 734 lflags = LK_EXCLUSIVE | ((flags & MNT_FORCE) ? 0 : LK_TIMELOCK); 735 error = lockmgr(&mp->mnt_lock, lflags); 736 if (error) { 737 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 738 if (mp->mnt_kern_flag & MNTK_MWAIT) { 739 mp->mnt_kern_flag &= ~MNTK_MWAIT; 740 wakeup(mp); 741 } 742 goto out; 743 } 744 745 if (mp->mnt_flag & MNT_EXPUBLIC) 746 vfs_setpublicfs(NULL, NULL, NULL); 747 748 vfs_msync(mp, MNT_WAIT); 749 async_flag = mp->mnt_flag & MNT_ASYNC; 750 mp->mnt_flag &=~ MNT_ASYNC; 751 752 /* 753 * If this filesystem isn't aliasing other filesystems, 754 * try to invalidate any remaining namecache entries and 755 * check the count afterwords. 756 * 757 * We own the last mnt_refs by owning mnt_ncmountpt. 758 */ 759 if ((mp->mnt_kern_flag & MNTK_NCALIASED) == 0) { 760 cache_lock(&mp->mnt_ncmountpt); 761 cache_inval(&mp->mnt_ncmountpt, CINV_DESTROY|CINV_CHILDREN); 762 cache_unlock(&mp->mnt_ncmountpt); 763 764 cache_clearmntcache(); 765 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 766 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 767 allproc_scan(&unmount_allproc_cb, mp, 0); 768 } 769 770 cache_clearmntcache(); 771 if ((ncp = mp->mnt_ncmountpt.ncp) != NULL && 772 (ncp->nc_refs != 1 || TAILQ_FIRST(&ncp->nc_list))) { 773 774 if ((flags & MNT_FORCE) == 0) { 775 error = EBUSY; 776 mount_warning(mp, "Cannot unmount: " 777 "%d namecache " 778 "references still " 779 "present", 780 ncp->nc_refs - 1); 781 } else { 782 mount_warning(mp, "Forced unmount: " 783 "%d namecache " 784 "references still " 785 "present", 786 ncp->nc_refs - 1); 787 freeok = 0; 788 } 789 } 790 } 791 792 /* 793 * Decomission our special mnt_syncer vnode. This also stops 794 * the vnlru code. If we are unable to unmount we recommission 795 * the vnode. 796 * 797 * Then sync the filesystem. 798 */ 799 if ((vp = mp->mnt_syncer) != NULL) { 800 mp->mnt_syncer = NULL; 801 atomic_set_int(&vp->v_refcnt, VREF_FINALIZE); 802 vrele(vp); 803 } 804 805 if (quickhalt == 0) { 806 if ((mp->mnt_flag & MNT_RDONLY) == 0) 807 VFS_SYNC(mp, MNT_WAIT); 808 } 809 810 /* 811 * nchandle records ref the mount structure. Expect a count of 1 812 * (our mount->mnt_ncmountpt). 813 * 814 * Scans can get temporary refs on a mountpoint (thought really 815 * heavy duty stuff like cache_findmount() do not). 816 */ 817 if (mp->mnt_refs != 1) 818 cache_clearmntcache(); 819 for (retry = 0; retry < 10 && mp->mnt_refs != 1; ++retry) { 820 cache_unmounting(mp); 821 tsleep(&mp->mnt_refs, 0, "mntbsy", hz / 10 + 1); 822 cache_clearmntcache(); 823 } 824 if (mp->mnt_refs != 1) { 825 if ((flags & MNT_FORCE) == 0) { 826 mount_warning(mp, "Cannot unmount: " 827 "%d mount refs still present", 828 mp->mnt_refs - 1); 829 error = EBUSY; 830 } else { 831 mount_warning(mp, "Forced unmount: " 832 "%d mount refs still present", 833 mp->mnt_refs - 1); 834 freeok = 0; 835 } 836 } 837 838 /* 839 * So far so good, sync the filesystem once more and 840 * call the VFS unmount code if the sync succeeds. 841 */ 842 if (error == 0 && quickhalt == 0) { 843 if (mp->mnt_flag & MNT_RDONLY) { 844 error = VFS_UNMOUNT(mp, flags); 845 } else { 846 error = VFS_SYNC(mp, MNT_WAIT); 847 if ((error == 0) || 848 (error == EOPNOTSUPP) || /* No sync */ 849 (flags & MNT_FORCE)) { 850 error = VFS_UNMOUNT(mp, flags); 851 } 852 } 853 } 854 855 /* 856 * If an error occurred we can still recover, restoring the 857 * syncer vnode and misc flags. 858 */ 859 if (error) { 860 if (mp->mnt_syncer == NULL) 861 vfs_allocate_syncvnode(mp); 862 mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); 863 mp->mnt_flag |= async_flag; 864 lockmgr(&mp->mnt_lock, LK_RELEASE); 865 if (mp->mnt_kern_flag & MNTK_MWAIT) { 866 mp->mnt_kern_flag &= ~MNTK_MWAIT; 867 wakeup(mp); 868 } 869 goto out; 870 } 871 /* 872 * Clean up any journals still associated with the mount after 873 * filesystem activity has ceased. 874 */ 875 journal_remove_all_journals(mp, 876 ((flags & MNT_FORCE) ? MC_JOURNAL_STOP_IMM : 0)); 877 878 mountlist_remove(mp); 879 880 /* 881 * Remove any installed vnode ops here so the individual VFSs don't 882 * have to. 883 * 884 * mnt_refs should go to zero when we scrap mnt_ncmountpt. 885 * 886 * When quickhalting we have to keep these intact because the 887 * underlying vnodes have not been destroyed, and some might be 888 * dirty. 889 */ 890 if (quickhalt == 0) { 891 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_coherency_ops); 892 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_journal_ops); 893 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_norm_ops); 894 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_spec_ops); 895 vfs_rm_vnodeops(mp, NULL, &mp->mnt_vn_fifo_ops); 896 } 897 898 if (mp->mnt_ncmountpt.ncp != NULL) { 899 nch = mp->mnt_ncmountpt; 900 cache_zero(&mp->mnt_ncmountpt); 901 cache_clrmountpt(&nch); 902 cache_drop(&nch); 903 } 904 if (mp->mnt_ncmounton.ncp != NULL) { 905 cache_unmounting(mp); 906 nch = mp->mnt_ncmounton; 907 cache_zero(&mp->mnt_ncmounton); 908 cache_clrmountpt(&nch); 909 cache_drop(&nch); 910 } 911 912 mp->mnt_vfc->vfc_refcount--; 913 914 /* 915 * If not quickhalting the mount, we expect there to be no 916 * vnodes left. 917 */ 918 if (quickhalt == 0 && !TAILQ_EMPTY(&mp->mnt_nvnodelist)) 919 panic("unmount: dangling vnode"); 920 921 /* 922 * Release the lock 923 */ 924 lockmgr(&mp->mnt_lock, LK_RELEASE); 925 if (mp->mnt_kern_flag & MNTK_MWAIT) { 926 mp->mnt_kern_flag &= ~MNTK_MWAIT; 927 wakeup(mp); 928 } 929 930 /* 931 * If we reach here and freeok != 0 we must free the mount. 932 * mnt_refs should already have dropped to 0, so if it is not 933 * zero we must cycle the caches and wait. 934 * 935 * When we are satisfied that the mount has disconnected we can 936 * drop the hold on the mp that represented the mount (though the 937 * caller might actually have another, so the caller's drop may 938 * do the actual free). 939 */ 940 if (freeok) { 941 if (mp->mnt_refs > 0) 942 cache_clearmntcache(); 943 while (mp->mnt_refs > 0) { 944 cache_unmounting(mp); 945 wakeup(mp); 946 tsleep(&mp->mnt_refs, 0, "umntrwait", hz / 10 + 1); 947 cache_clearmntcache(); 948 } 949 lwkt_reltoken(&mp->mnt_token); 950 mount_drop(mp); 951 mp = NULL; 952 } else { 953 cache_clearmntcache(); 954 } 955 error = 0; 956 KNOTE(&fs_klist, VQ_UNMOUNT); 957 out: 958 if (mp) 959 lwkt_reltoken(&mp->mnt_token); 960 return (error); 961 } 962 963 static 964 void 965 mount_warning(struct mount *mp, const char *ctl, ...) 966 { 967 char *ptr; 968 char *buf; 969 __va_list va; 970 971 __va_start(va, ctl); 972 if (cache_fullpath(NULL, &mp->mnt_ncmounton, NULL, 973 &ptr, &buf, 0) == 0) { 974 kprintf("unmount(%s): ", ptr); 975 kvprintf(ctl, va); 976 kprintf("\n"); 977 kfree(buf, M_TEMP); 978 } else { 979 kprintf("unmount(%p", mp); 980 if (mp->mnt_ncmounton.ncp && mp->mnt_ncmounton.ncp->nc_name) 981 kprintf(",%s", mp->mnt_ncmounton.ncp->nc_name); 982 kprintf("): "); 983 kvprintf(ctl, va); 984 kprintf("\n"); 985 } 986 __va_end(va); 987 } 988 989 /* 990 * Shim cache_fullpath() to handle the case where a process is chrooted into 991 * a subdirectory of a mount. In this case if the root mount matches the 992 * process root directory's mount we have to specify the process's root 993 * directory instead of the mount point, because the mount point might 994 * be above the root directory. 995 */ 996 static 997 int 998 mount_path(struct proc *p, struct mount *mp, char **rb, char **fb) 999 { 1000 struct nchandle *nch; 1001 1002 if (p && p->p_fd->fd_nrdir.mount == mp) 1003 nch = &p->p_fd->fd_nrdir; 1004 else 1005 nch = &mp->mnt_ncmountpt; 1006 return(cache_fullpath(p, nch, NULL, rb, fb, 0)); 1007 } 1008 1009 /* 1010 * Sync each mounted filesystem. 1011 */ 1012 1013 #ifdef DEBUG 1014 static int syncprt = 0; 1015 SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); 1016 #endif /* DEBUG */ 1017 1018 static int sync_callback(struct mount *mp, void *data); 1019 1020 int 1021 sys_sync(struct sync_args *uap) 1022 { 1023 mountlist_scan(sync_callback, NULL, MNTSCAN_FORWARD); 1024 return (0); 1025 } 1026 1027 static 1028 int 1029 sync_callback(struct mount *mp, void *data __unused) 1030 { 1031 int asyncflag; 1032 1033 if ((mp->mnt_flag & MNT_RDONLY) == 0) { 1034 lwkt_gettoken(&mp->mnt_token); 1035 asyncflag = mp->mnt_flag & MNT_ASYNC; 1036 mp->mnt_flag &= ~MNT_ASYNC; 1037 lwkt_reltoken(&mp->mnt_token); 1038 vfs_msync(mp, MNT_NOWAIT); 1039 VFS_SYNC(mp, MNT_NOWAIT); 1040 lwkt_gettoken(&mp->mnt_token); 1041 mp->mnt_flag |= asyncflag; 1042 lwkt_reltoken(&mp->mnt_token); 1043 } 1044 return(0); 1045 } 1046 1047 /* XXX PRISON: could be per prison flag */ 1048 static int prison_quotas; 1049 #if 0 1050 SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); 1051 #endif 1052 1053 /* 1054 * quotactl_args(char *path, int fcmd, int uid, caddr_t arg) 1055 * 1056 * Change filesystem quotas. 1057 * 1058 * MPALMOSTSAFE 1059 */ 1060 int 1061 sys_quotactl(struct quotactl_args *uap) 1062 { 1063 struct nlookupdata nd; 1064 struct thread *td; 1065 struct mount *mp; 1066 int error; 1067 1068 td = curthread; 1069 if (td->td_ucred->cr_prison && !prison_quotas) { 1070 error = EPERM; 1071 goto done; 1072 } 1073 1074 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1075 if (error == 0) 1076 error = nlookup(&nd); 1077 if (error == 0) { 1078 mp = nd.nl_nch.mount; 1079 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, 1080 uap->arg, nd.nl_cred); 1081 } 1082 nlookup_done(&nd); 1083 done: 1084 return (error); 1085 } 1086 1087 /* 1088 * mountctl(char *path, int op, int fd, const void *ctl, int ctllen, 1089 * void *buf, int buflen) 1090 * 1091 * This function operates on a mount point and executes the specified 1092 * operation using the specified control data, and possibly returns data. 1093 * 1094 * The actual number of bytes stored in the result buffer is returned, 0 1095 * if none, otherwise an error is returned. 1096 * 1097 * MPALMOSTSAFE 1098 */ 1099 int 1100 sys_mountctl(struct mountctl_args *uap) 1101 { 1102 struct thread *td = curthread; 1103 struct file *fp; 1104 void *ctl = NULL; 1105 void *buf = NULL; 1106 char *path = NULL; 1107 int error; 1108 1109 /* 1110 * Sanity and permissions checks. We must be root. 1111 */ 1112 if (td->td_ucred->cr_prison != NULL) 1113 return (EPERM); 1114 if ((uap->op != MOUNTCTL_MOUNTFLAGS) && 1115 (error = priv_check(td, PRIV_ROOT)) != 0) 1116 return (error); 1117 1118 /* 1119 * Argument length checks 1120 */ 1121 if (uap->ctllen < 0 || uap->ctllen > 1024) 1122 return (EINVAL); 1123 if (uap->buflen < 0 || uap->buflen > 16 * 1024) 1124 return (EINVAL); 1125 if (uap->path == NULL) 1126 return (EINVAL); 1127 1128 /* 1129 * Allocate the necessary buffers and copyin data 1130 */ 1131 path = objcache_get(namei_oc, M_WAITOK); 1132 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 1133 if (error) 1134 goto done; 1135 1136 if (uap->ctllen) { 1137 ctl = kmalloc(uap->ctllen + 1, M_TEMP, M_WAITOK|M_ZERO); 1138 error = copyin(uap->ctl, ctl, uap->ctllen); 1139 if (error) 1140 goto done; 1141 } 1142 if (uap->buflen) 1143 buf = kmalloc(uap->buflen + 1, M_TEMP, M_WAITOK|M_ZERO); 1144 1145 /* 1146 * Validate the descriptor 1147 */ 1148 if (uap->fd >= 0) { 1149 fp = holdfp(td, uap->fd, -1); 1150 if (fp == NULL) { 1151 error = EBADF; 1152 goto done; 1153 } 1154 } else { 1155 fp = NULL; 1156 } 1157 1158 /* 1159 * Execute the internal kernel function and clean up. 1160 */ 1161 error = kern_mountctl(path, uap->op, fp, ctl, uap->ctllen, 1162 buf, uap->buflen, &uap->sysmsg_result); 1163 if (fp) 1164 dropfp(td, uap->fd, fp); 1165 if (error == 0 && uap->sysmsg_result > 0) 1166 error = copyout(buf, uap->buf, uap->sysmsg_result); 1167 done: 1168 if (path) 1169 objcache_put(namei_oc, path); 1170 if (ctl) 1171 kfree(ctl, M_TEMP); 1172 if (buf) 1173 kfree(buf, M_TEMP); 1174 return (error); 1175 } 1176 1177 /* 1178 * Execute a mount control operation by resolving the path to a mount point 1179 * and calling vop_mountctl(). 1180 * 1181 * Use the mount point from the nch instead of the vnode so nullfs mounts 1182 * can properly spike the VOP. 1183 */ 1184 int 1185 kern_mountctl(const char *path, int op, struct file *fp, 1186 const void *ctl, int ctllen, 1187 void *buf, int buflen, int *res) 1188 { 1189 struct vnode *vp; 1190 struct nlookupdata nd; 1191 struct nchandle nch; 1192 struct mount *mp; 1193 int error; 1194 1195 *res = 0; 1196 vp = NULL; 1197 error = nlookup_init(&nd, path, UIO_SYSSPACE, NLC_FOLLOW); 1198 if (error) 1199 return (error); 1200 error = nlookup(&nd); 1201 if (error) { 1202 nlookup_done(&nd); 1203 return (error); 1204 } 1205 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 1206 if (error) { 1207 nlookup_done(&nd); 1208 return (error); 1209 } 1210 1211 /* 1212 * Yes, all this is needed to use the nch.mount below, because 1213 * we must maintain a ref on the mount to avoid ripouts (e.g. 1214 * due to heavy mount/unmount use by synth or poudriere). 1215 */ 1216 nch = nd.nl_nch; 1217 cache_zero(&nd.nl_nch); 1218 cache_unlock(&nch); 1219 nlookup_done(&nd); 1220 vn_unlock(vp); 1221 1222 mp = nch.mount; 1223 1224 /* 1225 * Must be the root of the filesystem 1226 */ 1227 if ((vp->v_flag & (VROOT|VPFSROOT)) == 0) { 1228 cache_drop(&nch); 1229 vrele(vp); 1230 return (EINVAL); 1231 } 1232 if (mp == NULL || mp->mnt_kern_flag & MNTK_UNMOUNT) { 1233 kprintf("kern_mountctl: Warning, \"%s\" racing unmount\n", 1234 path); 1235 cache_drop(&nch); 1236 vrele(vp); 1237 return (EINVAL); 1238 } 1239 error = vop_mountctl(mp->mnt_vn_use_ops, vp, op, fp, ctl, ctllen, 1240 buf, buflen, res); 1241 vrele(vp); 1242 cache_drop(&nch); 1243 1244 return (error); 1245 } 1246 1247 int 1248 kern_statfs(struct nlookupdata *nd, struct statfs *buf) 1249 { 1250 struct thread *td = curthread; 1251 struct proc *p = td->td_proc; 1252 struct mount *mp; 1253 struct statfs *sp; 1254 char *fullpath, *freepath; 1255 int error; 1256 1257 if ((error = nlookup(nd)) != 0) 1258 return (error); 1259 mp = nd->nl_nch.mount; 1260 sp = &mp->mnt_stat; 1261 if ((error = VFS_STATFS(mp, sp, nd->nl_cred)) != 0) 1262 return (error); 1263 1264 error = mount_path(p, mp, &fullpath, &freepath); 1265 if (error) 1266 return(error); 1267 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1268 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1269 kfree(freepath, M_TEMP); 1270 1271 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1272 bcopy(sp, buf, sizeof(*buf)); 1273 /* Only root should have access to the fsid's. */ 1274 if (priv_check(td, PRIV_ROOT)) 1275 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1276 return (0); 1277 } 1278 1279 /* 1280 * statfs_args(char *path, struct statfs *buf) 1281 * 1282 * Get filesystem statistics. 1283 */ 1284 int 1285 sys_statfs(struct statfs_args *uap) 1286 { 1287 struct nlookupdata nd; 1288 struct statfs buf; 1289 int error; 1290 1291 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1292 if (error == 0) 1293 error = kern_statfs(&nd, &buf); 1294 nlookup_done(&nd); 1295 if (error == 0) 1296 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1297 return (error); 1298 } 1299 1300 int 1301 kern_fstatfs(int fd, struct statfs *buf) 1302 { 1303 struct thread *td = curthread; 1304 struct proc *p = td->td_proc; 1305 struct file *fp; 1306 struct mount *mp; 1307 struct statfs *sp; 1308 char *fullpath, *freepath; 1309 int error; 1310 1311 KKASSERT(p); 1312 if ((error = holdvnode(td, fd, &fp)) != 0) 1313 return (error); 1314 1315 /* 1316 * Try to use mount info from any overlays rather than the 1317 * mount info for the underlying vnode, otherwise we will 1318 * fail when operating on null-mounted paths inside a chroot. 1319 */ 1320 if ((mp = fp->f_nchandle.mount) == NULL) 1321 mp = ((struct vnode *)fp->f_data)->v_mount; 1322 if (mp == NULL) { 1323 error = EBADF; 1324 goto done; 1325 } 1326 if (fp->f_cred == NULL) { 1327 error = EINVAL; 1328 goto done; 1329 } 1330 sp = &mp->mnt_stat; 1331 if ((error = VFS_STATFS(mp, sp, fp->f_cred)) != 0) 1332 goto done; 1333 1334 if ((error = mount_path(p, mp, &fullpath, &freepath)) != 0) 1335 goto done; 1336 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1337 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1338 kfree(freepath, M_TEMP); 1339 1340 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1341 bcopy(sp, buf, sizeof(*buf)); 1342 1343 /* Only root should have access to the fsid's. */ 1344 if (priv_check(td, PRIV_ROOT)) 1345 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 1346 error = 0; 1347 done: 1348 fdrop(fp); 1349 return (error); 1350 } 1351 1352 /* 1353 * fstatfs_args(int fd, struct statfs *buf) 1354 * 1355 * Get filesystem statistics. 1356 */ 1357 int 1358 sys_fstatfs(struct fstatfs_args *uap) 1359 { 1360 struct statfs buf; 1361 int error; 1362 1363 error = kern_fstatfs(uap->fd, &buf); 1364 1365 if (error == 0) 1366 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1367 return (error); 1368 } 1369 1370 int 1371 kern_statvfs(struct nlookupdata *nd, struct statvfs *buf) 1372 { 1373 struct mount *mp; 1374 struct statvfs *sp; 1375 int error; 1376 1377 if ((error = nlookup(nd)) != 0) 1378 return (error); 1379 mp = nd->nl_nch.mount; 1380 sp = &mp->mnt_vstat; 1381 if ((error = VFS_STATVFS(mp, sp, nd->nl_cred)) != 0) 1382 return (error); 1383 1384 sp->f_flag = 0; 1385 if (mp->mnt_flag & MNT_RDONLY) 1386 sp->f_flag |= ST_RDONLY; 1387 if (mp->mnt_flag & MNT_NOSUID) 1388 sp->f_flag |= ST_NOSUID; 1389 bcopy(sp, buf, sizeof(*buf)); 1390 return (0); 1391 } 1392 1393 /* 1394 * statfs_args(char *path, struct statfs *buf) 1395 * 1396 * Get filesystem statistics. 1397 */ 1398 int 1399 sys_statvfs(struct statvfs_args *uap) 1400 { 1401 struct nlookupdata nd; 1402 struct statvfs buf; 1403 int error; 1404 1405 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1406 if (error == 0) 1407 error = kern_statvfs(&nd, &buf); 1408 nlookup_done(&nd); 1409 if (error == 0) 1410 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1411 return (error); 1412 } 1413 1414 int 1415 kern_fstatvfs(int fd, struct statvfs *buf) 1416 { 1417 struct thread *td = curthread; 1418 struct file *fp; 1419 struct mount *mp; 1420 struct statvfs *sp; 1421 int error; 1422 1423 if ((error = holdvnode(td, fd, &fp)) != 0) 1424 return (error); 1425 if ((mp = fp->f_nchandle.mount) == NULL) 1426 mp = ((struct vnode *)fp->f_data)->v_mount; 1427 if (mp == NULL) { 1428 error = EBADF; 1429 goto done; 1430 } 1431 if (fp->f_cred == NULL) { 1432 error = EINVAL; 1433 goto done; 1434 } 1435 sp = &mp->mnt_vstat; 1436 if ((error = VFS_STATVFS(mp, sp, fp->f_cred)) != 0) 1437 goto done; 1438 1439 sp->f_flag = 0; 1440 if (mp->mnt_flag & MNT_RDONLY) 1441 sp->f_flag |= ST_RDONLY; 1442 if (mp->mnt_flag & MNT_NOSUID) 1443 sp->f_flag |= ST_NOSUID; 1444 1445 bcopy(sp, buf, sizeof(*buf)); 1446 error = 0; 1447 done: 1448 fdrop(fp); 1449 return (error); 1450 } 1451 1452 /* 1453 * fstatfs_args(int fd, struct statfs *buf) 1454 * 1455 * Get filesystem statistics. 1456 */ 1457 int 1458 sys_fstatvfs(struct fstatvfs_args *uap) 1459 { 1460 struct statvfs buf; 1461 int error; 1462 1463 error = kern_fstatvfs(uap->fd, &buf); 1464 1465 if (error == 0) 1466 error = copyout(&buf, uap->buf, sizeof(*uap->buf)); 1467 return (error); 1468 } 1469 1470 /* 1471 * getfsstat_args(struct statfs *buf, long bufsize, int flags) 1472 * 1473 * Get statistics on all filesystems. 1474 */ 1475 1476 struct getfsstat_info { 1477 struct statfs *sfsp; 1478 long count; 1479 long maxcount; 1480 int error; 1481 int flags; 1482 struct thread *td; 1483 }; 1484 1485 static int getfsstat_callback(struct mount *, void *); 1486 1487 int 1488 sys_getfsstat(struct getfsstat_args *uap) 1489 { 1490 struct thread *td = curthread; 1491 struct getfsstat_info info; 1492 1493 bzero(&info, sizeof(info)); 1494 1495 info.maxcount = uap->bufsize / sizeof(struct statfs); 1496 info.sfsp = uap->buf; 1497 info.count = 0; 1498 info.flags = uap->flags; 1499 info.td = td; 1500 1501 mountlist_scan(getfsstat_callback, &info, MNTSCAN_FORWARD); 1502 if (info.sfsp && info.count > info.maxcount) 1503 uap->sysmsg_result = info.maxcount; 1504 else 1505 uap->sysmsg_result = info.count; 1506 return (info.error); 1507 } 1508 1509 static int 1510 getfsstat_callback(struct mount *mp, void *data) 1511 { 1512 struct getfsstat_info *info = data; 1513 struct statfs *sp; 1514 char *freepath; 1515 char *fullpath; 1516 int error; 1517 1518 if (info->sfsp && info->count < info->maxcount) { 1519 if (info->td->td_proc && 1520 !chroot_visible_mnt(mp, info->td->td_proc)) { 1521 return(0); 1522 } 1523 sp = &mp->mnt_stat; 1524 1525 /* 1526 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1527 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1528 * overrides MNT_WAIT. 1529 */ 1530 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1531 (info->flags & MNT_WAIT)) && 1532 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1533 return(0); 1534 } 1535 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1536 1537 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1538 if (error) { 1539 info->error = error; 1540 return(-1); 1541 } 1542 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1543 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1544 kfree(freepath, M_TEMP); 1545 1546 error = copyout(sp, info->sfsp, sizeof(*sp)); 1547 if (error) { 1548 info->error = error; 1549 return (-1); 1550 } 1551 ++info->sfsp; 1552 } 1553 info->count++; 1554 return(0); 1555 } 1556 1557 /* 1558 * getvfsstat_args(struct statfs *buf, struct statvfs *vbuf, 1559 long bufsize, int flags) 1560 * 1561 * Get statistics on all filesystems. 1562 */ 1563 1564 struct getvfsstat_info { 1565 struct statfs *sfsp; 1566 struct statvfs *vsfsp; 1567 long count; 1568 long maxcount; 1569 int error; 1570 int flags; 1571 struct thread *td; 1572 }; 1573 1574 static int getvfsstat_callback(struct mount *, void *); 1575 1576 int 1577 sys_getvfsstat(struct getvfsstat_args *uap) 1578 { 1579 struct thread *td = curthread; 1580 struct getvfsstat_info info; 1581 1582 bzero(&info, sizeof(info)); 1583 1584 info.maxcount = uap->vbufsize / sizeof(struct statvfs); 1585 info.sfsp = uap->buf; 1586 info.vsfsp = uap->vbuf; 1587 info.count = 0; 1588 info.flags = uap->flags; 1589 info.td = td; 1590 1591 mountlist_scan(getvfsstat_callback, &info, MNTSCAN_FORWARD); 1592 if (info.vsfsp && info.count > info.maxcount) 1593 uap->sysmsg_result = info.maxcount; 1594 else 1595 uap->sysmsg_result = info.count; 1596 return (info.error); 1597 } 1598 1599 static int 1600 getvfsstat_callback(struct mount *mp, void *data) 1601 { 1602 struct getvfsstat_info *info = data; 1603 struct statfs *sp; 1604 struct statvfs *vsp; 1605 char *freepath; 1606 char *fullpath; 1607 int error; 1608 1609 if (info->vsfsp && info->count < info->maxcount) { 1610 if (info->td->td_proc && 1611 !chroot_visible_mnt(mp, info->td->td_proc)) { 1612 return(0); 1613 } 1614 sp = &mp->mnt_stat; 1615 vsp = &mp->mnt_vstat; 1616 1617 /* 1618 * If MNT_NOWAIT or MNT_LAZY is specified, do not 1619 * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY 1620 * overrides MNT_WAIT. 1621 */ 1622 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1623 (info->flags & MNT_WAIT)) && 1624 (error = VFS_STATFS(mp, sp, info->td->td_ucred))) { 1625 return(0); 1626 } 1627 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 1628 1629 if (((info->flags & (MNT_LAZY|MNT_NOWAIT)) == 0 || 1630 (info->flags & MNT_WAIT)) && 1631 (error = VFS_STATVFS(mp, vsp, info->td->td_ucred))) { 1632 return(0); 1633 } 1634 vsp->f_flag = 0; 1635 if (mp->mnt_flag & MNT_RDONLY) 1636 vsp->f_flag |= ST_RDONLY; 1637 if (mp->mnt_flag & MNT_NOSUID) 1638 vsp->f_flag |= ST_NOSUID; 1639 1640 error = mount_path(info->td->td_proc, mp, &fullpath, &freepath); 1641 if (error) { 1642 info->error = error; 1643 return(-1); 1644 } 1645 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 1646 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 1647 kfree(freepath, M_TEMP); 1648 1649 error = copyout(sp, info->sfsp, sizeof(*sp)); 1650 if (error == 0) 1651 error = copyout(vsp, info->vsfsp, sizeof(*vsp)); 1652 if (error) { 1653 info->error = error; 1654 return (-1); 1655 } 1656 ++info->sfsp; 1657 ++info->vsfsp; 1658 } 1659 info->count++; 1660 return(0); 1661 } 1662 1663 1664 /* 1665 * fchdir_args(int fd) 1666 * 1667 * Change current working directory to a given file descriptor. 1668 */ 1669 int 1670 sys_fchdir(struct fchdir_args *uap) 1671 { 1672 struct thread *td = curthread; 1673 struct proc *p = td->td_proc; 1674 struct filedesc *fdp = p->p_fd; 1675 struct vnode *vp, *ovp; 1676 struct mount *mp; 1677 struct file *fp; 1678 struct nchandle nch, onch, tnch; 1679 int error; 1680 1681 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 1682 return (error); 1683 lwkt_gettoken(&p->p_token); 1684 vp = (struct vnode *)fp->f_data; 1685 vref(vp); 1686 vn_lock(vp, LK_SHARED | LK_RETRY); 1687 if (fp->f_nchandle.ncp == NULL) 1688 error = ENOTDIR; 1689 else 1690 error = checkvp_chdir(vp, td); 1691 if (error) { 1692 vput(vp); 1693 goto done; 1694 } 1695 cache_copy(&fp->f_nchandle, &nch); 1696 1697 /* 1698 * If the ncp has become a mount point, traverse through 1699 * the mount point. 1700 */ 1701 1702 while (!error && (nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1703 (mp = cache_findmount(&nch)) != NULL 1704 ) { 1705 error = nlookup_mp(mp, &tnch); 1706 if (error == 0) { 1707 cache_unlock(&tnch); /* leave ref intact */ 1708 vput(vp); 1709 vp = tnch.ncp->nc_vp; 1710 error = vget(vp, LK_SHARED); 1711 KKASSERT(error == 0); 1712 cache_drop(&nch); 1713 nch = tnch; 1714 } 1715 cache_dropmount(mp); 1716 } 1717 if (error == 0) { 1718 spin_lock(&fdp->fd_spin); 1719 ovp = fdp->fd_cdir; 1720 onch = fdp->fd_ncdir; 1721 fdp->fd_cdir = vp; 1722 fdp->fd_ncdir = nch; 1723 spin_unlock(&fdp->fd_spin); 1724 vn_unlock(vp); /* leave ref intact */ 1725 cache_drop(&onch); 1726 vrele(ovp); 1727 } else { 1728 cache_drop(&nch); 1729 vput(vp); 1730 } 1731 fdrop(fp); 1732 done: 1733 lwkt_reltoken(&p->p_token); 1734 return (error); 1735 } 1736 1737 int 1738 kern_chdir(struct nlookupdata *nd) 1739 { 1740 struct thread *td = curthread; 1741 struct proc *p = td->td_proc; 1742 struct filedesc *fdp = p->p_fd; 1743 struct vnode *vp, *ovp; 1744 struct nchandle onch; 1745 int error; 1746 1747 nd->nl_flags |= NLC_SHAREDLOCK; 1748 if ((error = nlookup(nd)) != 0) 1749 return (error); 1750 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 1751 return (ENOENT); 1752 if ((error = vget(vp, LK_SHARED)) != 0) 1753 return (error); 1754 1755 lwkt_gettoken(&p->p_token); 1756 error = checkvp_chdir(vp, td); 1757 vn_unlock(vp); 1758 if (error == 0) { 1759 spin_lock(&fdp->fd_spin); 1760 ovp = fdp->fd_cdir; 1761 onch = fdp->fd_ncdir; 1762 fdp->fd_ncdir = nd->nl_nch; 1763 fdp->fd_cdir = vp; 1764 spin_unlock(&fdp->fd_spin); 1765 cache_unlock(&nd->nl_nch); /* leave reference intact */ 1766 cache_drop(&onch); 1767 vrele(ovp); 1768 cache_zero(&nd->nl_nch); 1769 } else { 1770 vrele(vp); 1771 } 1772 lwkt_reltoken(&p->p_token); 1773 return (error); 1774 } 1775 1776 /* 1777 * chdir_args(char *path) 1778 * 1779 * Change current working directory (``.''). 1780 */ 1781 int 1782 sys_chdir(struct chdir_args *uap) 1783 { 1784 struct nlookupdata nd; 1785 int error; 1786 1787 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1788 if (error == 0) 1789 error = kern_chdir(&nd); 1790 nlookup_done(&nd); 1791 return (error); 1792 } 1793 1794 /* 1795 * Helper function for raised chroot(2) security function: Refuse if 1796 * any filedescriptors are open directories. 1797 */ 1798 static int 1799 chroot_refuse_vdir_fds(thread_t td, struct filedesc *fdp) 1800 { 1801 struct vnode *vp; 1802 struct file *fp; 1803 int error; 1804 int fd; 1805 1806 for (fd = 0; fd < fdp->fd_nfiles ; fd++) { 1807 if ((error = holdvnode(td, fd, &fp)) != 0) 1808 continue; 1809 vp = (struct vnode *)fp->f_data; 1810 if (vp->v_type != VDIR) { 1811 fdrop(fp); 1812 continue; 1813 } 1814 fdrop(fp); 1815 return(EPERM); 1816 } 1817 return (0); 1818 } 1819 1820 /* 1821 * This sysctl determines if we will allow a process to chroot(2) if it 1822 * has a directory open: 1823 * 0: disallowed for all processes. 1824 * 1: allowed for processes that were not already chroot(2)'ed. 1825 * 2: allowed for all processes. 1826 */ 1827 1828 static int chroot_allow_open_directories = 1; 1829 1830 SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, 1831 &chroot_allow_open_directories, 0, ""); 1832 1833 /* 1834 * chroot to the specified namecache entry. We obtain the vp from the 1835 * namecache data. The passed ncp must be locked and referenced and will 1836 * remain locked and referenced on return. 1837 */ 1838 int 1839 kern_chroot(struct nchandle *nch) 1840 { 1841 struct thread *td = curthread; 1842 struct proc *p = td->td_proc; 1843 struct filedesc *fdp = p->p_fd; 1844 struct vnode *vp; 1845 int error; 1846 1847 /* 1848 * Only privileged user can chroot 1849 */ 1850 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1851 if (error) 1852 return (error); 1853 1854 /* 1855 * Disallow open directory descriptors (fchdir() breakouts). 1856 */ 1857 if (chroot_allow_open_directories == 0 || 1858 (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { 1859 if ((error = chroot_refuse_vdir_fds(td, fdp)) != 0) 1860 return (error); 1861 } 1862 if ((vp = nch->ncp->nc_vp) == NULL) 1863 return (ENOENT); 1864 1865 if ((error = vget(vp, LK_SHARED)) != 0) 1866 return (error); 1867 1868 /* 1869 * Check the validity of vp as a directory to change to and 1870 * associate it with rdir/jdir. 1871 */ 1872 error = checkvp_chdir(vp, td); 1873 vn_unlock(vp); /* leave reference intact */ 1874 if (error == 0) { 1875 lwkt_gettoken(&p->p_token); 1876 vrele(fdp->fd_rdir); 1877 fdp->fd_rdir = vp; /* reference inherited by fd_rdir */ 1878 cache_drop(&fdp->fd_nrdir); 1879 cache_copy(nch, &fdp->fd_nrdir); 1880 if (fdp->fd_jdir == NULL) { 1881 fdp->fd_jdir = vp; 1882 vref(fdp->fd_jdir); 1883 cache_copy(nch, &fdp->fd_njdir); 1884 } 1885 if ((p->p_flags & P_DIDCHROOT) == 0) { 1886 p->p_flags |= P_DIDCHROOT; 1887 if (p->p_depth <= 65535 - 32) 1888 p->p_depth += 32; 1889 } 1890 lwkt_reltoken(&p->p_token); 1891 } else { 1892 vrele(vp); 1893 } 1894 return (error); 1895 } 1896 1897 /* 1898 * chroot_args(char *path) 1899 * 1900 * Change notion of root (``/'') directory. 1901 */ 1902 int 1903 sys_chroot(struct chroot_args *uap) 1904 { 1905 struct thread *td __debugvar = curthread; 1906 struct nlookupdata nd; 1907 int error; 1908 1909 KKASSERT(td->td_proc); 1910 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1911 if (error == 0) { 1912 nd.nl_flags |= NLC_EXEC; 1913 error = nlookup(&nd); 1914 if (error == 0) 1915 error = kern_chroot(&nd.nl_nch); 1916 } 1917 nlookup_done(&nd); 1918 return(error); 1919 } 1920 1921 int 1922 sys_chroot_kernel(struct chroot_kernel_args *uap) 1923 { 1924 struct thread *td = curthread; 1925 struct nlookupdata nd; 1926 struct nchandle *nch; 1927 struct vnode *vp; 1928 int error; 1929 1930 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 1931 if (error) 1932 goto error_nond; 1933 1934 error = nlookup(&nd); 1935 if (error) 1936 goto error_out; 1937 1938 nch = &nd.nl_nch; 1939 1940 error = priv_check_cred(td->td_ucred, PRIV_VFS_CHROOT, 0); 1941 if (error) 1942 goto error_out; 1943 1944 if ((vp = nch->ncp->nc_vp) == NULL) { 1945 error = ENOENT; 1946 goto error_out; 1947 } 1948 1949 if ((error = cache_vref(nch, nd.nl_cred, &vp)) != 0) 1950 goto error_out; 1951 1952 kprintf("chroot_kernel: set new rootnch/rootvnode to %s\n", uap->path); 1953 vfs_cache_setroot(vp, cache_hold(nch)); 1954 1955 error_out: 1956 nlookup_done(&nd); 1957 error_nond: 1958 return(error); 1959 } 1960 1961 /* 1962 * Common routine for chroot and chdir. Given a locked, referenced vnode, 1963 * determine whether it is legal to chdir to the vnode. The vnode's state 1964 * is not changed by this call. 1965 */ 1966 static int 1967 checkvp_chdir(struct vnode *vp, struct thread *td) 1968 { 1969 int error; 1970 1971 if (vp->v_type != VDIR) 1972 error = ENOTDIR; 1973 else 1974 error = VOP_EACCESS(vp, VEXEC, td->td_ucred); 1975 return (error); 1976 } 1977 1978 int 1979 kern_open(struct nlookupdata *nd, int oflags, int mode, int *res) 1980 { 1981 struct thread *td = curthread; 1982 struct proc *p = td->td_proc; 1983 struct lwp *lp = td->td_lwp; 1984 struct filedesc *fdp = p->p_fd; 1985 int cmode, flags; 1986 struct file *nfp; 1987 struct file *fp; 1988 struct vnode *vp; 1989 int type, indx, error = 0; 1990 struct flock lf; 1991 1992 if ((oflags & O_ACCMODE) == O_ACCMODE) 1993 return (EINVAL); 1994 flags = FFLAGS(oflags); 1995 error = falloc(lp, &nfp, NULL); 1996 if (error) 1997 return (error); 1998 fp = nfp; 1999 cmode = ((mode &~ fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 2000 2001 /* 2002 * XXX p_dupfd is a real mess. It allows a device to return a 2003 * file descriptor to be duplicated rather then doing the open 2004 * itself. 2005 */ 2006 lp->lwp_dupfd = -1; 2007 2008 /* 2009 * Call vn_open() to do the lookup and assign the vnode to the 2010 * file pointer. vn_open() does not change the ref count on fp 2011 * and the vnode, on success, will be inherited by the file pointer 2012 * and unlocked. 2013 * 2014 * Request a shared lock on the vnode if possible. 2015 * 2016 * Executable binaries can race VTEXT against O_RDWR opens, so 2017 * use an exclusive lock for O_RDWR opens as well. 2018 * 2019 * NOTE: We need a flag to separate terminal vnode locking from 2020 * parent locking. O_CREAT needs parent locking, but O_TRUNC 2021 * and O_RDWR only need to lock the terminal vnode exclusively. 2022 */ 2023 nd->nl_flags |= NLC_LOCKVP; 2024 if ((flags & (O_CREAT|O_TRUNC|O_RDWR)) == 0) 2025 nd->nl_flags |= NLC_SHAREDLOCK; 2026 2027 error = vn_open(nd, fp, flags, cmode); 2028 nlookup_done(nd); 2029 2030 if (error) { 2031 /* 2032 * handle special fdopen() case. bleh. dupfdopen() is 2033 * responsible for dropping the old contents of ofiles[indx] 2034 * if it succeeds. 2035 * 2036 * Note that fsetfd() will add a ref to fp which represents 2037 * the fd_files[] assignment. We must still drop our 2038 * reference. 2039 */ 2040 if ((error == ENODEV || error == ENXIO) && lp->lwp_dupfd >= 0) { 2041 if (fdalloc(p, 0, &indx) == 0) { 2042 error = dupfdopen(td, indx, lp->lwp_dupfd, flags, error); 2043 if (error == 0) { 2044 *res = indx; 2045 fdrop(fp); /* our ref */ 2046 return (0); 2047 } 2048 fsetfd(fdp, NULL, indx); 2049 } 2050 } 2051 fdrop(fp); /* our ref */ 2052 if (error == ERESTART) 2053 error = EINTR; 2054 return (error); 2055 } 2056 2057 /* 2058 * ref the vnode for ourselves so it can't be ripped out from under 2059 * is. XXX need an ND flag to request that the vnode be returned 2060 * anyway. 2061 * 2062 * Reserve a file descriptor but do not assign it until the open 2063 * succeeds. 2064 */ 2065 vp = (struct vnode *)fp->f_data; 2066 vref(vp); 2067 if ((error = fdalloc(p, 0, &indx)) != 0) { 2068 fdrop(fp); 2069 vrele(vp); 2070 return (error); 2071 } 2072 2073 /* 2074 * If no error occurs the vp will have been assigned to the file 2075 * pointer. 2076 */ 2077 lp->lwp_dupfd = 0; 2078 2079 if (flags & (O_EXLOCK | O_SHLOCK)) { 2080 lf.l_whence = SEEK_SET; 2081 lf.l_start = 0; 2082 lf.l_len = 0; 2083 if (flags & O_EXLOCK) 2084 lf.l_type = F_WRLCK; 2085 else 2086 lf.l_type = F_RDLCK; 2087 if (flags & FNONBLOCK) 2088 type = 0; 2089 else 2090 type = F_WAIT; 2091 2092 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { 2093 /* 2094 * lock request failed. Clean up the reserved 2095 * descriptor. 2096 */ 2097 vrele(vp); 2098 fsetfd(fdp, NULL, indx); 2099 fdrop(fp); 2100 return (error); 2101 } 2102 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 2103 } 2104 #if 0 2105 /* 2106 * Assert that all regular file vnodes were created with a object. 2107 */ 2108 KASSERT(vp->v_type != VREG || vp->v_object != NULL, 2109 ("open: regular file has no backing object after vn_open")); 2110 #endif 2111 2112 vrele(vp); 2113 2114 /* 2115 * release our private reference, leaving the one associated with the 2116 * descriptor table intact. 2117 */ 2118 if (oflags & O_CLOEXEC) 2119 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 2120 fsetfd(fdp, fp, indx); 2121 fdrop(fp); 2122 *res = indx; 2123 2124 return (error); 2125 } 2126 2127 /* 2128 * open_args(char *path, int flags, int mode) 2129 * 2130 * Check permissions, allocate an open file structure, 2131 * and call the device open routine if any. 2132 */ 2133 int 2134 sys_open(struct open_args *uap) 2135 { 2136 struct nlookupdata nd; 2137 int error; 2138 2139 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2140 if (error == 0) { 2141 error = kern_open(&nd, uap->flags, 2142 uap->mode, &uap->sysmsg_result); 2143 } 2144 nlookup_done(&nd); 2145 return (error); 2146 } 2147 2148 /* 2149 * openat_args(int fd, char *path, int flags, int mode) 2150 */ 2151 int 2152 sys_openat(struct openat_args *uap) 2153 { 2154 struct nlookupdata nd; 2155 int error; 2156 struct file *fp; 2157 2158 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2159 if (error == 0) { 2160 error = kern_open(&nd, uap->flags, uap->mode, 2161 &uap->sysmsg_result); 2162 } 2163 nlookup_done_at(&nd, fp); 2164 return (error); 2165 } 2166 2167 int 2168 kern_mknod(struct nlookupdata *nd, int mode, int rmajor, int rminor) 2169 { 2170 struct thread *td = curthread; 2171 struct proc *p = td->td_proc; 2172 struct vnode *vp; 2173 struct vattr vattr; 2174 int error; 2175 int whiteout = 0; 2176 2177 KKASSERT(p); 2178 2179 VATTR_NULL(&vattr); 2180 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2181 vattr.va_rmajor = rmajor; 2182 vattr.va_rminor = rminor; 2183 2184 switch (mode & S_IFMT) { 2185 case S_IFMT: /* used by badsect to flag bad sectors */ 2186 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_BAD, 0); 2187 vattr.va_type = VBAD; 2188 break; 2189 case S_IFCHR: 2190 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2191 vattr.va_type = VCHR; 2192 break; 2193 case S_IFBLK: 2194 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 2195 vattr.va_type = VBLK; 2196 break; 2197 case S_IFWHT: 2198 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_WHT, 0); 2199 whiteout = 1; 2200 break; 2201 case S_IFDIR: /* special directories support for HAMMER */ 2202 error = priv_check_cred(td->td_ucred, PRIV_VFS_MKNOD_DIR, 0); 2203 vattr.va_type = VDIR; 2204 break; 2205 default: 2206 error = EINVAL; 2207 break; 2208 } 2209 2210 if (error) 2211 return (error); 2212 2213 bwillinode(1); 2214 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2215 if ((error = nlookup(nd)) != 0) 2216 return (error); 2217 if (nd->nl_nch.ncp->nc_vp) 2218 return (EEXIST); 2219 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2220 return (error); 2221 2222 if (whiteout) { 2223 error = VOP_NWHITEOUT(&nd->nl_nch, nd->nl_dvp, 2224 nd->nl_cred, NAMEI_CREATE); 2225 } else { 2226 vp = NULL; 2227 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, 2228 &vp, nd->nl_cred, &vattr); 2229 if (error == 0) 2230 vput(vp); 2231 } 2232 return (error); 2233 } 2234 2235 /* 2236 * mknod_args(char *path, int mode, int dev) 2237 * 2238 * Create a special file. 2239 */ 2240 int 2241 sys_mknod(struct mknod_args *uap) 2242 { 2243 struct nlookupdata nd; 2244 int error; 2245 2246 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2247 if (error == 0) { 2248 error = kern_mknod(&nd, uap->mode, 2249 umajor(uap->dev), uminor(uap->dev)); 2250 } 2251 nlookup_done(&nd); 2252 return (error); 2253 } 2254 2255 /* 2256 * mknodat_args(int fd, char *path, mode_t mode, dev_t dev) 2257 * 2258 * Create a special file. The path is relative to the directory associated 2259 * with fd. 2260 */ 2261 int 2262 sys_mknodat(struct mknodat_args *uap) 2263 { 2264 struct nlookupdata nd; 2265 struct file *fp; 2266 int error; 2267 2268 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2269 if (error == 0) { 2270 error = kern_mknod(&nd, uap->mode, 2271 umajor(uap->dev), uminor(uap->dev)); 2272 } 2273 nlookup_done_at(&nd, fp); 2274 return (error); 2275 } 2276 2277 int 2278 kern_mkfifo(struct nlookupdata *nd, int mode) 2279 { 2280 struct thread *td = curthread; 2281 struct proc *p = td->td_proc; 2282 struct vattr vattr; 2283 struct vnode *vp; 2284 int error; 2285 2286 bwillinode(1); 2287 2288 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2289 if ((error = nlookup(nd)) != 0) 2290 return (error); 2291 if (nd->nl_nch.ncp->nc_vp) 2292 return (EEXIST); 2293 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2294 return (error); 2295 2296 VATTR_NULL(&vattr); 2297 vattr.va_type = VFIFO; 2298 vattr.va_mode = (mode & ALLPERMS) &~ p->p_fd->fd_cmask; 2299 vp = NULL; 2300 error = VOP_NMKNOD(&nd->nl_nch, nd->nl_dvp, &vp, nd->nl_cred, &vattr); 2301 if (error == 0) 2302 vput(vp); 2303 return (error); 2304 } 2305 2306 /* 2307 * mkfifo_args(char *path, int mode) 2308 * 2309 * Create a named pipe. 2310 */ 2311 int 2312 sys_mkfifo(struct mkfifo_args *uap) 2313 { 2314 struct nlookupdata nd; 2315 int error; 2316 2317 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2318 if (error == 0) 2319 error = kern_mkfifo(&nd, uap->mode); 2320 nlookup_done(&nd); 2321 return (error); 2322 } 2323 2324 /* 2325 * mkfifoat_args(int fd, char *path, mode_t mode) 2326 * 2327 * Create a named pipe. The path is relative to the directory associated 2328 * with fd. 2329 */ 2330 int 2331 sys_mkfifoat(struct mkfifoat_args *uap) 2332 { 2333 struct nlookupdata nd; 2334 struct file *fp; 2335 int error; 2336 2337 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2338 if (error == 0) 2339 error = kern_mkfifo(&nd, uap->mode); 2340 nlookup_done_at(&nd, fp); 2341 return (error); 2342 } 2343 2344 static int hardlink_check_uid = 0; 2345 SYSCTL_INT(_security, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 2346 &hardlink_check_uid, 0, 2347 "Unprivileged processes cannot create hard links to files owned by other " 2348 "users"); 2349 static int hardlink_check_gid = 0; 2350 SYSCTL_INT(_security, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 2351 &hardlink_check_gid, 0, 2352 "Unprivileged processes cannot create hard links to files owned by other " 2353 "groups"); 2354 2355 static int 2356 can_hardlink(struct vnode *vp, struct thread *td, struct ucred *cred) 2357 { 2358 struct vattr va; 2359 int error; 2360 2361 /* 2362 * Shortcut if disabled 2363 */ 2364 if (hardlink_check_uid == 0 && hardlink_check_gid == 0) 2365 return (0); 2366 2367 /* 2368 * Privileged user can always hardlink 2369 */ 2370 if (priv_check_cred(cred, PRIV_VFS_LINK, 0) == 0) 2371 return (0); 2372 2373 /* 2374 * Otherwise only if the originating file is owned by the 2375 * same user or group. Note that any group is allowed if 2376 * the file is owned by the caller. 2377 */ 2378 error = VOP_GETATTR(vp, &va); 2379 if (error != 0) 2380 return (error); 2381 2382 if (hardlink_check_uid) { 2383 if (cred->cr_uid != va.va_uid) 2384 return (EPERM); 2385 } 2386 2387 if (hardlink_check_gid) { 2388 if (cred->cr_uid != va.va_uid && !groupmember(va.va_gid, cred)) 2389 return (EPERM); 2390 } 2391 2392 return (0); 2393 } 2394 2395 int 2396 kern_link(struct nlookupdata *nd, struct nlookupdata *linknd) 2397 { 2398 struct thread *td = curthread; 2399 struct vnode *vp; 2400 int error; 2401 2402 /* 2403 * Lookup the source and obtained a locked vnode. 2404 * 2405 * You may only hardlink a file which you have write permission 2406 * on or which you own. 2407 * 2408 * XXX relookup on vget failure / race ? 2409 */ 2410 bwillinode(1); 2411 nd->nl_flags |= NLC_WRITE | NLC_OWN | NLC_HLINK; 2412 if ((error = nlookup(nd)) != 0) 2413 return (error); 2414 vp = nd->nl_nch.ncp->nc_vp; 2415 KKASSERT(vp != NULL); 2416 if (vp->v_type == VDIR) 2417 return (EPERM); /* POSIX */ 2418 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2419 return (error); 2420 if ((error = vget(vp, LK_EXCLUSIVE)) != 0) 2421 return (error); 2422 2423 /* 2424 * Unlock the source so we can lookup the target without deadlocking 2425 * (XXX vp is locked already, possible other deadlock?). The target 2426 * must not exist. 2427 */ 2428 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 2429 nd->nl_flags &= ~NLC_NCPISLOCKED; 2430 cache_unlock(&nd->nl_nch); 2431 vn_unlock(vp); 2432 2433 linknd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2434 if ((error = nlookup(linknd)) != 0) { 2435 vrele(vp); 2436 return (error); 2437 } 2438 if (linknd->nl_nch.ncp->nc_vp) { 2439 vrele(vp); 2440 return (EEXIST); 2441 } 2442 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 2443 if (error) { 2444 vrele(vp); 2445 return (error); 2446 } 2447 2448 /* 2449 * Finally run the new API VOP. 2450 */ 2451 error = can_hardlink(vp, td, td->td_ucred); 2452 if (error == 0) { 2453 error = VOP_NLINK(&linknd->nl_nch, linknd->nl_dvp, 2454 vp, linknd->nl_cred); 2455 } 2456 vput(vp); 2457 return (error); 2458 } 2459 2460 /* 2461 * link_args(char *path, char *link) 2462 * 2463 * Make a hard file link. 2464 */ 2465 int 2466 sys_link(struct link_args *uap) 2467 { 2468 struct nlookupdata nd, linknd; 2469 int error; 2470 2471 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2472 if (error == 0) { 2473 error = nlookup_init(&linknd, uap->link, UIO_USERSPACE, 0); 2474 if (error == 0) 2475 error = kern_link(&nd, &linknd); 2476 nlookup_done(&linknd); 2477 } 2478 nlookup_done(&nd); 2479 return (error); 2480 } 2481 2482 /* 2483 * linkat_args(int fd1, char *path1, int fd2, char *path2, int flags) 2484 * 2485 * Make a hard file link. The path1 argument is relative to the directory 2486 * associated with fd1, and similarly the path2 argument is relative to 2487 * the directory associated with fd2. 2488 */ 2489 int 2490 sys_linkat(struct linkat_args *uap) 2491 { 2492 struct nlookupdata nd, linknd; 2493 struct file *fp1, *fp2; 2494 int error; 2495 2496 error = nlookup_init_at(&nd, &fp1, uap->fd1, uap->path1, UIO_USERSPACE, 2497 (uap->flags & AT_SYMLINK_FOLLOW) ? NLC_FOLLOW : 0); 2498 if (error == 0) { 2499 error = nlookup_init_at(&linknd, &fp2, uap->fd2, 2500 uap->path2, UIO_USERSPACE, 0); 2501 if (error == 0) 2502 error = kern_link(&nd, &linknd); 2503 nlookup_done_at(&linknd, fp2); 2504 } 2505 nlookup_done_at(&nd, fp1); 2506 return (error); 2507 } 2508 2509 int 2510 kern_symlink(struct nlookupdata *nd, char *path, int mode) 2511 { 2512 struct vattr vattr; 2513 struct vnode *vp; 2514 struct vnode *dvp; 2515 int error; 2516 2517 bwillinode(1); 2518 nd->nl_flags |= NLC_CREATE | NLC_REFDVP; 2519 if ((error = nlookup(nd)) != 0) 2520 return (error); 2521 if (nd->nl_nch.ncp->nc_vp) 2522 return (EEXIST); 2523 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2524 return (error); 2525 dvp = nd->nl_dvp; 2526 VATTR_NULL(&vattr); 2527 vattr.va_mode = mode; 2528 error = VOP_NSYMLINK(&nd->nl_nch, dvp, &vp, nd->nl_cred, &vattr, path); 2529 if (error == 0) 2530 vput(vp); 2531 return (error); 2532 } 2533 2534 /* 2535 * symlink(char *path, char *link) 2536 * 2537 * Make a symbolic link. 2538 */ 2539 int 2540 sys_symlink(struct symlink_args *uap) 2541 { 2542 struct thread *td = curthread; 2543 struct nlookupdata nd; 2544 char *path; 2545 int error; 2546 int mode; 2547 2548 path = objcache_get(namei_oc, M_WAITOK); 2549 error = copyinstr(uap->path, path, MAXPATHLEN, NULL); 2550 if (error == 0) { 2551 error = nlookup_init(&nd, uap->link, UIO_USERSPACE, 0); 2552 if (error == 0) { 2553 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2554 error = kern_symlink(&nd, path, mode); 2555 } 2556 nlookup_done(&nd); 2557 } 2558 objcache_put(namei_oc, path); 2559 return (error); 2560 } 2561 2562 /* 2563 * symlinkat_args(char *path1, int fd, char *path2) 2564 * 2565 * Make a symbolic link. The path2 argument is relative to the directory 2566 * associated with fd. 2567 */ 2568 int 2569 sys_symlinkat(struct symlinkat_args *uap) 2570 { 2571 struct thread *td = curthread; 2572 struct nlookupdata nd; 2573 struct file *fp; 2574 char *path1; 2575 int error; 2576 int mode; 2577 2578 path1 = objcache_get(namei_oc, M_WAITOK); 2579 error = copyinstr(uap->path1, path1, MAXPATHLEN, NULL); 2580 if (error == 0) { 2581 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path2, 2582 UIO_USERSPACE, 0); 2583 if (error == 0) { 2584 mode = ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask; 2585 error = kern_symlink(&nd, path1, mode); 2586 } 2587 nlookup_done_at(&nd, fp); 2588 } 2589 objcache_put(namei_oc, path1); 2590 return (error); 2591 } 2592 2593 /* 2594 * undelete_args(char *path) 2595 * 2596 * Delete a whiteout from the filesystem. 2597 */ 2598 int 2599 sys_undelete(struct undelete_args *uap) 2600 { 2601 struct nlookupdata nd; 2602 int error; 2603 2604 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2605 bwillinode(1); 2606 nd.nl_flags |= NLC_DELETE | NLC_REFDVP; 2607 if (error == 0) 2608 error = nlookup(&nd); 2609 if (error == 0) 2610 error = ncp_writechk(&nd.nl_nch); 2611 if (error == 0) { 2612 error = VOP_NWHITEOUT(&nd.nl_nch, nd.nl_dvp, nd.nl_cred, 2613 NAMEI_DELETE); 2614 } 2615 nlookup_done(&nd); 2616 return (error); 2617 } 2618 2619 int 2620 kern_unlink(struct nlookupdata *nd) 2621 { 2622 int error; 2623 2624 bwillinode(1); 2625 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 2626 if ((error = nlookup(nd)) != 0) 2627 return (error); 2628 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 2629 return (error); 2630 error = VOP_NREMOVE(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 2631 return (error); 2632 } 2633 2634 /* 2635 * unlink_args(char *path) 2636 * 2637 * Delete a name from the filesystem. 2638 */ 2639 int 2640 sys_unlink(struct unlink_args *uap) 2641 { 2642 struct nlookupdata nd; 2643 int error; 2644 2645 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2646 if (error == 0) 2647 error = kern_unlink(&nd); 2648 nlookup_done(&nd); 2649 return (error); 2650 } 2651 2652 2653 /* 2654 * unlinkat_args(int fd, char *path, int flags) 2655 * 2656 * Delete the file or directory entry pointed to by fd/path. 2657 */ 2658 int 2659 sys_unlinkat(struct unlinkat_args *uap) 2660 { 2661 struct nlookupdata nd; 2662 struct file *fp; 2663 int error; 2664 2665 if (uap->flags & ~AT_REMOVEDIR) 2666 return (EINVAL); 2667 2668 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 2669 if (error == 0) { 2670 if (uap->flags & AT_REMOVEDIR) 2671 error = kern_rmdir(&nd); 2672 else 2673 error = kern_unlink(&nd); 2674 } 2675 nlookup_done_at(&nd, fp); 2676 return (error); 2677 } 2678 2679 int 2680 kern_lseek(int fd, off_t offset, int whence, off_t *res) 2681 { 2682 struct thread *td = curthread; 2683 struct file *fp; 2684 struct vnode *vp; 2685 struct vattr vattr; 2686 off_t new_offset; 2687 int error; 2688 2689 fp = holdfp(td, fd, -1); 2690 if (fp == NULL) 2691 return (EBADF); 2692 if (fp->f_type != DTYPE_VNODE) { 2693 error = ESPIPE; 2694 goto done; 2695 } 2696 vp = (struct vnode *)fp->f_data; 2697 2698 switch (whence) { 2699 case L_INCR: 2700 spin_lock(&fp->f_spin); 2701 new_offset = fp->f_offset + offset; 2702 error = 0; 2703 break; 2704 case L_XTND: 2705 error = VOP_GETATTR(vp, &vattr); 2706 spin_lock(&fp->f_spin); 2707 new_offset = offset + vattr.va_size; 2708 break; 2709 case L_SET: 2710 new_offset = offset; 2711 error = 0; 2712 spin_lock(&fp->f_spin); 2713 break; 2714 default: 2715 new_offset = 0; 2716 error = EINVAL; 2717 spin_lock(&fp->f_spin); 2718 break; 2719 } 2720 2721 /* 2722 * Validate the seek position. Negative offsets are not allowed 2723 * for regular files or directories. 2724 * 2725 * Normally we would also not want to allow negative offsets for 2726 * character and block-special devices. However kvm addresses 2727 * on 64 bit architectures might appear to be negative and must 2728 * be allowed. 2729 */ 2730 if (error == 0) { 2731 if (new_offset < 0 && 2732 (vp->v_type == VREG || vp->v_type == VDIR)) { 2733 error = EINVAL; 2734 } else { 2735 fp->f_offset = new_offset; 2736 } 2737 } 2738 *res = fp->f_offset; 2739 spin_unlock(&fp->f_spin); 2740 done: 2741 dropfp(td, fd, fp); 2742 2743 return (error); 2744 } 2745 2746 /* 2747 * lseek_args(int fd, int pad, off_t offset, int whence) 2748 * 2749 * Reposition read/write file offset. 2750 */ 2751 int 2752 sys_lseek(struct lseek_args *uap) 2753 { 2754 int error; 2755 2756 error = kern_lseek(uap->fd, uap->offset, uap->whence, 2757 &uap->sysmsg_offset); 2758 2759 return (error); 2760 } 2761 2762 /* 2763 * Check if current process can access given file. amode is a bitmask of *_OK 2764 * access bits. flags is a bitmask of AT_* flags. 2765 */ 2766 int 2767 kern_access(struct nlookupdata *nd, int amode, int flags) 2768 { 2769 struct vnode *vp; 2770 int error, mode; 2771 2772 if (flags & ~AT_EACCESS) 2773 return (EINVAL); 2774 nd->nl_flags |= NLC_SHAREDLOCK; 2775 if ((error = nlookup(nd)) != 0) 2776 return (error); 2777 retry: 2778 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 2779 if (error) 2780 return (error); 2781 2782 /* Flags == 0 means only check for existence. */ 2783 if (amode) { 2784 mode = 0; 2785 if (amode & R_OK) 2786 mode |= VREAD; 2787 if (amode & W_OK) 2788 mode |= VWRITE; 2789 if (amode & X_OK) 2790 mode |= VEXEC; 2791 if ((mode & VWRITE) == 0 || 2792 (error = vn_writechk(vp, &nd->nl_nch)) == 0) 2793 error = VOP_ACCESS_FLAGS(vp, mode, flags, nd->nl_cred); 2794 2795 /* 2796 * If the file handle is stale we have to re-resolve the 2797 * entry with the ncp held exclusively. This is a hack 2798 * at the moment. 2799 */ 2800 if (error == ESTALE) { 2801 vput(vp); 2802 cache_unlock(&nd->nl_nch); 2803 cache_lock(&nd->nl_nch); 2804 cache_setunresolved(&nd->nl_nch); 2805 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2806 if (error == 0) { 2807 vp = NULL; 2808 goto retry; 2809 } 2810 return(error); 2811 } 2812 } 2813 vput(vp); 2814 return (error); 2815 } 2816 2817 /* 2818 * access_args(char *path, int flags) 2819 * 2820 * Check access permissions. 2821 */ 2822 int 2823 sys_access(struct access_args *uap) 2824 { 2825 struct nlookupdata nd; 2826 int error; 2827 2828 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2829 if (error == 0) 2830 error = kern_access(&nd, uap->flags, 0); 2831 nlookup_done(&nd); 2832 return (error); 2833 } 2834 2835 2836 /* 2837 * eaccess_args(char *path, int flags) 2838 * 2839 * Check access permissions. 2840 */ 2841 int 2842 sys_eaccess(struct eaccess_args *uap) 2843 { 2844 struct nlookupdata nd; 2845 int error; 2846 2847 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2848 if (error == 0) 2849 error = kern_access(&nd, uap->flags, AT_EACCESS); 2850 nlookup_done(&nd); 2851 return (error); 2852 } 2853 2854 2855 /* 2856 * faccessat_args(int fd, char *path, int amode, int flags) 2857 * 2858 * Check access permissions. 2859 */ 2860 int 2861 sys_faccessat(struct faccessat_args *uap) 2862 { 2863 struct nlookupdata nd; 2864 struct file *fp; 2865 int error; 2866 2867 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 2868 NLC_FOLLOW); 2869 if (error == 0) 2870 error = kern_access(&nd, uap->amode, uap->flags); 2871 nlookup_done_at(&nd, fp); 2872 return (error); 2873 } 2874 2875 int 2876 kern_stat(struct nlookupdata *nd, struct stat *st) 2877 { 2878 int error; 2879 struct vnode *vp; 2880 2881 nd->nl_flags |= NLC_SHAREDLOCK; 2882 if ((error = nlookup(nd)) != 0) 2883 return (error); 2884 again: 2885 if ((vp = nd->nl_nch.ncp->nc_vp) == NULL) 2886 return (ENOENT); 2887 2888 if ((error = vget(vp, LK_SHARED)) != 0) 2889 return (error); 2890 error = vn_stat(vp, st, nd->nl_cred); 2891 2892 /* 2893 * If the file handle is stale we have to re-resolve the 2894 * entry with the ncp held exclusively. This is a hack 2895 * at the moment. 2896 */ 2897 if (error == ESTALE) { 2898 vput(vp); 2899 cache_unlock(&nd->nl_nch); 2900 cache_lock(&nd->nl_nch); 2901 cache_setunresolved(&nd->nl_nch); 2902 error = cache_resolve(&nd->nl_nch, nd->nl_cred); 2903 if (error == 0) 2904 goto again; 2905 } else { 2906 vput(vp); 2907 } 2908 return (error); 2909 } 2910 2911 /* 2912 * stat_args(char *path, struct stat *ub) 2913 * 2914 * Get file status; this version follows links. 2915 */ 2916 int 2917 sys_stat(struct stat_args *uap) 2918 { 2919 struct nlookupdata nd; 2920 struct stat st; 2921 int error; 2922 2923 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 2924 if (error == 0) { 2925 error = kern_stat(&nd, &st); 2926 if (error == 0) 2927 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2928 } 2929 nlookup_done(&nd); 2930 return (error); 2931 } 2932 2933 /* 2934 * lstat_args(char *path, struct stat *ub) 2935 * 2936 * Get file status; this version does not follow links. 2937 */ 2938 int 2939 sys_lstat(struct lstat_args *uap) 2940 { 2941 struct nlookupdata nd; 2942 struct stat st; 2943 int error; 2944 2945 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 2946 if (error == 0) { 2947 error = kern_stat(&nd, &st); 2948 if (error == 0) 2949 error = copyout(&st, uap->ub, sizeof(*uap->ub)); 2950 } 2951 nlookup_done(&nd); 2952 return (error); 2953 } 2954 2955 /* 2956 * fstatat_args(int fd, char *path, struct stat *sb, int flags) 2957 * 2958 * Get status of file pointed to by fd/path. 2959 */ 2960 int 2961 sys_fstatat(struct fstatat_args *uap) 2962 { 2963 struct nlookupdata nd; 2964 struct stat st; 2965 int error; 2966 int flags; 2967 struct file *fp; 2968 2969 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 2970 return (EINVAL); 2971 2972 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 2973 2974 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 2975 UIO_USERSPACE, flags); 2976 if (error == 0) { 2977 error = kern_stat(&nd, &st); 2978 if (error == 0) 2979 error = copyout(&st, uap->sb, sizeof(*uap->sb)); 2980 } 2981 nlookup_done_at(&nd, fp); 2982 return (error); 2983 } 2984 2985 static int 2986 kern_pathconf(char *path, int name, int flags, register_t *sysmsg_regp) 2987 { 2988 struct nlookupdata nd; 2989 struct vnode *vp; 2990 int error; 2991 2992 vp = NULL; 2993 error = nlookup_init(&nd, path, UIO_USERSPACE, flags); 2994 if (error == 0) 2995 error = nlookup(&nd); 2996 if (error == 0) 2997 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 2998 nlookup_done(&nd); 2999 if (error == 0) { 3000 error = VOP_PATHCONF(vp, name, sysmsg_regp); 3001 vput(vp); 3002 } 3003 return (error); 3004 } 3005 3006 /* 3007 * pathconf_Args(char *path, int name) 3008 * 3009 * Get configurable pathname variables. 3010 */ 3011 int 3012 sys_pathconf(struct pathconf_args *uap) 3013 { 3014 return (kern_pathconf(uap->path, uap->name, NLC_FOLLOW, 3015 &uap->sysmsg_reg)); 3016 } 3017 3018 /* 3019 * lpathconf_Args(char *path, int name) 3020 * 3021 * Get configurable pathname variables, but don't follow symlinks. 3022 */ 3023 int 3024 sys_lpathconf(struct lpathconf_args *uap) 3025 { 3026 return (kern_pathconf(uap->path, uap->name, 0, &uap->sysmsg_reg)); 3027 } 3028 3029 /* 3030 * XXX: daver 3031 * kern_readlink isn't properly split yet. There is a copyin burried 3032 * in VOP_READLINK(). 3033 */ 3034 int 3035 kern_readlink(struct nlookupdata *nd, char *buf, int count, int *res) 3036 { 3037 struct thread *td = curthread; 3038 struct vnode *vp; 3039 struct iovec aiov; 3040 struct uio auio; 3041 int error; 3042 3043 nd->nl_flags |= NLC_SHAREDLOCK; 3044 if ((error = nlookup(nd)) != 0) 3045 return (error); 3046 error = cache_vget(&nd->nl_nch, nd->nl_cred, LK_SHARED, &vp); 3047 if (error) 3048 return (error); 3049 if (vp->v_type != VLNK) { 3050 error = EINVAL; 3051 } else { 3052 aiov.iov_base = buf; 3053 aiov.iov_len = count; 3054 auio.uio_iov = &aiov; 3055 auio.uio_iovcnt = 1; 3056 auio.uio_offset = 0; 3057 auio.uio_rw = UIO_READ; 3058 auio.uio_segflg = UIO_USERSPACE; 3059 auio.uio_td = td; 3060 auio.uio_resid = count; 3061 error = VOP_READLINK(vp, &auio, td->td_ucred); 3062 } 3063 vput(vp); 3064 *res = count - auio.uio_resid; 3065 return (error); 3066 } 3067 3068 /* 3069 * readlink_args(char *path, char *buf, int count) 3070 * 3071 * Return target name of a symbolic link. 3072 */ 3073 int 3074 sys_readlink(struct readlink_args *uap) 3075 { 3076 struct nlookupdata nd; 3077 int error; 3078 3079 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3080 if (error == 0) { 3081 error = kern_readlink(&nd, uap->buf, uap->count, 3082 &uap->sysmsg_result); 3083 } 3084 nlookup_done(&nd); 3085 return (error); 3086 } 3087 3088 /* 3089 * readlinkat_args(int fd, char *path, char *buf, size_t bufsize) 3090 * 3091 * Return target name of a symbolic link. The path is relative to the 3092 * directory associated with fd. 3093 */ 3094 int 3095 sys_readlinkat(struct readlinkat_args *uap) 3096 { 3097 struct nlookupdata nd; 3098 struct file *fp; 3099 int error; 3100 3101 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 3102 if (error == 0) { 3103 error = kern_readlink(&nd, uap->buf, uap->bufsize, 3104 &uap->sysmsg_result); 3105 } 3106 nlookup_done_at(&nd, fp); 3107 return (error); 3108 } 3109 3110 static int 3111 setfflags(struct vnode *vp, int flags) 3112 { 3113 struct thread *td = curthread; 3114 int error; 3115 struct vattr vattr; 3116 3117 /* 3118 * Prevent non-root users from setting flags on devices. When 3119 * a device is reused, users can retain ownership of the device 3120 * if they are allowed to set flags and programs assume that 3121 * chown can't fail when done as root. 3122 */ 3123 if ((vp->v_type == VCHR || vp->v_type == VBLK) && 3124 ((error = priv_check_cred(td->td_ucred, PRIV_VFS_CHFLAGS_DEV, 0)) != 0)) 3125 return (error); 3126 3127 /* 3128 * note: vget is required for any operation that might mod the vnode 3129 * so VINACTIVE is properly cleared. 3130 */ 3131 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3132 VATTR_NULL(&vattr); 3133 vattr.va_flags = flags; 3134 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3135 vput(vp); 3136 } 3137 return (error); 3138 } 3139 3140 /* 3141 * chflags(char *path, int flags) 3142 * 3143 * Change flags of a file given a path name. 3144 */ 3145 int 3146 sys_chflags(struct chflags_args *uap) 3147 { 3148 struct nlookupdata nd; 3149 struct vnode *vp; 3150 int error; 3151 3152 vp = NULL; 3153 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3154 if (error == 0) 3155 error = nlookup(&nd); 3156 if (error == 0) 3157 error = ncp_writechk(&nd.nl_nch); 3158 if (error == 0) 3159 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3160 nlookup_done(&nd); 3161 if (error == 0) { 3162 error = setfflags(vp, uap->flags); 3163 vrele(vp); 3164 } 3165 return (error); 3166 } 3167 3168 /* 3169 * lchflags(char *path, int flags) 3170 * 3171 * Change flags of a file given a path name, but don't follow symlinks. 3172 */ 3173 int 3174 sys_lchflags(struct lchflags_args *uap) 3175 { 3176 struct nlookupdata nd; 3177 struct vnode *vp; 3178 int error; 3179 3180 vp = NULL; 3181 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3182 if (error == 0) 3183 error = nlookup(&nd); 3184 if (error == 0) 3185 error = ncp_writechk(&nd.nl_nch); 3186 if (error == 0) 3187 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3188 nlookup_done(&nd); 3189 if (error == 0) { 3190 error = setfflags(vp, uap->flags); 3191 vrele(vp); 3192 } 3193 return (error); 3194 } 3195 3196 /* 3197 * fchflags_args(int fd, int flags) 3198 * 3199 * Change flags of a file given a file descriptor. 3200 */ 3201 int 3202 sys_fchflags(struct fchflags_args *uap) 3203 { 3204 struct thread *td = curthread; 3205 struct file *fp; 3206 int error; 3207 3208 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3209 return (error); 3210 if (fp->f_nchandle.ncp) 3211 error = ncp_writechk(&fp->f_nchandle); 3212 if (error == 0) 3213 error = setfflags((struct vnode *) fp->f_data, uap->flags); 3214 fdrop(fp); 3215 return (error); 3216 } 3217 3218 /* 3219 * chflagsat_args(int fd, const char *path, int flags, int atflags) 3220 * change flags given a pathname relative to a filedescriptor 3221 */ 3222 int sys_chflagsat(struct chflagsat_args *uap) 3223 { 3224 struct nlookupdata nd; 3225 struct vnode *vp; 3226 struct file *fp; 3227 int error; 3228 int lookupflags; 3229 3230 if (uap->atflags & ~AT_SYMLINK_NOFOLLOW) 3231 return (EINVAL); 3232 3233 lookupflags = (uap->atflags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3234 3235 vp = NULL; 3236 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, lookupflags); 3237 if (error == 0) 3238 error = nlookup(&nd); 3239 if (error == 0) 3240 error = ncp_writechk(&nd.nl_nch); 3241 if (error == 0) 3242 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 3243 nlookup_done_at(&nd, fp); 3244 if (error == 0) { 3245 error = setfflags(vp, uap->flags); 3246 vrele(vp); 3247 } 3248 return (error); 3249 } 3250 3251 3252 static int 3253 setfmode(struct vnode *vp, int mode) 3254 { 3255 struct thread *td = curthread; 3256 int error; 3257 struct vattr vattr; 3258 3259 /* 3260 * note: vget is required for any operation that might mod the vnode 3261 * so VINACTIVE is properly cleared. 3262 */ 3263 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3264 VATTR_NULL(&vattr); 3265 vattr.va_mode = mode & ALLPERMS; 3266 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3267 cache_inval_wxok(vp); 3268 vput(vp); 3269 } 3270 return error; 3271 } 3272 3273 int 3274 kern_chmod(struct nlookupdata *nd, int mode) 3275 { 3276 struct vnode *vp; 3277 int error; 3278 3279 if ((error = nlookup(nd)) != 0) 3280 return (error); 3281 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3282 return (error); 3283 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3284 error = setfmode(vp, mode); 3285 vrele(vp); 3286 return (error); 3287 } 3288 3289 /* 3290 * chmod_args(char *path, int mode) 3291 * 3292 * Change mode of a file given path name. 3293 */ 3294 int 3295 sys_chmod(struct chmod_args *uap) 3296 { 3297 struct nlookupdata nd; 3298 int error; 3299 3300 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3301 if (error == 0) 3302 error = kern_chmod(&nd, uap->mode); 3303 nlookup_done(&nd); 3304 return (error); 3305 } 3306 3307 /* 3308 * lchmod_args(char *path, int mode) 3309 * 3310 * Change mode of a file given path name (don't follow links.) 3311 */ 3312 int 3313 sys_lchmod(struct lchmod_args *uap) 3314 { 3315 struct nlookupdata nd; 3316 int error; 3317 3318 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3319 if (error == 0) 3320 error = kern_chmod(&nd, uap->mode); 3321 nlookup_done(&nd); 3322 return (error); 3323 } 3324 3325 /* 3326 * fchmod_args(int fd, int mode) 3327 * 3328 * Change mode of a file given a file descriptor. 3329 */ 3330 int 3331 sys_fchmod(struct fchmod_args *uap) 3332 { 3333 struct thread *td = curthread; 3334 struct file *fp; 3335 int error; 3336 3337 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3338 return (error); 3339 if (fp->f_nchandle.ncp) 3340 error = ncp_writechk(&fp->f_nchandle); 3341 if (error == 0) 3342 error = setfmode((struct vnode *)fp->f_data, uap->mode); 3343 fdrop(fp); 3344 return (error); 3345 } 3346 3347 /* 3348 * fchmodat_args(char *path, int mode) 3349 * 3350 * Change mode of a file pointed to by fd/path. 3351 */ 3352 int 3353 sys_fchmodat(struct fchmodat_args *uap) 3354 { 3355 struct nlookupdata nd; 3356 struct file *fp; 3357 int error; 3358 int flags; 3359 3360 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3361 return (EINVAL); 3362 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3363 3364 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3365 UIO_USERSPACE, flags); 3366 if (error == 0) 3367 error = kern_chmod(&nd, uap->mode); 3368 nlookup_done_at(&nd, fp); 3369 return (error); 3370 } 3371 3372 static int 3373 setfown(struct mount *mp, struct vnode *vp, uid_t uid, gid_t gid) 3374 { 3375 struct thread *td = curthread; 3376 int error; 3377 struct vattr vattr; 3378 uid_t o_uid; 3379 gid_t o_gid; 3380 uint64_t size; 3381 3382 /* 3383 * note: vget is required for any operation that might mod the vnode 3384 * so VINACTIVE is properly cleared. 3385 */ 3386 if ((error = vget(vp, LK_EXCLUSIVE)) == 0) { 3387 if ((error = VOP_GETATTR(vp, &vattr)) != 0) 3388 return error; 3389 o_uid = vattr.va_uid; 3390 o_gid = vattr.va_gid; 3391 size = vattr.va_size; 3392 3393 VATTR_NULL(&vattr); 3394 vattr.va_uid = uid; 3395 vattr.va_gid = gid; 3396 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3397 vput(vp); 3398 } 3399 3400 if (error == 0) { 3401 if (uid == -1) 3402 uid = o_uid; 3403 if (gid == -1) 3404 gid = o_gid; 3405 VFS_ACCOUNT(mp, o_uid, o_gid, -size); 3406 VFS_ACCOUNT(mp, uid, gid, size); 3407 } 3408 3409 return error; 3410 } 3411 3412 int 3413 kern_chown(struct nlookupdata *nd, int uid, int gid) 3414 { 3415 struct vnode *vp; 3416 int error; 3417 3418 if ((error = nlookup(nd)) != 0) 3419 return (error); 3420 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3421 return (error); 3422 if ((error = ncp_writechk(&nd->nl_nch)) == 0) 3423 error = setfown(nd->nl_nch.mount, vp, uid, gid); 3424 vrele(vp); 3425 return (error); 3426 } 3427 3428 /* 3429 * chown(char *path, int uid, int gid) 3430 * 3431 * Set ownership given a path name. 3432 */ 3433 int 3434 sys_chown(struct chown_args *uap) 3435 { 3436 struct nlookupdata nd; 3437 int error; 3438 3439 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3440 if (error == 0) 3441 error = kern_chown(&nd, uap->uid, uap->gid); 3442 nlookup_done(&nd); 3443 return (error); 3444 } 3445 3446 /* 3447 * lchown_args(char *path, int uid, int gid) 3448 * 3449 * Set ownership given a path name, do not cross symlinks. 3450 */ 3451 int 3452 sys_lchown(struct lchown_args *uap) 3453 { 3454 struct nlookupdata nd; 3455 int error; 3456 3457 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3458 if (error == 0) 3459 error = kern_chown(&nd, uap->uid, uap->gid); 3460 nlookup_done(&nd); 3461 return (error); 3462 } 3463 3464 /* 3465 * fchown_args(int fd, int uid, int gid) 3466 * 3467 * Set ownership given a file descriptor. 3468 */ 3469 int 3470 sys_fchown(struct fchown_args *uap) 3471 { 3472 struct thread *td = curthread; 3473 struct proc *p = td->td_proc; 3474 struct file *fp; 3475 int error; 3476 3477 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3478 return (error); 3479 if (fp->f_nchandle.ncp) 3480 error = ncp_writechk(&fp->f_nchandle); 3481 if (error == 0) 3482 error = setfown(p->p_fd->fd_ncdir.mount, 3483 (struct vnode *)fp->f_data, uap->uid, uap->gid); 3484 fdrop(fp); 3485 return (error); 3486 } 3487 3488 /* 3489 * fchownat(int fd, char *path, int uid, int gid, int flags) 3490 * 3491 * Set ownership of file pointed to by fd/path. 3492 */ 3493 int 3494 sys_fchownat(struct fchownat_args *uap) 3495 { 3496 struct nlookupdata nd; 3497 struct file *fp; 3498 int error; 3499 int flags; 3500 3501 if (uap->flags & ~AT_SYMLINK_NOFOLLOW) 3502 return (EINVAL); 3503 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3504 3505 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3506 UIO_USERSPACE, flags); 3507 if (error == 0) 3508 error = kern_chown(&nd, uap->uid, uap->gid); 3509 nlookup_done_at(&nd, fp); 3510 return (error); 3511 } 3512 3513 3514 static int 3515 getutimes(struct timeval *tvp, struct timespec *tsp) 3516 { 3517 struct timeval tv[2]; 3518 int error; 3519 3520 if (tvp == NULL) { 3521 microtime(&tv[0]); 3522 TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); 3523 tsp[1] = tsp[0]; 3524 } else { 3525 if ((error = itimerfix(tvp)) != 0) 3526 return (error); 3527 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3528 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3529 } 3530 return 0; 3531 } 3532 3533 static int 3534 getutimens(const struct timespec *ts, struct timespec *newts, int *nullflag) 3535 { 3536 struct timespec tsnow; 3537 int error; 3538 3539 *nullflag = 0; 3540 nanotime(&tsnow); 3541 if (ts == NULL) { 3542 newts[0] = tsnow; 3543 newts[1] = tsnow; 3544 *nullflag = 1; 3545 return (0); 3546 } 3547 3548 newts[0] = ts[0]; 3549 newts[1] = ts[1]; 3550 if (newts[0].tv_nsec == UTIME_OMIT && newts[1].tv_nsec == UTIME_OMIT) 3551 return (0); 3552 if (newts[0].tv_nsec == UTIME_NOW && newts[1].tv_nsec == UTIME_NOW) 3553 *nullflag = 1; 3554 3555 if (newts[0].tv_nsec == UTIME_OMIT) 3556 newts[0].tv_sec = VNOVAL; 3557 else if (newts[0].tv_nsec == UTIME_NOW) 3558 newts[0] = tsnow; 3559 else if ((error = itimespecfix(&newts[0])) != 0) 3560 return (error); 3561 3562 if (newts[1].tv_nsec == UTIME_OMIT) 3563 newts[1].tv_sec = VNOVAL; 3564 else if (newts[1].tv_nsec == UTIME_NOW) 3565 newts[1] = tsnow; 3566 else if ((error = itimespecfix(&newts[1])) != 0) 3567 return (error); 3568 3569 return (0); 3570 } 3571 3572 static int 3573 setutimes(struct vnode *vp, struct vattr *vattr, 3574 const struct timespec *ts, int nullflag) 3575 { 3576 struct thread *td = curthread; 3577 int error; 3578 3579 VATTR_NULL(vattr); 3580 vattr->va_atime = ts[0]; 3581 vattr->va_mtime = ts[1]; 3582 if (nullflag) 3583 vattr->va_vaflags |= VA_UTIMES_NULL; 3584 error = VOP_SETATTR(vp, vattr, td->td_ucred); 3585 3586 return error; 3587 } 3588 3589 int 3590 kern_utimes(struct nlookupdata *nd, struct timeval *tptr) 3591 { 3592 struct timespec ts[2]; 3593 int error; 3594 3595 if (tptr) { 3596 if ((error = getutimes(tptr, ts)) != 0) 3597 return (error); 3598 } 3599 error = kern_utimensat(nd, tptr ? ts : NULL, 0); 3600 return (error); 3601 } 3602 3603 /* 3604 * utimes_args(char *path, struct timeval *tptr) 3605 * 3606 * Set the access and modification times of a file. 3607 */ 3608 int 3609 sys_utimes(struct utimes_args *uap) 3610 { 3611 struct timeval tv[2]; 3612 struct nlookupdata nd; 3613 int error; 3614 3615 if (uap->tptr) { 3616 error = copyin(uap->tptr, tv, sizeof(tv)); 3617 if (error) 3618 return (error); 3619 } 3620 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3621 if (error == 0) 3622 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3623 nlookup_done(&nd); 3624 return (error); 3625 } 3626 3627 /* 3628 * lutimes_args(char *path, struct timeval *tptr) 3629 * 3630 * Set the access and modification times of a file. 3631 */ 3632 int 3633 sys_lutimes(struct lutimes_args *uap) 3634 { 3635 struct timeval tv[2]; 3636 struct nlookupdata nd; 3637 int error; 3638 3639 if (uap->tptr) { 3640 error = copyin(uap->tptr, tv, sizeof(tv)); 3641 if (error) 3642 return (error); 3643 } 3644 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 3645 if (error == 0) 3646 error = kern_utimes(&nd, uap->tptr ? tv : NULL); 3647 nlookup_done(&nd); 3648 return (error); 3649 } 3650 3651 /* 3652 * Set utimes on a file descriptor. The creds used to open the 3653 * file are used to determine whether the operation is allowed 3654 * or not. 3655 */ 3656 int 3657 kern_futimens(int fd, struct timespec *ts) 3658 { 3659 struct thread *td = curthread; 3660 struct timespec newts[2]; 3661 struct file *fp; 3662 struct vnode *vp; 3663 struct vattr vattr; 3664 int nullflag; 3665 int error; 3666 3667 error = getutimens(ts, newts, &nullflag); 3668 if (error) 3669 return (error); 3670 if ((error = holdvnode(td, fd, &fp)) != 0) 3671 return (error); 3672 if (fp->f_nchandle.ncp) 3673 error = ncp_writechk(&fp->f_nchandle); 3674 if (error == 0) { 3675 vp = fp->f_data; 3676 error = vget(vp, LK_EXCLUSIVE); 3677 if (error == 0) { 3678 error = VOP_GETATTR(vp, &vattr); 3679 if (error == 0) { 3680 error = naccess_va(&vattr, NLC_OWN | NLC_WRITE, 3681 fp->f_cred); 3682 } 3683 if (error == 0) { 3684 error = setutimes(vp, &vattr, newts, nullflag); 3685 } 3686 vput(vp); 3687 } 3688 } 3689 fdrop(fp); 3690 return (error); 3691 } 3692 3693 /* 3694 * futimens_args(int fd, struct timespec *ts) 3695 * 3696 * Set the access and modification times of a file. 3697 */ 3698 int 3699 sys_futimens(struct futimens_args *uap) 3700 { 3701 struct timespec ts[2]; 3702 int error; 3703 3704 if (uap->ts) { 3705 error = copyin(uap->ts, ts, sizeof(ts)); 3706 if (error) 3707 return (error); 3708 } 3709 error = kern_futimens(uap->fd, uap->ts ? ts : NULL); 3710 return (error); 3711 } 3712 3713 int 3714 kern_futimes(int fd, struct timeval *tptr) 3715 { 3716 struct timespec ts[2]; 3717 int error; 3718 3719 if (tptr) { 3720 if ((error = getutimes(tptr, ts)) != 0) 3721 return (error); 3722 } 3723 error = kern_futimens(fd, tptr ? ts : NULL); 3724 return (error); 3725 } 3726 3727 /* 3728 * futimes_args(int fd, struct timeval *tptr) 3729 * 3730 * Set the access and modification times of a file. 3731 */ 3732 int 3733 sys_futimes(struct futimes_args *uap) 3734 { 3735 struct timeval tv[2]; 3736 int error; 3737 3738 if (uap->tptr) { 3739 error = copyin(uap->tptr, tv, sizeof(tv)); 3740 if (error) 3741 return (error); 3742 } 3743 error = kern_futimes(uap->fd, uap->tptr ? tv : NULL); 3744 return (error); 3745 } 3746 3747 int 3748 kern_utimensat(struct nlookupdata *nd, const struct timespec *ts, int flags) 3749 { 3750 struct timespec newts[2]; 3751 struct vnode *vp; 3752 struct vattr vattr; 3753 int nullflag; 3754 int error; 3755 3756 if (flags & ~AT_SYMLINK_NOFOLLOW) 3757 return (EINVAL); 3758 3759 error = getutimens(ts, newts, &nullflag); 3760 if (error) 3761 return (error); 3762 3763 nd->nl_flags |= NLC_OWN | NLC_WRITE; 3764 if ((error = nlookup(nd)) != 0) 3765 return (error); 3766 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3767 return (error); 3768 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3769 return (error); 3770 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3771 error = vget(vp, LK_EXCLUSIVE); 3772 if (error == 0) { 3773 error = setutimes(vp, &vattr, newts, nullflag); 3774 vput(vp); 3775 } 3776 } 3777 vrele(vp); 3778 return (error); 3779 } 3780 3781 /* 3782 * utimensat_args(int fd, const char *path, const struct timespec *ts, int flags); 3783 * 3784 * Set file access and modification times of a file. 3785 */ 3786 int 3787 sys_utimensat(struct utimensat_args *uap) 3788 { 3789 struct timespec ts[2]; 3790 struct nlookupdata nd; 3791 struct file *fp; 3792 int error; 3793 int flags; 3794 3795 if (uap->ts) { 3796 error = copyin(uap->ts, ts, sizeof(ts)); 3797 if (error) 3798 return (error); 3799 } 3800 3801 flags = (uap->flags & AT_SYMLINK_NOFOLLOW) ? 0 : NLC_FOLLOW; 3802 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, 3803 UIO_USERSPACE, flags); 3804 if (error == 0) 3805 error = kern_utimensat(&nd, uap->ts ? ts : NULL, uap->flags); 3806 nlookup_done_at(&nd, fp); 3807 return (error); 3808 } 3809 3810 int 3811 kern_truncate(struct nlookupdata *nd, off_t length) 3812 { 3813 struct vnode *vp; 3814 struct vattr vattr; 3815 int error; 3816 uid_t uid = 0; 3817 gid_t gid = 0; 3818 uint64_t old_size = 0; 3819 3820 if (length < 0) 3821 return(EINVAL); 3822 nd->nl_flags |= NLC_WRITE | NLC_TRUNCATE; 3823 if ((error = nlookup(nd)) != 0) 3824 return (error); 3825 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 3826 return (error); 3827 if ((error = cache_vref(&nd->nl_nch, nd->nl_cred, &vp)) != 0) 3828 return (error); 3829 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY | LK_FAILRECLAIM); 3830 if (error) { 3831 vrele(vp); 3832 return (error); 3833 } 3834 if (vp->v_type == VDIR) { 3835 error = EISDIR; 3836 goto done; 3837 } 3838 if (vfs_quota_enabled) { 3839 error = VOP_GETATTR(vp, &vattr); 3840 KASSERT(error == 0, ("kern_truncate(): VOP_GETATTR didn't return 0")); 3841 uid = vattr.va_uid; 3842 gid = vattr.va_gid; 3843 old_size = vattr.va_size; 3844 } 3845 3846 if ((error = vn_writechk(vp, &nd->nl_nch)) == 0) { 3847 VATTR_NULL(&vattr); 3848 vattr.va_size = length; 3849 error = VOP_SETATTR(vp, &vattr, nd->nl_cred); 3850 VFS_ACCOUNT(nd->nl_nch.mount, uid, gid, length - old_size); 3851 } 3852 done: 3853 vput(vp); 3854 return (error); 3855 } 3856 3857 /* 3858 * truncate(char *path, int pad, off_t length) 3859 * 3860 * Truncate a file given its path name. 3861 */ 3862 int 3863 sys_truncate(struct truncate_args *uap) 3864 { 3865 struct nlookupdata nd; 3866 int error; 3867 3868 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 3869 if (error == 0) 3870 error = kern_truncate(&nd, uap->length); 3871 nlookup_done(&nd); 3872 return error; 3873 } 3874 3875 int 3876 kern_ftruncate(int fd, off_t length) 3877 { 3878 struct thread *td = curthread; 3879 struct vattr vattr; 3880 struct vnode *vp; 3881 struct file *fp; 3882 int error; 3883 uid_t uid = 0; 3884 gid_t gid = 0; 3885 uint64_t old_size = 0; 3886 struct mount *mp; 3887 3888 if (length < 0) 3889 return(EINVAL); 3890 if ((error = holdvnode(td, fd, &fp)) != 0) 3891 return (error); 3892 if (fp->f_nchandle.ncp) { 3893 error = ncp_writechk(&fp->f_nchandle); 3894 if (error) 3895 goto done; 3896 } 3897 if ((fp->f_flag & FWRITE) == 0) { 3898 error = EINVAL; 3899 goto done; 3900 } 3901 if (fp->f_flag & FAPPENDONLY) { /* inode was set s/uapnd */ 3902 error = EINVAL; 3903 goto done; 3904 } 3905 vp = (struct vnode *)fp->f_data; 3906 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3907 if (vp->v_type == VDIR) { 3908 error = EISDIR; 3909 vn_unlock(vp); 3910 goto done; 3911 } 3912 3913 if (vfs_quota_enabled) { 3914 error = VOP_GETATTR(vp, &vattr); 3915 KASSERT(error == 0, ("kern_ftruncate(): VOP_GETATTR didn't return 0")); 3916 uid = vattr.va_uid; 3917 gid = vattr.va_gid; 3918 old_size = vattr.va_size; 3919 } 3920 3921 if ((error = vn_writechk(vp, NULL)) == 0) { 3922 VATTR_NULL(&vattr); 3923 vattr.va_size = length; 3924 error = VOP_SETATTR(vp, &vattr, fp->f_cred); 3925 mp = vq_vptomp(vp); 3926 VFS_ACCOUNT(mp, uid, gid, length - old_size); 3927 } 3928 vn_unlock(vp); 3929 done: 3930 fdrop(fp); 3931 return (error); 3932 } 3933 3934 /* 3935 * ftruncate_args(int fd, int pad, off_t length) 3936 * 3937 * Truncate a file given a file descriptor. 3938 */ 3939 int 3940 sys_ftruncate(struct ftruncate_args *uap) 3941 { 3942 int error; 3943 3944 error = kern_ftruncate(uap->fd, uap->length); 3945 3946 return (error); 3947 } 3948 3949 /* 3950 * fsync(int fd) 3951 * 3952 * Sync an open file. 3953 */ 3954 int 3955 sys_fsync(struct fsync_args *uap) 3956 { 3957 struct thread *td = curthread; 3958 struct vnode *vp; 3959 struct file *fp; 3960 vm_object_t obj; 3961 int error; 3962 3963 if ((error = holdvnode(td, uap->fd, &fp)) != 0) 3964 return (error); 3965 vp = (struct vnode *)fp->f_data; 3966 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3967 if ((obj = vp->v_object) != NULL) { 3968 if (vp->v_mount == NULL || 3969 (vp->v_mount->mnt_kern_flag & MNTK_NOMSYNC) == 0) { 3970 vm_object_page_clean(obj, 0, 0, 0); 3971 } 3972 } 3973 error = VOP_FSYNC(vp, MNT_WAIT, VOP_FSYNC_SYSCALL); 3974 if (error == 0 && vp->v_mount) 3975 error = buf_fsync(vp); 3976 vn_unlock(vp); 3977 fdrop(fp); 3978 3979 return (error); 3980 } 3981 3982 int 3983 kern_rename(struct nlookupdata *fromnd, struct nlookupdata *tond) 3984 { 3985 struct nchandle fnchd; 3986 struct nchandle tnchd; 3987 struct namecache *ncp; 3988 struct vnode *fdvp; 3989 struct vnode *tdvp; 3990 struct mount *mp; 3991 int error; 3992 u_int fncp_gen; 3993 u_int tncp_gen; 3994 3995 bwillinode(1); 3996 fromnd->nl_flags |= NLC_REFDVP | NLC_RENAME_SRC; 3997 if ((error = nlookup(fromnd)) != 0) 3998 return (error); 3999 if ((fnchd.ncp = fromnd->nl_nch.ncp->nc_parent) == NULL) 4000 return (ENOENT); 4001 fnchd.mount = fromnd->nl_nch.mount; 4002 cache_hold(&fnchd); 4003 4004 /* 4005 * unlock the source nch so we can lookup the target nch without 4006 * deadlocking. The target may or may not exist so we do not check 4007 * for a target vp like kern_mkdir() and other creation functions do. 4008 * 4009 * The source and target directories are ref'd and rechecked after 4010 * everything is relocked to determine if the source or target file 4011 * has been renamed. 4012 */ 4013 KKASSERT(fromnd->nl_flags & NLC_NCPISLOCKED); 4014 fromnd->nl_flags &= ~NLC_NCPISLOCKED; 4015 4016 fncp_gen = fromnd->nl_nch.ncp->nc_generation; 4017 4018 cache_unlock(&fromnd->nl_nch); 4019 4020 tond->nl_flags |= NLC_RENAME_DST | NLC_REFDVP; 4021 if ((error = nlookup(tond)) != 0) { 4022 cache_drop(&fnchd); 4023 return (error); 4024 } 4025 tncp_gen = tond->nl_nch.ncp->nc_generation; 4026 4027 if ((tnchd.ncp = tond->nl_nch.ncp->nc_parent) == NULL) { 4028 cache_drop(&fnchd); 4029 return (ENOENT); 4030 } 4031 tnchd.mount = tond->nl_nch.mount; 4032 cache_hold(&tnchd); 4033 4034 /* 4035 * If the source and target are the same there is nothing to do 4036 */ 4037 if (fromnd->nl_nch.ncp == tond->nl_nch.ncp) { 4038 cache_drop(&fnchd); 4039 cache_drop(&tnchd); 4040 return (0); 4041 } 4042 4043 /* 4044 * Mount points cannot be renamed or overwritten 4045 */ 4046 if ((fromnd->nl_nch.ncp->nc_flag | tond->nl_nch.ncp->nc_flag) & 4047 NCF_ISMOUNTPT 4048 ) { 4049 cache_drop(&fnchd); 4050 cache_drop(&tnchd); 4051 return (EINVAL); 4052 } 4053 4054 /* 4055 * Relock the source ncp. cache_relock() will deal with any 4056 * deadlocks against the already-locked tond and will also 4057 * make sure both are resolved. 4058 * 4059 * NOTE AFTER RELOCKING: The source or target ncp may have become 4060 * invalid while they were unlocked, nc_vp and nc_mount could 4061 * be NULL. 4062 */ 4063 cache_relock(&fromnd->nl_nch, fromnd->nl_cred, 4064 &tond->nl_nch, tond->nl_cred); 4065 fromnd->nl_flags |= NLC_NCPISLOCKED; 4066 4067 /* 4068 * If the namecache generation changed for either fromnd or tond, 4069 * we must retry. 4070 */ 4071 if (fromnd->nl_nch.ncp->nc_generation != fncp_gen || 4072 tond->nl_nch.ncp->nc_generation != tncp_gen) { 4073 kprintf("kern_rename: retry due to gen on: " 4074 "\"%s\" -> \"%s\"\n", 4075 fromnd->nl_nch.ncp->nc_name, 4076 tond->nl_nch.ncp->nc_name); 4077 cache_drop(&fnchd); 4078 cache_drop(&tnchd); 4079 return (EAGAIN); 4080 } 4081 4082 /* 4083 * If either fromnd or tond are marked destroyed a ripout occured 4084 * out from under us and we must retry. 4085 */ 4086 if ((fromnd->nl_nch.ncp->nc_flag & (NCF_DESTROYED | NCF_UNRESOLVED)) || 4087 fromnd->nl_nch.ncp->nc_vp == NULL || 4088 (tond->nl_nch.ncp->nc_flag & NCF_DESTROYED)) { 4089 kprintf("kern_rename: retry due to ripout on: " 4090 "\"%s\" -> \"%s\"\n", 4091 fromnd->nl_nch.ncp->nc_name, 4092 tond->nl_nch.ncp->nc_name); 4093 cache_drop(&fnchd); 4094 cache_drop(&tnchd); 4095 return (EAGAIN); 4096 } 4097 4098 /* 4099 * Make sure the parent directories linkages are the same. 4100 * XXX shouldn't be needed any more w/ generation check above. 4101 */ 4102 if (fnchd.ncp != fromnd->nl_nch.ncp->nc_parent || 4103 tnchd.ncp != tond->nl_nch.ncp->nc_parent) { 4104 cache_drop(&fnchd); 4105 cache_drop(&tnchd); 4106 return (ENOENT); 4107 } 4108 4109 /* 4110 * Both the source and target must be within the same filesystem and 4111 * in the same filesystem as their parent directories within the 4112 * namecache topology. 4113 * 4114 * NOTE: fromnd's nc_mount or nc_vp could be NULL. 4115 */ 4116 mp = fnchd.mount; 4117 if (mp != tnchd.mount || mp != fromnd->nl_nch.mount || 4118 mp != tond->nl_nch.mount) { 4119 cache_drop(&fnchd); 4120 cache_drop(&tnchd); 4121 return (EXDEV); 4122 } 4123 4124 /* 4125 * Make sure the mount point is writable 4126 */ 4127 if ((error = ncp_writechk(&tond->nl_nch)) != 0) { 4128 cache_drop(&fnchd); 4129 cache_drop(&tnchd); 4130 return (error); 4131 } 4132 4133 /* 4134 * If the target exists and either the source or target is a directory, 4135 * then both must be directories. 4136 * 4137 * Due to relocking of the source, fromnd->nl_nch.ncp->nc_vp might h 4138 * have become NULL. 4139 */ 4140 if (tond->nl_nch.ncp->nc_vp) { 4141 if (fromnd->nl_nch.ncp->nc_vp == NULL) { 4142 error = ENOENT; 4143 } else if (fromnd->nl_nch.ncp->nc_vp->v_type == VDIR) { 4144 if (tond->nl_nch.ncp->nc_vp->v_type != VDIR) 4145 error = ENOTDIR; 4146 } else if (tond->nl_nch.ncp->nc_vp->v_type == VDIR) { 4147 error = EISDIR; 4148 } 4149 } 4150 4151 /* 4152 * You cannot rename a source into itself or a subdirectory of itself. 4153 * We check this by travsersing the target directory upwards looking 4154 * for a match against the source. 4155 * 4156 * XXX MPSAFE 4157 */ 4158 if (error == 0) { 4159 for (ncp = tnchd.ncp; ncp; ncp = ncp->nc_parent) { 4160 if (fromnd->nl_nch.ncp == ncp) { 4161 error = EINVAL; 4162 break; 4163 } 4164 } 4165 } 4166 4167 cache_drop(&fnchd); 4168 cache_drop(&tnchd); 4169 4170 /* 4171 * Even though the namespaces are different, they may still represent 4172 * hardlinks to the same file. The filesystem might have a hard time 4173 * with this so we issue a NREMOVE of the source instead of a NRENAME 4174 * when we detect the situation. 4175 */ 4176 if (error == 0) { 4177 fdvp = fromnd->nl_dvp; 4178 tdvp = tond->nl_dvp; 4179 if (fdvp == NULL || tdvp == NULL) { 4180 error = EPERM; 4181 } else if (fromnd->nl_nch.ncp->nc_vp == tond->nl_nch.ncp->nc_vp) { 4182 error = VOP_NREMOVE(&fromnd->nl_nch, fdvp, 4183 fromnd->nl_cred); 4184 } else { 4185 error = VOP_NRENAME(&fromnd->nl_nch, &tond->nl_nch, 4186 fdvp, tdvp, tond->nl_cred); 4187 } 4188 } 4189 return (error); 4190 } 4191 4192 /* 4193 * rename_args(char *from, char *to) 4194 * 4195 * Rename files. Source and destination must either both be directories, 4196 * or both not be directories. If target is a directory, it must be empty. 4197 */ 4198 int 4199 sys_rename(struct rename_args *uap) 4200 { 4201 struct nlookupdata fromnd, tond; 4202 int error; 4203 4204 do { 4205 error = nlookup_init(&fromnd, uap->from, UIO_USERSPACE, 0); 4206 if (error == 0) { 4207 error = nlookup_init(&tond, uap->to, UIO_USERSPACE, 0); 4208 if (error == 0) 4209 error = kern_rename(&fromnd, &tond); 4210 nlookup_done(&tond); 4211 } 4212 nlookup_done(&fromnd); 4213 } while (error == EAGAIN); 4214 return (error); 4215 } 4216 4217 /* 4218 * renameat_args(int oldfd, char *old, int newfd, char *new) 4219 * 4220 * Rename files using paths relative to the directories associated with 4221 * oldfd and newfd. Source and destination must either both be directories, 4222 * or both not be directories. If target is a directory, it must be empty. 4223 */ 4224 int 4225 sys_renameat(struct renameat_args *uap) 4226 { 4227 struct nlookupdata oldnd, newnd; 4228 struct file *oldfp, *newfp; 4229 int error; 4230 4231 do { 4232 error = nlookup_init_at(&oldnd, &oldfp, 4233 uap->oldfd, uap->old, 4234 UIO_USERSPACE, 0); 4235 if (error == 0) { 4236 error = nlookup_init_at(&newnd, &newfp, 4237 uap->newfd, uap->new, 4238 UIO_USERSPACE, 0); 4239 if (error == 0) 4240 error = kern_rename(&oldnd, &newnd); 4241 nlookup_done_at(&newnd, newfp); 4242 } 4243 nlookup_done_at(&oldnd, oldfp); 4244 } while (error == EAGAIN); 4245 return (error); 4246 } 4247 4248 int 4249 kern_mkdir(struct nlookupdata *nd, int mode) 4250 { 4251 struct thread *td = curthread; 4252 struct proc *p = td->td_proc; 4253 struct vnode *vp; 4254 struct vattr vattr; 4255 int error; 4256 4257 bwillinode(1); 4258 nd->nl_flags |= NLC_WILLBEDIR | NLC_CREATE | NLC_REFDVP; 4259 if ((error = nlookup(nd)) != 0) 4260 return (error); 4261 4262 if (nd->nl_nch.ncp->nc_vp) 4263 return (EEXIST); 4264 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4265 return (error); 4266 VATTR_NULL(&vattr); 4267 vattr.va_type = VDIR; 4268 vattr.va_mode = (mode & ACCESSPERMS) &~ p->p_fd->fd_cmask; 4269 4270 vp = NULL; 4271 error = VOP_NMKDIR(&nd->nl_nch, nd->nl_dvp, &vp, td->td_ucred, &vattr); 4272 if (error == 0) 4273 vput(vp); 4274 return (error); 4275 } 4276 4277 /* 4278 * mkdir_args(char *path, int mode) 4279 * 4280 * Make a directory file. 4281 */ 4282 int 4283 sys_mkdir(struct mkdir_args *uap) 4284 { 4285 struct nlookupdata nd; 4286 int error; 4287 4288 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4289 if (error == 0) 4290 error = kern_mkdir(&nd, uap->mode); 4291 nlookup_done(&nd); 4292 return (error); 4293 } 4294 4295 /* 4296 * mkdirat_args(int fd, char *path, mode_t mode) 4297 * 4298 * Make a directory file. The path is relative to the directory associated 4299 * with fd. 4300 */ 4301 int 4302 sys_mkdirat(struct mkdirat_args *uap) 4303 { 4304 struct nlookupdata nd; 4305 struct file *fp; 4306 int error; 4307 4308 error = nlookup_init_at(&nd, &fp, uap->fd, uap->path, UIO_USERSPACE, 0); 4309 if (error == 0) 4310 error = kern_mkdir(&nd, uap->mode); 4311 nlookup_done_at(&nd, fp); 4312 return (error); 4313 } 4314 4315 int 4316 kern_rmdir(struct nlookupdata *nd) 4317 { 4318 int error; 4319 4320 bwillinode(1); 4321 nd->nl_flags |= NLC_DELETE | NLC_REFDVP; 4322 if ((error = nlookup(nd)) != 0) 4323 return (error); 4324 4325 /* 4326 * Do not allow directories representing mount points to be 4327 * deleted, even if empty. Check write perms on mount point 4328 * in case the vnode is aliased (aka nullfs). 4329 */ 4330 if (nd->nl_nch.ncp->nc_flag & (NCF_ISMOUNTPT)) 4331 return (EBUSY); 4332 if ((error = ncp_writechk(&nd->nl_nch)) != 0) 4333 return (error); 4334 error = VOP_NRMDIR(&nd->nl_nch, nd->nl_dvp, nd->nl_cred); 4335 return (error); 4336 } 4337 4338 /* 4339 * rmdir_args(char *path) 4340 * 4341 * Remove a directory file. 4342 */ 4343 int 4344 sys_rmdir(struct rmdir_args *uap) 4345 { 4346 struct nlookupdata nd; 4347 int error; 4348 4349 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, 0); 4350 if (error == 0) 4351 error = kern_rmdir(&nd); 4352 nlookup_done(&nd); 4353 return (error); 4354 } 4355 4356 int 4357 kern_getdirentries(int fd, char *buf, u_int count, long *basep, int *res, 4358 enum uio_seg direction) 4359 { 4360 struct thread *td = curthread; 4361 struct vnode *vp; 4362 struct file *fp; 4363 struct uio auio; 4364 struct iovec aiov; 4365 off_t loff; 4366 int error, eofflag; 4367 4368 if ((error = holdvnode(td, fd, &fp)) != 0) 4369 return (error); 4370 if ((fp->f_flag & FREAD) == 0) { 4371 error = EBADF; 4372 goto done; 4373 } 4374 vp = (struct vnode *)fp->f_data; 4375 if (vp->v_type != VDIR) { 4376 error = EINVAL; 4377 goto done; 4378 } 4379 aiov.iov_base = buf; 4380 aiov.iov_len = count; 4381 auio.uio_iov = &aiov; 4382 auio.uio_iovcnt = 1; 4383 auio.uio_rw = UIO_READ; 4384 auio.uio_segflg = direction; 4385 auio.uio_td = td; 4386 auio.uio_resid = count; 4387 loff = auio.uio_offset = fp->f_offset; 4388 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); 4389 fp->f_offset = auio.uio_offset; 4390 if (error) 4391 goto done; 4392 4393 /* 4394 * WARNING! *basep may not be wide enough to accomodate the 4395 * seek offset. XXX should we hack this to return the upper 32 bits 4396 * for offsets greater then 4G? 4397 */ 4398 if (basep) { 4399 *basep = (long)loff; 4400 } 4401 *res = count - auio.uio_resid; 4402 done: 4403 fdrop(fp); 4404 return (error); 4405 } 4406 4407 /* 4408 * getdirentries_args(int fd, char *buf, u_int conut, long *basep) 4409 * 4410 * Read a block of directory entries in a file system independent format. 4411 */ 4412 int 4413 sys_getdirentries(struct getdirentries_args *uap) 4414 { 4415 long base; 4416 int error; 4417 4418 error = kern_getdirentries(uap->fd, uap->buf, uap->count, &base, 4419 &uap->sysmsg_result, UIO_USERSPACE); 4420 4421 if (error == 0 && uap->basep) 4422 error = copyout(&base, uap->basep, sizeof(*uap->basep)); 4423 return (error); 4424 } 4425 4426 /* 4427 * getdents_args(int fd, char *buf, size_t count) 4428 */ 4429 int 4430 sys_getdents(struct getdents_args *uap) 4431 { 4432 int error; 4433 4434 error = kern_getdirentries(uap->fd, uap->buf, uap->count, NULL, 4435 &uap->sysmsg_result, UIO_USERSPACE); 4436 4437 return (error); 4438 } 4439 4440 /* 4441 * Set the mode mask for creation of filesystem nodes. 4442 * 4443 * umask(int newmask) 4444 */ 4445 int 4446 sys_umask(struct umask_args *uap) 4447 { 4448 struct thread *td = curthread; 4449 struct proc *p = td->td_proc; 4450 struct filedesc *fdp; 4451 4452 fdp = p->p_fd; 4453 uap->sysmsg_result = fdp->fd_cmask; 4454 fdp->fd_cmask = uap->newmask & ALLPERMS; 4455 return (0); 4456 } 4457 4458 /* 4459 * revoke(char *path) 4460 * 4461 * Void all references to file by ripping underlying filesystem 4462 * away from vnode. 4463 */ 4464 int 4465 sys_revoke(struct revoke_args *uap) 4466 { 4467 struct nlookupdata nd; 4468 struct vattr vattr; 4469 struct vnode *vp; 4470 struct ucred *cred; 4471 int error; 4472 4473 vp = NULL; 4474 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4475 if (error == 0) 4476 error = nlookup(&nd); 4477 if (error == 0) 4478 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4479 cred = crhold(nd.nl_cred); 4480 nlookup_done(&nd); 4481 if (error == 0) { 4482 if (error == 0) 4483 error = VOP_GETATTR(vp, &vattr); 4484 if (error == 0 && cred->cr_uid != vattr.va_uid) 4485 error = priv_check_cred(cred, PRIV_VFS_REVOKE, 0); 4486 if (error == 0 && (vp->v_type == VCHR || vp->v_type == VBLK)) { 4487 if (vcount(vp) > 0) 4488 error = vrevoke(vp, cred); 4489 } else if (error == 0) { 4490 error = vrevoke(vp, cred); 4491 } 4492 vrele(vp); 4493 } 4494 if (cred) 4495 crfree(cred); 4496 return (error); 4497 } 4498 4499 /* 4500 * getfh_args(char *fname, fhandle_t *fhp) 4501 * 4502 * Get (NFS) file handle 4503 * 4504 * NOTE: We use the fsid of the covering mount, even if it is a nullfs 4505 * mount. This allows nullfs mounts to be explicitly exported. 4506 * 4507 * WARNING: nullfs mounts of HAMMER PFS ROOTs are safe. 4508 * 4509 * nullfs mounts of subdirectories are not safe. That is, it will 4510 * work, but you do not really have protection against access to 4511 * the related parent directories. 4512 */ 4513 int 4514 sys_getfh(struct getfh_args *uap) 4515 { 4516 struct thread *td = curthread; 4517 struct nlookupdata nd; 4518 fhandle_t fh; 4519 struct vnode *vp; 4520 struct mount *mp; 4521 int error; 4522 4523 /* 4524 * Must be super user 4525 */ 4526 if ((error = priv_check(td, PRIV_ROOT)) != 0) 4527 return (error); 4528 4529 vp = NULL; 4530 error = nlookup_init(&nd, uap->fname, UIO_USERSPACE, NLC_FOLLOW); 4531 if (error == 0) 4532 error = nlookup(&nd); 4533 if (error == 0) 4534 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4535 mp = nd.nl_nch.mount; 4536 nlookup_done(&nd); 4537 if (error == 0) { 4538 bzero(&fh, sizeof(fh)); 4539 fh.fh_fsid = mp->mnt_stat.f_fsid; 4540 error = VFS_VPTOFH(vp, &fh.fh_fid); 4541 vput(vp); 4542 if (error == 0) 4543 error = copyout(&fh, uap->fhp, sizeof(fh)); 4544 } 4545 return (error); 4546 } 4547 4548 /* 4549 * fhopen_args(const struct fhandle *u_fhp, int flags) 4550 * 4551 * syscall for the rpc.lockd to use to translate a NFS file handle into 4552 * an open descriptor. 4553 * 4554 * warning: do not remove the priv_check() call or this becomes one giant 4555 * security hole. 4556 */ 4557 int 4558 sys_fhopen(struct fhopen_args *uap) 4559 { 4560 struct thread *td = curthread; 4561 struct filedesc *fdp = td->td_proc->p_fd; 4562 struct mount *mp; 4563 struct vnode *vp; 4564 struct fhandle fhp; 4565 struct vattr vat; 4566 struct vattr *vap = &vat; 4567 struct flock lf; 4568 int fmode, mode, error = 0, type; 4569 struct file *nfp; 4570 struct file *fp; 4571 int indx; 4572 4573 /* 4574 * Must be super user 4575 */ 4576 error = priv_check(td, PRIV_ROOT); 4577 if (error) 4578 return (error); 4579 4580 fmode = FFLAGS(uap->flags); 4581 4582 /* 4583 * Why not allow a non-read/write open for our lockd? 4584 */ 4585 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4586 return (EINVAL); 4587 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4588 if (error) 4589 return(error); 4590 4591 /* 4592 * Find the mount point 4593 */ 4594 mp = vfs_getvfs(&fhp.fh_fsid); 4595 if (mp == NULL) { 4596 error = ESTALE; 4597 goto done2; 4598 } 4599 /* now give me my vnode, it gets returned to me locked */ 4600 error = VFS_FHTOVP(mp, NULL, &fhp.fh_fid, &vp); 4601 if (error) 4602 goto done; 4603 /* 4604 * from now on we have to make sure not 4605 * to forget about the vnode 4606 * any error that causes an abort must vput(vp) 4607 * just set error = err and 'goto bad;'. 4608 */ 4609 4610 /* 4611 * from vn_open 4612 */ 4613 if (vp->v_type == VLNK) { 4614 error = EMLINK; 4615 goto bad; 4616 } 4617 if (vp->v_type == VSOCK) { 4618 error = EOPNOTSUPP; 4619 goto bad; 4620 } 4621 mode = 0; 4622 if (fmode & (FWRITE | O_TRUNC)) { 4623 if (vp->v_type == VDIR) { 4624 error = EISDIR; 4625 goto bad; 4626 } 4627 error = vn_writechk(vp, NULL); 4628 if (error) 4629 goto bad; 4630 mode |= VWRITE; 4631 } 4632 if (fmode & FREAD) 4633 mode |= VREAD; 4634 if (mode) { 4635 error = VOP_ACCESS(vp, mode, td->td_ucred); 4636 if (error) 4637 goto bad; 4638 } 4639 if (fmode & O_TRUNC) { 4640 vn_unlock(vp); /* XXX */ 4641 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); /* XXX */ 4642 VATTR_NULL(vap); 4643 vap->va_size = 0; 4644 error = VOP_SETATTR(vp, vap, td->td_ucred); 4645 if (error) 4646 goto bad; 4647 } 4648 4649 /* 4650 * VOP_OPEN needs the file pointer so it can potentially override 4651 * it. 4652 * 4653 * WARNING! no f_nchandle will be associated when fhopen()ing a 4654 * directory. XXX 4655 */ 4656 if ((error = falloc(td->td_lwp, &nfp, &indx)) != 0) 4657 goto bad; 4658 fp = nfp; 4659 4660 error = VOP_OPEN(vp, fmode, td->td_ucred, fp); 4661 if (error) { 4662 /* 4663 * setting f_ops this way prevents VOP_CLOSE from being 4664 * called or fdrop() releasing the vp from v_data. Since 4665 * the VOP_OPEN failed we don't want to VOP_CLOSE. 4666 */ 4667 fp->f_ops = &badfileops; 4668 fp->f_data = NULL; 4669 goto bad_drop; 4670 } 4671 4672 /* 4673 * The fp is given its own reference, we still have our ref and lock. 4674 * 4675 * Assert that all regular files must be created with a VM object. 4676 */ 4677 if (vp->v_type == VREG && vp->v_object == NULL) { 4678 kprintf("fhopen: regular file did not " 4679 "have VM object: %p\n", 4680 vp); 4681 goto bad_drop; 4682 } 4683 4684 /* 4685 * The open was successful. Handle any locking requirements. 4686 */ 4687 if (fmode & (O_EXLOCK | O_SHLOCK)) { 4688 lf.l_whence = SEEK_SET; 4689 lf.l_start = 0; 4690 lf.l_len = 0; 4691 if (fmode & O_EXLOCK) 4692 lf.l_type = F_WRLCK; 4693 else 4694 lf.l_type = F_RDLCK; 4695 if (fmode & FNONBLOCK) 4696 type = 0; 4697 else 4698 type = F_WAIT; 4699 vn_unlock(vp); 4700 if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, 4701 &lf, type)) != 0) { 4702 /* 4703 * release our private reference. 4704 */ 4705 fsetfd(fdp, NULL, indx); 4706 fdrop(fp); 4707 vrele(vp); 4708 goto done; 4709 } 4710 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 4711 atomic_set_int(&fp->f_flag, FHASLOCK); /* race ok */ 4712 } 4713 4714 /* 4715 * Clean up. Associate the file pointer with the previously 4716 * reserved descriptor and return it. 4717 */ 4718 vput(vp); 4719 if (uap->flags & O_CLOEXEC) 4720 fdp->fd_files[indx].fileflags |= UF_EXCLOSE; 4721 fsetfd(fdp, fp, indx); 4722 fdrop(fp); 4723 uap->sysmsg_result = indx; 4724 mount_drop(mp); 4725 4726 return (error); 4727 4728 bad_drop: 4729 fsetfd(fdp, NULL, indx); 4730 fdrop(fp); 4731 bad: 4732 vput(vp); 4733 done: 4734 mount_drop(mp); 4735 done2: 4736 return (error); 4737 } 4738 4739 /* 4740 * fhstat_args(struct fhandle *u_fhp, struct stat *sb) 4741 */ 4742 int 4743 sys_fhstat(struct fhstat_args *uap) 4744 { 4745 struct thread *td = curthread; 4746 struct stat sb; 4747 fhandle_t fh; 4748 struct mount *mp; 4749 struct vnode *vp; 4750 int error; 4751 4752 /* 4753 * Must be super user 4754 */ 4755 error = priv_check(td, PRIV_ROOT); 4756 if (error) 4757 return (error); 4758 4759 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4760 if (error) 4761 return (error); 4762 4763 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) 4764 error = ESTALE; 4765 if (error == 0) { 4766 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) == 0) { 4767 error = vn_stat(vp, &sb, td->td_ucred); 4768 vput(vp); 4769 } 4770 } 4771 if (error == 0) 4772 error = copyout(&sb, uap->sb, sizeof(sb)); 4773 if (mp) 4774 mount_drop(mp); 4775 4776 return (error); 4777 } 4778 4779 /* 4780 * fhstatfs_args(struct fhandle *u_fhp, struct statfs *buf) 4781 */ 4782 int 4783 sys_fhstatfs(struct fhstatfs_args *uap) 4784 { 4785 struct thread *td = curthread; 4786 struct proc *p = td->td_proc; 4787 struct statfs *sp; 4788 struct mount *mp; 4789 struct vnode *vp; 4790 struct statfs sb; 4791 char *fullpath, *freepath; 4792 fhandle_t fh; 4793 int error; 4794 4795 /* 4796 * Must be super user 4797 */ 4798 if ((error = priv_check(td, PRIV_ROOT))) 4799 return (error); 4800 4801 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4802 return (error); 4803 4804 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4805 error = ESTALE; 4806 goto done; 4807 } 4808 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4809 error = ESTALE; 4810 goto done; 4811 } 4812 4813 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp)) != 0) 4814 goto done; 4815 mp = vp->v_mount; 4816 sp = &mp->mnt_stat; 4817 vput(vp); 4818 if ((error = VFS_STATFS(mp, sp, td->td_ucred)) != 0) 4819 goto done; 4820 4821 error = mount_path(p, mp, &fullpath, &freepath); 4822 if (error) 4823 goto done; 4824 bzero(sp->f_mntonname, sizeof(sp->f_mntonname)); 4825 strlcpy(sp->f_mntonname, fullpath, sizeof(sp->f_mntonname)); 4826 kfree(freepath, M_TEMP); 4827 4828 sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; 4829 if (priv_check(td, PRIV_ROOT)) { 4830 bcopy(sp, &sb, sizeof(sb)); 4831 sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; 4832 sp = &sb; 4833 } 4834 error = copyout(sp, uap->buf, sizeof(*sp)); 4835 done: 4836 if (mp) 4837 mount_drop(mp); 4838 4839 return (error); 4840 } 4841 4842 /* 4843 * fhstatvfs_args(struct fhandle *u_fhp, struct statvfs *buf) 4844 */ 4845 int 4846 sys_fhstatvfs(struct fhstatvfs_args *uap) 4847 { 4848 struct thread *td = curthread; 4849 struct proc *p = td->td_proc; 4850 struct statvfs *sp; 4851 struct mount *mp; 4852 struct vnode *vp; 4853 fhandle_t fh; 4854 int error; 4855 4856 /* 4857 * Must be super user 4858 */ 4859 if ((error = priv_check(td, PRIV_ROOT))) 4860 return (error); 4861 4862 if ((error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t))) != 0) 4863 return (error); 4864 4865 if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) { 4866 error = ESTALE; 4867 goto done; 4868 } 4869 if (p != NULL && !chroot_visible_mnt(mp, p)) { 4870 error = ESTALE; 4871 goto done; 4872 } 4873 4874 if ((error = VFS_FHTOVP(mp, NULL, &fh.fh_fid, &vp))) 4875 goto done; 4876 mp = vp->v_mount; 4877 sp = &mp->mnt_vstat; 4878 vput(vp); 4879 if ((error = VFS_STATVFS(mp, sp, td->td_ucred)) != 0) 4880 goto done; 4881 4882 sp->f_flag = 0; 4883 if (mp->mnt_flag & MNT_RDONLY) 4884 sp->f_flag |= ST_RDONLY; 4885 if (mp->mnt_flag & MNT_NOSUID) 4886 sp->f_flag |= ST_NOSUID; 4887 error = copyout(sp, uap->buf, sizeof(*sp)); 4888 done: 4889 if (mp) 4890 mount_drop(mp); 4891 return (error); 4892 } 4893 4894 4895 /* 4896 * Syscall to push extended attribute configuration information into the 4897 * VFS. Accepts a path, which it converts to a mountpoint, as well as 4898 * a command (int cmd), and attribute name and misc data. For now, the 4899 * attribute name is left in userspace for consumption by the VFS_op. 4900 * It will probably be changed to be copied into sysspace by the 4901 * syscall in the future, once issues with various consumers of the 4902 * attribute code have raised their hands. 4903 * 4904 * Currently this is used only by UFS Extended Attributes. 4905 */ 4906 int 4907 sys_extattrctl(struct extattrctl_args *uap) 4908 { 4909 struct nlookupdata nd; 4910 struct vnode *vp; 4911 char attrname[EXTATTR_MAXNAMELEN]; 4912 int error; 4913 size_t size; 4914 4915 attrname[0] = 0; 4916 vp = NULL; 4917 error = 0; 4918 4919 if (error == 0 && uap->filename) { 4920 error = nlookup_init(&nd, uap->filename, UIO_USERSPACE, 4921 NLC_FOLLOW); 4922 if (error == 0) 4923 error = nlookup(&nd); 4924 if (error == 0) 4925 error = cache_vref(&nd.nl_nch, nd.nl_cred, &vp); 4926 nlookup_done(&nd); 4927 } 4928 4929 if (error == 0 && uap->attrname) { 4930 error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, 4931 &size); 4932 } 4933 4934 if (error == 0) { 4935 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4936 if (error == 0) 4937 error = nlookup(&nd); 4938 if (error == 0) 4939 error = ncp_writechk(&nd.nl_nch); 4940 if (error == 0) { 4941 error = VFS_EXTATTRCTL(nd.nl_nch.mount, uap->cmd, vp, 4942 uap->attrnamespace, 4943 uap->attrname, nd.nl_cred); 4944 } 4945 nlookup_done(&nd); 4946 } 4947 4948 return (error); 4949 } 4950 4951 /* 4952 * Syscall to get a named extended attribute on a file or directory. 4953 */ 4954 int 4955 sys_extattr_set_file(struct extattr_set_file_args *uap) 4956 { 4957 char attrname[EXTATTR_MAXNAMELEN]; 4958 struct nlookupdata nd; 4959 struct vnode *vp; 4960 struct uio auio; 4961 struct iovec aiov; 4962 int error; 4963 4964 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 4965 if (error) 4966 return (error); 4967 4968 vp = NULL; 4969 4970 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 4971 if (error == 0) 4972 error = nlookup(&nd); 4973 if (error == 0) 4974 error = ncp_writechk(&nd.nl_nch); 4975 if (error == 0) 4976 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 4977 if (error) { 4978 nlookup_done(&nd); 4979 return (error); 4980 } 4981 4982 bzero(&auio, sizeof(auio)); 4983 aiov.iov_base = uap->data; 4984 aiov.iov_len = uap->nbytes; 4985 auio.uio_iov = &aiov; 4986 auio.uio_iovcnt = 1; 4987 auio.uio_offset = 0; 4988 auio.uio_resid = uap->nbytes; 4989 auio.uio_rw = UIO_WRITE; 4990 auio.uio_td = curthread; 4991 4992 error = VOP_SETEXTATTR(vp, uap->attrnamespace, attrname, 4993 &auio, nd.nl_cred); 4994 4995 vput(vp); 4996 nlookup_done(&nd); 4997 return (error); 4998 } 4999 5000 /* 5001 * Syscall to get a named extended attribute on a file or directory. 5002 */ 5003 int 5004 sys_extattr_get_file(struct extattr_get_file_args *uap) 5005 { 5006 char attrname[EXTATTR_MAXNAMELEN]; 5007 struct nlookupdata nd; 5008 struct uio auio; 5009 struct iovec aiov; 5010 struct vnode *vp; 5011 int error; 5012 5013 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5014 if (error) 5015 return (error); 5016 5017 vp = NULL; 5018 5019 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5020 if (error == 0) 5021 error = nlookup(&nd); 5022 if (error == 0) 5023 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_SHARED, &vp); 5024 if (error) { 5025 nlookup_done(&nd); 5026 return (error); 5027 } 5028 5029 bzero(&auio, sizeof(auio)); 5030 aiov.iov_base = uap->data; 5031 aiov.iov_len = uap->nbytes; 5032 auio.uio_iov = &aiov; 5033 auio.uio_iovcnt = 1; 5034 auio.uio_offset = 0; 5035 auio.uio_resid = uap->nbytes; 5036 auio.uio_rw = UIO_READ; 5037 auio.uio_td = curthread; 5038 5039 error = VOP_GETEXTATTR(vp, uap->attrnamespace, attrname, 5040 &auio, nd.nl_cred); 5041 uap->sysmsg_result = uap->nbytes - auio.uio_resid; 5042 5043 vput(vp); 5044 nlookup_done(&nd); 5045 return(error); 5046 } 5047 5048 /* 5049 * Syscall to delete a named extended attribute from a file or directory. 5050 * Accepts attribute name. The real work happens in VOP_SETEXTATTR(). 5051 */ 5052 int 5053 sys_extattr_delete_file(struct extattr_delete_file_args *uap) 5054 { 5055 char attrname[EXTATTR_MAXNAMELEN]; 5056 struct nlookupdata nd; 5057 struct vnode *vp; 5058 int error; 5059 5060 error = copyin(uap->attrname, attrname, EXTATTR_MAXNAMELEN); 5061 if (error) 5062 return(error); 5063 5064 error = nlookup_init(&nd, uap->path, UIO_USERSPACE, NLC_FOLLOW); 5065 if (error == 0) 5066 error = nlookup(&nd); 5067 if (error == 0) 5068 error = ncp_writechk(&nd.nl_nch); 5069 if (error == 0) { 5070 error = cache_vget(&nd.nl_nch, nd.nl_cred, LK_EXCLUSIVE, &vp); 5071 if (error == 0) { 5072 error = VOP_SETEXTATTR(vp, uap->attrnamespace, 5073 attrname, NULL, nd.nl_cred); 5074 vput(vp); 5075 } 5076 } 5077 nlookup_done(&nd); 5078 return(error); 5079 } 5080 5081 /* 5082 * Determine if the mount is visible to the process. 5083 */ 5084 static int 5085 chroot_visible_mnt(struct mount *mp, struct proc *p) 5086 { 5087 struct nchandle nch; 5088 5089 /* 5090 * Traverse from the mount point upwards. If we hit the process 5091 * root then the mount point is visible to the process. 5092 */ 5093 nch = mp->mnt_ncmountpt; 5094 while (nch.ncp) { 5095 if (nch.mount == p->p_fd->fd_nrdir.mount && 5096 nch.ncp == p->p_fd->fd_nrdir.ncp) { 5097 return(1); 5098 } 5099 if (nch.ncp == nch.mount->mnt_ncmountpt.ncp) { 5100 nch = nch.mount->mnt_ncmounton; 5101 } else { 5102 nch.ncp = nch.ncp->nc_parent; 5103 } 5104 } 5105 5106 /* 5107 * If the mount point is not visible to the process, but the 5108 * process root is in a subdirectory of the mount, return 5109 * TRUE anyway. 5110 */ 5111 if (p->p_fd->fd_nrdir.mount == mp) 5112 return(1); 5113 5114 return(0); 5115 } 5116 5117