1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/vnode.h> 59 #include <sys/mount.h> 60 #include <sys/filedesc.h> 61 #include <sys/proc.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/malloc.h> 65 #include <sys/stat.h> 66 #include <sys/objcache.h> 67 #include <sys/file.h> 68 #include <sys/kcollect.h> 69 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 74 static int naccess(struct nchandle *nch, int vmode, struct ucred *cred, 75 int *stickyp); 76 77 /* 78 * unmount operations flag NLC_IGNBADDIR in order to allow the 79 * umount to successfully issue a nlookup() on the path in order 80 * to extract the mount point. Allow certain errors through. 81 */ 82 static __inline 83 int 84 keeperror(struct nlookupdata *nd, int error) 85 { 86 if (error) { 87 if ((nd->nl_flags & NLC_IGNBADDIR) == 0 || 88 (error != EIO && error != EBADRPC && error != ESTALE)) { 89 return 1; 90 } 91 } 92 return 0; 93 } 94 95 /* 96 * Initialize a nlookup() structure, early error return for copyin faults 97 * or a degenerate empty string (which is not allowed). 98 * 99 * The first process proc0's credentials are used if the calling thread 100 * is not associated with a process context. 101 * 102 * MPSAFE 103 */ 104 int 105 nlookup_init(struct nlookupdata *nd, 106 const char *path, enum uio_seg seg, int flags) 107 { 108 size_t pathlen; 109 struct proc *p; 110 thread_t td; 111 int error; 112 113 td = curthread; 114 p = td->td_proc; 115 116 /* 117 * note: the pathlen set by copy*str() includes the terminating \0. 118 */ 119 bzero(nd, sizeof(struct nlookupdata)); 120 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 121 nd->nl_flags |= NLC_HASBUF; 122 if (seg == UIO_SYSSPACE) 123 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 124 else 125 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 126 127 /* 128 * Don't allow empty pathnames. 129 * POSIX.1 requirement: "" is not a vaild file name. 130 */ 131 if (error == 0 && pathlen <= 1) 132 error = ENOENT; 133 134 if (error == 0) { 135 if (p && p->p_fd) { 136 cache_copy_ncdir(p, &nd->nl_nch); 137 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 138 if (p->p_fd->fd_njdir.ncp) 139 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 140 nd->nl_cred = td->td_ucred; 141 nd->nl_flags |= NLC_BORROWCRED | NLC_NCDIR; 142 } else { 143 cache_copy(&rootnch, &nd->nl_nch); 144 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 145 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 146 nd->nl_cred = proc0.p_ucred; 147 nd->nl_flags |= NLC_BORROWCRED; 148 } 149 nd->nl_td = td; 150 nd->nl_flags |= flags; 151 } else { 152 nlookup_done(nd); 153 } 154 return(error); 155 } 156 157 158 /* 159 * nlookup_init() for "at" family of syscalls. 160 * 161 * Works similarly to nlookup_init() but if path is relative and fd is not 162 * AT_FDCWD, path is interpreted relative to the directory pointed to by fd. 163 * In this case, the file entry pointed to by fd is ref'ed and returned in 164 * *fpp. 165 * 166 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 167 * and release the ref to the file entry. 168 */ 169 int 170 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 171 const char *path, enum uio_seg seg, int flags) 172 { 173 struct thread *td = curthread; 174 struct file* fp; 175 struct vnode *vp; 176 int error; 177 178 *fpp = NULL; 179 180 if ((error = nlookup_init(nd, path, seg, flags)) != 0) { 181 return (error); 182 } 183 184 if (nd->nl_path[0] != '/' && fd != AT_FDCWD) { 185 if ((error = holdvnode(td, fd, &fp)) != 0) 186 goto done; 187 vp = (struct vnode*)fp->f_data; 188 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 189 fdrop(fp); 190 fp = NULL; 191 error = ENOTDIR; 192 goto done; 193 } 194 if (nd->nl_flags & NLC_NCDIR) { 195 cache_drop_ncdir(&nd->nl_nch); 196 nd->nl_flags &= ~NLC_NCDIR; 197 } else { 198 cache_drop(&nd->nl_nch); 199 } 200 cache_copy(&fp->f_nchandle, &nd->nl_nch); 201 *fpp = fp; 202 } 203 204 205 done: 206 if (error) 207 nlookup_done(nd); 208 return (error); 209 210 } 211 212 /* 213 * This works similarly to nlookup_init() but does not assume a process 214 * context. rootnch is always chosen for the root directory and the cred 215 * and starting directory are supplied in arguments. 216 */ 217 int 218 nlookup_init_raw(struct nlookupdata *nd, 219 const char *path, enum uio_seg seg, int flags, 220 struct ucred *cred, struct nchandle *ncstart) 221 { 222 size_t pathlen; 223 thread_t td; 224 int error; 225 226 td = curthread; 227 228 bzero(nd, sizeof(struct nlookupdata)); 229 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 230 nd->nl_flags |= NLC_HASBUF; 231 if (seg == UIO_SYSSPACE) 232 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 233 else 234 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 235 236 /* 237 * Don't allow empty pathnames. 238 * POSIX.1 requirement: "" is not a vaild file name. 239 */ 240 if (error == 0 && pathlen <= 1) 241 error = ENOENT; 242 243 if (error == 0) { 244 cache_copy(ncstart, &nd->nl_nch); 245 cache_copy(&rootnch, &nd->nl_rootnch); 246 cache_copy(&rootnch, &nd->nl_jailnch); 247 nd->nl_cred = crhold(cred); 248 nd->nl_td = td; 249 nd->nl_flags |= flags; 250 } else { 251 nlookup_done(nd); 252 } 253 return(error); 254 } 255 256 /* 257 * This works similarly to nlookup_init_raw() but does not rely 258 * on rootnch being initialized yet. 259 */ 260 int 261 nlookup_init_root(struct nlookupdata *nd, 262 const char *path, enum uio_seg seg, int flags, 263 struct ucred *cred, struct nchandle *ncstart, 264 struct nchandle *ncroot) 265 { 266 size_t pathlen; 267 thread_t td; 268 int error; 269 270 td = curthread; 271 272 bzero(nd, sizeof(struct nlookupdata)); 273 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 274 nd->nl_flags |= NLC_HASBUF; 275 if (seg == UIO_SYSSPACE) 276 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 277 else 278 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 279 280 /* 281 * Don't allow empty pathnames. 282 * POSIX.1 requirement: "" is not a vaild file name. 283 */ 284 if (error == 0 && pathlen <= 1) 285 error = ENOENT; 286 287 if (error == 0) { 288 cache_copy(ncstart, &nd->nl_nch); 289 cache_copy(ncroot, &nd->nl_rootnch); 290 cache_copy(ncroot, &nd->nl_jailnch); 291 nd->nl_cred = crhold(cred); 292 nd->nl_td = td; 293 nd->nl_flags |= flags; 294 } else { 295 nlookup_done(nd); 296 } 297 return(error); 298 } 299 300 #if 0 301 /* 302 * Set a different credential; this credential will be used by future 303 * operations performed on nd.nl_open_vp and nlookupdata structure. 304 */ 305 void 306 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 307 { 308 KKASSERT(nd->nl_cred != NULL); 309 310 if (nd->nl_cred != cred) { 311 cred = crhold(cred); 312 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 313 crfree(nd->nl_cred); 314 nd->nl_flags &= ~NLC_BORROWCRED; 315 nd->nl_cred = cred; 316 } 317 } 318 #endif 319 320 /* 321 * Cleanup a nlookupdata structure after we are through with it. This may 322 * be called on any nlookupdata structure initialized with nlookup_init(). 323 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 324 * returns an error, even if as a consumer you believe you have taken all 325 * dynamic elements out of the nlookupdata structure. 326 */ 327 void 328 nlookup_done(struct nlookupdata *nd) 329 { 330 if (nd->nl_nch.ncp) { 331 if (nd->nl_flags & NLC_NCPISLOCKED) { 332 nd->nl_flags &= ~NLC_NCPISLOCKED; 333 cache_unlock(&nd->nl_nch); 334 } 335 if (nd->nl_flags & NLC_NCDIR) { 336 cache_drop_ncdir(&nd->nl_nch); 337 nd->nl_flags &= ~NLC_NCDIR; 338 } else { 339 cache_drop(&nd->nl_nch); /* NULL's out the nch */ 340 } 341 } 342 if (nd->nl_rootnch.ncp) 343 cache_drop_and_cache(&nd->nl_rootnch); 344 if (nd->nl_jailnch.ncp) 345 cache_drop_and_cache(&nd->nl_jailnch); 346 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 347 objcache_put(namei_oc, nd->nl_path); 348 nd->nl_path = NULL; 349 } 350 if (nd->nl_cred) { 351 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 352 crfree(nd->nl_cred); 353 nd->nl_cred = NULL; 354 nd->nl_flags &= ~NLC_BORROWCRED; 355 } 356 if (nd->nl_open_vp) { 357 if (nd->nl_flags & NLC_LOCKVP) { 358 vn_unlock(nd->nl_open_vp); 359 nd->nl_flags &= ~NLC_LOCKVP; 360 } 361 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 362 nd->nl_open_vp = NULL; 363 } 364 if (nd->nl_dvp) { 365 vrele(nd->nl_dvp); 366 nd->nl_dvp = NULL; 367 } 368 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 369 } 370 371 /* 372 * Works similarly to nlookup_done() when nd initialized with 373 * nlookup_init_at(). 374 */ 375 void 376 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 377 { 378 nlookup_done(nd); 379 if (fp != NULL) 380 fdrop(fp); 381 } 382 383 void 384 nlookup_zero(struct nlookupdata *nd) 385 { 386 bzero(nd, sizeof(struct nlookupdata)); 387 } 388 389 /* 390 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 391 * if an error occured. 392 * 393 * Note that the returned ncp is not checked for permissions, though VEXEC 394 * is checked on the directory path leading up to the result. The caller 395 * must call naccess() to check the permissions of the returned leaf. 396 */ 397 struct nchandle 398 nlookup_simple(const char *str, enum uio_seg seg, 399 int niflags, int *error) 400 { 401 struct nlookupdata nd; 402 struct nchandle nch; 403 404 *error = nlookup_init(&nd, str, seg, niflags); 405 if (*error == 0) { 406 if ((*error = nlookup(&nd)) == 0) { 407 nch = nd.nl_nch; /* keep hold ref from structure */ 408 cache_zero(&nd.nl_nch); /* and NULL out */ 409 } else { 410 cache_zero(&nch); 411 } 412 nlookup_done(&nd); 413 } else { 414 cache_zero(&nch); 415 } 416 return(nch); 417 } 418 419 /* 420 * Returns non-zero if the path element is the last element 421 */ 422 static 423 int 424 islastelement(const char *ptr) 425 { 426 while (*ptr == '/') 427 ++ptr; 428 return (*ptr == 0); 429 } 430 431 /* 432 * Returns non-zero if we need to lock the namecache element 433 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 434 * the last element of the namecache lookup will be locked 435 * exclusively. 436 * 437 * NOTE: Even if we return on-zero, an unresolved namecache record 438 * will always be locked exclusively. 439 */ 440 static __inline 441 int 442 wantsexcllock(struct nlookupdata *nd, const char *ptr) 443 { 444 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 445 return(islastelement(ptr)); 446 return(0); 447 } 448 449 450 /* 451 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 452 * on return, even if an error occurs. If no error occurs or NLC_CREATE 453 * is flagged and ENOENT is returned, then the returned nl_nch is always 454 * referenced and locked exclusively. 455 * 456 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 457 * the resulting nl_nch may or may not be locked and if locked 458 * might be locked either shared or exclusive. 459 * 460 * Intermediate directory elements, including the current directory, require 461 * execute (search) permission. nlookup does not examine the access 462 * permissions on the returned element. 463 * 464 * If NLC_CREATE is set the last directory must allow node creation, 465 * and an error code of 0 will be returned for a non-existant 466 * target (not ENOENT). 467 * 468 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 469 * plus the sticky check is made, and an error code of 0 will be returned 470 * for a non-existant target (not ENOENT). 471 * 472 * If NLC_DELETE is set the last directory mut allow node deletion, 473 * plus the sticky check is made. 474 * 475 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 476 * of the returned entry. The vnode will be referenced, but not locked, 477 * and will be released by nlookup_done() along with everything else. 478 * 479 * NOTE: As an optimization we attempt to obtain a shared namecache lock 480 * on any intermediate elements. On success, the returned element 481 * is ALWAYS locked exclusively. 482 */ 483 int 484 nlookup(struct nlookupdata *nd) 485 { 486 globaldata_t gd = mycpu; 487 struct nlcomponent nlc; 488 struct nchandle nch; 489 struct nchandle par; 490 struct nchandle nctmp; 491 struct mount *mp; 492 struct vnode *hvp; /* hold to prevent recyclement */ 493 int wasdotordotdot; 494 char *ptr; 495 char *nptr; 496 int error; 497 int len; 498 int dflags; 499 int hit = 1; 500 int saveflag = nd->nl_flags & ~NLC_NCDIR; 501 boolean_t doretry = FALSE; 502 boolean_t inretry = FALSE; 503 504 nlookup_start: 505 #ifdef KTRACE 506 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 507 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 508 #endif 509 bzero(&nlc, sizeof(nlc)); 510 511 /* 512 * Setup for the loop. The current working namecache element is 513 * always at least referenced. We lock it as required, but always 514 * return a locked, resolved namecache entry. 515 */ 516 nd->nl_loopcnt = 0; 517 if (nd->nl_dvp) { 518 vrele(nd->nl_dvp); 519 nd->nl_dvp = NULL; 520 } 521 ptr = nd->nl_path; 522 523 /* 524 * Loop on the path components. At the top of the loop nd->nl_nch 525 * is ref'd and unlocked and represents our current position. 526 */ 527 for (;;) { 528 /* 529 * Make sure nl_nch is locked so we can access the vnode, resolution 530 * state, etc. 531 */ 532 if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) { 533 nd->nl_flags |= NLC_NCPISLOCKED; 534 cache_lock_maybe_shared(&nd->nl_nch, wantsexcllock(nd, ptr)); 535 } 536 537 /* 538 * Check if the root directory should replace the current 539 * directory. This is done at the start of a translation 540 * or after a symbolic link has been found. In other cases 541 * ptr will never be pointing at a '/'. 542 */ 543 if (*ptr == '/') { 544 do { 545 ++ptr; 546 } while (*ptr == '/'); 547 cache_unlock(&nd->nl_nch); 548 cache_get_maybe_shared(&nd->nl_rootnch, &nch, 549 wantsexcllock(nd, ptr)); 550 if (nd->nl_flags & NLC_NCDIR) { 551 cache_drop_ncdir(&nd->nl_nch); 552 nd->nl_flags &= ~NLC_NCDIR; 553 } else { 554 cache_drop(&nd->nl_nch); 555 } 556 nd->nl_nch = nch; /* remains locked */ 557 558 /* 559 * Fast-track termination. There is no parent directory of 560 * the root in the same mount from the point of view of 561 * the caller so return EACCES if NLC_REFDVP is specified, 562 * and EEXIST if NLC_CREATE is also specified. 563 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 564 */ 565 if (*ptr == 0) { 566 if (nd->nl_flags & NLC_REFDVP) 567 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 568 else 569 error = 0; 570 break; 571 } 572 continue; 573 } 574 575 /* 576 * Pre-calculate next path component so we can check whether the 577 * current component directory is the last directory in the path 578 * or not. 579 */ 580 for (nptr = ptr; *nptr && *nptr != '/'; ++nptr) 581 ; 582 583 /* 584 * Check directory search permissions (nd->nl_nch is locked & refd). 585 * This will load dflags to obtain directory-special permissions to 586 * be checked along with the last component. 587 * 588 * We only need to pass-in &dflags for the second-to-last component. 589 * Optimize by passing-in NULL for any prior components, which may 590 * allow the code to bypass the naccess() call. 591 */ 592 dflags = 0; 593 if (*nptr == '/') 594 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, NULL); 595 else 596 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags); 597 if (error) { 598 if (keeperror(nd, error)) 599 break; 600 error = 0; 601 } 602 603 /* 604 * Extract the next (or last) path component. Path components are 605 * limited to 255 characters. 606 */ 607 nlc.nlc_nameptr = ptr; 608 nlc.nlc_namelen = nptr - ptr; 609 ptr = nptr; 610 if (nlc.nlc_namelen >= 256) { 611 error = ENAMETOOLONG; 612 break; 613 } 614 615 /* 616 * Lookup the path component in the cache, creating an unresolved 617 * entry if necessary. We have to handle "." and ".." as special 618 * cases. 619 * 620 * When handling ".." we have to detect a traversal back through a 621 * mount point. If we are at the root, ".." just returns the root. 622 * 623 * When handling "." or ".." we also have to recalculate dflags 624 * since our dflags will be for some sub-directory instead of the 625 * parent dir. 626 * 627 * This subsection returns a locked, refd 'nch' unless it errors out, 628 * and an unlocked but still ref'd nd->nl_nch. 629 * 630 * The namecache topology is not allowed to be disconnected, so 631 * encountering a NULL parent will generate EINVAL. This typically 632 * occurs when a directory is removed out from under a process. 633 * 634 * WARNING! The unlocking of nd->nl_nch is sensitive code. 635 */ 636 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 637 638 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 639 cache_unlock(&nd->nl_nch); 640 nd->nl_flags &= ~NLC_NCPISLOCKED; 641 cache_get_maybe_shared(&nd->nl_nch, &nch, wantsexcllock(nd, ptr)); 642 wasdotordotdot = 1; 643 } else if (nlc.nlc_namelen == 2 && 644 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 645 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 646 nd->nl_nch.ncp == nd->nl_rootnch.ncp 647 ) { 648 /* 649 * ".." at the root returns the root 650 */ 651 cache_unlock(&nd->nl_nch); 652 nd->nl_flags &= ~NLC_NCPISLOCKED; 653 cache_get_maybe_shared(&nd->nl_nch, &nch, 654 wantsexcllock(nd, ptr)); 655 } else { 656 /* 657 * Locate the parent ncp. If we are at the root of a 658 * filesystem mount we have to skip to the mounted-on 659 * point in the underlying filesystem. 660 * 661 * Expect the parent to always be good since the 662 * mountpoint doesn't go away. XXX hack. cache_get() 663 * requires the ncp to already have a ref as a safety. 664 * 665 * However, a process which has been broken out of a chroot 666 * will wind up with a NULL parent if it tries to '..' above 667 * the real root, deal with the case. Note that this does 668 * not protect us from a jail breakout, it just stops a panic 669 * if the jail-broken process tries to '..' past the real 670 * root. 671 */ 672 nctmp = nd->nl_nch; 673 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 674 nctmp = nctmp.mount->mnt_ncmounton; 675 if (nctmp.ncp == NULL) 676 break; 677 } 678 if (nctmp.ncp == NULL) { 679 if (curthread->td_proc) { 680 kprintf("vfs_nlookup: '..' traverse broke " 681 "jail: pid %d (%s)\n", 682 curthread->td_proc->p_pid, 683 curthread->td_comm); 684 } 685 nctmp = nd->nl_rootnch; 686 } else { 687 nctmp.ncp = nctmp.ncp->nc_parent; 688 } 689 cache_hold(&nctmp); 690 cache_unlock(&nd->nl_nch); 691 nd->nl_flags &= ~NLC_NCPISLOCKED; 692 cache_get_maybe_shared(&nctmp, &nch, wantsexcllock(nd, ptr)); 693 cache_drop(&nctmp); /* NOTE: zero's nctmp */ 694 } 695 wasdotordotdot = 2; 696 } else { 697 /* 698 * Must unlock nl_nch when traversing down the path. However, 699 * the child ncp has not yet been found/created and the parent's 700 * child list might be empty. Thus releasing the lock can 701 * allow a race whereby the parent ncp's vnode is recycled. 702 * This case can occur especially when maxvnodes is set very low. 703 * 704 * We need the parent's ncp to remain resolved for all normal 705 * filesystem activities, so we vhold() the vp during the lookup 706 * to prevent recyclement due to vnlru / maxvnodes. 707 * 708 * If we race an unlink or rename the ncp might be marked 709 * DESTROYED after resolution, requiring a retry. 710 */ 711 if ((hvp = nd->nl_nch.ncp->nc_vp) != NULL) 712 vhold(hvp); 713 cache_unlock(&nd->nl_nch); 714 nd->nl_flags &= ~NLC_NCPISLOCKED; 715 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 716 wantsexcllock(nd, ptr), &nch); 717 if (error == EWOULDBLOCK) { 718 nch = cache_nlookup(&nd->nl_nch, &nlc); 719 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 720 hit = 0; 721 for (;;) { 722 error = cache_resolve(&nch, nd->nl_cred); 723 if (error != EAGAIN && 724 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 725 if (error == ESTALE) { 726 if (!inretry) 727 error = ENOENT; 728 doretry = TRUE; 729 } 730 break; 731 } 732 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 733 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 734 nch.ncp->nc_name); 735 cache_put(&nch); 736 nch = cache_nlookup(&nd->nl_nch, &nlc); 737 } 738 } 739 if (hvp) 740 vdrop(hvp); 741 wasdotordotdot = 0; 742 } 743 744 /* 745 * If the last component was "." or ".." our dflags no longer 746 * represents the parent directory and we have to explicitly 747 * look it up. 748 * 749 * Expect the parent to be good since nch is locked. 750 */ 751 if (wasdotordotdot && error == 0) { 752 dflags = 0; 753 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 754 par.mount = nch.mount; 755 cache_hold(&par); 756 cache_lock_maybe_shared(&par, wantsexcllock(nd, ptr)); 757 error = naccess(&par, 0, nd->nl_cred, &dflags); 758 cache_put(&par); 759 if (error) { 760 if (!keeperror(nd, error)) 761 error = 0; 762 } 763 } 764 } 765 766 /* 767 * [end of subsection] 768 * 769 * nch is locked and referenced. 770 * nd->nl_nch is unlocked and referenced. 771 * 772 * nl_nch must be unlocked or we could chain lock to the root 773 * if a resolve gets stuck (e.g. in NFS). 774 */ 775 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 776 777 /* 778 * Resolve the namespace if necessary. The ncp returned by 779 * cache_nlookup() is referenced and locked. 780 * 781 * XXX neither '.' nor '..' should return EAGAIN since they were 782 * previously resolved and thus cannot be newly created ncp's. 783 */ 784 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 785 hit = 0; 786 error = cache_resolve(&nch, nd->nl_cred); 787 if (error == ESTALE) { 788 if (!inretry) 789 error = ENOENT; 790 doretry = TRUE; 791 } 792 KKASSERT(error != EAGAIN); 793 } else { 794 error = nch.ncp->nc_error; 795 } 796 797 /* 798 * Early completion. ENOENT is not an error if this is the last 799 * component and NLC_CREATE or NLC_RENAME (rename target) was 800 * requested. Note that ncp->nc_error is left as ENOENT in that 801 * case, which we check later on. 802 * 803 * Also handle invalid '.' or '..' components terminating a path 804 * for a create/rename/delete. The standard requires this and pax 805 * pretty stupidly depends on it. 806 */ 807 if (islastelement(ptr)) { 808 if (error == ENOENT && 809 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 810 ) { 811 if (nd->nl_flags & NLC_NFS_RDONLY) { 812 error = EROFS; 813 } else { 814 error = naccess(&nch, nd->nl_flags | dflags, 815 nd->nl_cred, NULL); 816 } 817 } 818 if (error == 0 && wasdotordotdot && 819 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 820 NLC_RENAME_SRC | NLC_RENAME_DST))) { 821 /* 822 * POSIX junk 823 */ 824 if (nd->nl_flags & NLC_CREATE) 825 error = EEXIST; 826 else if (nd->nl_flags & NLC_DELETE) 827 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 828 else 829 error = EINVAL; 830 } 831 } 832 833 /* 834 * Early completion on error. 835 */ 836 if (error) { 837 cache_put(&nch); 838 break; 839 } 840 841 /* 842 * If the element is a symlink and it is either not the last 843 * element or it is the last element and we are allowed to 844 * follow symlinks, resolve the symlink. 845 */ 846 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 847 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 848 ) { 849 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 850 error = ELOOP; 851 cache_put(&nch); 852 break; 853 } 854 error = nreadsymlink(nd, &nch, &nlc); 855 cache_put(&nch); 856 if (error) 857 break; 858 859 /* 860 * Concatenate trailing path elements onto the returned symlink. 861 * Note that if the path component (ptr) is not exhausted, it 862 * will being with a '/', so we do not have to add another one. 863 * 864 * The symlink may not be empty. 865 */ 866 len = strlen(ptr); 867 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 868 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 869 objcache_put(namei_oc, nlc.nlc_nameptr); 870 break; 871 } 872 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 873 if (nd->nl_flags & NLC_HASBUF) 874 objcache_put(namei_oc, nd->nl_path); 875 nd->nl_path = nlc.nlc_nameptr; 876 nd->nl_flags |= NLC_HASBUF; 877 ptr = nd->nl_path; 878 879 /* 880 * Go back up to the top to resolve any initial '/'s in the 881 * symlink. 882 */ 883 continue; 884 } 885 886 /* 887 * If the element is a directory and we are crossing a mount point, 888 * Locate the mount. 889 */ 890 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 891 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 892 (mp = cache_findmount(&nch)) != NULL 893 ) { 894 struct vnode *tdp; 895 int vfs_do_busy = 0; 896 897 /* 898 * VFS must be busied before the namecache entry is locked, 899 * but we don't want to waste time calling vfs_busy() if the 900 * mount point is already resolved. 901 */ 902 again: 903 cache_put(&nch); 904 if (vfs_do_busy) { 905 while (vfs_busy(mp, 0)) { 906 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 907 kprintf("nlookup: warning umount race avoided\n"); 908 cache_dropmount(mp); 909 error = EBUSY; 910 vfs_do_busy = 0; 911 goto double_break; 912 } 913 } 914 } 915 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 916 wantsexcllock(nd, ptr)); 917 918 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 919 if (vfs_do_busy == 0) { 920 vfs_do_busy = 1; 921 goto again; 922 } 923 error = VFS_ROOT(mp, &tdp); 924 vfs_unbusy(mp); 925 vfs_do_busy = 0; 926 if (keeperror(nd, error)) { 927 cache_dropmount(mp); 928 break; 929 } 930 if (error == 0) { 931 cache_setvp(&nch, tdp); 932 vput(tdp); 933 } 934 } 935 if (vfs_do_busy) 936 vfs_unbusy(mp); 937 cache_dropmount(mp); 938 } 939 940 if (keeperror(nd, error)) { 941 cache_put(&nch); 942 double_break: 943 break; 944 } 945 946 /* 947 * Skip any slashes to get to the next element. If there 948 * are any slashes at all the current element must be a 949 * directory or, in the create case, intended to become a directory. 950 * If it isn't we break without incrementing ptr and fall through 951 * to the failure case below. 952 */ 953 while (*ptr == '/') { 954 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 955 !(nd->nl_flags & NLC_WILLBEDIR) 956 ) { 957 break; 958 } 959 ++ptr; 960 } 961 962 /* 963 * Continuation case: additional elements and the current 964 * element is a directory. 965 */ 966 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 967 if (nd->nl_flags & NLC_NCDIR) { 968 cache_drop_ncdir(&nd->nl_nch); 969 nd->nl_flags &= ~NLC_NCDIR; 970 } else { 971 cache_drop(&nd->nl_nch); 972 } 973 cache_unlock(&nch); 974 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 975 nd->nl_nch = nch; 976 continue; 977 } 978 979 /* 980 * Failure case: additional elements and the current element 981 * is not a directory 982 */ 983 if (*ptr) { 984 cache_put(&nch); 985 error = ENOTDIR; 986 break; 987 } 988 989 /* 990 * Successful lookup of last element. 991 * 992 * Check permissions if the target exists. If the target does not 993 * exist directory permissions were already tested in the early 994 * completion code above. 995 * 996 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 997 * if the file is marked append-only, and NLC_STICKY if the directory 998 * containing the file is sticky. 999 */ 1000 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 1001 error = naccess(&nch, nd->nl_flags | dflags, 1002 nd->nl_cred, NULL); 1003 if (keeperror(nd, error)) { 1004 cache_put(&nch); 1005 break; 1006 } 1007 } 1008 1009 /* 1010 * Termination: no more elements. 1011 * 1012 * If NLC_REFDVP is set acquire a referenced parent dvp. 1013 */ 1014 if (nd->nl_flags & NLC_REFDVP) { 1015 cache_lock(&nd->nl_nch); 1016 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 1017 cache_unlock(&nd->nl_nch); 1018 if (keeperror(nd, error)) { 1019 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 1020 cache_put(&nch); 1021 break; 1022 } 1023 } 1024 if (nd->nl_flags & NLC_NCDIR) { 1025 cache_drop_ncdir(&nd->nl_nch); 1026 nd->nl_flags &= ~NLC_NCDIR; 1027 } else { 1028 cache_drop(&nd->nl_nch); 1029 } 1030 nd->nl_nch = nch; 1031 nd->nl_flags |= NLC_NCPISLOCKED; 1032 error = 0; 1033 break; 1034 } 1035 1036 if (hit) 1037 ++gd->gd_nchstats->ncs_longhits; 1038 else 1039 ++gd->gd_nchstats->ncs_longmiss; 1040 1041 if (nd->nl_flags & NLC_NCPISLOCKED) 1042 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 1043 1044 /* 1045 * Retry the whole thing if doretry flag is set, but only once. 1046 * autofs(5) may mount another filesystem under its root directory 1047 * while resolving a path. 1048 */ 1049 if (doretry && !inretry) { 1050 inretry = TRUE; 1051 nd->nl_flags &= NLC_NCDIR; 1052 nd->nl_flags |= saveflag; 1053 goto nlookup_start; 1054 } 1055 1056 /* 1057 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1058 * (ncp->nc_error will be ENOENT), but we will still return an error 1059 * code of 0. 1060 */ 1061 return(error); 1062 } 1063 1064 /* 1065 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1066 * of continuity in the namecache tree by connecting the ncp related to the 1067 * vnode under the mount to the ncp related to the mount's root vnode. 1068 * 1069 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1070 */ 1071 int 1072 nlookup_mp(struct mount *mp, struct nchandle *nch) 1073 { 1074 struct vnode *vp; 1075 int error; 1076 1077 error = 0; 1078 cache_get(&mp->mnt_ncmountpt, nch); 1079 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1080 while (vfs_busy(mp, 0)) 1081 ; 1082 error = VFS_ROOT(mp, &vp); 1083 vfs_unbusy(mp); 1084 if (error) { 1085 cache_put(nch); 1086 } else { 1087 cache_setvp(nch, vp); 1088 vput(vp); 1089 } 1090 } 1091 return(error); 1092 } 1093 1094 /* 1095 * Read the contents of a symlink, allocate a path buffer out of the 1096 * namei_oc and initialize the supplied nlcomponent with the result. 1097 * 1098 * If an error occurs no buffer will be allocated or returned in the nlc. 1099 */ 1100 int 1101 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1102 struct nlcomponent *nlc) 1103 { 1104 struct vnode *vp; 1105 struct iovec aiov; 1106 struct uio auio; 1107 int linklen; 1108 int error; 1109 char *cp; 1110 1111 nlc->nlc_nameptr = NULL; 1112 nlc->nlc_namelen = 0; 1113 if (nch->ncp->nc_vp == NULL) 1114 return(ENOENT); 1115 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1116 return(error); 1117 cp = objcache_get(namei_oc, M_WAITOK); 1118 aiov.iov_base = cp; 1119 aiov.iov_len = MAXPATHLEN; 1120 auio.uio_iov = &aiov; 1121 auio.uio_iovcnt = 1; 1122 auio.uio_offset = 0; 1123 auio.uio_rw = UIO_READ; 1124 auio.uio_segflg = UIO_SYSSPACE; 1125 auio.uio_td = nd->nl_td; 1126 auio.uio_resid = MAXPATHLEN - 1; 1127 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1128 if (error) 1129 goto fail; 1130 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1131 if (varsym_enable) { 1132 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1133 if (linklen < 0) { 1134 error = ENAMETOOLONG; 1135 goto fail; 1136 } 1137 } 1138 cp[linklen] = 0; 1139 nlc->nlc_nameptr = cp; 1140 nlc->nlc_namelen = linklen; 1141 vput(vp); 1142 return(0); 1143 fail: 1144 objcache_put(namei_oc, cp); 1145 vput(vp); 1146 return(error); 1147 } 1148 1149 /* 1150 * Check access [XXX cache vattr!] [XXX quota] 1151 * 1152 * Generally check the NLC_* access bits. All specified bits must pass 1153 * for this function to return 0. 1154 * 1155 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1156 * access, otherwise it must exist. No error is returned in this case. 1157 * 1158 * The file must not exist if NLC_EXCL is specified. 1159 * 1160 * Directory permissions in general are tested for NLC_CREATE if the file 1161 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1162 * whether the file exists or not. 1163 * 1164 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1165 * the latter is only tested if the target exists. 1166 * 1167 * The passed ncp must be referenced and locked. If it is already resolved 1168 * it may be locked shared but otherwise should be locked exclusively. 1169 */ 1170 1171 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1172 1173 static int 1174 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 1175 { 1176 struct vnode *vp; 1177 struct vattr va; 1178 struct namecache *ncp; 1179 int error; 1180 int cflags; 1181 1182 KKASSERT(cache_lockstatus(nch) > 0); 1183 1184 ncp = nch->ncp; 1185 if (ncp->nc_flag & NCF_UNRESOLVED) { 1186 cache_resolve(nch, cred); 1187 ncp = nch->ncp; 1188 } 1189 error = ncp->nc_error; 1190 1191 /* 1192 * Directory permissions checks. Silently ignore ENOENT if these 1193 * tests pass. It isn't an error. 1194 * 1195 * We can safely resolve ncp->nc_parent because ncp is currently 1196 * locked. 1197 */ 1198 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1199 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1200 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1201 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1202 (nflags & NLC_RENAME_DST) 1203 ) { 1204 struct nchandle par; 1205 1206 if ((par.ncp = ncp->nc_parent) == NULL) { 1207 if (error != EAGAIN) 1208 error = EINVAL; 1209 } else if (error == 0 || error == ENOENT) { 1210 par.mount = nch->mount; 1211 cache_hold(&par); 1212 cache_lock_maybe_shared(&par, 0); 1213 error = naccess(&par, NLC_WRITE, cred, NULL); 1214 cache_put(&par); 1215 } 1216 } 1217 } 1218 1219 /* 1220 * NLC_EXCL check. Target file must not exist. 1221 */ 1222 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1223 error = EEXIST; 1224 1225 /* 1226 * Try to short-cut the vnode operation for intermediate directory 1227 * components. This is a major SMP win because it avoids having 1228 * to execute a lot of code for intermediate directory components, 1229 * including shared refs and locks on intermediate directory vnodes. 1230 * 1231 * We can only do this if the caller does not need nflagsp. 1232 */ 1233 if (error == 0 && nflagsp == NULL && 1234 nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1235 return 0; 1236 } 1237 1238 /* 1239 * Get the vnode attributes so we can do the rest of our checks. 1240 * 1241 * NOTE: We only call naccess_va() if the target exists. 1242 */ 1243 if (error == 0) { 1244 error = cache_vget(nch, cred, LK_SHARED, &vp); 1245 if (error == ENOENT) { 1246 /* 1247 * Silently zero-out ENOENT if creating or renaming 1248 * (rename target). It isn't an error. 1249 */ 1250 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1251 error = 0; 1252 } else if (error == 0) { 1253 /* 1254 * Get the vnode attributes and check for illegal O_TRUNC 1255 * requests and read-only mounts. 1256 * 1257 * NOTE: You can still open devices on read-only mounts for 1258 * writing. 1259 * 1260 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1261 * check on the parent directory above. 1262 * 1263 * XXX cache the va in the namecache or in the vnode 1264 */ 1265 error = VOP_GETATTR(vp, &va); 1266 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1267 switch(va.va_type) { 1268 case VREG: 1269 case VDATABASE: 1270 case VCHR: 1271 case VBLK: 1272 case VFIFO: 1273 break; 1274 case VDIR: 1275 error = EISDIR; 1276 break; 1277 default: 1278 error = EINVAL; 1279 break; 1280 } 1281 } 1282 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1283 (vp->v_mount->mnt_flag & MNT_RDONLY) 1284 ) { 1285 switch(va.va_type) { 1286 case VDIR: 1287 case VLNK: 1288 case VREG: 1289 case VDATABASE: 1290 error = EROFS; 1291 break; 1292 default: 1293 break; 1294 } 1295 } 1296 vput(vp); 1297 1298 /* 1299 * Check permissions based on file attributes. The passed 1300 * flags (*nflagsp) are modified with feedback based on 1301 * special attributes and requirements. 1302 */ 1303 if (error == 0) { 1304 /* 1305 * Adjust the returned (*nflagsp) if non-NULL. 1306 */ 1307 if (nflagsp) { 1308 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 1309 *nflagsp |= NLC_STICKY; 1310 if (va.va_flags & APPEND) 1311 *nflagsp |= NLC_APPENDONLY; 1312 if (va.va_flags & IMMUTABLE) 1313 *nflagsp |= NLC_IMMUTABLE; 1314 } 1315 1316 /* 1317 * NCF_WXOK can be set for world-searchable directories. 1318 * 1319 * XXX When we implement capabilities this code would also 1320 * need a cap check, or only set the flag if there are no 1321 * capabilities. 1322 */ 1323 cflags = 0; 1324 if (va.va_type == VDIR && 1325 (va.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1326 cflags |= NCF_WXOK; 1327 } 1328 1329 /* 1330 * Track swapcache management flags in the namecache. 1331 * 1332 * Calculate the flags based on the current vattr info 1333 * and recalculate the inherited flags from the parent 1334 * (the original cache linkage may have occurred without 1335 * getattrs and thus have stale flags). 1336 */ 1337 if (va.va_flags & SF_NOCACHE) 1338 cflags |= NCF_SF_NOCACHE; 1339 if (va.va_flags & UF_CACHE) 1340 cflags |= NCF_UF_CACHE; 1341 if (ncp->nc_parent) { 1342 if (ncp->nc_parent->nc_flag & 1343 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1344 cflags |= NCF_SF_PNOCACHE; 1345 } 1346 if (ncp->nc_parent->nc_flag & 1347 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1348 cflags |= NCF_UF_PCACHE; 1349 } 1350 } 1351 1352 /* 1353 * We're not supposed to update nc_flag when holding a shared 1354 * lock, but we allow the case for certain flags. Note that 1355 * holding an exclusive lock allows updating nc_flag without 1356 * atomics. nc_flag is not allowe to be updated at all unless 1357 * a shared or exclusive lock is held. 1358 */ 1359 atomic_clear_short(&ncp->nc_flag, 1360 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1361 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1362 NCF_WXOK) & ~cflags); 1363 atomic_set_short(&ncp->nc_flag, cflags); 1364 1365 /* 1366 * Process general access. 1367 */ 1368 error = naccess_va(&va, nflags, cred); 1369 } 1370 } 1371 } 1372 return(error); 1373 } 1374 1375 /* 1376 * Check the requested access against the given vattr using cred. 1377 */ 1378 int 1379 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 1380 { 1381 int i; 1382 int vmode; 1383 1384 /* 1385 * Test the immutable bit. Creations, deletions, renames (source 1386 * or destination) are not allowed. chown/chmod/other is also not 1387 * allowed but is handled by SETATTR. Hardlinks to the immutable 1388 * file are allowed. 1389 * 1390 * If the directory is set to immutable then creations, deletions, 1391 * renames (source or dest) and hardlinks to files within the directory 1392 * are not allowed, and regular files opened through the directory may 1393 * not be written to or truncated (unless a special device). 1394 * 1395 * NOTE! New hardlinks to immutable files work but new hardlinks to 1396 * files, immutable or not, sitting inside an immutable directory are 1397 * not allowed. As always if the file is hardlinked via some other 1398 * path additional hardlinks may be possible even if the file is marked 1399 * immutable. The sysop needs to create a closure by checking the hard 1400 * link count. Once closure is achieved you are good, and security 1401 * scripts should check link counts anyway. 1402 * 1403 * Writes and truncations are only allowed on special devices. 1404 */ 1405 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1406 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1407 return (EPERM); 1408 if (nflags & (NLC_CREATE | NLC_DELETE | 1409 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1410 return (EPERM); 1411 } 1412 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1413 switch(va->va_type) { 1414 case VDIR: 1415 return (EISDIR); 1416 case VLNK: 1417 case VREG: 1418 case VDATABASE: 1419 return (EPERM); 1420 default: 1421 break; 1422 } 1423 } 1424 } 1425 1426 /* 1427 * Test the no-unlink and append-only bits for opens, rename targets, 1428 * and deletions. These bits are not tested for creations or 1429 * rename sources. 1430 * 1431 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1432 * If you do not wish this you must also set NOUNLINK. 1433 * 1434 * If the governing directory is marked APPEND-only it implies 1435 * NOUNLINK for all entries in the directory. 1436 */ 1437 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1438 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1439 ) { 1440 return (EPERM); 1441 } 1442 1443 /* 1444 * A file marked append-only may not be deleted but can be renamed. 1445 */ 1446 if ((va->va_flags & APPEND) && 1447 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1448 ) { 1449 return (EPERM); 1450 } 1451 1452 /* 1453 * A file marked append-only which is opened for writing must also 1454 * be opened O_APPEND. 1455 */ 1456 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1457 if (nflags & NLC_TRUNCATE) 1458 return (EPERM); 1459 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1460 if ((nflags & NLC_APPEND) == 0) 1461 return (EPERM); 1462 } 1463 } 1464 1465 /* 1466 * root gets universal access 1467 */ 1468 if (cred->cr_uid == 0) 1469 return(0); 1470 1471 /* 1472 * Check owner perms. 1473 * 1474 * If NLC_OWN is set the owner of the file is allowed no matter when 1475 * the owner-mode bits say (utimes). 1476 */ 1477 vmode = 0; 1478 if (nflags & NLC_READ) 1479 vmode |= S_IRUSR; 1480 if (nflags & NLC_WRITE) 1481 vmode |= S_IWUSR; 1482 if (nflags & NLC_EXEC) 1483 vmode |= S_IXUSR; 1484 1485 if (cred->cr_uid == va->va_uid) { 1486 if ((nflags & NLC_OWN) == 0) { 1487 if ((vmode & va->va_mode) != vmode) 1488 return(EACCES); 1489 } 1490 return(0); 1491 } 1492 1493 /* 1494 * If NLC_STICKY is set only the owner may delete or rename a file. 1495 * This bit is typically set on /tmp. 1496 * 1497 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1498 * the specific delete or rename case. For deletions and renames we 1499 * usually just care about directory permissions, not file permissions. 1500 */ 1501 if ((nflags & NLC_STICKY) && 1502 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1503 return(EACCES); 1504 } 1505 1506 /* 1507 * Check group perms 1508 */ 1509 vmode >>= 3; 1510 for (i = 0; i < cred->cr_ngroups; ++i) { 1511 if (va->va_gid == cred->cr_groups[i]) { 1512 if ((vmode & va->va_mode) != vmode) 1513 return(EACCES); 1514 return(0); 1515 } 1516 } 1517 1518 /* 1519 * Check world perms 1520 */ 1521 vmode >>= 3; 1522 if ((vmode & va->va_mode) != vmode) 1523 return(EACCES); 1524 return(0); 1525 } 1526 1527 /* 1528 * Long-term (10-second interval) statistics collection 1529 */ 1530 static 1531 uint64_t 1532 collect_nlookup_callback(int n) 1533 { 1534 static uint64_t last_total; 1535 uint64_t save; 1536 uint64_t total; 1537 1538 total = 0; 1539 for (n = 0; n < ncpus; ++n) { 1540 globaldata_t gd = globaldata_find(n); 1541 struct nchstats *sp; 1542 1543 if ((sp = gd->gd_nchstats) != NULL) 1544 total += sp->ncs_longhits + sp->ncs_longmiss; 1545 } 1546 save = total; 1547 total = total - last_total; 1548 last_total = save; 1549 1550 return total; 1551 } 1552 1553 static 1554 void 1555 nlookup_collect_init(void *dummy __unused) 1556 { 1557 kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, 1558 KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); 1559 } 1560 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); 1561