1 /* 2 * Copyright (c) 2004-2022 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/vnode.h> 59 #include <sys/mount.h> 60 #include <sys/filedesc.h> 61 #include <sys/proc.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/malloc.h> 65 #include <sys/stat.h> 66 #include <sys/objcache.h> 67 #include <sys/file.h> 68 #include <sys/kcollect.h> 69 #include <sys/sysctl.h> 70 71 #ifdef KTRACE 72 #include <sys/ktrace.h> 73 #endif 74 75 __read_mostly static int nlookup_max_retries = 4; 76 SYSCTL_INT(_debug, OID_AUTO, nlookup_max_retries, CTLFLAG_RW, 77 &nlookup_max_retries, 0, 78 "retries on generation mismatch"); 79 __read_mostly static int nlookup_debug; 80 SYSCTL_INT(_debug, OID_AUTO, nlookup_debug, CTLFLAG_RW, 81 &nlookup_debug, 0, 82 "Force retry test"); 83 84 static int naccess(struct nchandle *nch, u_int *genp, int vmode, 85 struct ucred *cred, int *stickyp, int nchislocked); 86 87 /* 88 * unmount operations flag NLC_IGNBADDIR in order to allow the 89 * umount to successfully issue a nlookup() on the path in order 90 * to extract the mount point. Allow certain errors through. 91 */ 92 static __inline 93 int 94 keeperror(struct nlookupdata *nd, int error) 95 { 96 if (error) { 97 if ((nd->nl_flags & NLC_IGNBADDIR) == 0 || 98 (error != EIO && error != EBADRPC && error != ESTALE)) { 99 return 1; 100 } 101 } 102 return 0; 103 } 104 105 /* 106 * Initialize a nlookup() structure, early error return for copyin faults 107 * or a degenerate empty string (which is not allowed). 108 * 109 * The first process proc0's credentials are used if the calling thread 110 * is not associated with a process context. 111 * 112 * MPSAFE 113 */ 114 int 115 nlookup_init(struct nlookupdata *nd, 116 const char *path, enum uio_seg seg, int flags) 117 { 118 size_t pathlen; 119 struct proc *p; 120 thread_t td; 121 int error; 122 123 td = curthread; 124 p = td->td_proc; 125 126 /* 127 * note: the pathlen set by copy*str() includes the terminating \0. 128 */ 129 bzero(nd, sizeof(struct nlookupdata)); 130 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 131 nd->nl_flags |= NLC_HASBUF; 132 if (seg == UIO_SYSSPACE) 133 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 134 else 135 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 136 137 /* 138 * Don't allow empty pathnames. 139 * POSIX.1 requirement: "" is not a vaild file name. 140 */ 141 if (error == 0 && pathlen <= 1) 142 error = ENOENT; 143 144 if (error == 0) { 145 if (p && p->p_fd) { 146 if (nd->nl_path[0] == '/') { 147 if ((flags & NLC_NLNCH_NOINIT) == 0) { 148 nd->nl_basench = &p->p_fd->fd_nrdir; 149 cache_copy(nd->nl_basench, &nd->nl_nch); 150 } 151 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 152 if (p->p_fd->fd_njdir.ncp) 153 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 154 nd->nl_cred = td->td_ucred; 155 nd->nl_flags |= NLC_BORROWCRED; 156 } else { 157 if ((flags & NLC_NLNCH_NOINIT) == 0) { 158 nd->nl_basench = &p->p_fd->fd_ncdir; 159 cache_copy(nd->nl_basench, &nd->nl_nch); 160 } 161 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 162 if (p->p_fd->fd_njdir.ncp) 163 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 164 nd->nl_cred = td->td_ucred; 165 nd->nl_flags |= NLC_BORROWCRED; 166 } 167 } else { 168 if ((flags & NLC_NLNCH_NOINIT) == 0) { 169 nd->nl_basench = &rootnch; 170 cache_copy(nd->nl_basench, &nd->nl_nch); 171 } 172 cache_copy(&rootnch, &nd->nl_rootnch); 173 cache_copy(&rootnch, &nd->nl_jailnch); 174 nd->nl_cred = proc0.p_ucred; 175 nd->nl_flags |= NLC_BORROWCRED; 176 } 177 nd->nl_td = td; 178 nd->nl_flags |= flags & ~NLC_NLNCH_NOINIT; 179 } else { 180 nlookup_done(nd); 181 } 182 return(error); 183 } 184 185 186 /* 187 * nlookup_init() for "at" family of syscalls. 188 * 189 * Similar to nlookup_init() but if the path is relative and fd is not 190 * AT_FDCWD, the path will be interpreted relative to the directory pointed 191 * to by fd. In this case, the file entry pointed to by fd is ref'ed and 192 * returned in *fpp. 193 * 194 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 195 * and release the ref to the file entry. 196 */ 197 int 198 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 199 const char *path, enum uio_seg seg, int flags) 200 { 201 struct thread *td = curthread; 202 struct file* fp; 203 struct vnode *vp; 204 int error; 205 206 *fpp = NULL; 207 208 /* 209 * Resolve the path, we might have to copy it in from userland, 210 * but don't initialize nl_basench, or nl_nch. 211 */ 212 error = nlookup_init(nd, path, seg, flags | NLC_NLNCH_NOINIT); 213 if (__predict_false(error)) 214 return (error); 215 216 /* 217 * Setup nl_basench (a pointer only not refd), and copy+ref 218 * to initialize nl_nch. Only applicable to relative paths. 219 * For absolute paths, or if (fd) is degenerate, just use the 220 * normal path. 221 */ 222 if (nd->nl_path[0] == '/') { 223 struct proc *p = curproc; 224 nd->nl_basench = &p->p_fd->fd_nrdir; 225 } else if (fd == AT_FDCWD) { 226 struct proc *p = curproc; 227 nd->nl_basench = &p->p_fd->fd_ncdir; 228 } else { 229 if ((error = holdvnode(td, fd, &fp)) != 0) 230 goto done; 231 vp = (struct vnode*)fp->f_data; 232 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 233 fdrop(fp); 234 fp = NULL; 235 error = ENOTDIR; 236 goto done; 237 } 238 nd->nl_basench = &fp->f_nchandle; 239 *fpp = fp; 240 } 241 cache_copy(nd->nl_basench, &nd->nl_nch); 242 done: 243 if (error) 244 nlookup_done(nd); 245 return (error); 246 } 247 248 /* 249 * This works similarly to nlookup_init() but does not assume a process 250 * context. rootnch is always chosen for the root directory and the cred 251 * and starting directory are supplied in arguments. 252 */ 253 int 254 nlookup_init_raw(struct nlookupdata *nd, 255 const char *path, enum uio_seg seg, int flags, 256 struct ucred *cred, struct nchandle *ncstart) 257 { 258 size_t pathlen; 259 thread_t td; 260 int error; 261 262 td = curthread; 263 264 bzero(nd, sizeof(struct nlookupdata)); 265 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 266 nd->nl_flags |= NLC_HASBUF; 267 if (seg == UIO_SYSSPACE) 268 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 269 else 270 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 271 272 /* 273 * Don't allow empty pathnames. 274 * POSIX.1 requirement: "" is not a vaild file name. 275 */ 276 if (error == 0 && pathlen <= 1) 277 error = ENOENT; 278 279 if (error == 0) { 280 cache_copy(ncstart, &nd->nl_nch); 281 cache_copy(&rootnch, &nd->nl_rootnch); 282 cache_copy(&rootnch, &nd->nl_jailnch); 283 nd->nl_cred = crhold(cred); 284 nd->nl_td = td; 285 nd->nl_flags |= flags; 286 } else { 287 nlookup_done(nd); 288 } 289 return(error); 290 } 291 292 /* 293 * This works similarly to nlookup_init_raw() but does not rely 294 * on rootnch being initialized yet. 295 */ 296 int 297 nlookup_init_root(struct nlookupdata *nd, 298 const char *path, enum uio_seg seg, int flags, 299 struct ucred *cred, struct nchandle *ncstart, 300 struct nchandle *ncroot) 301 { 302 size_t pathlen; 303 thread_t td; 304 int error; 305 306 td = curthread; 307 308 bzero(nd, sizeof(struct nlookupdata)); 309 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 310 nd->nl_flags |= NLC_HASBUF; 311 if (seg == UIO_SYSSPACE) 312 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 313 else 314 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 315 316 /* 317 * Don't allow empty pathnames. 318 * POSIX.1 requirement: "" is not a vaild file name. 319 */ 320 if (error == 0 && pathlen <= 1) 321 error = ENOENT; 322 323 if (error == 0) { 324 cache_copy(ncstart, &nd->nl_nch); 325 cache_copy(ncroot, &nd->nl_rootnch); 326 cache_copy(ncroot, &nd->nl_jailnch); 327 nd->nl_cred = crhold(cred); 328 nd->nl_td = td; 329 nd->nl_flags |= flags; 330 } else { 331 nlookup_done(nd); 332 } 333 return(error); 334 } 335 336 #if 0 337 /* 338 * Set a different credential; this credential will be used by future 339 * operations performed on nd.nl_open_vp and nlookupdata structure. 340 */ 341 void 342 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 343 { 344 KKASSERT(nd->nl_cred != NULL); 345 346 if (nd->nl_cred != cred) { 347 cred = crhold(cred); 348 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 349 crfree(nd->nl_cred); 350 nd->nl_flags &= ~NLC_BORROWCRED; 351 nd->nl_cred = cred; 352 } 353 } 354 #endif 355 356 /* 357 * Cleanup a nlookupdata structure after we are through with it. This may 358 * be called on any nlookupdata structure initialized with nlookup_init(). 359 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 360 * returns an error, even if as a consumer you believe you have taken all 361 * dynamic elements out of the nlookupdata structure. 362 */ 363 void 364 nlookup_done(struct nlookupdata *nd) 365 { 366 if (nd->nl_nch.ncp) { 367 if (nd->nl_flags & NLC_NCPISLOCKED) 368 cache_unlock(&nd->nl_nch); 369 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 370 } 371 nd->nl_flags &= ~NLC_NCPISLOCKED; 372 if (nd->nl_rootnch.ncp) 373 cache_drop_and_cache(&nd->nl_rootnch, 0); 374 if (nd->nl_jailnch.ncp) 375 cache_drop_and_cache(&nd->nl_jailnch, 0); 376 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 377 objcache_put(namei_oc, nd->nl_path); 378 nd->nl_path = NULL; 379 } 380 if (nd->nl_cred) { 381 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 382 crfree(nd->nl_cred); 383 nd->nl_cred = NULL; 384 nd->nl_flags &= ~NLC_BORROWCRED; 385 } 386 if (nd->nl_open_vp) { 387 if (nd->nl_flags & NLC_LOCKVP) { 388 vn_unlock(nd->nl_open_vp); 389 nd->nl_flags &= ~NLC_LOCKVP; 390 } 391 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 392 nd->nl_open_vp = NULL; 393 } 394 if (nd->nl_dvp) { 395 vrele(nd->nl_dvp); 396 nd->nl_dvp = NULL; 397 } 398 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 399 nd->nl_basench = NULL; 400 } 401 402 /* 403 * Works similarly to nlookup_done() when nd initialized with 404 * nlookup_init_at(). 405 */ 406 void 407 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 408 { 409 nlookup_done(nd); 410 if (fp != NULL) 411 fdrop(fp); 412 } 413 414 void 415 nlookup_zero(struct nlookupdata *nd) 416 { 417 bzero(nd, sizeof(struct nlookupdata)); 418 } 419 420 /* 421 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 422 * if an error occured. 423 * 424 * Note that the returned ncp is not checked for permissions, though VEXEC 425 * is checked on the directory path leading up to the result. The caller 426 * must call naccess() to check the permissions of the returned leaf. 427 */ 428 struct nchandle 429 nlookup_simple(const char *str, enum uio_seg seg, 430 int niflags, int *error) 431 { 432 struct nlookupdata nd; 433 struct nchandle nch; 434 435 *error = nlookup_init(&nd, str, seg, niflags); 436 if (*error == 0) { 437 if ((*error = nlookup(&nd)) == 0) { 438 nch = nd.nl_nch; /* keep hold ref from structure */ 439 cache_zero(&nd.nl_nch); /* and NULL out */ 440 } else { 441 cache_zero(&nch); 442 } 443 nlookup_done(&nd); 444 } else { 445 cache_zero(&nch); 446 } 447 return(nch); 448 } 449 450 /* 451 * Returns non-zero if the path element is the last element 452 */ 453 static 454 int 455 islastelement(const char *ptr) 456 { 457 while (*ptr == '/') 458 ++ptr; 459 return (*ptr == 0); 460 } 461 462 /* 463 * Returns non-zero if we need to lock the namecache element 464 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 465 * the last element of the namecache lookup will be locked 466 * exclusively. 467 * 468 * O_CREAT or O_TRUNC need the last element to be locked exlcusively. 469 * Intermediate elements are always locked shared. 470 * 471 * NOTE: Even if we return on-zero, an unresolved namecache record 472 * will always be locked exclusively. 473 */ 474 static __inline 475 int 476 wantsexcllock(struct nlookupdata *nd, int last_element) 477 { 478 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 479 return(last_element); 480 return 0; 481 } 482 483 484 /* 485 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 486 * on return, even if an error occurs. If no error occurs or NLC_CREATE 487 * is flagged and ENOENT is returned, then the returned nl_nch is always 488 * referenced and locked exclusively. 489 * 490 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 491 * the resulting nl_nch may or may not be locked and if locked 492 * might be locked either shared or exclusive. 493 * 494 * Intermediate directory elements, including the current directory, require 495 * execute (search) permission. nlookup does not examine the access 496 * permissions on the returned element. 497 * 498 * If NLC_CREATE is set the last directory must allow node creation, 499 * and an error code of 0 will be returned for a non-existant 500 * target (not ENOENT). 501 * 502 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 503 * plus the sticky check is made, and an error code of 0 will be returned 504 * for a non-existant target (not ENOENT). 505 * 506 * If NLC_DELETE is set the last directory mut allow node deletion, 507 * plus the sticky check is made. 508 * 509 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 510 * of the returned entry. The vnode will be referenced but not locked. 511 * 512 * IF THE PATH REPRESENTS A MOUNT POINT CROSSING THEN NLC_REFDVP WILL SET 513 * NL_DVP TO NULL AND RETURN NO ERROR (ERROR == 0), allowing the operation 514 * to return up the stack. The nch will only be referenced and not locked. 515 * High level code must check this case and do the right thing since, 516 * typically, it means things like 'mkdir' should fail with EEXIST. For 517 * example 'mkdir /var/cache' where /var/cache is a null-mount from 518 * /build/var.cache, needs to return EEXIST rather than a mount-crossing 519 * failure. 520 * 521 * NOTE: As an optimization we attempt to obtain a shared namecache lock 522 * on any intermediate elements. On success, the returned element 523 * is ALWAYS locked exclusively. 524 * 525 * NOTE: If for any reason the nc_generation number of the ncp's being 526 * evaluated changes, the lookup is retried. 527 */ 528 int 529 nlookup(struct nlookupdata *nd) 530 { 531 globaldata_t gd = mycpu; 532 struct nlcomponent nlc; 533 struct nchandle nch; 534 struct nchandle nctmp; 535 struct mount *mp; 536 int wasdotordotdot; 537 char *path_reset; 538 char *ptr; 539 char *nptr; 540 int error; 541 int len; 542 int dflags; 543 int hit = 1; 544 int saveflag = nd->nl_flags; 545 int max_retries = nlookup_max_retries; 546 u_int nl_gen; 547 u_int nch_gen; 548 int gen_changed; 549 boolean_t doretry = FALSE; 550 boolean_t inretry = FALSE; 551 552 path_reset = NULL; 553 554 nlookup_start: 555 556 #ifdef KTRACE 557 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 558 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 559 #endif 560 bzero(&nlc, sizeof(nlc)); 561 562 /* 563 * Setup for the loop. The current working namecache element is 564 * always at least referenced. We lock it as required, but always 565 * return a locked, resolved namecache entry. 566 */ 567 nd->nl_loopcnt = 0; 568 if (nd->nl_dvp) { 569 vrele(nd->nl_dvp); 570 nd->nl_dvp = NULL; 571 } 572 ptr = nd->nl_path; 573 574 nl_gen = nd->nl_nch.ncp ? nd->nl_nch.ncp->nc_generation : 0; 575 nl_gen &= ~3; 576 gen_changed = 0; 577 578 /* 579 * Loop on the path components. At the top of the loop nd->nl_nch 580 * is ref'd and unlocked and represents our current position. 581 */ 582 for (;;) { 583 int last_element; 584 585 ++nd->nl_elmno; 586 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 587 588 /* 589 * Check if the root directory should replace the current 590 * directory. This is done at the start of a translation 591 * or after a symbolic link has been found. In other cases 592 * ptr will never be pointing at a '/'. 593 */ 594 if (*ptr == '/') { 595 do { 596 ++ptr; 597 } while (*ptr == '/'); 598 599 /* 600 * We might already be at the root as a pre-optimization 601 */ 602 if (nd->nl_nch.mount != nd->nl_rootnch.mount || 603 nd->nl_nch.ncp != nd->nl_rootnch.ncp) { 604 cache_drop_and_cache(&nd->nl_nch, 0); 605 cache_copy(&nd->nl_rootnch, &nd->nl_nch); 606 nl_gen = nd->nl_nch.ncp->nc_generation & ~3; 607 } 608 609 /* 610 * Fast-track termination. There is no parent directory of 611 * the root in the same mount from the point of view of 612 * the caller so return EACCES if NLC_REFDVP is specified, 613 * and EEXIST if NLC_CREATE is also specified. 614 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 615 */ 616 if (*ptr == 0) { 617 if (nd->nl_flags & NLC_REFDVP) 618 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 619 else 620 error = 0; 621 nd->nl_flags |= NLC_NCPISLOCKED; 622 cache_lock_maybe_shared(&nd->nl_nch, 623 wantsexcllock(nd, islastelement(ptr))); 624 break; 625 } 626 continue; 627 } 628 629 /* 630 * Pre-calculate next path component so we can check whether the 631 * current component directory is the last directory in the path 632 * or not. 633 */ 634 for (nptr = ptr; *nptr && *nptr != '/'; ++nptr) 635 ; 636 637 /* 638 * nd->nl_nch is referenced and not locked here. 639 * 640 * Check directory search permissions. This will load dflags to 641 * obtain directory-special permissions to be checked along with the 642 * last component. 643 * 644 * We only need to pass-in &dflags for the second-to-last component. 645 * Optimize by passing-in NULL for any prior components, which may 646 * allow the code to bypass the naccess() call. 647 * 648 * naccess() is optimized to avoid having to lock the nch or get 649 * the related vnode if cached perms are sufficient. 650 */ 651 dflags = 0; 652 if (*nptr == '/' || (saveflag & NLC_MODIFYING_MASK) == 0) { 653 error = naccess(&nd->nl_nch, &nl_gen, NLC_EXEC, 654 nd->nl_cred, NULL, 0); 655 } else { 656 error = naccess(&nd->nl_nch, &nl_gen, NLC_EXEC, 657 nd->nl_cred, &dflags, 0); 658 } 659 if (error) { 660 if (keeperror(nd, error)) 661 break; 662 error = 0; 663 } 664 665 /* 666 * Extract the next (or last) path component. Path components are 667 * limited to 255 characters. 668 */ 669 nlc.nlc_nameptr = ptr; 670 nlc.nlc_namelen = nptr - ptr; 671 ptr = nptr; 672 if (nlc.nlc_namelen >= 256) { 673 error = ENAMETOOLONG; 674 break; 675 } 676 last_element = islastelement(nptr); 677 678 /* 679 * Lookup the path component in the cache, creating an unresolved 680 * entry if necessary. We have to handle "." and ".." as special 681 * cases. 682 * 683 * When handling ".." we have to detect a traversal back through a 684 * mount point. If we are at the root, ".." just returns the root. 685 * 686 * When handling "." or ".." we also have to recalculate dflags 687 * since our dflags will be for some sub-directory instead of the 688 * parent dir. 689 * 690 * This subsection returns a referenced and possibly locked 'nch'. 691 * The locking status is based on the last_element flag. 692 * 693 * The namecache topology is not allowed to be disconnected, so 694 * encountering a NULL parent will generate EINVAL. This typically 695 * occurs when a directory is removed out from under a process. 696 * 697 * WARNING! The unlocking of nd->nl_nch is sensitive code. 698 */ 699 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 700 701 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 702 if (last_element) { 703 cache_get_maybe_shared(&nd->nl_nch, &nch, 704 wantsexcllock(nd, 1)); 705 } else { 706 cache_copy(&nd->nl_nch, &nch); 707 } 708 nch_gen = nch.ncp->nc_generation & ~3; 709 wasdotordotdot = 1; 710 } else if (nlc.nlc_namelen == 2 && 711 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 712 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 713 nd->nl_nch.ncp == nd->nl_rootnch.ncp 714 ) { 715 /* 716 * ".." at the root returns the root 717 */ 718 if (last_element) { 719 cache_get_maybe_shared(&nd->nl_nch, &nch, 720 wantsexcllock(nd, 1)); 721 } else { 722 cache_copy(&nd->nl_nch, &nch); 723 } 724 } else { 725 /* 726 * Locate the parent ncp. If we are at the root of a 727 * filesystem mount we have to skip to the mounted-on 728 * point in the underlying filesystem. 729 * 730 * Expect the parent to always be good since the 731 * mountpoint doesn't go away. XXX hack. cache_get() 732 * requires the ncp to already have a ref as a safety. 733 * 734 * However, a process which has been broken out of a chroot 735 * will wind up with a NULL parent if it tries to '..' above 736 * the real root, deal with the case. Note that this does 737 * not protect us from a jail breakout, it just stops a panic 738 * if the jail-broken process tries to '..' past the real 739 * root. 740 */ 741 nctmp = nd->nl_nch; 742 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 743 nctmp = nctmp.mount->mnt_ncmounton; 744 if (nctmp.ncp == NULL) 745 break; 746 } 747 if (nctmp.ncp == NULL) { 748 if (curthread->td_proc) { 749 kprintf("vfs_nlookup: '..' traverse broke " 750 "jail: pid %d (%s)\n", 751 curthread->td_proc->p_pid, 752 curthread->td_comm); 753 } 754 nctmp = nd->nl_rootnch; 755 } else { 756 nctmp.ncp = nctmp.ncp->nc_parent; 757 } 758 if (last_element) { 759 cache_get_maybe_shared(&nctmp, &nch, 760 wantsexcllock(nd, 1)); 761 } else { 762 cache_copy(&nctmp, &nch); 763 } 764 } 765 nch_gen = nch.ncp->nc_generation & ~3; 766 wasdotordotdot = 2; 767 } else { 768 /* 769 * Quickly lookup the component. If we can't find it, then 770 * slowly lookup and resolve the component. 771 */ 772 if (last_element) { 773 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 774 wantsexcllock(nd, 1), 775 &nch); 776 } else { 777 nch = cache_nlookup_nonlocked(&nd->nl_nch, &nlc); 778 if (nch.ncp == NULL) 779 error = EWOULDBLOCK; 780 } 781 782 /* 783 * At this point the only possible error is EWOULDBLOCK. 784 * 785 * If no error nch is set and referenced, and then also locked 786 * according to last_element. For EWOULDBLOCK nch is not set. 787 * For any other error nch is set and referenced, but not locked. 788 * 789 * On EWOULDBLOCK the ncp may be unresolved (if not locked it can 790 * become unresolved at any time, but we don't care at this time). 791 */ 792 if (error == EWOULDBLOCK) { 793 nch = cache_nlookup(&nd->nl_nch, &nlc); 794 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 795 hit = 0; 796 for (;;) { 797 error = cache_resolve(&nch, &nch_gen, nd->nl_cred); 798 if (error != EAGAIN && 799 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 800 if (error == ESTALE) { 801 if (!inretry) 802 error = ENOENT; 803 doretry = TRUE; 804 } 805 if (last_element == 0) 806 cache_unlock(&nch); 807 break; 808 } 809 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 810 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 811 nch.ncp->nc_name); 812 cache_put(&nch); 813 nch = cache_nlookup(&nd->nl_nch, &nlc); 814 } 815 } 816 nch_gen = nch.ncp->nc_generation & ~3; 817 wasdotordotdot = 0; 818 } 819 820 /* 821 * If the component is "." or ".." our dflags no longer represents 822 * the parent directory and we have to explicitly look it up. 823 * 824 * Expect the parent to be good since nch is locked. 825 * 826 * nch will continue to be valid even if an error occurs after this 827 * point. 828 */ 829 if (wasdotordotdot && error == 0) { 830 struct nchandle par; 831 832 dflags = 0; 833 if (last_element == 0) 834 cache_lock_maybe_shared(&nch, wantsexcllock(nd, 0)); 835 836 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 837 u_int dummy_gen = 0; 838 839 par.mount = nch.mount; 840 cache_hold(&par); 841 error = naccess(&par, &dummy_gen, 0, nd->nl_cred, &dflags, 0); 842 cache_drop_and_cache(&par, nd->nl_elmno - 1); 843 if (error) { 844 if (!keeperror(nd, error)) 845 error = 0; 846 if (error == EINVAL) { 847 kprintf("nlookup (%s): trailing . or .. retry on %s\n", 848 curthread->td_comm, nd->nl_path); 849 doretry = TRUE; 850 } 851 } 852 } 853 854 if (last_element == 0) 855 cache_unlock(&nch); 856 } 857 858 /* 859 * [end of subsection] 860 * 861 * nch is referenced and locked according to (last_element). 862 * nd->nl_nch is unlocked and referenced. 863 * nl_gen and nch_gen are both set. 864 */ 865 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 866 867 /* 868 * Resolve the namespace if necessary. The ncp returned by 869 * cache_nlookup() is referenced, and also locked according 870 * to last_element. 871 * 872 * XXX neither '.' nor '..' should return EAGAIN since they were 873 * previously resolved and thus cannot be newly created ncp's. 874 */ 875 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 876 if (last_element == 0) 877 cache_lock(&nch); 878 hit = 0; 879 error = cache_resolve(&nch, &nch_gen, nd->nl_cred); 880 if (error == ESTALE) { 881 if (!inretry) 882 error = ENOENT; 883 doretry = TRUE; 884 } 885 if (last_element == 0) 886 cache_unlock(&nch); 887 KKASSERT(error != EAGAIN); 888 } else { 889 error = nch.ncp->nc_error; 890 } 891 892 /* 893 * Early completion. ENOENT is not an error if this is the last 894 * component and NLC_CREATE or NLC_RENAME (rename target) was 895 * requested. Note that ncp->nc_error is left as ENOENT in that 896 * case, which we check later on. 897 * 898 * Also handle invalid '.' or '..' components terminating a path 899 * for a create/rename/delete. The standard requires this and pax 900 * pretty stupidly depends on it. 901 */ 902 if (last_element) { 903 if (error == ENOENT && 904 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST))) 905 { 906 if (nd->nl_flags & NLC_NFS_RDONLY) { 907 error = EROFS; 908 } else { 909 error = naccess(&nch, &nch_gen, nd->nl_flags | dflags, 910 nd->nl_cred, NULL, last_element); 911 } 912 } 913 if (error == 0 && wasdotordotdot && 914 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 915 NLC_RENAME_SRC | NLC_RENAME_DST))) 916 { 917 /* 918 * POSIX junk 919 */ 920 if (nd->nl_flags & NLC_CREATE) 921 error = EEXIST; 922 else if (nd->nl_flags & NLC_DELETE) 923 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 924 else 925 error = EINVAL; 926 } 927 } 928 929 /* 930 * Early completion on error. 931 */ 932 if (error) { 933 if (last_element) 934 cache_unlock(&nch); 935 cache_drop_and_cache(&nch, nd->nl_elmno); 936 break; 937 } 938 939 /* 940 * If the element is a symlink and it is either not the last 941 * element or it is the last element and we are allowed to 942 * follow symlinks, resolve the symlink. 943 */ 944 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 945 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 946 ) { 947 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 948 error = ELOOP; 949 if (last_element) 950 cache_unlock(&nch); 951 cache_drop_and_cache(&nch, nd->nl_elmno); 952 break; 953 } 954 955 /* 956 * Check for a generation change. 957 * 958 * NOTE: On generation changes we must at a minimum cycle 959 * the lock. Here we get or have the lock so we are 960 * ok. 961 */ 962 if (last_element == 0) 963 cache_lock_maybe_shared(&nch, 1); 964 965 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 966 if (nlookup_debug & 1) { 967 kprintf("nlookup: symlink: GEN CHANGE %d\n", 968 (nch.ncp->nc_generation - nch_gen)); 969 } 970 gen_changed = 1; 971 } 972 973 error = nreadsymlink(nd, &nch, &nlc); 974 cache_put(&nch); 975 if (error) 976 break; 977 978 /* 979 * Concatenate trailing path elements onto the returned symlink. 980 * Note that if the path component (ptr) is not exhausted, it 981 * will being with a '/', so we do not have to add another one. 982 * 983 * The symlink may not be empty. 984 */ 985 len = strlen(ptr); 986 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 987 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 988 objcache_put(namei_oc, nlc.nlc_nameptr); 989 break; 990 } 991 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 992 993 if (path_reset) { 994 if (nd->nl_flags & NLC_HASBUF) 995 objcache_put(namei_oc, nd->nl_path); 996 } else { 997 path_reset = nd->nl_path; 998 } 999 nd->nl_path = nlc.nlc_nameptr; 1000 nd->nl_flags |= NLC_HASBUF; 1001 ptr = nd->nl_path; 1002 /* nl_gen has not changed */ 1003 1004 /* 1005 * Go back up to the top to resolve any initial '/'s in the 1006 * symlink. 1007 */ 1008 continue; 1009 } 1010 1011 /* 1012 * If the element is a directory and we are crossing a mount point, 1013 * Locate the mount. 1014 */ 1015 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1016 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 1017 (mp = cache_findmount(&nch)) != NULL 1018 ) { 1019 struct vnode *tdp; 1020 int vfs_do_busy = 0; 1021 1022 /* 1023 * VFS must be busied before the namecache entry is locked, 1024 * but we don't want to waste time calling vfs_busy() if the 1025 * mount point is already resolved. 1026 */ 1027 again: 1028 /* 1029 * Check for a generation change. 1030 * 1031 * NOTE: On generation changes we must at a minimum cycle 1032 * the lock. So get and release the lock if we 1033 * do not have it. 1034 */ 1035 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 1036 if (last_element == 0) { 1037 cache_lock_maybe_shared(&nch, 1); 1038 cache_unlock(&nch); 1039 } 1040 if (nlookup_debug & 1) { 1041 kprintf("nlookup: mountpt: GEN CHANGE %d\n", 1042 (nch.ncp->nc_generation - nch_gen)); 1043 } 1044 gen_changed = 1; 1045 } 1046 if (last_element) 1047 cache_unlock(&nch); 1048 cache_drop_and_cache(&nch, nd->nl_elmno); 1049 1050 if (vfs_do_busy) { 1051 while (vfs_busy(mp, 0)) { 1052 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1053 kprintf("nlookup: warning umount race avoided\n"); 1054 cache_dropmount(mp); 1055 error = EBUSY; 1056 vfs_do_busy = 0; 1057 goto double_break; 1058 } 1059 } 1060 } 1061 1062 /* 1063 * We don't need to lock the nch unless the entry is unresolved 1064 * or this is the last element. 1065 */ 1066 if (last_element) 1067 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 1068 wantsexcllock(nd, 1)); 1069 else 1070 cache_copy(&mp->mnt_ncmountpt, &nch); 1071 nch_gen = nch.ncp->nc_generation & ~3; 1072 1073 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 1074 if (last_element == 0) 1075 cache_lock(&nch); 1076 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 1077 if (vfs_do_busy == 0) { 1078 vfs_do_busy = 1; 1079 if (last_element == 0) 1080 cache_unlock(&nch); 1081 goto again; 1082 } 1083 error = VFS_ROOT(mp, &tdp); 1084 vfs_unbusy(mp); 1085 vfs_do_busy = 0; 1086 if (keeperror(nd, error)) { 1087 cache_dropmount(mp); 1088 if (last_element == 0) 1089 cache_unlock(&nch); 1090 break; 1091 } 1092 if (error == 0) { 1093 cache_setvp(&nch, tdp); 1094 nch_gen = nch.ncp->nc_generation & ~3; 1095 vput(tdp); 1096 } 1097 } 1098 if (last_element == 0) 1099 cache_unlock(&nch); 1100 } 1101 if (vfs_do_busy) 1102 vfs_unbusy(mp); 1103 cache_dropmount(mp); 1104 } 1105 1106 /* 1107 * Break out on error 1108 */ 1109 if (keeperror(nd, error)) { 1110 if (last_element) 1111 cache_unlock(&nch); 1112 cache_drop_and_cache(&nch, nd->nl_elmno); 1113 double_break: 1114 break; 1115 } 1116 1117 /* 1118 * Skip any slashes to get to the next element. If there 1119 * are any slashes at all the current element must be a 1120 * directory or, in the create case, intended to become a directory. 1121 * If it isn't we break without incrementing ptr and fall through 1122 * to the failure case below. 1123 */ 1124 while (*ptr == '/') { 1125 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 1126 !(nd->nl_flags & NLC_WILLBEDIR) 1127 ) { 1128 break; 1129 } 1130 ++ptr; 1131 } 1132 1133 /* 1134 * Continuation case: additional elements and the current 1135 * element is a directory. 1136 */ 1137 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 1138 /* 1139 * Check for a generation change. 1140 * 1141 * NOTE: On generation changes we must at a minimum cycle 1142 * the lock. So get and release the lock if we 1143 * do not have it. 1144 */ 1145 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 1146 if (last_element == 0) { 1147 cache_lock_maybe_shared(&nch, 1); 1148 cache_unlock(&nch); 1149 } 1150 if (nlookup_debug & 1) { 1151 kprintf("nlookup: next: GEN CHANGE %d\n", 1152 (nch.ncp->nc_generation - nch_gen)); 1153 } 1154 gen_changed = 1; 1155 } 1156 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 1157 if (last_element) 1158 cache_unlock(&nch); 1159 /*nchislocked = 0; not needed */ 1160 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 1161 nd->nl_nch = nch; 1162 nl_gen = nch_gen; 1163 continue; 1164 } 1165 1166 /* 1167 * Check for a generation change. 1168 * 1169 * NOTE: On generation changes we must at a minimum cycle 1170 * the lock. So get and release the lock if we 1171 * do not have it. 1172 */ 1173 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 1174 if (nlookup_debug & 1) { 1175 if (last_element == 0) { 1176 cache_lock_maybe_shared(&nch, 1); 1177 cache_unlock(&nch); 1178 } 1179 kprintf("nlookup: final: GEN CHANGE %d\n", 1180 (nch.ncp->nc_generation - nch_gen)); 1181 gen_changed = 1; 1182 } 1183 } 1184 1185 /* 1186 * Failure case: additional elements and the current element 1187 * is not a directory 1188 */ 1189 if (*ptr) { 1190 if (last_element) 1191 cache_unlock(&nch); 1192 cache_drop_and_cache(&nch, nd->nl_elmno); 1193 error = ENOTDIR; 1194 break; 1195 } 1196 1197 /* 1198 * Successful lookup of last element. 1199 * 1200 * Check permissions if the target exists. If the target does not 1201 * exist directory permissions were already tested in the early 1202 * completion code above. 1203 * 1204 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 1205 * if the file is marked append-only, and NLC_STICKY if the directory 1206 * containing the file is sticky. 1207 */ 1208 KKASSERT(last_element); 1209 1210 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 1211 error = naccess(&nch, &nch_gen, nd->nl_flags | dflags, 1212 nd->nl_cred, NULL, 1); 1213 if (keeperror(nd, error)) { 1214 cache_put(&nch); 1215 break; 1216 } 1217 } 1218 1219 /* 1220 * Termination: no more elements. 1221 * 1222 * Check to see if the immediate parent has been destroyed. This race 1223 * can occur because the element lookup must temporarily unlock 1224 * the parent. If so, do a retry. 1225 */ 1226 if (nch.ncp->nc_parent && 1227 (nch.ncp->nc_parent->nc_flag & NCF_DESTROYED)) { 1228 doretry = TRUE; 1229 } 1230 1231 /* 1232 * Termination: no more elements. 1233 * 1234 * If NLC_REFDVP is set acquire a referenced parent dvp. Typically 1235 * used for mkdir/mknod/ncreate/nremove/unlink/rename. 1236 * 1237 * If a mount-point transition occurs due to ncp being a mount point, 1238 * or a null-mount, nl_dvp will be set to NULL and an error code of 1239 * 0 will be returned. A NULL nc_parent is not necessarily the only 1240 * indication of a mount-point as null-mounts will also tend to have 1241 * a non-null nc_parent. 1242 * 1243 * nch is locked, standard lock order for the namecache is 1244 * child-to-parent so we can safely lock its parent. We can 1245 * just use cache_dvpref(). 1246 */ 1247 if ((nd->nl_flags & NLC_REFDVP) && 1248 (doretry == FALSE || inretry == TRUE)) { 1249 if (nch.ncp->nc_parent) { 1250 error = cache_resolve_dvp(&nch, nd->nl_cred, 1251 &nd->nl_dvp); 1252 if (error) { 1253 if (nlookup_debug & 1) { 1254 kprintf("Parent directory lost during " 1255 "nlookup: %s/%s (%08x/%08x)\n", 1256 nch.ncp->nc_parent->nc_name, 1257 nch.ncp->nc_name, 1258 nch.ncp->nc_parent->nc_flag, 1259 nch.ncp->nc_flag); 1260 } 1261 cache_put(&nch); 1262 error = EINVAL; 1263 break; 1264 } 1265 1266 /* 1267 * Mount-point, nl_dvp should remain NULL, error 0, 1268 * caller won't be able to use the results so leave 1269 * the ncp referenced but unlocked. 1270 */ 1271 if (nd->nl_dvp == NULL) { 1272 cache_put(&nch); 1273 break; 1274 } 1275 1276 /* 1277 * Good directory, fall through to drop-and-cache 1278 * below 1279 */ 1280 /* */ 1281 } else { 1282 /* 1283 * Mount-point, nl_dvp should remain NULL, error 0, 1284 * caller won't be able to use the results so leave 1285 * the ncp referenced but unlocked. 1286 */ 1287 error = 0; 1288 cache_put(&nch); 1289 break; 1290 } 1291 } 1292 1293 /* 1294 * ncp left with lock+ref on break, set NLC_NCPISLOCKED flag 1295 */ 1296 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 1297 nd->nl_nch = nch; 1298 nd->nl_flags |= NLC_NCPISLOCKED; 1299 nl_gen = nch_gen; 1300 error = 0; 1301 break; 1302 } 1303 1304 /* 1305 * Force a retry (up to max_retries) if nl_gen is incorrect 1306 * 1307 * NOTE: On generation changes we must at a minimum cycle 1308 * the lock. In this case we have one so we are ok. 1309 */ 1310 if (nd->nl_nch.ncp && (nd->nl_nch.ncp->nc_generation - nl_gen) & ~1) { 1311 if (nlookup_debug & 1) { 1312 kprintf("nlookup: DONE error %d: GEN CHANGE ON \"%s\" " 1313 "%d (retries %d)\n", 1314 error, 1315 nd->nl_nch.ncp->nc_name, 1316 (nd->nl_nch.ncp->nc_generation - nl_gen), 1317 max_retries); 1318 } 1319 gen_changed = 1; 1320 } 1321 if (gen_changed) { 1322 if (max_retries) { 1323 --max_retries; 1324 doretry = TRUE; 1325 inretry = FALSE; 1326 } else { 1327 error = EINVAL; 1328 } 1329 } 1330 1331 /* 1332 * We are done / or possibly retry 1333 */ 1334 if (hit) 1335 ++gd->gd_nchstats->ncs_longhits; 1336 else 1337 ++gd->gd_nchstats->ncs_longmiss; 1338 1339 if (nd->nl_flags & NLC_NCPISLOCKED) 1340 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 1341 1342 /* 1343 * Reset nd->nl_path if necessary (due to softlinks). We want to return 1344 * nl_path to its original state before retrying or returning. 1345 */ 1346 if (path_reset) { 1347 if (nd->nl_flags & NLC_HASBUF) { 1348 objcache_put(namei_oc, nd->nl_path); 1349 nd->nl_flags &= ~NLC_HASBUF; 1350 } 1351 nd->nl_path = path_reset; 1352 nd->nl_flags |= saveflag & NLC_HASBUF; 1353 path_reset = NULL; 1354 } 1355 1356 /* 1357 * Retry the whole thing if doretry flag is set, but only once. 1358 * 1359 * autofs(5) may mount another filesystem under its root directory 1360 * while resolving a path. 1361 * 1362 * NFS might return ESTALE 1363 */ 1364 if (doretry && !inretry) { 1365 if (nlookup_debug & 2) 1366 kprintf("nlookup: errno %d retry %s\n", error, nd->nl_path); 1367 inretry = TRUE; 1368 1369 /* 1370 * Clean up nd->nl_nch and reset to base directory 1371 */ 1372 if (nd->nl_flags & NLC_NCPISLOCKED) { 1373 cache_unlock(&nd->nl_nch); 1374 nd->nl_flags &= ~NLC_NCPISLOCKED; 1375 } 1376 cache_drop(&nd->nl_nch); 1377 cache_copy(nd->nl_basench, &nd->nl_nch); 1378 1379 nd->nl_elmno = 0; 1380 nd->nl_flags |= saveflag; 1381 1382 goto nlookup_start; 1383 } 1384 1385 /* 1386 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1387 * (ncp->nc_error will be ENOENT), but we will still return an error 1388 * code of 0. 1389 */ 1390 return(error); 1391 } 1392 1393 /* 1394 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1395 * of continuity in the namecache tree by connecting the ncp related to the 1396 * vnode under the mount to the ncp related to the mount's root vnode. 1397 * 1398 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1399 */ 1400 int 1401 nlookup_mp(struct mount *mp, struct nchandle *nch) 1402 { 1403 struct vnode *vp; 1404 int error; 1405 1406 error = 0; 1407 cache_get(&mp->mnt_ncmountpt, nch); 1408 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1409 while (vfs_busy(mp, 0)) 1410 ; 1411 error = VFS_ROOT(mp, &vp); 1412 vfs_unbusy(mp); 1413 if (error) { 1414 cache_put(nch); 1415 } else { 1416 cache_setvp(nch, vp); 1417 vput(vp); 1418 } 1419 } 1420 return(error); 1421 } 1422 1423 /* 1424 * Read the contents of a symlink, allocate a path buffer out of the 1425 * namei_oc and initialize the supplied nlcomponent with the result. 1426 * 1427 * If an error occurs no buffer will be allocated or returned in the nlc. 1428 */ 1429 int 1430 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1431 struct nlcomponent *nlc) 1432 { 1433 struct vnode *vp; 1434 struct iovec aiov; 1435 struct uio auio; 1436 int linklen; 1437 int error; 1438 char *cp; 1439 1440 nlc->nlc_nameptr = NULL; 1441 nlc->nlc_namelen = 0; 1442 if (nch->ncp->nc_vp == NULL) 1443 return(ENOENT); 1444 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1445 return(error); 1446 cp = objcache_get(namei_oc, M_WAITOK); 1447 aiov.iov_base = cp; 1448 aiov.iov_len = MAXPATHLEN; 1449 auio.uio_iov = &aiov; 1450 auio.uio_iovcnt = 1; 1451 auio.uio_offset = 0; 1452 auio.uio_rw = UIO_READ; 1453 auio.uio_segflg = UIO_SYSSPACE; 1454 auio.uio_td = nd->nl_td; 1455 auio.uio_resid = MAXPATHLEN - 1; 1456 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1457 if (error) 1458 goto fail; 1459 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1460 if (varsym_enable) { 1461 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1462 if (linklen < 0) { 1463 error = ENAMETOOLONG; 1464 goto fail; 1465 } 1466 } 1467 cp[linklen] = 0; 1468 nlc->nlc_nameptr = cp; 1469 nlc->nlc_namelen = linklen; 1470 vput(vp); 1471 return(0); 1472 fail: 1473 objcache_put(namei_oc, cp); 1474 vput(vp); 1475 return(error); 1476 } 1477 1478 /* 1479 * Check access [XXX cache vattr!] [XXX quota] 1480 * 1481 * Generally check the NLC_* access bits. All specified bits must pass 1482 * for this function to return 0. 1483 * 1484 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1485 * access, otherwise it must exist. No error is returned in this case. 1486 * 1487 * The file must not exist if NLC_EXCL is specified. 1488 * 1489 * Directory permissions in general are tested for NLC_CREATE if the file 1490 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1491 * whether the file exists or not. 1492 * 1493 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1494 * the latter is only tested if the target exists. 1495 * 1496 * The passed ncp must be referenced and locked. If it is already resolved 1497 * it may be locked shared but otherwise should be locked exclusively. 1498 */ 1499 1500 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1501 #define S_XOK_MASK (S_IXUSR|S_IXGRP|S_IXOTH) 1502 1503 static int 1504 naccess(struct nchandle *nch, u_int *genp, int nflags, 1505 struct ucred *cred, int *nflagsp, int nchislocked) 1506 { 1507 struct vnode *vp; 1508 struct vattr_lite lva; 1509 struct namecache *ncp; 1510 int error; 1511 int cflags; 1512 1513 ncp = nch->ncp; 1514 1515 again: 1516 /* 1517 * We need a resolved entry. If the entry is not resolved we need 1518 * to lock and resolve it. If it is already resolved, our ref should 1519 * prevent normal evictions (as long as we tested the lock race above). 1520 * 1521 * If the ncp was locked by the caller and left unresolved, it must 1522 * have been locked exclusively. 1523 */ 1524 if (ncp->nc_flag & NCF_UNRESOLVED) { 1525 if (nchislocked == 0) { 1526 cache_lock(nch); 1527 nchislocked = 2; 1528 } 1529 cache_resolve(nch, genp, cred); 1530 ncp = nch->ncp; 1531 } 1532 error = ncp->nc_error; 1533 1534 /* 1535 * Only unresolved entries should return this error (though maybe 1536 * in-filesystem sockets can too). XXX check filetype for VSOCK. 1537 */ 1538 if (error == ENOTCONN) { 1539 if (nchislocked == 0) { 1540 if (nlookup_debug & 4) { 1541 kprintf("ncp %p %08x %d %s: Warning, unexpected state, " 1542 "forcing lock\n", 1543 ncp, ncp->nc_flag, ncp->nc_error, ncp->nc_name); 1544 print_backtrace(-1); 1545 } 1546 cache_lock(nch); 1547 nchislocked = 2; 1548 goto again; 1549 } 1550 if (nlookup_debug & 4) { 1551 kprintf("ncp %p %08x %d %s: Warning, unexpected state\n", 1552 ncp, ncp->nc_flag, ncp->nc_error, ncp->nc_name); 1553 print_backtrace(-1); 1554 } 1555 } 1556 1557 /* 1558 * Directory permissions checks. Silently ignore ENOENT if these 1559 * tests pass. It isn't an error. 1560 * 1561 * We can safely resolve ncp->nc_parent because ncp is currently 1562 * locked. 1563 */ 1564 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1565 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1566 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1567 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1568 (nflags & NLC_RENAME_DST) 1569 ) { 1570 struct nchandle par; 1571 1572 if (nchislocked == 0) { 1573 cache_lock_maybe_shared(nch, 0); 1574 nchislocked = 2; 1575 goto again; 1576 } 1577 if ((par.ncp = ncp->nc_parent) == NULL) { 1578 if (error != EAGAIN) 1579 error = EINVAL; 1580 } else if (error == 0 || error == ENOENT) { 1581 u_int dummy_gen = 0; 1582 1583 par.mount = nch->mount; 1584 cache_hold(&par); 1585 cache_lock_maybe_shared(&par, 0); 1586 error = naccess(&par, &dummy_gen, NLC_WRITE, cred, NULL, 1); 1587 cache_put(&par); 1588 } 1589 } 1590 } 1591 1592 /* 1593 * NLC_EXCL check. Target file must not exist. 1594 */ 1595 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1596 error = EEXIST; 1597 1598 /* 1599 * Try to short-cut the vnode operation for intermediate directory 1600 * components. This is a major SMP win because it avoids having 1601 * to execute a lot of code for intermediate directory components, 1602 * including shared refs and locks on intermediate directory vnodes. 1603 * 1604 * We can only do this if the caller does not need nflagsp. 1605 */ 1606 if (error == 0 && nflagsp == NULL && 1607 nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1608 if (nchislocked == 2) 1609 cache_unlock(nch); 1610 return 0; 1611 } 1612 1613 /* 1614 * Get the vnode attributes so we can do the rest of our checks. 1615 * 1616 * NOTE: We only call naccess_lva() if the target exists. 1617 */ 1618 if (error == 0) { 1619 if (nchislocked == 0) { 1620 cache_lock_maybe_shared(nch, 0); 1621 nchislocked = 2; 1622 } 1623 #if 0 1624 error = cache_vget(nch, cred, LK_SHARED, &vp); 1625 #else 1626 error = cache_vref(nch, cred, &vp); 1627 #endif 1628 if (error == ENOENT) { 1629 /* 1630 * Silently zero-out ENOENT if creating or renaming 1631 * (rename target). It isn't an error. 1632 */ 1633 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1634 error = 0; 1635 } else if (error == 0) { 1636 /* 1637 * Get the vnode attributes and check for illegal O_TRUNC 1638 * requests and read-only mounts. 1639 * 1640 * NOTE: You can still open devices on read-only mounts for 1641 * writing. 1642 * 1643 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1644 * check on the parent directory above. 1645 * 1646 * XXX cache the va in the namecache or in the vnode 1647 */ 1648 error = VOP_GETATTR_LITE(vp, &lva); 1649 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1650 switch(lva.va_type) { 1651 case VREG: 1652 case VDATABASE: 1653 case VCHR: 1654 case VBLK: 1655 case VFIFO: 1656 break; 1657 case VDIR: 1658 error = EISDIR; 1659 break; 1660 default: 1661 error = EINVAL; 1662 break; 1663 } 1664 } 1665 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1666 (vp->v_mount->mnt_flag & MNT_RDONLY) 1667 ) { 1668 switch(lva.va_type) { 1669 case VDIR: 1670 case VLNK: 1671 case VREG: 1672 case VDATABASE: 1673 error = EROFS; 1674 break; 1675 default: 1676 break; 1677 } 1678 } 1679 #if 0 1680 vput(vp); 1681 #else 1682 vrele(vp); 1683 #endif 1684 1685 /* 1686 * Check permissions based on file attributes. The passed 1687 * flags (*nflagsp) are modified with feedback based on 1688 * special attributes and requirements. 1689 */ 1690 if (error == 0) { 1691 /* 1692 * Adjust the returned (*nflagsp) if non-NULL. 1693 */ 1694 if (nflagsp) { 1695 if ((lva.va_mode & VSVTX) && lva.va_uid != cred->cr_uid) 1696 *nflagsp |= NLC_STICKY; 1697 if (lva.va_flags & APPEND) 1698 *nflagsp |= NLC_APPENDONLY; 1699 if (lva.va_flags & IMMUTABLE) 1700 *nflagsp |= NLC_IMMUTABLE; 1701 } 1702 1703 /* 1704 * NCF_WXOK can be set for world-searchable directories. 1705 * 1706 * XXX When we implement capabilities this code would also 1707 * need a cap check, or only set the flag if there are no 1708 * capabilities. 1709 */ 1710 cflags = 0; 1711 if (lva.va_type == VDIR && 1712 (lva.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1713 cflags |= NCF_WXOK; 1714 } 1715 if ((lva.va_mode & S_XOK_MASK) == 0) 1716 cflags |= NCF_NOTX; 1717 1718 /* 1719 * Track swapcache management flags in the namecache. 1720 * 1721 * Calculate the flags based on the current vattr_lite info 1722 * and recalculate the inherited flags from the parent 1723 * (the original cache linkage may have occurred without 1724 * getattrs and thus have stale flags). 1725 */ 1726 if (lva.va_flags & SF_NOCACHE) 1727 cflags |= NCF_SF_NOCACHE; 1728 if (lva.va_flags & UF_CACHE) 1729 cflags |= NCF_UF_CACHE; 1730 if (ncp->nc_parent) { 1731 if (ncp->nc_parent->nc_flag & 1732 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1733 cflags |= NCF_SF_PNOCACHE; 1734 } 1735 if (ncp->nc_parent->nc_flag & 1736 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1737 cflags |= NCF_UF_PCACHE; 1738 } 1739 } 1740 1741 /* 1742 * We're not supposed to update nc_flag when holding a shared 1743 * lock, but we allow the case for certain flags. Note that 1744 * holding an exclusive lock allows updating nc_flag without 1745 * atomics. nc_flag is not allowe to be updated at all unless 1746 * a shared or exclusive lock is held. 1747 */ 1748 atomic_clear_short(&ncp->nc_flag, 1749 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1750 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1751 NCF_WXOK | NCF_NOTX) & ~cflags); 1752 atomic_set_short(&ncp->nc_flag, cflags); 1753 1754 /* 1755 * Process general access. 1756 */ 1757 error = naccess_lva(&lva, nflags, cred); 1758 } 1759 } 1760 } 1761 if (nchislocked == 2) 1762 cache_unlock(nch); 1763 return(error); 1764 } 1765 1766 /* 1767 * Check the requested access against the given vattr using cred. 1768 */ 1769 int 1770 naccess_lva(struct vattr_lite *lvap, int nflags, struct ucred *cred) 1771 { 1772 int i; 1773 int vmode; 1774 1775 /* 1776 * Test the immutable bit. Creations, deletions, renames (source 1777 * or destination) are not allowed. chown/chmod/other is also not 1778 * allowed but is handled by SETATTR. Hardlinks to the immutable 1779 * file are allowed. 1780 * 1781 * If the directory is set to immutable then creations, deletions, 1782 * renames (source or dest) and hardlinks to files within the directory 1783 * are not allowed, and regular files opened through the directory may 1784 * not be written to or truncated (unless a special device). 1785 * 1786 * NOTE! New hardlinks to immutable files work but new hardlinks to 1787 * files, immutable or not, sitting inside an immutable directory are 1788 * not allowed. As always if the file is hardlinked via some other 1789 * path additional hardlinks may be possible even if the file is marked 1790 * immutable. The sysop needs to create a closure by checking the hard 1791 * link count. Once closure is achieved you are good, and security 1792 * scripts should check link counts anyway. 1793 * 1794 * Writes and truncations are only allowed on special devices. 1795 */ 1796 if ((lvap->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1797 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1798 return (EPERM); 1799 if (nflags & (NLC_CREATE | NLC_DELETE | 1800 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1801 return (EPERM); 1802 } 1803 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1804 switch(lvap->va_type) { 1805 case VDIR: 1806 return (EISDIR); 1807 case VLNK: 1808 case VREG: 1809 case VDATABASE: 1810 return (EPERM); 1811 default: 1812 break; 1813 } 1814 } 1815 } 1816 1817 /* 1818 * Test the no-unlink and append-only bits for opens, rename targets, 1819 * and deletions. These bits are not tested for creations or 1820 * rename sources. 1821 * 1822 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1823 * If you do not wish this you must also set NOUNLINK. 1824 * 1825 * If the governing directory is marked APPEND-only it implies 1826 * NOUNLINK for all entries in the directory. 1827 */ 1828 if (((lvap->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1829 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1830 ) { 1831 return (EPERM); 1832 } 1833 1834 /* 1835 * A file marked append-only may not be deleted but can be renamed. 1836 */ 1837 if ((lvap->va_flags & APPEND) && 1838 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1839 ) { 1840 return (EPERM); 1841 } 1842 1843 /* 1844 * A file marked append-only which is opened for writing must also 1845 * be opened O_APPEND. 1846 */ 1847 if ((lvap->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1848 if (nflags & NLC_TRUNCATE) 1849 return (EPERM); 1850 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1851 if ((nflags & NLC_APPEND) == 0) 1852 return (EPERM); 1853 } 1854 } 1855 1856 /* 1857 * root gets universal access 1858 */ 1859 if (cred->cr_uid == 0) 1860 return(0); 1861 1862 /* 1863 * Check owner perms. 1864 * 1865 * If NLC_OWN is set the owner of the file is allowed no matter when 1866 * the owner-mode bits say (utimes). 1867 */ 1868 vmode = 0; 1869 if (nflags & NLC_READ) 1870 vmode |= S_IRUSR; 1871 if (nflags & NLC_WRITE) 1872 vmode |= S_IWUSR; 1873 if (nflags & NLC_EXEC) 1874 vmode |= S_IXUSR; 1875 1876 if (cred->cr_uid == lvap->va_uid) { 1877 if ((nflags & NLC_OWN) == 0) { 1878 if ((vmode & lvap->va_mode) != vmode) 1879 return(EACCES); 1880 } 1881 return(0); 1882 } 1883 1884 /* 1885 * If NLC_STICKY is set only the owner may delete or rename a file. 1886 * This bit is typically set on /tmp. 1887 * 1888 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1889 * the specific delete or rename case. For deletions and renames we 1890 * usually just care about directory permissions, not file permissions. 1891 */ 1892 if ((nflags & NLC_STICKY) && 1893 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1894 return(EACCES); 1895 } 1896 1897 /* 1898 * Check group perms 1899 */ 1900 vmode >>= 3; 1901 for (i = 0; i < cred->cr_ngroups; ++i) { 1902 if (lvap->va_gid == cred->cr_groups[i]) { 1903 if ((vmode & lvap->va_mode) != vmode) 1904 return(EACCES); 1905 return(0); 1906 } 1907 } 1908 1909 /* 1910 * Check world perms 1911 */ 1912 vmode >>= 3; 1913 if ((vmode & lvap->va_mode) != vmode) 1914 return(EACCES); 1915 return(0); 1916 } 1917 1918 /* 1919 * Long-term (10-second interval) statistics collection 1920 */ 1921 static 1922 uint64_t 1923 collect_nlookup_callback(int n) 1924 { 1925 static uint64_t last_total; 1926 uint64_t save; 1927 uint64_t total; 1928 1929 total = 0; 1930 for (n = 0; n < ncpus; ++n) { 1931 globaldata_t gd = globaldata_find(n); 1932 struct nchstats *sp; 1933 1934 if ((sp = gd->gd_nchstats) != NULL) 1935 total += sp->ncs_longhits + sp->ncs_longmiss; 1936 } 1937 save = total; 1938 total = total - last_total; 1939 last_total = save; 1940 1941 return total; 1942 } 1943 1944 static 1945 void 1946 nlookup_collect_init(void *dummy __unused) 1947 { 1948 kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, 1949 KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); 1950 } 1951 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); 1952