1 /* 2 * Copyright (c) 2004-2022 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/vnode.h> 59 #include <sys/mount.h> 60 #include <sys/filedesc.h> 61 #include <sys/proc.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/malloc.h> 65 #include <sys/stat.h> 66 #include <sys/objcache.h> 67 #include <sys/file.h> 68 #include <sys/kcollect.h> 69 #include <sys/sysctl.h> 70 71 #ifdef KTRACE 72 #include <sys/ktrace.h> 73 #endif 74 75 __read_mostly static int nlookup_max_retries = 4; 76 SYSCTL_INT(_debug, OID_AUTO, nlookup_max_retries, CTLFLAG_RW, 77 &nlookup_max_retries, 0, 78 "retries on generation mismatch"); 79 __read_mostly static int nlookup_debug; 80 SYSCTL_INT(_debug, OID_AUTO, nlookup_debug, CTLFLAG_RW, 81 &nlookup_debug, 0, 82 "Force retry test"); 83 84 static int naccess(struct nlookupdata *nd, struct nchandle *nch, 85 u_int *genp, int vmode, 86 struct ucred *cred, int *stickyp, int nchislocked); 87 88 /* 89 * unmount operations flag NLC_IGNBADDIR in order to allow the 90 * umount to successfully issue a nlookup() on the path in order 91 * to extract the mount point. Allow certain errors through. 92 */ 93 static __inline 94 int 95 keeperror(struct nlookupdata *nd, int error) 96 { 97 if (error) { 98 if ((nd->nl_flags & NLC_IGNBADDIR) == 0 || 99 (error != EIO && error != EBADRPC && error != ESTALE)) { 100 return 1; 101 } 102 } 103 return 0; 104 } 105 106 /* 107 * Initialize a nlookup() structure, early error return for copyin faults 108 * or a degenerate empty string (which is not allowed). 109 * 110 * The first process proc0's credentials are used if the calling thread 111 * is not associated with a process context. 112 * 113 * MPSAFE 114 */ 115 int 116 nlookup_init(struct nlookupdata *nd, 117 const char *path, enum uio_seg seg, int flags) 118 { 119 size_t pathlen; 120 struct proc *p; 121 thread_t td; 122 int error; 123 124 td = curthread; 125 p = td->td_proc; 126 127 /* 128 * note: the pathlen set by copy*str() includes the terminating \0. 129 */ 130 bzero(nd, sizeof(struct nlookupdata)); 131 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 132 nd->nl_flags |= NLC_HASBUF; 133 if (seg == UIO_SYSSPACE) 134 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 135 else 136 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 137 138 /* 139 * Don't allow empty pathnames. 140 * POSIX.1 requirement: "" is not a vaild file name. 141 */ 142 if (error == 0 && pathlen <= 1) 143 error = ENOENT; 144 145 if (error == 0) { 146 if (p && p->p_fd) { 147 if (nd->nl_path[0] == '/') { 148 if ((flags & NLC_NLNCH_NOINIT) == 0) { 149 nd->nl_basench = &p->p_fd->fd_nrdir; 150 cache_copy(nd->nl_basench, &nd->nl_nch); 151 } 152 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 153 if (p->p_fd->fd_njdir.ncp) 154 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 155 nd->nl_cred = td->td_ucred; 156 nd->nl_flags |= NLC_BORROWCRED; 157 } else { 158 if ((flags & NLC_NLNCH_NOINIT) == 0) { 159 nd->nl_basench = &p->p_fd->fd_ncdir; 160 cache_copy(nd->nl_basench, &nd->nl_nch); 161 } 162 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 163 if (p->p_fd->fd_njdir.ncp) 164 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 165 nd->nl_cred = td->td_ucred; 166 nd->nl_flags |= NLC_BORROWCRED; 167 } 168 } else { 169 if ((flags & NLC_NLNCH_NOINIT) == 0) { 170 nd->nl_basench = &rootnch; 171 cache_copy(nd->nl_basench, &nd->nl_nch); 172 } 173 cache_copy(&rootnch, &nd->nl_rootnch); 174 cache_copy(&rootnch, &nd->nl_jailnch); 175 nd->nl_cred = proc0.p_ucred; 176 nd->nl_flags |= NLC_BORROWCRED; 177 } 178 nd->nl_td = td; 179 nd->nl_flags |= flags & ~NLC_NLNCH_NOINIT; 180 } else { 181 nlookup_done(nd); 182 } 183 return(error); 184 } 185 186 187 /* 188 * nlookup_init() for "at" family of syscalls. 189 * 190 * Similar to nlookup_init() but if the path is relative and fd is not 191 * AT_FDCWD, the path will be interpreted relative to the directory pointed 192 * to by fd. In this case, the file entry pointed to by fd is ref'ed and 193 * returned in *fpp. 194 * 195 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 196 * and release the ref to the file entry. 197 */ 198 int 199 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 200 const char *path, enum uio_seg seg, int flags) 201 { 202 struct thread *td = curthread; 203 struct file* fp; 204 struct vnode *vp; 205 int error; 206 207 *fpp = NULL; 208 209 /* 210 * Resolve the path, we might have to copy it in from userland, 211 * but don't initialize nl_basench, or nl_nch. 212 */ 213 error = nlookup_init(nd, path, seg, flags | NLC_NLNCH_NOINIT); 214 if (__predict_false(error)) 215 return (error); 216 217 /* 218 * Setup nl_basench (a pointer only not refd), and copy+ref 219 * to initialize nl_nch. Only applicable to relative paths. 220 * For absolute paths, or if (fd) is degenerate, just use the 221 * normal path. 222 */ 223 if (nd->nl_path[0] == '/') { 224 struct proc *p = curproc; 225 nd->nl_basench = &p->p_fd->fd_nrdir; 226 } else if (fd == AT_FDCWD) { 227 struct proc *p = curproc; 228 nd->nl_basench = &p->p_fd->fd_ncdir; 229 } else { 230 if ((error = holdvnode(td, fd, &fp)) != 0) 231 goto done; 232 vp = (struct vnode*)fp->f_data; 233 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 234 fdrop(fp); 235 fp = NULL; 236 error = ENOTDIR; 237 goto done; 238 } 239 nd->nl_basench = &fp->f_nchandle; 240 *fpp = fp; 241 } 242 cache_copy(nd->nl_basench, &nd->nl_nch); 243 done: 244 if (error) 245 nlookup_done(nd); 246 return (error); 247 } 248 249 /* 250 * This works similarly to nlookup_init() but does not assume a process 251 * context. rootnch is always chosen for the root directory and the cred 252 * and starting directory are supplied in arguments. 253 */ 254 int 255 nlookup_init_raw(struct nlookupdata *nd, 256 const char *path, enum uio_seg seg, int flags, 257 struct ucred *cred, struct nchandle *ncstart) 258 { 259 size_t pathlen; 260 thread_t td; 261 int error; 262 263 td = curthread; 264 265 bzero(nd, sizeof(struct nlookupdata)); 266 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 267 nd->nl_flags |= NLC_HASBUF; 268 if (seg == UIO_SYSSPACE) 269 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 270 else 271 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 272 273 /* 274 * Don't allow empty pathnames. 275 * POSIX.1 requirement: "" is not a vaild file name. 276 */ 277 if (error == 0 && pathlen <= 1) 278 error = ENOENT; 279 280 if (error == 0) { 281 cache_copy(ncstart, &nd->nl_nch); 282 cache_copy(&rootnch, &nd->nl_rootnch); 283 cache_copy(&rootnch, &nd->nl_jailnch); 284 nd->nl_cred = crhold(cred); 285 nd->nl_td = td; 286 nd->nl_flags |= flags; 287 } else { 288 nlookup_done(nd); 289 } 290 return(error); 291 } 292 293 /* 294 * This works similarly to nlookup_init_raw() but does not rely 295 * on rootnch being initialized yet. 296 */ 297 int 298 nlookup_init_root(struct nlookupdata *nd, 299 const char *path, enum uio_seg seg, int flags, 300 struct ucred *cred, struct nchandle *ncstart, 301 struct nchandle *ncroot) 302 { 303 size_t pathlen; 304 thread_t td; 305 int error; 306 307 td = curthread; 308 309 bzero(nd, sizeof(struct nlookupdata)); 310 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 311 nd->nl_flags |= NLC_HASBUF; 312 if (seg == UIO_SYSSPACE) 313 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 314 else 315 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 316 317 /* 318 * Don't allow empty pathnames. 319 * POSIX.1 requirement: "" is not a vaild file name. 320 */ 321 if (error == 0 && pathlen <= 1) 322 error = ENOENT; 323 324 if (error == 0) { 325 cache_copy(ncstart, &nd->nl_nch); 326 cache_copy(ncroot, &nd->nl_rootnch); 327 cache_copy(ncroot, &nd->nl_jailnch); 328 nd->nl_cred = crhold(cred); 329 nd->nl_td = td; 330 nd->nl_flags |= flags; 331 } else { 332 nlookup_done(nd); 333 } 334 return(error); 335 } 336 337 #if 0 338 /* 339 * Set a different credential; this credential will be used by future 340 * operations performed on nd.nl_open_vp and nlookupdata structure. 341 */ 342 void 343 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 344 { 345 KKASSERT(nd->nl_cred != NULL); 346 347 if (nd->nl_cred != cred) { 348 cred = crhold(cred); 349 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 350 crfree(nd->nl_cred); 351 nd->nl_flags &= ~NLC_BORROWCRED; 352 nd->nl_cred = cred; 353 } 354 } 355 #endif 356 357 /* 358 * Cleanup a nlookupdata structure after we are through with it. This may 359 * be called on any nlookupdata structure initialized with nlookup_init(). 360 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 361 * returns an error, even if as a consumer you believe you have taken all 362 * dynamic elements out of the nlookupdata structure. 363 */ 364 void 365 nlookup_done(struct nlookupdata *nd) 366 { 367 if (nd->nl_nch.ncp) { 368 if (nd->nl_flags & NLC_NCPISLOCKED) 369 cache_unlock(&nd->nl_nch); 370 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 371 } 372 nd->nl_flags &= ~NLC_NCPISLOCKED; 373 if (nd->nl_rootnch.ncp) 374 cache_drop_and_cache(&nd->nl_rootnch, 0); 375 if (nd->nl_jailnch.ncp) 376 cache_drop_and_cache(&nd->nl_jailnch, 0); 377 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 378 objcache_put(namei_oc, nd->nl_path); 379 nd->nl_path = NULL; 380 } 381 if (nd->nl_cred) { 382 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 383 crfree(nd->nl_cred); 384 nd->nl_cred = NULL; 385 nd->nl_flags &= ~NLC_BORROWCRED; 386 } 387 if (nd->nl_open_vp) { 388 if (nd->nl_flags & NLC_LOCKVP) { 389 vn_unlock(nd->nl_open_vp); 390 nd->nl_flags &= ~NLC_LOCKVP; 391 } 392 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 393 nd->nl_open_vp = NULL; 394 } 395 if (nd->nl_dvp) { 396 vrele(nd->nl_dvp); 397 nd->nl_dvp = NULL; 398 } 399 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 400 nd->nl_basench = NULL; 401 } 402 403 /* 404 * Works similarly to nlookup_done() when nd initialized with 405 * nlookup_init_at(). 406 */ 407 void 408 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 409 { 410 nlookup_done(nd); 411 if (fp != NULL) 412 fdrop(fp); 413 } 414 415 void 416 nlookup_zero(struct nlookupdata *nd) 417 { 418 bzero(nd, sizeof(struct nlookupdata)); 419 } 420 421 /* 422 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 423 * if an error occured. 424 * 425 * Note that the returned ncp is not checked for permissions, though VEXEC 426 * is checked on the directory path leading up to the result. The caller 427 * must call naccess() to check the permissions of the returned leaf. 428 */ 429 struct nchandle 430 nlookup_simple(const char *str, enum uio_seg seg, 431 int niflags, int *error) 432 { 433 struct nlookupdata nd; 434 struct nchandle nch; 435 436 *error = nlookup_init(&nd, str, seg, niflags); 437 if (*error == 0) { 438 if ((*error = nlookup(&nd)) == 0) { 439 nch = nd.nl_nch; /* keep hold ref from structure */ 440 cache_zero(&nd.nl_nch); /* and NULL out */ 441 } else { 442 cache_zero(&nch); 443 } 444 nlookup_done(&nd); 445 } else { 446 cache_zero(&nch); 447 } 448 return(nch); 449 } 450 451 /* 452 * Returns non-zero if the path element is the last element 453 */ 454 static 455 int 456 islastelement(const char *ptr) 457 { 458 while (*ptr == '/') 459 ++ptr; 460 return (*ptr == 0); 461 } 462 463 /* 464 * Returns non-zero if we need to lock the namecache element 465 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 466 * the last element of the namecache lookup will be locked 467 * exclusively. 468 * 469 * O_CREAT or O_TRUNC need the last element to be locked exlcusively. 470 * Intermediate elements are always locked shared. 471 * 472 * NOTE: Even if we return on-zero, an unresolved namecache record 473 * will always be locked exclusively. 474 */ 475 static __inline 476 int 477 wantsexcllock(struct nlookupdata *nd, int last_element) 478 { 479 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 480 return(last_element); 481 return 0; 482 } 483 484 485 /* 486 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 487 * on return, even if an error occurs. If no error occurs or NLC_CREATE 488 * is flagged and ENOENT is returned, then the returned nl_nch is always 489 * referenced and locked exclusively. 490 * 491 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 492 * the resulting nl_nch may or may not be locked and if locked 493 * might be locked either shared or exclusive. 494 * 495 * Intermediate directory elements, including the current directory, require 496 * execute (search) permission. nlookup does not examine the access 497 * permissions on the returned element. 498 * 499 * If NLC_CREATE is set the last directory must allow node creation, 500 * and an error code of 0 will be returned for a non-existant 501 * target (not ENOENT). 502 * 503 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 504 * plus the sticky check is made, and an error code of 0 will be returned 505 * for a non-existant target (not ENOENT). 506 * 507 * If NLC_DELETE is set the last directory mut allow node deletion, 508 * plus the sticky check is made. 509 * 510 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 511 * of the returned entry. The vnode will be referenced but not locked. 512 * 513 * IF THE PATH REPRESENTS A MOUNT POINT CROSSING THEN NLC_REFDVP WILL SET 514 * NL_DVP TO NULL AND RETURN NO ERROR (ERROR == 0), allowing the operation 515 * to return up the stack. The nch will only be referenced and not locked. 516 * High level code must check this case and do the right thing since, 517 * typically, it means things like 'mkdir' should fail with EEXIST. For 518 * example 'mkdir /var/cache' where /var/cache is a null-mount from 519 * /build/var.cache, needs to return EEXIST rather than a mount-crossing 520 * failure. 521 * 522 * NOTE: As an optimization we attempt to obtain a shared namecache lock 523 * on any intermediate elements. On success, the returned element 524 * is ALWAYS locked exclusively. 525 * 526 * NOTE: If for any reason the nc_generation number of the ncp's being 527 * evaluated changes, the lookup is retried. 528 */ 529 int 530 nlookup(struct nlookupdata *nd) 531 { 532 globaldata_t gd = mycpu; 533 struct nlcomponent nlc; 534 struct nchandle nch; 535 struct nchandle nctmp; 536 struct mount *mp; 537 int wasdotordotdot; 538 char *path_reset; 539 char *ptr; 540 char *nptr; 541 int error; 542 int len; 543 int dflags; 544 int hit = 1; 545 int saveflag = nd->nl_flags; 546 int max_retries = nlookup_max_retries; 547 u_int nl_gen; 548 u_int nch_gen; 549 int gen_changed; 550 boolean_t doretry = FALSE; 551 boolean_t inretry = FALSE; 552 553 path_reset = NULL; 554 555 nlookup_start: 556 557 #ifdef KTRACE 558 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 559 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 560 #endif 561 bzero(&nlc, sizeof(nlc)); 562 563 /* 564 * Setup for the loop. The current working namecache element is 565 * always at least referenced. We lock it as required, but always 566 * return a locked, resolved namecache entry. 567 */ 568 nd->nl_loopcnt = 0; 569 nd->nl_dir_error = 0; 570 if (nd->nl_dvp) { 571 vrele(nd->nl_dvp); 572 nd->nl_dvp = NULL; 573 } 574 ptr = nd->nl_path; 575 576 nl_gen = nd->nl_nch.ncp ? nd->nl_nch.ncp->nc_generation : 0; 577 nl_gen &= ~3; 578 gen_changed = 0; 579 580 /* 581 * Loop on the path components. At the top of the loop nd->nl_nch 582 * is ref'd and unlocked and represents our current position. 583 */ 584 for (;;) { 585 int last_element; 586 587 ++nd->nl_elmno; 588 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 589 590 /* 591 * Check if the root directory should replace the current 592 * directory. This is done at the start of a translation 593 * or after a symbolic link has been found. In other cases 594 * ptr will never be pointing at a '/'. 595 */ 596 if (*ptr == '/') { 597 do { 598 ++ptr; 599 } while (*ptr == '/'); 600 601 /* 602 * We might already be at the root as a pre-optimization 603 */ 604 if (nd->nl_nch.mount != nd->nl_rootnch.mount || 605 nd->nl_nch.ncp != nd->nl_rootnch.ncp) { 606 cache_drop_and_cache(&nd->nl_nch, 0); 607 cache_copy(&nd->nl_rootnch, &nd->nl_nch); 608 nl_gen = nd->nl_nch.ncp->nc_generation & ~3; 609 } 610 611 /* 612 * Fast-track termination. There is no parent directory of 613 * the root in the same mount from the point of view of 614 * the caller so return EACCES if NLC_REFDVP is specified, 615 * and EEXIST if NLC_CREATE is also specified. 616 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 617 */ 618 if (*ptr == 0) { 619 if (nd->nl_flags & NLC_REFDVP) 620 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 621 else 622 error = 0; 623 nd->nl_flags |= NLC_NCPISLOCKED; 624 cache_lock_maybe_shared(&nd->nl_nch, 625 wantsexcllock(nd, islastelement(ptr))); 626 break; 627 } 628 continue; 629 } 630 631 /* 632 * Pre-calculate next path component so we can check whether the 633 * current component directory is the last directory in the path 634 * or not. 635 */ 636 for (nptr = ptr; *nptr && *nptr != '/'; ++nptr) 637 ; 638 639 /* 640 * nd->nl_nch is referenced and not locked here. 641 * 642 * Check directory search permissions. This will load dflags to 643 * obtain directory-special permissions to be checked along with the 644 * last component. 645 * 646 * We only need to pass-in &dflags for the second-to-last component. 647 * Optimize by passing-in NULL for any prior components, which may 648 * allow the code to bypass the naccess() call. 649 * 650 * naccess() is optimized to avoid having to lock the nch or get 651 * the related vnode if cached perms are sufficient. 652 */ 653 dflags = 0; 654 if (*nptr == '/' || (saveflag & NLC_MODIFYING_MASK) == 0) { 655 error = naccess(nd, &nd->nl_nch, &nl_gen, NLC_EXEC, 656 nd->nl_cred, NULL, 0); 657 } else { 658 error = naccess(nd, &nd->nl_nch, &nl_gen, NLC_EXEC, 659 nd->nl_cred, &dflags, 0); 660 } 661 if (error) { 662 if (keeperror(nd, error)) 663 break; 664 error = 0; 665 } 666 667 /* 668 * Extract the next (or last) path component. Path components are 669 * limited to 255 characters. 670 */ 671 nlc.nlc_nameptr = ptr; 672 nlc.nlc_namelen = nptr - ptr; 673 ptr = nptr; 674 if (nlc.nlc_namelen >= 256) { 675 error = ENAMETOOLONG; 676 break; 677 } 678 last_element = islastelement(nptr); 679 680 /* 681 * Lookup the path component in the cache, creating an unresolved 682 * entry if necessary. We have to handle "." and ".." as special 683 * cases. 684 * 685 * When handling ".." we have to detect a traversal back through a 686 * mount point. If we are at the root, ".." just returns the root. 687 * 688 * When handling "." or ".." we also have to recalculate dflags 689 * since our dflags will be for some sub-directory instead of the 690 * parent dir. 691 * 692 * This subsection returns a referenced and possibly locked 'nch'. 693 * The locking status is based on the last_element flag. 694 * 695 * The namecache topology is not allowed to be disconnected, so 696 * encountering a NULL parent will generate EINVAL. This typically 697 * occurs when a directory is removed out from under a process. 698 * 699 * WARNING! The unlocking of nd->nl_nch is sensitive code. 700 */ 701 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 702 703 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 704 if (last_element) { 705 cache_get_maybe_shared(&nd->nl_nch, &nch, 706 wantsexcllock(nd, 1)); 707 } else { 708 cache_copy(&nd->nl_nch, &nch); 709 } 710 nch_gen = nch.ncp->nc_generation & ~3; 711 wasdotordotdot = 1; 712 } else if (nlc.nlc_namelen == 2 && 713 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 714 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 715 nd->nl_nch.ncp == nd->nl_rootnch.ncp 716 ) { 717 /* 718 * ".." at the root returns the root 719 */ 720 if (last_element) { 721 cache_get_maybe_shared(&nd->nl_nch, &nch, 722 wantsexcllock(nd, 1)); 723 } else { 724 cache_copy(&nd->nl_nch, &nch); 725 } 726 } else { 727 /* 728 * Locate the parent ncp. If we are at the root of a 729 * filesystem mount we have to skip to the mounted-on 730 * point in the underlying filesystem. 731 * 732 * Expect the parent to always be good since the 733 * mountpoint doesn't go away. XXX hack. cache_get() 734 * requires the ncp to already have a ref as a safety. 735 * 736 * However, a process which has been broken out of a chroot 737 * will wind up with a NULL parent if it tries to '..' above 738 * the real root, deal with the case. Note that this does 739 * not protect us from a jail breakout, it just stops a panic 740 * if the jail-broken process tries to '..' past the real 741 * root. 742 */ 743 nctmp = nd->nl_nch; 744 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 745 nctmp = nctmp.mount->mnt_ncmounton; 746 if (nctmp.ncp == NULL) 747 break; 748 } 749 if (nctmp.ncp == NULL) { 750 if (curthread->td_proc) { 751 kprintf("vfs_nlookup: '..' traverse broke " 752 "jail: pid %d (%s)\n", 753 curthread->td_proc->p_pid, 754 curthread->td_comm); 755 } 756 nctmp = nd->nl_rootnch; 757 } else { 758 nctmp.ncp = nctmp.ncp->nc_parent; 759 } 760 if (last_element) { 761 cache_get_maybe_shared(&nctmp, &nch, 762 wantsexcllock(nd, 1)); 763 } else { 764 cache_copy(&nctmp, &nch); 765 } 766 } 767 nch_gen = nch.ncp->nc_generation & ~3; 768 wasdotordotdot = 2; 769 } else { 770 /* 771 * Quickly lookup the component. If we can't find it, then 772 * slowly lookup and resolve the component. 773 */ 774 if (last_element) { 775 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 776 wantsexcllock(nd, 1), 777 &nch); 778 } else { 779 nch = cache_nlookup_nonlocked(&nd->nl_nch, &nlc); 780 if (nch.ncp == NULL) 781 error = EWOULDBLOCK; 782 } 783 784 /* 785 * At this point the only possible error is EWOULDBLOCK. 786 * 787 * If no error nch is set and referenced, and then also locked 788 * according to last_element. For EWOULDBLOCK nch is not set. 789 * For any other error nch is set and referenced, but not locked. 790 * 791 * On EWOULDBLOCK the ncp may be unresolved (if not locked it can 792 * become unresolved at any time, but we don't care at this time). 793 */ 794 if (error == EWOULDBLOCK) { 795 nch = cache_nlookup(&nd->nl_nch, &nlc); 796 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 797 hit = 0; 798 for (;;) { 799 error = cache_resolve(&nch, &nch_gen, nd->nl_cred); 800 if (error != EAGAIN && 801 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 802 if (error == ESTALE) { 803 if (!inretry) 804 error = ENOENT; 805 doretry = TRUE; 806 } 807 if (last_element == 0) 808 cache_unlock(&nch); 809 break; 810 } 811 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 812 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 813 nch.ncp->nc_name); 814 cache_put(&nch); 815 nch = cache_nlookup(&nd->nl_nch, &nlc); 816 } 817 } 818 nch_gen = nch.ncp->nc_generation & ~3; 819 wasdotordotdot = 0; 820 } 821 822 /* 823 * If the component is "." or ".." our dflags no longer represents 824 * the parent directory and we have to explicitly look it up. 825 * 826 * Expect the parent to be good since nch is locked. 827 * 828 * nch will continue to be valid even if an error occurs after this 829 * point. 830 */ 831 if (wasdotordotdot && error == 0) { 832 struct nchandle par; 833 834 dflags = 0; 835 if (last_element == 0) 836 cache_lock_maybe_shared(&nch, wantsexcllock(nd, 0)); 837 838 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 839 u_int dummy_gen = 0; 840 841 par.mount = nch.mount; 842 cache_hold(&par); 843 error = naccess(nd, &par, &dummy_gen, 0, nd->nl_cred, &dflags, 0); 844 cache_drop_and_cache(&par, nd->nl_elmno - 1); 845 if (error) { 846 if (!keeperror(nd, error)) 847 error = 0; 848 if (error == EINVAL) { 849 kprintf("nlookup (%s): trailing . or .. retry on %s\n", 850 curthread->td_comm, nd->nl_path); 851 doretry = TRUE; 852 } 853 } 854 } 855 856 if (last_element == 0) 857 cache_unlock(&nch); 858 } 859 860 /* 861 * [end of subsection] 862 * 863 * nch is referenced and locked according to (last_element). 864 * nd->nl_nch is unlocked and referenced. 865 * nl_gen and nch_gen are both set. 866 */ 867 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 868 869 /* 870 * Resolve the namespace if necessary. The ncp returned by 871 * cache_nlookup() is referenced, and also locked according 872 * to last_element. 873 * 874 * XXX neither '.' nor '..' should return EAGAIN since they were 875 * previously resolved and thus cannot be newly created ncp's. 876 */ 877 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 878 if (last_element == 0) 879 cache_lock(&nch); 880 hit = 0; 881 error = cache_resolve(&nch, &nch_gen, nd->nl_cred); 882 if (error == ESTALE) { 883 if (!inretry) 884 error = ENOENT; 885 doretry = TRUE; 886 } 887 if (last_element == 0) 888 cache_unlock(&nch); 889 KKASSERT(error != EAGAIN); 890 } else { 891 error = nch.ncp->nc_error; 892 } 893 894 /* 895 * Early completion. ENOENT is not an error if this is the last 896 * component and NLC_CREATE or NLC_RENAME (rename target) was 897 * requested. Note that ncp->nc_error is left as ENOENT in that 898 * case, which we check later on. 899 * 900 * Also handle invalid '.' or '..' components terminating a path 901 * for a create/rename/delete. The standard requires this and pax 902 * pretty stupidly depends on it. 903 */ 904 if (last_element) { 905 if (error == ENOENT && 906 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST))) 907 { 908 if (nd->nl_flags & NLC_NFS_RDONLY) { 909 error = EROFS; 910 } else { 911 error = naccess(nd, &nch, &nch_gen, 912 nd->nl_flags | dflags, 913 nd->nl_cred, NULL, last_element); 914 } 915 } 916 if (error == 0 && wasdotordotdot && 917 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 918 NLC_RENAME_SRC | NLC_RENAME_DST))) 919 { 920 /* 921 * POSIX junk 922 */ 923 if (nd->nl_flags & NLC_CREATE) 924 error = EEXIST; 925 else if (nd->nl_flags & NLC_DELETE) 926 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 927 else 928 error = EINVAL; 929 } 930 } 931 932 /* 933 * Early completion on error. 934 */ 935 if (error) { 936 if (last_element) 937 cache_unlock(&nch); 938 cache_drop_and_cache(&nch, nd->nl_elmno); 939 break; 940 } 941 942 /* 943 * If the element is a symlink and it is either not the last 944 * element or it is the last element and we are allowed to 945 * follow symlinks, resolve the symlink. 946 */ 947 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 948 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 949 ) { 950 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 951 error = ELOOP; 952 if (last_element) 953 cache_unlock(&nch); 954 cache_drop_and_cache(&nch, nd->nl_elmno); 955 break; 956 } 957 958 /* 959 * Check for a generation change. 960 * 961 * NOTE: On generation changes we must at a minimum cycle 962 * the lock. Here we get or have the lock so we are 963 * ok. 964 */ 965 if (last_element == 0) 966 cache_lock_maybe_shared(&nch, 1); 967 968 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 969 if (nlookup_debug & 1) { 970 kprintf("nlookup: symlink: GEN CHANGE %d\n", 971 (nch.ncp->nc_generation - nch_gen)); 972 } 973 gen_changed = 1; 974 } 975 976 error = nreadsymlink(nd, &nch, &nlc); 977 cache_put(&nch); 978 if (error) 979 break; 980 981 /* 982 * Concatenate trailing path elements onto the returned symlink. 983 * Note that if the path component (ptr) is not exhausted, it 984 * will being with a '/', so we do not have to add another one. 985 * 986 * The symlink may not be empty. 987 */ 988 len = strlen(ptr); 989 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 990 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 991 objcache_put(namei_oc, nlc.nlc_nameptr); 992 break; 993 } 994 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 995 996 if (path_reset) { 997 if (nd->nl_flags & NLC_HASBUF) 998 objcache_put(namei_oc, nd->nl_path); 999 } else { 1000 path_reset = nd->nl_path; 1001 } 1002 nd->nl_path = nlc.nlc_nameptr; 1003 nd->nl_flags |= NLC_HASBUF; 1004 ptr = nd->nl_path; 1005 /* nl_gen has not changed */ 1006 1007 /* 1008 * Go back up to the top to resolve any initial '/'s in the 1009 * symlink. 1010 */ 1011 continue; 1012 } 1013 1014 /* 1015 * If the element is a directory and we are crossing a mount point, 1016 * Locate the mount. 1017 */ 1018 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 1019 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 1020 (mp = cache_findmount(&nch)) != NULL 1021 ) { 1022 struct vnode *tdp; 1023 int vfs_do_busy = 0; 1024 1025 /* 1026 * VFS must be busied before the namecache entry is locked, 1027 * but we don't want to waste time calling vfs_busy() if the 1028 * mount point is already resolved. 1029 */ 1030 again: 1031 /* 1032 * Check for a generation change. 1033 * 1034 * NOTE: On generation changes we must at a minimum cycle 1035 * the lock. So get and release the lock if we 1036 * do not have it. 1037 */ 1038 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 1039 if (last_element == 0) { 1040 cache_lock_maybe_shared(&nch, 1); 1041 cache_unlock(&nch); 1042 } 1043 if (nlookup_debug & 1) { 1044 kprintf("nlookup: mountpt: GEN CHANGE %d\n", 1045 (nch.ncp->nc_generation - nch_gen)); 1046 } 1047 gen_changed = 1; 1048 } 1049 if (last_element) 1050 cache_unlock(&nch); 1051 cache_drop_and_cache(&nch, nd->nl_elmno); 1052 1053 if (vfs_do_busy) { 1054 while (vfs_busy(mp, 0)) { 1055 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 1056 kprintf("nlookup: warning umount race avoided\n"); 1057 cache_dropmount(mp); 1058 error = EBUSY; 1059 vfs_do_busy = 0; 1060 goto double_break; 1061 } 1062 } 1063 } 1064 1065 /* 1066 * We don't need to lock the nch unless the entry is unresolved 1067 * or this is the last element. 1068 */ 1069 if (last_element) 1070 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 1071 wantsexcllock(nd, 1)); 1072 else 1073 cache_copy(&mp->mnt_ncmountpt, &nch); 1074 nch_gen = nch.ncp->nc_generation & ~3; 1075 1076 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 1077 if (last_element == 0) 1078 cache_lock(&nch); 1079 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 1080 if (vfs_do_busy == 0) { 1081 vfs_do_busy = 1; 1082 if (last_element == 0) 1083 cache_unlock(&nch); 1084 goto again; 1085 } 1086 error = VFS_ROOT(mp, &tdp); 1087 vfs_unbusy(mp); 1088 vfs_do_busy = 0; 1089 if (keeperror(nd, error)) { 1090 cache_dropmount(mp); 1091 if (last_element == 0) 1092 cache_unlock(&nch); 1093 break; 1094 } 1095 if (error == 0) { 1096 cache_setvp(&nch, tdp); 1097 nch_gen = nch.ncp->nc_generation & ~3; 1098 vput(tdp); 1099 } 1100 } 1101 if (last_element == 0) 1102 cache_unlock(&nch); 1103 } 1104 if (vfs_do_busy) 1105 vfs_unbusy(mp); 1106 cache_dropmount(mp); 1107 } 1108 1109 /* 1110 * Break out on error 1111 */ 1112 if (keeperror(nd, error)) { 1113 if (last_element) 1114 cache_unlock(&nch); 1115 cache_drop_and_cache(&nch, nd->nl_elmno); 1116 double_break: 1117 break; 1118 } 1119 1120 /* 1121 * Skip any slashes to get to the next element. If there 1122 * are any slashes at all the current element must be a 1123 * directory or, in the create case, intended to become a directory. 1124 * If it isn't we break without incrementing ptr and fall through 1125 * to the failure case below. 1126 */ 1127 while (*ptr == '/') { 1128 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 1129 !(nd->nl_flags & NLC_WILLBEDIR) 1130 ) { 1131 break; 1132 } 1133 ++ptr; 1134 } 1135 1136 /* 1137 * Continuation case: additional elements and the current 1138 * element is a directory. 1139 */ 1140 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 1141 /* 1142 * Check for a generation change. 1143 * 1144 * NOTE: On generation changes we must at a minimum cycle 1145 * the lock. So get and release the lock if we 1146 * do not have it. 1147 */ 1148 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 1149 if (last_element == 0) { 1150 cache_lock_maybe_shared(&nch, 1); 1151 cache_unlock(&nch); 1152 } 1153 if (nlookup_debug & 1) { 1154 kprintf("nlookup: next: GEN CHANGE %d\n", 1155 (nch.ncp->nc_generation - nch_gen)); 1156 } 1157 gen_changed = 1; 1158 } 1159 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 1160 if (last_element) 1161 cache_unlock(&nch); 1162 /*nchislocked = 0; not needed */ 1163 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 1164 nd->nl_nch = nch; 1165 nl_gen = nch_gen; 1166 continue; 1167 } 1168 1169 /* 1170 * Check for a generation change. 1171 * 1172 * NOTE: On generation changes we must at a minimum cycle 1173 * the lock. So get and release the lock if we 1174 * do not have it. 1175 */ 1176 if ((nch.ncp->nc_generation - nch_gen) & ~1) { 1177 if (nlookup_debug & 1) { 1178 if (last_element == 0) { 1179 cache_lock_maybe_shared(&nch, 1); 1180 cache_unlock(&nch); 1181 } 1182 kprintf("nlookup: final: GEN CHANGE %d\n", 1183 (nch.ncp->nc_generation - nch_gen)); 1184 gen_changed = 1; 1185 } 1186 } 1187 1188 /* 1189 * Failure case: additional elements and the current element 1190 * is not a directory 1191 */ 1192 if (*ptr) { 1193 if (last_element) 1194 cache_unlock(&nch); 1195 cache_drop_and_cache(&nch, nd->nl_elmno); 1196 error = ENOTDIR; 1197 break; 1198 } 1199 1200 /* 1201 * Successful lookup of last element. 1202 * 1203 * Check permissions if the target exists. If the target does not 1204 * exist directory permissions were already tested in the early 1205 * completion code above. 1206 * 1207 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 1208 * if the file is marked append-only, and NLC_STICKY if the directory 1209 * containing the file is sticky. 1210 */ 1211 KKASSERT(last_element); 1212 1213 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 1214 error = naccess(nd, &nch, &nch_gen, nd->nl_flags | dflags, 1215 nd->nl_cred, NULL, 1); 1216 if (keeperror(nd, error)) { 1217 cache_put(&nch); 1218 break; 1219 } 1220 } 1221 1222 /* 1223 * Termination: no more elements. 1224 * 1225 * Check to see if the immediate parent has been destroyed. This race 1226 * can occur because the element lookup must temporarily unlock 1227 * the parent. If so, do a retry. 1228 */ 1229 if (nch.ncp->nc_parent && 1230 (nch.ncp->nc_parent->nc_flag & NCF_DESTROYED)) { 1231 doretry = TRUE; 1232 } 1233 1234 /* 1235 * Termination: no more elements. 1236 * 1237 * If NLC_REFDVP is set acquire a referenced parent dvp. Typically 1238 * used for mkdir/mknod/ncreate/nremove/unlink/rename. 1239 * 1240 * If a mount-point transition occurs due to ncp being a mount point, 1241 * or a null-mount, nl_dvp will be set to NULL and an error code of 1242 * 0 will be returned. A NULL nc_parent is not necessarily the only 1243 * indication of a mount-point as null-mounts will also tend to have 1244 * a non-null nc_parent. 1245 * 1246 * nch is locked, standard lock order for the namecache is 1247 * child-to-parent so we can safely lock its parent. We can 1248 * just use cache_dvpref(). 1249 */ 1250 if ((nd->nl_flags & NLC_REFDVP) && 1251 (doretry == FALSE || inretry == TRUE)) { 1252 if (nch.ncp->nc_parent) { 1253 error = cache_resolve_dvp(&nch, nd->nl_cred, 1254 &nd->nl_dvp); 1255 if (error) { 1256 if (nlookup_debug & 1) { 1257 kprintf("Parent directory lost during " 1258 "nlookup: %s/%s (%08x/%08x)\n", 1259 nch.ncp->nc_parent->nc_name, 1260 nch.ncp->nc_name, 1261 nch.ncp->nc_parent->nc_flag, 1262 nch.ncp->nc_flag); 1263 } 1264 cache_put(&nch); 1265 error = EINVAL; 1266 break; 1267 } 1268 1269 /* 1270 * Mount-point, nl_dvp should remain NULL, error 0, 1271 * caller won't be able to use the results so leave 1272 * the ncp referenced but unlocked. 1273 */ 1274 if (nd->nl_dvp == NULL) { 1275 cache_put(&nch); 1276 break; 1277 } 1278 1279 /* 1280 * Good directory, fall through to drop-and-cache 1281 * below 1282 */ 1283 /* */ 1284 } else { 1285 /* 1286 * Mount-point, nl_dvp should remain NULL, error 0, 1287 * caller won't be able to use the results so leave 1288 * the ncp referenced but unlocked. 1289 */ 1290 error = 0; 1291 cache_put(&nch); 1292 break; 1293 } 1294 } 1295 1296 /* 1297 * ncp left with lock+ref on break, set NLC_NCPISLOCKED flag 1298 */ 1299 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 1300 nd->nl_nch = nch; 1301 nd->nl_flags |= NLC_NCPISLOCKED; 1302 nl_gen = nch_gen; 1303 error = 0; 1304 break; 1305 } 1306 1307 /* 1308 * Force a retry (up to max_retries) if nl_gen is incorrect 1309 * 1310 * NOTE: On generation changes we must at a minimum cycle 1311 * the lock. In this case we have one so we are ok. 1312 */ 1313 if (nd->nl_nch.ncp && (nd->nl_nch.ncp->nc_generation - nl_gen) & ~1) { 1314 if (nlookup_debug & 1) { 1315 kprintf("nlookup: DONE error %d: GEN CHANGE ON \"%s\" " 1316 "%d (retries %d)\n", 1317 error, 1318 nd->nl_nch.ncp->nc_name, 1319 (nd->nl_nch.ncp->nc_generation - nl_gen), 1320 max_retries); 1321 } 1322 gen_changed = 1; 1323 } 1324 if (gen_changed) { 1325 if (max_retries) { 1326 --max_retries; 1327 doretry = TRUE; 1328 inretry = FALSE; 1329 } else { 1330 error = EINVAL; 1331 } 1332 } 1333 1334 /* 1335 * We are done / or possibly retry 1336 */ 1337 if (hit) 1338 ++gd->gd_nchstats->ncs_longhits; 1339 else 1340 ++gd->gd_nchstats->ncs_longmiss; 1341 1342 if (nd->nl_flags & NLC_NCPISLOCKED) 1343 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 1344 1345 /* 1346 * Reset nd->nl_path if necessary (due to softlinks). We want to return 1347 * nl_path to its original state before retrying or returning. 1348 */ 1349 if (path_reset) { 1350 if (nd->nl_flags & NLC_HASBUF) { 1351 objcache_put(namei_oc, nd->nl_path); 1352 nd->nl_flags &= ~NLC_HASBUF; 1353 } 1354 nd->nl_path = path_reset; 1355 nd->nl_flags |= saveflag & NLC_HASBUF; 1356 path_reset = NULL; 1357 } 1358 1359 /* 1360 * Retry the whole thing if doretry flag is set, but only once. 1361 * 1362 * autofs(5) may mount another filesystem under its root directory 1363 * while resolving a path. 1364 * 1365 * NFS might return ESTALE 1366 */ 1367 if (doretry && !inretry) { 1368 if (nlookup_debug & 2) 1369 kprintf("nlookup: errno %d retry %s\n", error, nd->nl_path); 1370 inretry = TRUE; 1371 1372 /* 1373 * Clean up nd->nl_nch and reset to base directory 1374 */ 1375 if (nd->nl_flags & NLC_NCPISLOCKED) { 1376 cache_unlock(&nd->nl_nch); 1377 nd->nl_flags &= ~NLC_NCPISLOCKED; 1378 } 1379 cache_drop(&nd->nl_nch); 1380 cache_copy(nd->nl_basench, &nd->nl_nch); 1381 1382 nd->nl_elmno = 0; 1383 nd->nl_flags |= saveflag; 1384 1385 goto nlookup_start; 1386 } 1387 1388 /* 1389 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1390 * (ncp->nc_error will be ENOENT), but we will still return an error 1391 * code of 0. 1392 */ 1393 return(error); 1394 } 1395 1396 /* 1397 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1398 * of continuity in the namecache tree by connecting the ncp related to the 1399 * vnode under the mount to the ncp related to the mount's root vnode. 1400 * 1401 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1402 */ 1403 int 1404 nlookup_mp(struct mount *mp, struct nchandle *nch) 1405 { 1406 struct vnode *vp; 1407 int error; 1408 1409 error = 0; 1410 cache_get(&mp->mnt_ncmountpt, nch); 1411 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1412 while (vfs_busy(mp, 0)) 1413 ; 1414 error = VFS_ROOT(mp, &vp); 1415 vfs_unbusy(mp); 1416 if (error) { 1417 cache_put(nch); 1418 } else { 1419 cache_setvp(nch, vp); 1420 vput(vp); 1421 } 1422 } 1423 return(error); 1424 } 1425 1426 /* 1427 * Read the contents of a symlink, allocate a path buffer out of the 1428 * namei_oc and initialize the supplied nlcomponent with the result. 1429 * 1430 * If an error occurs no buffer will be allocated or returned in the nlc. 1431 */ 1432 int 1433 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1434 struct nlcomponent *nlc) 1435 { 1436 struct vnode *vp; 1437 struct iovec aiov; 1438 struct uio auio; 1439 int linklen; 1440 int error; 1441 char *cp; 1442 1443 nlc->nlc_nameptr = NULL; 1444 nlc->nlc_namelen = 0; 1445 if (nch->ncp->nc_vp == NULL) 1446 return(ENOENT); 1447 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1448 return(error); 1449 cp = objcache_get(namei_oc, M_WAITOK); 1450 aiov.iov_base = cp; 1451 aiov.iov_len = MAXPATHLEN; 1452 auio.uio_iov = &aiov; 1453 auio.uio_iovcnt = 1; 1454 auio.uio_offset = 0; 1455 auio.uio_rw = UIO_READ; 1456 auio.uio_segflg = UIO_SYSSPACE; 1457 auio.uio_td = nd->nl_td; 1458 auio.uio_resid = MAXPATHLEN - 1; 1459 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1460 if (error) 1461 goto fail; 1462 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1463 if (varsym_enable) { 1464 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1465 if (linklen < 0) { 1466 error = ENAMETOOLONG; 1467 goto fail; 1468 } 1469 } 1470 cp[linklen] = 0; 1471 nlc->nlc_nameptr = cp; 1472 nlc->nlc_namelen = linklen; 1473 vput(vp); 1474 return(0); 1475 fail: 1476 objcache_put(namei_oc, cp); 1477 vput(vp); 1478 return(error); 1479 } 1480 1481 /* 1482 * Check access [XXX cache vattr!] [XXX quota] 1483 * 1484 * Generally check the NLC_* access bits. All specified bits must pass 1485 * for this function to return 0. 1486 * 1487 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1488 * access, otherwise it must exist. No error is returned in this case. 1489 * 1490 * The file must not exist if NLC_EXCL is specified. 1491 * 1492 * Directory permissions in general are tested for NLC_CREATE if the file 1493 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1494 * whether the file exists or not. 1495 * 1496 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1497 * the latter is only tested if the target exists. 1498 * 1499 * The passed ncp must be referenced and locked. If it is already resolved 1500 * it may be locked shared but otherwise should be locked exclusively. 1501 */ 1502 1503 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1504 #define S_XOK_MASK (S_IXUSR|S_IXGRP|S_IXOTH) 1505 1506 static int 1507 naccess(struct nlookupdata *nd, struct nchandle *nch, u_int *genp, int nflags, 1508 struct ucred *cred, int *nflagsp, int nchislocked) 1509 { 1510 struct vnode *vp; 1511 struct vattr_lite lva; 1512 struct namecache *ncp; 1513 int error; 1514 int cflags; 1515 1516 ncp = nch->ncp; 1517 1518 again: 1519 /* 1520 * We need a resolved entry. If the entry is not resolved we need 1521 * to lock and resolve it. If it is already resolved, our ref should 1522 * prevent normal evictions (as long as we tested the lock race above). 1523 * 1524 * If the ncp was locked by the caller and left unresolved, it must 1525 * have been locked exclusively. 1526 */ 1527 if (ncp->nc_flag & NCF_UNRESOLVED) { 1528 if (nchislocked == 0) { 1529 cache_lock(nch); 1530 nchislocked = 2; 1531 } 1532 cache_resolve(nch, genp, cred); 1533 ncp = nch->ncp; 1534 } 1535 error = ncp->nc_error; 1536 1537 /* 1538 * Only unresolved entries should return this error (though maybe 1539 * in-filesystem sockets can too). XXX check filetype for VSOCK. 1540 */ 1541 if (error == ENOTCONN) { 1542 if (nchislocked == 0) { 1543 if (nlookup_debug & 4) { 1544 kprintf("ncp %p %08x %d %s: Warning, unexpected state, " 1545 "forcing lock\n", 1546 ncp, ncp->nc_flag, ncp->nc_error, ncp->nc_name); 1547 print_backtrace(-1); 1548 } 1549 cache_lock(nch); 1550 nchislocked = 2; 1551 goto again; 1552 } 1553 if (nlookup_debug & 4) { 1554 kprintf("ncp %p %08x %d %s: Warning, unexpected state\n", 1555 ncp, ncp->nc_flag, ncp->nc_error, ncp->nc_name); 1556 print_backtrace(-1); 1557 } 1558 } 1559 1560 /* 1561 * Directory permissions checks. Silently ignore ENOENT if these 1562 * tests pass. It isn't an error. 1563 * 1564 * We can safely resolve ncp->nc_parent because ncp is currently 1565 * locked. 1566 * 1567 * We set nl_dir_error if an error occurs checking directory perms 1568 * for an intermediate directory. 1569 */ 1570 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1571 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1572 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1573 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1574 (nflags & NLC_RENAME_DST) 1575 ) { 1576 struct nchandle par; 1577 1578 if (nchislocked == 0) { 1579 cache_lock_maybe_shared(nch, 0); 1580 nchislocked = 2; 1581 goto again; 1582 } 1583 if ((par.ncp = ncp->nc_parent) == NULL) { 1584 if (error != EAGAIN) { 1585 error = EINVAL; 1586 ++nd->nl_dir_error; 1587 } 1588 } else if (error == 0 || error == ENOENT) { 1589 u_int dummy_gen = 0; 1590 1591 par.mount = nch->mount; 1592 cache_hold(&par); 1593 cache_lock_maybe_shared(&par, 0); 1594 error = naccess(nd, &par, &dummy_gen, NLC_WRITE, cred, NULL, 1); 1595 cache_put(&par); 1596 if (error) 1597 ++nd->nl_dir_error; 1598 } 1599 } 1600 } 1601 1602 /* 1603 * NLC_EXCL check. Target file must not exist. 1604 */ 1605 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1606 error = EEXIST; 1607 1608 /* 1609 * Try to short-cut the vnode operation for intermediate directory 1610 * components. This is a major SMP win because it avoids having 1611 * to execute a lot of code for intermediate directory components, 1612 * including shared refs and locks on intermediate directory vnodes. 1613 * 1614 * We can only do this if the caller does not need nflagsp. 1615 */ 1616 if (error == 0 && nflagsp == NULL && 1617 nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1618 if (nchislocked == 2) 1619 cache_unlock(nch); 1620 return 0; 1621 } 1622 1623 /* 1624 * Get the vnode attributes so we can do the rest of our checks. 1625 * 1626 * NOTE: We only call naccess_lva() if the target exists. 1627 */ 1628 if (error == 0) { 1629 if (nchislocked == 0) { 1630 cache_lock_maybe_shared(nch, 0); 1631 nchislocked = 2; 1632 } 1633 #if 0 1634 error = cache_vget(nch, cred, LK_SHARED, &vp); 1635 #else 1636 error = cache_vref(nch, cred, &vp); 1637 #endif 1638 if (error == ENOENT) { 1639 /* 1640 * Silently zero-out ENOENT if creating or renaming 1641 * (rename target). It isn't an error. 1642 */ 1643 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1644 error = 0; 1645 } else if (error == 0) { 1646 /* 1647 * Get the vnode attributes and check for illegal O_TRUNC 1648 * requests and read-only mounts. 1649 * 1650 * NOTE: You can still open devices on read-only mounts for 1651 * writing. 1652 * 1653 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1654 * check on the parent directory above. 1655 * 1656 * XXX cache the va in the namecache or in the vnode 1657 */ 1658 error = VOP_GETATTR_LITE(vp, &lva); 1659 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1660 switch(lva.va_type) { 1661 case VREG: 1662 case VDATABASE: 1663 case VCHR: 1664 case VBLK: 1665 case VFIFO: 1666 break; 1667 case VDIR: 1668 error = EISDIR; 1669 break; 1670 default: 1671 error = EINVAL; 1672 break; 1673 } 1674 } 1675 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1676 (vp->v_mount->mnt_flag & MNT_RDONLY) 1677 ) { 1678 switch(lva.va_type) { 1679 case VDIR: 1680 case VLNK: 1681 case VREG: 1682 case VDATABASE: 1683 error = EROFS; 1684 break; 1685 default: 1686 break; 1687 } 1688 } 1689 #if 0 1690 vput(vp); 1691 #else 1692 vrele(vp); 1693 #endif 1694 1695 /* 1696 * Check permissions based on file attributes. The passed 1697 * flags (*nflagsp) are modified with feedback based on 1698 * special attributes and requirements. 1699 */ 1700 if (error == 0) { 1701 /* 1702 * Adjust the returned (*nflagsp) if non-NULL. 1703 */ 1704 if (nflagsp) { 1705 if ((lva.va_mode & VSVTX) && lva.va_uid != cred->cr_uid) 1706 *nflagsp |= NLC_STICKY; 1707 if (lva.va_flags & APPEND) 1708 *nflagsp |= NLC_APPENDONLY; 1709 if (lva.va_flags & IMMUTABLE) 1710 *nflagsp |= NLC_IMMUTABLE; 1711 } 1712 1713 /* 1714 * NCF_WXOK can be set for world-searchable directories. 1715 * 1716 * XXX When we implement capabilities this code would also 1717 * need a cap check, or only set the flag if there are no 1718 * capabilities. 1719 */ 1720 cflags = 0; 1721 if (lva.va_type == VDIR && 1722 (lva.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1723 cflags |= NCF_WXOK; 1724 } 1725 if ((lva.va_mode & S_XOK_MASK) == 0) 1726 cflags |= NCF_NOTX; 1727 1728 /* 1729 * Track swapcache management flags in the namecache. 1730 * 1731 * Calculate the flags based on the current vattr_lite info 1732 * and recalculate the inherited flags from the parent 1733 * (the original cache linkage may have occurred without 1734 * getattrs and thus have stale flags). 1735 */ 1736 if (lva.va_flags & SF_NOCACHE) 1737 cflags |= NCF_SF_NOCACHE; 1738 if (lva.va_flags & UF_CACHE) 1739 cflags |= NCF_UF_CACHE; 1740 if (ncp->nc_parent) { 1741 if (ncp->nc_parent->nc_flag & 1742 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1743 cflags |= NCF_SF_PNOCACHE; 1744 } 1745 if (ncp->nc_parent->nc_flag & 1746 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1747 cflags |= NCF_UF_PCACHE; 1748 } 1749 } 1750 1751 /* 1752 * We're not supposed to update nc_flag when holding a shared 1753 * lock, but we allow the case for certain flags. Note that 1754 * holding an exclusive lock allows updating nc_flag without 1755 * atomics. nc_flag is not allowe to be updated at all unless 1756 * a shared or exclusive lock is held. 1757 */ 1758 atomic_clear_short(&ncp->nc_flag, 1759 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1760 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1761 NCF_WXOK | NCF_NOTX) & ~cflags); 1762 atomic_set_short(&ncp->nc_flag, cflags); 1763 1764 /* 1765 * Process general access. 1766 */ 1767 error = naccess_lva(&lva, nflags, cred); 1768 } 1769 } 1770 } 1771 if (nchislocked == 2) 1772 cache_unlock(nch); 1773 return(error); 1774 } 1775 1776 /* 1777 * Check the requested access against the given vattr using cred. 1778 */ 1779 int 1780 naccess_lva(struct vattr_lite *lvap, int nflags, struct ucred *cred) 1781 { 1782 int i; 1783 int vmode; 1784 1785 /* 1786 * Test the immutable bit. Creations, deletions, renames (source 1787 * or destination) are not allowed. chown/chmod/other is also not 1788 * allowed but is handled by SETATTR. Hardlinks to the immutable 1789 * file are allowed. 1790 * 1791 * If the directory is set to immutable then creations, deletions, 1792 * renames (source or dest) and hardlinks to files within the directory 1793 * are not allowed, and regular files opened through the directory may 1794 * not be written to or truncated (unless a special device). 1795 * 1796 * NOTE! New hardlinks to immutable files work but new hardlinks to 1797 * files, immutable or not, sitting inside an immutable directory are 1798 * not allowed. As always if the file is hardlinked via some other 1799 * path additional hardlinks may be possible even if the file is marked 1800 * immutable. The sysop needs to create a closure by checking the hard 1801 * link count. Once closure is achieved you are good, and security 1802 * scripts should check link counts anyway. 1803 * 1804 * Writes and truncations are only allowed on special devices. 1805 */ 1806 if ((lvap->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1807 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1808 return (EPERM); 1809 if (nflags & (NLC_CREATE | NLC_DELETE | 1810 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1811 return (EPERM); 1812 } 1813 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1814 switch(lvap->va_type) { 1815 case VDIR: 1816 return (EISDIR); 1817 case VLNK: 1818 case VREG: 1819 case VDATABASE: 1820 return (EPERM); 1821 default: 1822 break; 1823 } 1824 } 1825 } 1826 1827 /* 1828 * Test the no-unlink and append-only bits for opens, rename targets, 1829 * and deletions. These bits are not tested for creations or 1830 * rename sources. 1831 * 1832 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1833 * If you do not wish this you must also set NOUNLINK. 1834 * 1835 * If the governing directory is marked APPEND-only it implies 1836 * NOUNLINK for all entries in the directory. 1837 */ 1838 if (((lvap->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1839 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1840 ) { 1841 return (EPERM); 1842 } 1843 1844 /* 1845 * A file marked append-only may not be deleted but can be renamed. 1846 */ 1847 if ((lvap->va_flags & APPEND) && 1848 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1849 ) { 1850 return (EPERM); 1851 } 1852 1853 /* 1854 * A file marked append-only which is opened for writing must also 1855 * be opened O_APPEND. 1856 */ 1857 if ((lvap->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1858 if (nflags & NLC_TRUNCATE) 1859 return (EPERM); 1860 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1861 if ((nflags & NLC_APPEND) == 0) 1862 return (EPERM); 1863 } 1864 } 1865 1866 /* 1867 * root gets universal access 1868 */ 1869 if (cred->cr_uid == 0) 1870 return(0); 1871 1872 /* 1873 * Check owner perms. 1874 * 1875 * If NLC_OWN is set the owner of the file is allowed no matter when 1876 * the owner-mode bits say (utimes). 1877 */ 1878 vmode = 0; 1879 if (nflags & NLC_READ) 1880 vmode |= S_IRUSR; 1881 if (nflags & NLC_WRITE) 1882 vmode |= S_IWUSR; 1883 if (nflags & NLC_EXEC) 1884 vmode |= S_IXUSR; 1885 1886 if (cred->cr_uid == lvap->va_uid) { 1887 if ((nflags & NLC_OWN) == 0) { 1888 if ((vmode & lvap->va_mode) != vmode) 1889 return(EACCES); 1890 } 1891 return(0); 1892 } 1893 1894 /* 1895 * If NLC_STICKY is set only the owner may delete or rename a file. 1896 * This bit is typically set on /tmp. 1897 * 1898 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1899 * the specific delete or rename case. For deletions and renames we 1900 * usually just care about directory permissions, not file permissions. 1901 */ 1902 if ((nflags & NLC_STICKY) && 1903 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1904 return(EACCES); 1905 } 1906 1907 /* 1908 * Check group perms 1909 */ 1910 vmode >>= 3; 1911 for (i = 0; i < cred->cr_ngroups; ++i) { 1912 if (lvap->va_gid == cred->cr_groups[i]) { 1913 if ((vmode & lvap->va_mode) != vmode) 1914 return(EACCES); 1915 return(0); 1916 } 1917 } 1918 1919 /* 1920 * Check world perms 1921 */ 1922 vmode >>= 3; 1923 if ((vmode & lvap->va_mode) != vmode) 1924 return(EACCES); 1925 return(0); 1926 } 1927 1928 /* 1929 * Long-term (10-second interval) statistics collection 1930 */ 1931 static 1932 uint64_t 1933 collect_nlookup_callback(int n) 1934 { 1935 static uint64_t last_total; 1936 uint64_t save; 1937 uint64_t total; 1938 1939 total = 0; 1940 for (n = 0; n < ncpus; ++n) { 1941 globaldata_t gd = globaldata_find(n); 1942 struct nchstats *sp; 1943 1944 if ((sp = gd->gd_nchstats) != NULL) 1945 total += sp->ncs_longhits + sp->ncs_longmiss; 1946 } 1947 save = total; 1948 total = total - last_total; 1949 last_total = save; 1950 1951 return total; 1952 } 1953 1954 static 1955 void 1956 nlookup_collect_init(void *dummy __unused) 1957 { 1958 kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, 1959 KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); 1960 } 1961 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); 1962