1 /* 2 * Copyright (c) 2004-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/vnode.h> 59 #include <sys/mount.h> 60 #include <sys/filedesc.h> 61 #include <sys/proc.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/malloc.h> 65 #include <sys/stat.h> 66 #include <sys/objcache.h> 67 #include <sys/file.h> 68 #include <sys/kcollect.h> 69 #include <sys/sysctl.h> 70 71 #ifdef KTRACE 72 #include <sys/ktrace.h> 73 #endif 74 75 __read_mostly static int nlookup_debug; 76 SYSCTL_INT(_debug, OID_AUTO, nlookup_debug, CTLFLAG_RW, &nlookup_debug, 0, 77 "Force retry test"); 78 79 static int naccess(struct nchandle *nch, int vmode, struct ucred *cred, 80 int *stickyp, int nchislocked); 81 82 /* 83 * unmount operations flag NLC_IGNBADDIR in order to allow the 84 * umount to successfully issue a nlookup() on the path in order 85 * to extract the mount point. Allow certain errors through. 86 */ 87 static __inline 88 int 89 keeperror(struct nlookupdata *nd, int error) 90 { 91 if (error) { 92 if ((nd->nl_flags & NLC_IGNBADDIR) == 0 || 93 (error != EIO && error != EBADRPC && error != ESTALE)) { 94 return 1; 95 } 96 } 97 return 0; 98 } 99 100 /* 101 * Initialize a nlookup() structure, early error return for copyin faults 102 * or a degenerate empty string (which is not allowed). 103 * 104 * The first process proc0's credentials are used if the calling thread 105 * is not associated with a process context. 106 * 107 * MPSAFE 108 */ 109 int 110 nlookup_init(struct nlookupdata *nd, 111 const char *path, enum uio_seg seg, int flags) 112 { 113 size_t pathlen; 114 struct proc *p; 115 thread_t td; 116 int error; 117 118 td = curthread; 119 p = td->td_proc; 120 121 /* 122 * note: the pathlen set by copy*str() includes the terminating \0. 123 */ 124 bzero(nd, sizeof(struct nlookupdata)); 125 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 126 nd->nl_flags |= NLC_HASBUF; 127 if (seg == UIO_SYSSPACE) 128 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 129 else 130 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 131 132 /* 133 * Don't allow empty pathnames. 134 * POSIX.1 requirement: "" is not a vaild file name. 135 */ 136 if (error == 0 && pathlen <= 1) 137 error = ENOENT; 138 139 if (error == 0) { 140 if (p && p->p_fd) { 141 if (nd->nl_path[0] == '/') { 142 if ((flags & NLC_NLNCH_NOINIT) == 0) { 143 nd->nl_basench = &p->p_fd->fd_nrdir; 144 cache_copy(nd->nl_basench, &nd->nl_nch); 145 } 146 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 147 if (p->p_fd->fd_njdir.ncp) 148 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 149 nd->nl_cred = td->td_ucred; 150 nd->nl_flags |= NLC_BORROWCRED; 151 } else { 152 if ((flags & NLC_NLNCH_NOINIT) == 0) { 153 nd->nl_basench = &p->p_fd->fd_ncdir; 154 cache_copy(nd->nl_basench, &nd->nl_nch); 155 } 156 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 157 if (p->p_fd->fd_njdir.ncp) 158 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 159 nd->nl_cred = td->td_ucred; 160 nd->nl_flags |= NLC_BORROWCRED; 161 } 162 } else { 163 if ((flags & NLC_NLNCH_NOINIT) == 0) { 164 nd->nl_basench = &rootnch; 165 cache_copy(nd->nl_basench, &nd->nl_nch); 166 } 167 cache_copy(&rootnch, &nd->nl_rootnch); 168 cache_copy(&rootnch, &nd->nl_jailnch); 169 nd->nl_cred = proc0.p_ucred; 170 nd->nl_flags |= NLC_BORROWCRED; 171 } 172 nd->nl_td = td; 173 nd->nl_flags |= flags & ~NLC_NLNCH_NOINIT; 174 } else { 175 nlookup_done(nd); 176 } 177 return(error); 178 } 179 180 181 /* 182 * nlookup_init() for "at" family of syscalls. 183 * 184 * Similar to nlookup_init() but if the path is relative and fd is not 185 * AT_FDCWD, the path will be interpreted relative to the directory pointed 186 * to by fd. In this case, the file entry pointed to by fd is ref'ed and 187 * returned in *fpp. 188 * 189 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 190 * and release the ref to the file entry. 191 */ 192 int 193 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 194 const char *path, enum uio_seg seg, int flags) 195 { 196 struct thread *td = curthread; 197 struct file* fp; 198 struct vnode *vp; 199 int error; 200 201 *fpp = NULL; 202 203 /* 204 * Resolve the path, we might have to copy it in from userland, 205 * but don't initialize nl_basench, or nl_nch. 206 */ 207 error = nlookup_init(nd, path, seg, flags | NLC_NLNCH_NOINIT); 208 if (__predict_false(error)) 209 return (error); 210 211 /* 212 * Setup nl_basench (a pointer only not refd), and copy+ref 213 * to initialize nl_nch. Only applicable to relative paths. 214 * For absolute paths, or if (fd) is degenerate, just use the 215 * normal path. 216 */ 217 if (nd->nl_path[0] == '/') { 218 struct proc *p = curproc; 219 nd->nl_basench = &p->p_fd->fd_nrdir; 220 } else if (fd == AT_FDCWD) { 221 struct proc *p = curproc; 222 nd->nl_basench = &p->p_fd->fd_ncdir; 223 } else { 224 if ((error = holdvnode(td, fd, &fp)) != 0) 225 goto done; 226 vp = (struct vnode*)fp->f_data; 227 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 228 fdrop(fp); 229 fp = NULL; 230 error = ENOTDIR; 231 goto done; 232 } 233 nd->nl_basench = &fp->f_nchandle; 234 *fpp = fp; 235 } 236 cache_copy(nd->nl_basench, &nd->nl_nch); 237 done: 238 if (error) 239 nlookup_done(nd); 240 return (error); 241 } 242 243 /* 244 * This works similarly to nlookup_init() but does not assume a process 245 * context. rootnch is always chosen for the root directory and the cred 246 * and starting directory are supplied in arguments. 247 */ 248 int 249 nlookup_init_raw(struct nlookupdata *nd, 250 const char *path, enum uio_seg seg, int flags, 251 struct ucred *cred, struct nchandle *ncstart) 252 { 253 size_t pathlen; 254 thread_t td; 255 int error; 256 257 td = curthread; 258 259 bzero(nd, sizeof(struct nlookupdata)); 260 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 261 nd->nl_flags |= NLC_HASBUF; 262 if (seg == UIO_SYSSPACE) 263 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 264 else 265 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 266 267 /* 268 * Don't allow empty pathnames. 269 * POSIX.1 requirement: "" is not a vaild file name. 270 */ 271 if (error == 0 && pathlen <= 1) 272 error = ENOENT; 273 274 if (error == 0) { 275 cache_copy(ncstart, &nd->nl_nch); 276 cache_copy(&rootnch, &nd->nl_rootnch); 277 cache_copy(&rootnch, &nd->nl_jailnch); 278 nd->nl_cred = crhold(cred); 279 nd->nl_td = td; 280 nd->nl_flags |= flags; 281 } else { 282 nlookup_done(nd); 283 } 284 return(error); 285 } 286 287 /* 288 * This works similarly to nlookup_init_raw() but does not rely 289 * on rootnch being initialized yet. 290 */ 291 int 292 nlookup_init_root(struct nlookupdata *nd, 293 const char *path, enum uio_seg seg, int flags, 294 struct ucred *cred, struct nchandle *ncstart, 295 struct nchandle *ncroot) 296 { 297 size_t pathlen; 298 thread_t td; 299 int error; 300 301 td = curthread; 302 303 bzero(nd, sizeof(struct nlookupdata)); 304 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 305 nd->nl_flags |= NLC_HASBUF; 306 if (seg == UIO_SYSSPACE) 307 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 308 else 309 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 310 311 /* 312 * Don't allow empty pathnames. 313 * POSIX.1 requirement: "" is not a vaild file name. 314 */ 315 if (error == 0 && pathlen <= 1) 316 error = ENOENT; 317 318 if (error == 0) { 319 cache_copy(ncstart, &nd->nl_nch); 320 cache_copy(ncroot, &nd->nl_rootnch); 321 cache_copy(ncroot, &nd->nl_jailnch); 322 nd->nl_cred = crhold(cred); 323 nd->nl_td = td; 324 nd->nl_flags |= flags; 325 } else { 326 nlookup_done(nd); 327 } 328 return(error); 329 } 330 331 #if 0 332 /* 333 * Set a different credential; this credential will be used by future 334 * operations performed on nd.nl_open_vp and nlookupdata structure. 335 */ 336 void 337 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 338 { 339 KKASSERT(nd->nl_cred != NULL); 340 341 if (nd->nl_cred != cred) { 342 cred = crhold(cred); 343 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 344 crfree(nd->nl_cred); 345 nd->nl_flags &= ~NLC_BORROWCRED; 346 nd->nl_cred = cred; 347 } 348 } 349 #endif 350 351 /* 352 * Cleanup a nlookupdata structure after we are through with it. This may 353 * be called on any nlookupdata structure initialized with nlookup_init(). 354 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 355 * returns an error, even if as a consumer you believe you have taken all 356 * dynamic elements out of the nlookupdata structure. 357 */ 358 void 359 nlookup_done(struct nlookupdata *nd) 360 { 361 if (nd->nl_nch.ncp) { 362 if (nd->nl_flags & NLC_NCPISLOCKED) 363 cache_unlock(&nd->nl_nch); 364 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 365 } 366 nd->nl_flags &= ~NLC_NCPISLOCKED; 367 if (nd->nl_rootnch.ncp) 368 cache_drop_and_cache(&nd->nl_rootnch, 0); 369 if (nd->nl_jailnch.ncp) 370 cache_drop_and_cache(&nd->nl_jailnch, 0); 371 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 372 objcache_put(namei_oc, nd->nl_path); 373 nd->nl_path = NULL; 374 } 375 if (nd->nl_cred) { 376 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 377 crfree(nd->nl_cred); 378 nd->nl_cred = NULL; 379 nd->nl_flags &= ~NLC_BORROWCRED; 380 } 381 if (nd->nl_open_vp) { 382 if (nd->nl_flags & NLC_LOCKVP) { 383 vn_unlock(nd->nl_open_vp); 384 nd->nl_flags &= ~NLC_LOCKVP; 385 } 386 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 387 nd->nl_open_vp = NULL; 388 } 389 if (nd->nl_dvp) { 390 vrele(nd->nl_dvp); 391 nd->nl_dvp = NULL; 392 } 393 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 394 nd->nl_basench = NULL; 395 } 396 397 /* 398 * Works similarly to nlookup_done() when nd initialized with 399 * nlookup_init_at(). 400 */ 401 void 402 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 403 { 404 nlookup_done(nd); 405 if (fp != NULL) 406 fdrop(fp); 407 } 408 409 void 410 nlookup_zero(struct nlookupdata *nd) 411 { 412 bzero(nd, sizeof(struct nlookupdata)); 413 } 414 415 /* 416 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 417 * if an error occured. 418 * 419 * Note that the returned ncp is not checked for permissions, though VEXEC 420 * is checked on the directory path leading up to the result. The caller 421 * must call naccess() to check the permissions of the returned leaf. 422 */ 423 struct nchandle 424 nlookup_simple(const char *str, enum uio_seg seg, 425 int niflags, int *error) 426 { 427 struct nlookupdata nd; 428 struct nchandle nch; 429 430 *error = nlookup_init(&nd, str, seg, niflags); 431 if (*error == 0) { 432 if ((*error = nlookup(&nd)) == 0) { 433 nch = nd.nl_nch; /* keep hold ref from structure */ 434 cache_zero(&nd.nl_nch); /* and NULL out */ 435 } else { 436 cache_zero(&nch); 437 } 438 nlookup_done(&nd); 439 } else { 440 cache_zero(&nch); 441 } 442 return(nch); 443 } 444 445 /* 446 * Returns non-zero if the path element is the last element 447 */ 448 static 449 int 450 islastelement(const char *ptr) 451 { 452 while (*ptr == '/') 453 ++ptr; 454 return (*ptr == 0); 455 } 456 457 /* 458 * Returns non-zero if we need to lock the namecache element 459 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 460 * the last element of the namecache lookup will be locked 461 * exclusively. 462 * 463 * O_CREAT or O_TRUNC need the last element to be locked exlcusively. 464 * Intermediate elements are always locked shared. 465 * 466 * NOTE: Even if we return on-zero, an unresolved namecache record 467 * will always be locked exclusively. 468 */ 469 static __inline 470 int 471 wantsexcllock(struct nlookupdata *nd, int last_element) 472 { 473 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 474 return(last_element); 475 return 0; 476 } 477 478 479 /* 480 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 481 * on return, even if an error occurs. If no error occurs or NLC_CREATE 482 * is flagged and ENOENT is returned, then the returned nl_nch is always 483 * referenced and locked exclusively. 484 * 485 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 486 * the resulting nl_nch may or may not be locked and if locked 487 * might be locked either shared or exclusive. 488 * 489 * Intermediate directory elements, including the current directory, require 490 * execute (search) permission. nlookup does not examine the access 491 * permissions on the returned element. 492 * 493 * If NLC_CREATE is set the last directory must allow node creation, 494 * and an error code of 0 will be returned for a non-existant 495 * target (not ENOENT). 496 * 497 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 498 * plus the sticky check is made, and an error code of 0 will be returned 499 * for a non-existant target (not ENOENT). 500 * 501 * If NLC_DELETE is set the last directory mut allow node deletion, 502 * plus the sticky check is made. 503 * 504 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 505 * of the returned entry. The vnode will be referenced, but not locked, 506 * and will be released by nlookup_done() along with everything else. 507 * 508 * NOTE: As an optimization we attempt to obtain a shared namecache lock 509 * on any intermediate elements. On success, the returned element 510 * is ALWAYS locked exclusively. 511 */ 512 int 513 nlookup(struct nlookupdata *nd) 514 { 515 globaldata_t gd = mycpu; 516 struct nlcomponent nlc; 517 struct nchandle nch; 518 struct nchandle nctmp; 519 struct mount *mp; 520 int wasdotordotdot; 521 char *path_reset; 522 char *ptr; 523 char *nptr; 524 int error; 525 int len; 526 int dflags; 527 int hit = 1; 528 int saveflag = nd->nl_flags; 529 boolean_t doretry = FALSE; 530 boolean_t inretry = FALSE; 531 532 if (nlookup_debug > 0) { 533 --nlookup_debug; 534 doretry = 1; 535 } 536 path_reset = NULL; 537 538 nlookup_start: 539 540 #ifdef KTRACE 541 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 542 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 543 #endif 544 bzero(&nlc, sizeof(nlc)); 545 546 /* 547 * Setup for the loop. The current working namecache element is 548 * always at least referenced. We lock it as required, but always 549 * return a locked, resolved namecache entry. 550 */ 551 nd->nl_loopcnt = 0; 552 if (nd->nl_dvp) { 553 vrele(nd->nl_dvp); 554 nd->nl_dvp = NULL; 555 } 556 ptr = nd->nl_path; 557 558 /* 559 * Loop on the path components. At the top of the loop nd->nl_nch 560 * is ref'd and unlocked and represents our current position. 561 */ 562 for (;;) { 563 int last_element; 564 565 ++nd->nl_elmno; 566 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 567 568 /* 569 * Check if the root directory should replace the current 570 * directory. This is done at the start of a translation 571 * or after a symbolic link has been found. In other cases 572 * ptr will never be pointing at a '/'. 573 */ 574 if (*ptr == '/') { 575 do { 576 ++ptr; 577 } while (*ptr == '/'); 578 579 /* 580 * We might already be at the root as a pre-optimization 581 */ 582 if (nd->nl_nch.mount != nd->nl_rootnch.mount || 583 nd->nl_nch.ncp != nd->nl_rootnch.ncp) { 584 cache_drop_and_cache(&nd->nl_nch, 0); 585 cache_copy(&nd->nl_rootnch, &nd->nl_nch); 586 } 587 588 /* 589 * Fast-track termination. There is no parent directory of 590 * the root in the same mount from the point of view of 591 * the caller so return EACCES if NLC_REFDVP is specified, 592 * and EEXIST if NLC_CREATE is also specified. 593 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 594 */ 595 if (*ptr == 0) { 596 if (nd->nl_flags & NLC_REFDVP) 597 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 598 else 599 error = 0; 600 nd->nl_flags |= NLC_NCPISLOCKED; 601 cache_lock_maybe_shared(&nd->nl_nch, 602 wantsexcllock(nd, islastelement(ptr))); 603 break; 604 } 605 continue; 606 } 607 608 /* 609 * Pre-calculate next path component so we can check whether the 610 * current component directory is the last directory in the path 611 * or not. 612 */ 613 for (nptr = ptr; *nptr && *nptr != '/'; ++nptr) 614 ; 615 616 /* 617 * nd->nl_nch is referenced and not locked here. 618 * 619 * Check directory search permissions. This will load dflags to 620 * obtain directory-special permissions to be checked along with the 621 * last component. 622 * 623 * We only need to pass-in &dflags for the second-to-last component. 624 * Optimize by passing-in NULL for any prior components, which may 625 * allow the code to bypass the naccess() call. 626 * 627 * naccess() is optimized to avoid having to lock the nch or get 628 * the related vnode if cached perms are sufficient. 629 */ 630 dflags = 0; 631 if (*nptr == '/' || (saveflag & NLC_MODIFYING_MASK) == 0) 632 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, NULL, 0); 633 else 634 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags, 0); 635 if (error) { 636 if (keeperror(nd, error)) 637 break; 638 error = 0; 639 } 640 641 /* 642 * Extract the next (or last) path component. Path components are 643 * limited to 255 characters. 644 */ 645 nlc.nlc_nameptr = ptr; 646 nlc.nlc_namelen = nptr - ptr; 647 ptr = nptr; 648 if (nlc.nlc_namelen >= 256) { 649 error = ENAMETOOLONG; 650 break; 651 } 652 last_element = islastelement(nptr); 653 654 /* 655 * Lookup the path component in the cache, creating an unresolved 656 * entry if necessary. We have to handle "." and ".." as special 657 * cases. 658 * 659 * When handling ".." we have to detect a traversal back through a 660 * mount point. If we are at the root, ".." just returns the root. 661 * 662 * When handling "." or ".." we also have to recalculate dflags 663 * since our dflags will be for some sub-directory instead of the 664 * parent dir. 665 * 666 * This subsection returns a referenced and possibly locked 'nch'. 667 * The locking status is based on the last_element flag. 668 * 669 * The namecache topology is not allowed to be disconnected, so 670 * encountering a NULL parent will generate EINVAL. This typically 671 * occurs when a directory is removed out from under a process. 672 * 673 * WARNING! The unlocking of nd->nl_nch is sensitive code. 674 */ 675 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 676 677 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 678 if (last_element) { 679 cache_get_maybe_shared(&nd->nl_nch, &nch, 680 wantsexcllock(nd, 1)); 681 } else { 682 cache_copy(&nd->nl_nch, &nch); 683 } 684 wasdotordotdot = 1; 685 } else if (nlc.nlc_namelen == 2 && 686 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 687 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 688 nd->nl_nch.ncp == nd->nl_rootnch.ncp 689 ) { 690 /* 691 * ".." at the root returns the root 692 */ 693 if (last_element) { 694 cache_get_maybe_shared(&nd->nl_nch, &nch, 695 wantsexcllock(nd, 1)); 696 } else { 697 cache_copy(&nd->nl_nch, &nch); 698 } 699 } else { 700 /* 701 * Locate the parent ncp. If we are at the root of a 702 * filesystem mount we have to skip to the mounted-on 703 * point in the underlying filesystem. 704 * 705 * Expect the parent to always be good since the 706 * mountpoint doesn't go away. XXX hack. cache_get() 707 * requires the ncp to already have a ref as a safety. 708 * 709 * However, a process which has been broken out of a chroot 710 * will wind up with a NULL parent if it tries to '..' above 711 * the real root, deal with the case. Note that this does 712 * not protect us from a jail breakout, it just stops a panic 713 * if the jail-broken process tries to '..' past the real 714 * root. 715 */ 716 nctmp = nd->nl_nch; 717 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 718 nctmp = nctmp.mount->mnt_ncmounton; 719 if (nctmp.ncp == NULL) 720 break; 721 } 722 if (nctmp.ncp == NULL) { 723 if (curthread->td_proc) { 724 kprintf("vfs_nlookup: '..' traverse broke " 725 "jail: pid %d (%s)\n", 726 curthread->td_proc->p_pid, 727 curthread->td_comm); 728 } 729 nctmp = nd->nl_rootnch; 730 } else { 731 nctmp.ncp = nctmp.ncp->nc_parent; 732 } 733 if (last_element) { 734 cache_get_maybe_shared(&nctmp, &nch, 735 wantsexcllock(nd, 1)); 736 } else { 737 cache_copy(&nctmp, &nch); 738 } 739 } 740 wasdotordotdot = 2; 741 } else { 742 /* 743 * Quickly lookup the component. If we can't find it, then 744 * slowly lookup and resolve the component. 745 */ 746 if (last_element) { 747 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 748 wantsexcllock(nd, 1), 749 &nch); 750 } else { 751 nch = cache_nlookup_nonlocked(&nd->nl_nch, &nlc); 752 if (nch.ncp == NULL) 753 error = EWOULDBLOCK; 754 } 755 756 /* 757 * At this point the only possible error is EWOULDBLOCK. 758 * 759 * If no error nch is set and referenced, and then also locked 760 * according to last_element. For EWOULDBLOCK nch is not set. 761 * For any other error nch is set and referenced, but not locked. 762 * 763 * On EWOULDBLOCK the ncp may be unresolved (if not locked it can 764 * become unresolved at any time, but we don't care at this time). 765 */ 766 if (error == EWOULDBLOCK) { 767 nch = cache_nlookup(&nd->nl_nch, &nlc); 768 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 769 hit = 0; 770 for (;;) { 771 error = cache_resolve(&nch, nd->nl_cred); 772 if (error != EAGAIN && 773 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 774 if (error == ESTALE) { 775 if (!inretry) 776 error = ENOENT; 777 doretry = TRUE; 778 } 779 if (last_element == 0) 780 cache_unlock(&nch); 781 break; 782 } 783 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 784 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 785 nch.ncp->nc_name); 786 cache_put(&nch); 787 nch = cache_nlookup(&nd->nl_nch, &nlc); 788 } 789 } 790 wasdotordotdot = 0; 791 } 792 793 /* 794 * If the component is "." or ".." our dflags no longer represents 795 * the parent directory and we have to explicitly look it up. 796 * 797 * Expect the parent to be good since nch is locked. 798 * 799 * nch will continue to be valid even if an error occurs after this 800 * point. 801 */ 802 if (wasdotordotdot && error == 0) { 803 struct nchandle par; 804 805 dflags = 0; 806 if (last_element == 0) 807 cache_lock_maybe_shared(&nch, wantsexcllock(nd, 0)); 808 809 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 810 par.mount = nch.mount; 811 cache_hold(&par); 812 error = naccess(&par, 0, nd->nl_cred, &dflags, 0); 813 cache_drop_and_cache(&par, nd->nl_elmno - 1); 814 if (error) { 815 if (!keeperror(nd, error)) 816 error = 0; 817 if (error == EINVAL) { 818 kprintf("nlookup (%s): trailing . or .. retry on %s\n", 819 curthread->td_comm, nd->nl_path); 820 doretry = TRUE; 821 } 822 } 823 } 824 825 if (last_element == 0) 826 cache_unlock(&nch); 827 } 828 829 /* 830 * [end of subsection] 831 * 832 * nch is referenced and locked according to (last_element). 833 * nd->nl_nch is unlocked and referenced. 834 */ 835 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 836 837 /* 838 * Resolve the namespace if necessary. The ncp returned by 839 * cache_nlookup() is referenced, and also locked according 840 * to last_element. 841 * 842 * XXX neither '.' nor '..' should return EAGAIN since they were 843 * previously resolved and thus cannot be newly created ncp's. 844 */ 845 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 846 if (last_element == 0) 847 cache_lock(&nch); 848 hit = 0; 849 error = cache_resolve(&nch, nd->nl_cred); 850 if (error == ESTALE) { 851 if (!inretry) 852 error = ENOENT; 853 doretry = TRUE; 854 } 855 if (last_element == 0) 856 cache_unlock(&nch); 857 KKASSERT(error != EAGAIN); 858 } else { 859 error = nch.ncp->nc_error; 860 } 861 862 /* 863 * Early completion. ENOENT is not an error if this is the last 864 * component and NLC_CREATE or NLC_RENAME (rename target) was 865 * requested. Note that ncp->nc_error is left as ENOENT in that 866 * case, which we check later on. 867 * 868 * Also handle invalid '.' or '..' components terminating a path 869 * for a create/rename/delete. The standard requires this and pax 870 * pretty stupidly depends on it. 871 */ 872 if (last_element) { 873 if (error == ENOENT && 874 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 875 ) { 876 if (nd->nl_flags & NLC_NFS_RDONLY) { 877 error = EROFS; 878 } else { 879 error = naccess(&nch, nd->nl_flags | dflags, 880 nd->nl_cred, NULL, last_element); 881 } 882 } 883 if (error == 0 && wasdotordotdot && 884 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 885 NLC_RENAME_SRC | NLC_RENAME_DST))) { 886 /* 887 * POSIX junk 888 */ 889 if (nd->nl_flags & NLC_CREATE) 890 error = EEXIST; 891 else if (nd->nl_flags & NLC_DELETE) 892 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 893 else 894 error = EINVAL; 895 } 896 } 897 898 /* 899 * Early completion on error. 900 */ 901 if (error) { 902 if (last_element) 903 cache_unlock(&nch); 904 cache_drop_and_cache(&nch, nd->nl_elmno); 905 break; 906 } 907 908 /* 909 * If the element is a symlink and it is either not the last 910 * element or it is the last element and we are allowed to 911 * follow symlinks, resolve the symlink. 912 */ 913 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 914 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 915 ) { 916 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 917 error = ELOOP; 918 if (last_element) 919 cache_unlock(&nch); 920 cache_drop_and_cache(&nch, nd->nl_elmno); 921 break; 922 } 923 if (last_element == 0) 924 cache_lock_maybe_shared(&nch, 1); 925 926 error = nreadsymlink(nd, &nch, &nlc); 927 cache_put(&nch); 928 if (error) 929 break; 930 931 /* 932 * Concatenate trailing path elements onto the returned symlink. 933 * Note that if the path component (ptr) is not exhausted, it 934 * will being with a '/', so we do not have to add another one. 935 * 936 * The symlink may not be empty. 937 */ 938 len = strlen(ptr); 939 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 940 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 941 objcache_put(namei_oc, nlc.nlc_nameptr); 942 break; 943 } 944 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 945 946 if (path_reset) { 947 if (nd->nl_flags & NLC_HASBUF) 948 objcache_put(namei_oc, nd->nl_path); 949 } else { 950 path_reset = nd->nl_path; 951 } 952 nd->nl_path = nlc.nlc_nameptr; 953 nd->nl_flags |= NLC_HASBUF; 954 ptr = nd->nl_path; 955 956 /* 957 * Go back up to the top to resolve any initial '/'s in the 958 * symlink. 959 */ 960 continue; 961 } 962 963 /* 964 * If the element is a directory and we are crossing a mount point, 965 * Locate the mount. 966 */ 967 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 968 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 969 (mp = cache_findmount(&nch)) != NULL 970 ) { 971 struct vnode *tdp; 972 int vfs_do_busy = 0; 973 974 /* 975 * VFS must be busied before the namecache entry is locked, 976 * but we don't want to waste time calling vfs_busy() if the 977 * mount point is already resolved. 978 */ 979 again: 980 if (last_element) 981 cache_unlock(&nch); 982 cache_drop_and_cache(&nch, nd->nl_elmno); 983 984 if (vfs_do_busy) { 985 while (vfs_busy(mp, 0)) { 986 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 987 kprintf("nlookup: warning umount race avoided\n"); 988 cache_dropmount(mp); 989 error = EBUSY; 990 vfs_do_busy = 0; 991 goto double_break; 992 } 993 } 994 } 995 996 /* 997 * We don't need to lock the nch unless the entry is unresolved 998 * or this is the last element. 999 */ 1000 if (last_element) 1001 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 1002 wantsexcllock(nd, 1)); 1003 else 1004 cache_copy(&mp->mnt_ncmountpt, &nch); 1005 1006 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 1007 if (last_element == 0) 1008 cache_lock(&nch); 1009 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 1010 if (vfs_do_busy == 0) { 1011 vfs_do_busy = 1; 1012 if (last_element == 0) 1013 cache_unlock(&nch); 1014 goto again; 1015 } 1016 error = VFS_ROOT(mp, &tdp); 1017 vfs_unbusy(mp); 1018 vfs_do_busy = 0; 1019 if (keeperror(nd, error)) { 1020 cache_dropmount(mp); 1021 if (last_element == 0) 1022 cache_unlock(&nch); 1023 break; 1024 } 1025 if (error == 0) { 1026 cache_setvp(&nch, tdp); 1027 vput(tdp); 1028 } 1029 } 1030 if (last_element == 0) 1031 cache_unlock(&nch); 1032 } 1033 if (vfs_do_busy) 1034 vfs_unbusy(mp); 1035 cache_dropmount(mp); 1036 } 1037 1038 /* 1039 * Break out on error 1040 */ 1041 if (keeperror(nd, error)) { 1042 if (last_element) 1043 cache_unlock(&nch); 1044 cache_drop_and_cache(&nch, nd->nl_elmno); 1045 double_break: 1046 break; 1047 } 1048 1049 /* 1050 * Skip any slashes to get to the next element. If there 1051 * are any slashes at all the current element must be a 1052 * directory or, in the create case, intended to become a directory. 1053 * If it isn't we break without incrementing ptr and fall through 1054 * to the failure case below. 1055 */ 1056 while (*ptr == '/') { 1057 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 1058 !(nd->nl_flags & NLC_WILLBEDIR) 1059 ) { 1060 break; 1061 } 1062 ++ptr; 1063 } 1064 1065 /* 1066 * Continuation case: additional elements and the current 1067 * element is a directory. 1068 */ 1069 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 1070 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 1071 if (last_element) 1072 cache_unlock(&nch); 1073 /*nchislocked = 0; not needed */ 1074 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 1075 nd->nl_nch = nch; 1076 continue; 1077 } 1078 1079 /* 1080 * Failure case: additional elements and the current element 1081 * is not a directory 1082 */ 1083 if (*ptr) { 1084 if (last_element) 1085 cache_unlock(&nch); 1086 cache_drop_and_cache(&nch, nd->nl_elmno); 1087 error = ENOTDIR; 1088 break; 1089 } 1090 1091 /* 1092 * Successful lookup of last element. 1093 * 1094 * Check permissions if the target exists. If the target does not 1095 * exist directory permissions were already tested in the early 1096 * completion code above. 1097 * 1098 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 1099 * if the file is marked append-only, and NLC_STICKY if the directory 1100 * containing the file is sticky. 1101 */ 1102 KKASSERT(last_element); 1103 1104 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 1105 error = naccess(&nch, nd->nl_flags | dflags, 1106 nd->nl_cred, NULL, 1); 1107 if (keeperror(nd, error)) { 1108 cache_put(&nch); 1109 break; 1110 } 1111 } 1112 1113 /* 1114 * Termination: no more elements. 1115 * 1116 * If NLC_REFDVP is set acquire a referenced parent dvp. Typically 1117 * used for mkdir/mknod/ncreate/nremove/unlink/rename. 1118 * 1119 * NOTE: nd->nl_nch does not necessarily represent the parent 1120 * directory, e.g. due to a mount point transition. 1121 * 1122 * nch is locked, standard lock order for the namecache is 1123 * child-to-parent so we can safely lock its parent. We can 1124 * just use cache_dvpref(). 1125 * 1126 * If nc_parent is NULL this is probably a mount point and there 1127 * is no legal parent directory. However, we do not want to fail 1128 * the nlookup() because a higher level may wish to return a better 1129 * error code, such as mkdir("/mntpt") would want to return EEXIST 1130 */ 1131 if (nd->nl_flags & NLC_REFDVP) { 1132 if (nch.ncp->nc_parent) { 1133 nd->nl_dvp = cache_dvpref(nch.ncp); 1134 if (nd->nl_dvp == NULL) { 1135 error = EINVAL; 1136 if (keeperror(nd, error)) { 1137 kprintf("NLC_REFDVP: Cannot ref dvp " 1138 "of %s\n", 1139 nch.ncp->nc_name); 1140 cache_put(&nch); 1141 break; 1142 } 1143 } 1144 } else { 1145 error = 0; 1146 cache_put(&nch); 1147 break; 1148 } 1149 } 1150 cache_drop_and_cache(&nd->nl_nch, nd->nl_elmno); 1151 nd->nl_nch = nch; 1152 nd->nl_flags |= NLC_NCPISLOCKED; 1153 error = 0; 1154 break; 1155 } 1156 1157 /* 1158 * We are done / or possibly retry 1159 */ 1160 1161 if (hit) 1162 ++gd->gd_nchstats->ncs_longhits; 1163 else 1164 ++gd->gd_nchstats->ncs_longmiss; 1165 1166 if (nd->nl_flags & NLC_NCPISLOCKED) 1167 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 1168 1169 /* 1170 * Reset nd->nl_path if necessary (due to softlinks). We want to return 1171 * nl_path to its original state before retrying or returning. 1172 */ 1173 if (path_reset) { 1174 if (nd->nl_flags & NLC_HASBUF) { 1175 objcache_put(namei_oc, nd->nl_path); 1176 nd->nl_flags &= ~NLC_HASBUF; 1177 } 1178 nd->nl_path = path_reset; 1179 nd->nl_flags |= saveflag & NLC_HASBUF; 1180 path_reset = NULL; 1181 } 1182 1183 /* 1184 * Retry the whole thing if doretry flag is set, but only once. 1185 * 1186 * autofs(5) may mount another filesystem under its root directory 1187 * while resolving a path. 1188 * 1189 * NFS might return ESTALE 1190 */ 1191 if (doretry && !inretry) { 1192 kprintf("nlookup: errno %d retry %s\n", error, nd->nl_path); 1193 inretry = TRUE; 1194 1195 /* 1196 * Clean up nd->nl_nch and reset to base directory 1197 */ 1198 if (nd->nl_flags & NLC_NCPISLOCKED) { 1199 cache_unlock(&nd->nl_nch); 1200 nd->nl_flags &= ~NLC_NCPISLOCKED; 1201 } 1202 cache_drop(&nd->nl_nch); 1203 cache_copy(nd->nl_basench, &nd->nl_nch); 1204 1205 nd->nl_elmno = 0; 1206 nd->nl_flags |= saveflag; 1207 1208 goto nlookup_start; 1209 } 1210 1211 /* 1212 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1213 * (ncp->nc_error will be ENOENT), but we will still return an error 1214 * code of 0. 1215 */ 1216 return(error); 1217 } 1218 1219 /* 1220 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1221 * of continuity in the namecache tree by connecting the ncp related to the 1222 * vnode under the mount to the ncp related to the mount's root vnode. 1223 * 1224 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1225 */ 1226 int 1227 nlookup_mp(struct mount *mp, struct nchandle *nch) 1228 { 1229 struct vnode *vp; 1230 int error; 1231 1232 error = 0; 1233 cache_get(&mp->mnt_ncmountpt, nch); 1234 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1235 while (vfs_busy(mp, 0)) 1236 ; 1237 error = VFS_ROOT(mp, &vp); 1238 vfs_unbusy(mp); 1239 if (error) { 1240 cache_put(nch); 1241 } else { 1242 cache_setvp(nch, vp); 1243 vput(vp); 1244 } 1245 } 1246 return(error); 1247 } 1248 1249 /* 1250 * Read the contents of a symlink, allocate a path buffer out of the 1251 * namei_oc and initialize the supplied nlcomponent with the result. 1252 * 1253 * If an error occurs no buffer will be allocated or returned in the nlc. 1254 */ 1255 int 1256 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1257 struct nlcomponent *nlc) 1258 { 1259 struct vnode *vp; 1260 struct iovec aiov; 1261 struct uio auio; 1262 int linklen; 1263 int error; 1264 char *cp; 1265 1266 nlc->nlc_nameptr = NULL; 1267 nlc->nlc_namelen = 0; 1268 if (nch->ncp->nc_vp == NULL) 1269 return(ENOENT); 1270 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1271 return(error); 1272 cp = objcache_get(namei_oc, M_WAITOK); 1273 aiov.iov_base = cp; 1274 aiov.iov_len = MAXPATHLEN; 1275 auio.uio_iov = &aiov; 1276 auio.uio_iovcnt = 1; 1277 auio.uio_offset = 0; 1278 auio.uio_rw = UIO_READ; 1279 auio.uio_segflg = UIO_SYSSPACE; 1280 auio.uio_td = nd->nl_td; 1281 auio.uio_resid = MAXPATHLEN - 1; 1282 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1283 if (error) 1284 goto fail; 1285 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1286 if (varsym_enable) { 1287 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1288 if (linklen < 0) { 1289 error = ENAMETOOLONG; 1290 goto fail; 1291 } 1292 } 1293 cp[linklen] = 0; 1294 nlc->nlc_nameptr = cp; 1295 nlc->nlc_namelen = linklen; 1296 vput(vp); 1297 return(0); 1298 fail: 1299 objcache_put(namei_oc, cp); 1300 vput(vp); 1301 return(error); 1302 } 1303 1304 /* 1305 * Check access [XXX cache vattr!] [XXX quota] 1306 * 1307 * Generally check the NLC_* access bits. All specified bits must pass 1308 * for this function to return 0. 1309 * 1310 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1311 * access, otherwise it must exist. No error is returned in this case. 1312 * 1313 * The file must not exist if NLC_EXCL is specified. 1314 * 1315 * Directory permissions in general are tested for NLC_CREATE if the file 1316 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1317 * whether the file exists or not. 1318 * 1319 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1320 * the latter is only tested if the target exists. 1321 * 1322 * The passed ncp must be referenced and locked. If it is already resolved 1323 * it may be locked shared but otherwise should be locked exclusively. 1324 */ 1325 1326 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1327 #define S_XOK_MASK (S_IXUSR|S_IXGRP|S_IXOTH) 1328 1329 static int 1330 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp, 1331 int nchislocked) 1332 { 1333 struct vnode *vp; 1334 struct vattr va; 1335 struct namecache *ncp; 1336 int error; 1337 int cflags; 1338 1339 KKASSERT(nchislocked == 0 || cache_lockstatus(nch) > 0); 1340 1341 ncp = nch->ncp; 1342 again: 1343 if (ncp->nc_flag & NCF_UNRESOLVED) { 1344 if (nchislocked == 0) { 1345 cache_lock(nch); 1346 nchislocked = 2; 1347 } 1348 cache_resolve(nch, cred); 1349 ncp = nch->ncp; 1350 } 1351 error = ncp->nc_error; 1352 1353 /* 1354 * Directory permissions checks. Silently ignore ENOENT if these 1355 * tests pass. It isn't an error. 1356 * 1357 * We can safely resolve ncp->nc_parent because ncp is currently 1358 * locked. 1359 */ 1360 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1361 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1362 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1363 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1364 (nflags & NLC_RENAME_DST) 1365 ) { 1366 struct nchandle par; 1367 1368 if (nchislocked == 0) { 1369 cache_lock_maybe_shared(nch, 0); 1370 nchislocked = 2; 1371 if (ncp->nc_flag & NCF_UNRESOLVED) 1372 goto again; 1373 } 1374 if ((par.ncp = ncp->nc_parent) == NULL) { 1375 if (error != EAGAIN) 1376 error = EINVAL; 1377 } else if (error == 0 || error == ENOENT) { 1378 par.mount = nch->mount; 1379 cache_hold(&par); 1380 cache_lock_maybe_shared(&par, 0); 1381 error = naccess(&par, NLC_WRITE, cred, NULL, 1); 1382 cache_put(&par); 1383 } 1384 } 1385 } 1386 1387 /* 1388 * NLC_EXCL check. Target file must not exist. 1389 */ 1390 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1391 error = EEXIST; 1392 1393 /* 1394 * Try to short-cut the vnode operation for intermediate directory 1395 * components. This is a major SMP win because it avoids having 1396 * to execute a lot of code for intermediate directory components, 1397 * including shared refs and locks on intermediate directory vnodes. 1398 * 1399 * We can only do this if the caller does not need nflagsp. 1400 */ 1401 if (error == 0 && nflagsp == NULL && 1402 nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1403 if (nchislocked == 2) 1404 cache_unlock(nch); 1405 return 0; 1406 } 1407 1408 /* 1409 * Get the vnode attributes so we can do the rest of our checks. 1410 * 1411 * NOTE: We only call naccess_va() if the target exists. 1412 */ 1413 if (error == 0) { 1414 if (nchislocked == 0) { 1415 cache_lock_maybe_shared(nch, 0); 1416 nchislocked = 2; 1417 } 1418 #if 0 1419 error = cache_vget(nch, cred, LK_SHARED, &vp); 1420 #else 1421 error = cache_vref(nch, cred, &vp); 1422 #endif 1423 if (error == ENOENT) { 1424 /* 1425 * Silently zero-out ENOENT if creating or renaming 1426 * (rename target). It isn't an error. 1427 */ 1428 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1429 error = 0; 1430 } else if (error == 0) { 1431 /* 1432 * Get the vnode attributes and check for illegal O_TRUNC 1433 * requests and read-only mounts. 1434 * 1435 * NOTE: You can still open devices on read-only mounts for 1436 * writing. 1437 * 1438 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1439 * check on the parent directory above. 1440 * 1441 * XXX cache the va in the namecache or in the vnode 1442 */ 1443 error = VOP_GETATTR_QUICK(vp, &va); 1444 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1445 switch(va.va_type) { 1446 case VREG: 1447 case VDATABASE: 1448 case VCHR: 1449 case VBLK: 1450 case VFIFO: 1451 break; 1452 case VDIR: 1453 error = EISDIR; 1454 break; 1455 default: 1456 error = EINVAL; 1457 break; 1458 } 1459 } 1460 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1461 (vp->v_mount->mnt_flag & MNT_RDONLY) 1462 ) { 1463 switch(va.va_type) { 1464 case VDIR: 1465 case VLNK: 1466 case VREG: 1467 case VDATABASE: 1468 error = EROFS; 1469 break; 1470 default: 1471 break; 1472 } 1473 } 1474 #if 0 1475 vput(vp); 1476 #else 1477 vrele(vp); 1478 #endif 1479 1480 /* 1481 * Check permissions based on file attributes. The passed 1482 * flags (*nflagsp) are modified with feedback based on 1483 * special attributes and requirements. 1484 */ 1485 if (error == 0) { 1486 /* 1487 * Adjust the returned (*nflagsp) if non-NULL. 1488 */ 1489 if (nflagsp) { 1490 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 1491 *nflagsp |= NLC_STICKY; 1492 if (va.va_flags & APPEND) 1493 *nflagsp |= NLC_APPENDONLY; 1494 if (va.va_flags & IMMUTABLE) 1495 *nflagsp |= NLC_IMMUTABLE; 1496 } 1497 1498 /* 1499 * NCF_WXOK can be set for world-searchable directories. 1500 * 1501 * XXX When we implement capabilities this code would also 1502 * need a cap check, or only set the flag if there are no 1503 * capabilities. 1504 */ 1505 cflags = 0; 1506 if (va.va_type == VDIR && 1507 (va.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1508 cflags |= NCF_WXOK; 1509 } 1510 if ((va.va_mode & S_XOK_MASK) == 0) 1511 cflags |= NCF_NOTX; 1512 1513 /* 1514 * Track swapcache management flags in the namecache. 1515 * 1516 * Calculate the flags based on the current vattr info 1517 * and recalculate the inherited flags from the parent 1518 * (the original cache linkage may have occurred without 1519 * getattrs and thus have stale flags). 1520 */ 1521 if (va.va_flags & SF_NOCACHE) 1522 cflags |= NCF_SF_NOCACHE; 1523 if (va.va_flags & UF_CACHE) 1524 cflags |= NCF_UF_CACHE; 1525 if (ncp->nc_parent) { 1526 if (ncp->nc_parent->nc_flag & 1527 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1528 cflags |= NCF_SF_PNOCACHE; 1529 } 1530 if (ncp->nc_parent->nc_flag & 1531 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1532 cflags |= NCF_UF_PCACHE; 1533 } 1534 } 1535 1536 /* 1537 * We're not supposed to update nc_flag when holding a shared 1538 * lock, but we allow the case for certain flags. Note that 1539 * holding an exclusive lock allows updating nc_flag without 1540 * atomics. nc_flag is not allowe to be updated at all unless 1541 * a shared or exclusive lock is held. 1542 */ 1543 atomic_clear_short(&ncp->nc_flag, 1544 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1545 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1546 NCF_WXOK | NCF_NOTX) & ~cflags); 1547 atomic_set_short(&ncp->nc_flag, cflags); 1548 1549 /* 1550 * Process general access. 1551 */ 1552 error = naccess_va(&va, nflags, cred); 1553 } 1554 } 1555 } 1556 if (nchislocked == 2) 1557 cache_unlock(nch); 1558 return(error); 1559 } 1560 1561 /* 1562 * Check the requested access against the given vattr using cred. 1563 */ 1564 int 1565 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 1566 { 1567 int i; 1568 int vmode; 1569 1570 /* 1571 * Test the immutable bit. Creations, deletions, renames (source 1572 * or destination) are not allowed. chown/chmod/other is also not 1573 * allowed but is handled by SETATTR. Hardlinks to the immutable 1574 * file are allowed. 1575 * 1576 * If the directory is set to immutable then creations, deletions, 1577 * renames (source or dest) and hardlinks to files within the directory 1578 * are not allowed, and regular files opened through the directory may 1579 * not be written to or truncated (unless a special device). 1580 * 1581 * NOTE! New hardlinks to immutable files work but new hardlinks to 1582 * files, immutable or not, sitting inside an immutable directory are 1583 * not allowed. As always if the file is hardlinked via some other 1584 * path additional hardlinks may be possible even if the file is marked 1585 * immutable. The sysop needs to create a closure by checking the hard 1586 * link count. Once closure is achieved you are good, and security 1587 * scripts should check link counts anyway. 1588 * 1589 * Writes and truncations are only allowed on special devices. 1590 */ 1591 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1592 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1593 return (EPERM); 1594 if (nflags & (NLC_CREATE | NLC_DELETE | 1595 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1596 return (EPERM); 1597 } 1598 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1599 switch(va->va_type) { 1600 case VDIR: 1601 return (EISDIR); 1602 case VLNK: 1603 case VREG: 1604 case VDATABASE: 1605 return (EPERM); 1606 default: 1607 break; 1608 } 1609 } 1610 } 1611 1612 /* 1613 * Test the no-unlink and append-only bits for opens, rename targets, 1614 * and deletions. These bits are not tested for creations or 1615 * rename sources. 1616 * 1617 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1618 * If you do not wish this you must also set NOUNLINK. 1619 * 1620 * If the governing directory is marked APPEND-only it implies 1621 * NOUNLINK for all entries in the directory. 1622 */ 1623 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1624 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1625 ) { 1626 return (EPERM); 1627 } 1628 1629 /* 1630 * A file marked append-only may not be deleted but can be renamed. 1631 */ 1632 if ((va->va_flags & APPEND) && 1633 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1634 ) { 1635 return (EPERM); 1636 } 1637 1638 /* 1639 * A file marked append-only which is opened for writing must also 1640 * be opened O_APPEND. 1641 */ 1642 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1643 if (nflags & NLC_TRUNCATE) 1644 return (EPERM); 1645 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1646 if ((nflags & NLC_APPEND) == 0) 1647 return (EPERM); 1648 } 1649 } 1650 1651 /* 1652 * root gets universal access 1653 */ 1654 if (cred->cr_uid == 0) 1655 return(0); 1656 1657 /* 1658 * Check owner perms. 1659 * 1660 * If NLC_OWN is set the owner of the file is allowed no matter when 1661 * the owner-mode bits say (utimes). 1662 */ 1663 vmode = 0; 1664 if (nflags & NLC_READ) 1665 vmode |= S_IRUSR; 1666 if (nflags & NLC_WRITE) 1667 vmode |= S_IWUSR; 1668 if (nflags & NLC_EXEC) 1669 vmode |= S_IXUSR; 1670 1671 if (cred->cr_uid == va->va_uid) { 1672 if ((nflags & NLC_OWN) == 0) { 1673 if ((vmode & va->va_mode) != vmode) 1674 return(EACCES); 1675 } 1676 return(0); 1677 } 1678 1679 /* 1680 * If NLC_STICKY is set only the owner may delete or rename a file. 1681 * This bit is typically set on /tmp. 1682 * 1683 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1684 * the specific delete or rename case. For deletions and renames we 1685 * usually just care about directory permissions, not file permissions. 1686 */ 1687 if ((nflags & NLC_STICKY) && 1688 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1689 return(EACCES); 1690 } 1691 1692 /* 1693 * Check group perms 1694 */ 1695 vmode >>= 3; 1696 for (i = 0; i < cred->cr_ngroups; ++i) { 1697 if (va->va_gid == cred->cr_groups[i]) { 1698 if ((vmode & va->va_mode) != vmode) 1699 return(EACCES); 1700 return(0); 1701 } 1702 } 1703 1704 /* 1705 * Check world perms 1706 */ 1707 vmode >>= 3; 1708 if ((vmode & va->va_mode) != vmode) 1709 return(EACCES); 1710 return(0); 1711 } 1712 1713 /* 1714 * Long-term (10-second interval) statistics collection 1715 */ 1716 static 1717 uint64_t 1718 collect_nlookup_callback(int n) 1719 { 1720 static uint64_t last_total; 1721 uint64_t save; 1722 uint64_t total; 1723 1724 total = 0; 1725 for (n = 0; n < ncpus; ++n) { 1726 globaldata_t gd = globaldata_find(n); 1727 struct nchstats *sp; 1728 1729 if ((sp = gd->gd_nchstats) != NULL) 1730 total += sp->ncs_longhits + sp->ncs_longmiss; 1731 } 1732 save = total; 1733 total = total - last_total; 1734 last_total = save; 1735 1736 return total; 1737 } 1738 1739 static 1740 void 1741 nlookup_collect_init(void *dummy __unused) 1742 { 1743 kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, 1744 KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); 1745 } 1746 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); 1747