1 /* 2 * Copyright (c) 2004-2020 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/uio.h> 57 #include <sys/kernel.h> 58 #include <sys/vnode.h> 59 #include <sys/mount.h> 60 #include <sys/filedesc.h> 61 #include <sys/proc.h> 62 #include <sys/namei.h> 63 #include <sys/nlookup.h> 64 #include <sys/malloc.h> 65 #include <sys/stat.h> 66 #include <sys/objcache.h> 67 #include <sys/file.h> 68 #include <sys/kcollect.h> 69 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 74 static int naccess(struct nchandle *nch, int vmode, struct ucred *cred, 75 int *stickyp); 76 77 /* 78 * unmount operations flag NLC_IGNBADDIR in order to allow the 79 * umount to successfully issue a nlookup() on the path in order 80 * to extract the mount point. Allow certain errors through. 81 */ 82 static __inline 83 int 84 keeperror(struct nlookupdata *nd, int error) 85 { 86 if (error) { 87 if ((nd->nl_flags & NLC_IGNBADDIR) == 0 || 88 (error != EIO && error != EBADRPC && error != ESTALE)) { 89 return 1; 90 } 91 } 92 return 0; 93 } 94 95 /* 96 * Initialize a nlookup() structure, early error return for copyin faults 97 * or a degenerate empty string (which is not allowed). 98 * 99 * The first process proc0's credentials are used if the calling thread 100 * is not associated with a process context. 101 * 102 * MPSAFE 103 */ 104 int 105 nlookup_init(struct nlookupdata *nd, 106 const char *path, enum uio_seg seg, int flags) 107 { 108 size_t pathlen; 109 struct proc *p; 110 thread_t td; 111 int error; 112 113 td = curthread; 114 p = td->td_proc; 115 116 /* 117 * note: the pathlen set by copy*str() includes the terminating \0. 118 */ 119 bzero(nd, sizeof(struct nlookupdata)); 120 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 121 nd->nl_flags |= NLC_HASBUF; 122 if (seg == UIO_SYSSPACE) 123 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 124 else 125 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 126 127 /* 128 * Don't allow empty pathnames. 129 * POSIX.1 requirement: "" is not a vaild file name. 130 */ 131 if (error == 0 && pathlen <= 1) 132 error = ENOENT; 133 134 if (error == 0) { 135 if (p && p->p_fd) { 136 cache_copy_ncdir(p, &nd->nl_nch); 137 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 138 if (p->p_fd->fd_njdir.ncp) 139 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 140 nd->nl_cred = td->td_ucred; 141 nd->nl_flags |= NLC_BORROWCRED | NLC_NCDIR; 142 } else { 143 cache_copy(&rootnch, &nd->nl_nch); 144 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 145 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 146 nd->nl_cred = proc0.p_ucred; 147 nd->nl_flags |= NLC_BORROWCRED; 148 } 149 nd->nl_td = td; 150 nd->nl_flags |= flags; 151 } else { 152 nlookup_done(nd); 153 } 154 return(error); 155 } 156 157 158 /* 159 * nlookup_init() for "at" family of syscalls. 160 * 161 * Works similarly to nlookup_init() but if path is relative and fd is not 162 * AT_FDCWD, path is interpreted relative to the directory pointed to by fd. 163 * In this case, the file entry pointed to by fd is ref'ed and returned in 164 * *fpp. 165 * 166 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 167 * and release the ref to the file entry. 168 */ 169 int 170 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 171 const char *path, enum uio_seg seg, int flags) 172 { 173 struct thread *td = curthread; 174 struct file* fp; 175 struct vnode *vp; 176 int error; 177 178 *fpp = NULL; 179 180 if ((error = nlookup_init(nd, path, seg, flags)) != 0) { 181 return (error); 182 } 183 184 if (nd->nl_path[0] != '/' && fd != AT_FDCWD) { 185 if ((error = holdvnode(td, fd, &fp)) != 0) 186 goto done; 187 vp = (struct vnode*)fp->f_data; 188 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 189 fdrop(fp); 190 fp = NULL; 191 error = ENOTDIR; 192 goto done; 193 } 194 if (nd->nl_flags & NLC_NCDIR) { 195 cache_drop_ncdir(&nd->nl_nch); 196 nd->nl_flags &= ~NLC_NCDIR; 197 } else { 198 cache_drop(&nd->nl_nch); 199 } 200 cache_copy(&fp->f_nchandle, &nd->nl_nch); 201 *fpp = fp; 202 } 203 204 205 done: 206 if (error) 207 nlookup_done(nd); 208 return (error); 209 210 } 211 212 /* 213 * This works similarly to nlookup_init() but does not assume a process 214 * context. rootnch is always chosen for the root directory and the cred 215 * and starting directory are supplied in arguments. 216 */ 217 int 218 nlookup_init_raw(struct nlookupdata *nd, 219 const char *path, enum uio_seg seg, int flags, 220 struct ucred *cred, struct nchandle *ncstart) 221 { 222 size_t pathlen; 223 thread_t td; 224 int error; 225 226 td = curthread; 227 228 bzero(nd, sizeof(struct nlookupdata)); 229 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 230 nd->nl_flags |= NLC_HASBUF; 231 if (seg == UIO_SYSSPACE) 232 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 233 else 234 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 235 236 /* 237 * Don't allow empty pathnames. 238 * POSIX.1 requirement: "" is not a vaild file name. 239 */ 240 if (error == 0 && pathlen <= 1) 241 error = ENOENT; 242 243 if (error == 0) { 244 cache_copy(ncstart, &nd->nl_nch); 245 cache_copy(&rootnch, &nd->nl_rootnch); 246 cache_copy(&rootnch, &nd->nl_jailnch); 247 nd->nl_cred = crhold(cred); 248 nd->nl_td = td; 249 nd->nl_flags |= flags; 250 } else { 251 nlookup_done(nd); 252 } 253 return(error); 254 } 255 256 /* 257 * This works similarly to nlookup_init_raw() but does not rely 258 * on rootnch being initialized yet. 259 */ 260 int 261 nlookup_init_root(struct nlookupdata *nd, 262 const char *path, enum uio_seg seg, int flags, 263 struct ucred *cred, struct nchandle *ncstart, 264 struct nchandle *ncroot) 265 { 266 size_t pathlen; 267 thread_t td; 268 int error; 269 270 td = curthread; 271 272 bzero(nd, sizeof(struct nlookupdata)); 273 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 274 nd->nl_flags |= NLC_HASBUF; 275 if (seg == UIO_SYSSPACE) 276 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 277 else 278 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 279 280 /* 281 * Don't allow empty pathnames. 282 * POSIX.1 requirement: "" is not a vaild file name. 283 */ 284 if (error == 0 && pathlen <= 1) 285 error = ENOENT; 286 287 if (error == 0) { 288 cache_copy(ncstart, &nd->nl_nch); 289 cache_copy(ncroot, &nd->nl_rootnch); 290 cache_copy(ncroot, &nd->nl_jailnch); 291 nd->nl_cred = crhold(cred); 292 nd->nl_td = td; 293 nd->nl_flags |= flags; 294 } else { 295 nlookup_done(nd); 296 } 297 return(error); 298 } 299 300 #if 0 301 /* 302 * Set a different credential; this credential will be used by future 303 * operations performed on nd.nl_open_vp and nlookupdata structure. 304 */ 305 void 306 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 307 { 308 KKASSERT(nd->nl_cred != NULL); 309 310 if (nd->nl_cred != cred) { 311 cred = crhold(cred); 312 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 313 crfree(nd->nl_cred); 314 nd->nl_flags &= ~NLC_BORROWCRED; 315 nd->nl_cred = cred; 316 } 317 } 318 #endif 319 320 /* 321 * Cleanup a nlookupdata structure after we are through with it. This may 322 * be called on any nlookupdata structure initialized with nlookup_init(). 323 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 324 * returns an error, even if as a consumer you believe you have taken all 325 * dynamic elements out of the nlookupdata structure. 326 */ 327 void 328 nlookup_done(struct nlookupdata *nd) 329 { 330 if (nd->nl_nch.ncp) { 331 if (nd->nl_flags & NLC_NCPISLOCKED) { 332 nd->nl_flags &= ~NLC_NCPISLOCKED; 333 cache_unlock(&nd->nl_nch); 334 } 335 if (nd->nl_flags & NLC_NCDIR) { 336 cache_drop_ncdir(&nd->nl_nch); 337 nd->nl_flags &= ~NLC_NCDIR; 338 } else { 339 cache_drop(&nd->nl_nch); /* NULL's out the nch */ 340 } 341 } 342 if (nd->nl_rootnch.ncp) 343 cache_drop_and_cache(&nd->nl_rootnch); 344 if (nd->nl_jailnch.ncp) 345 cache_drop_and_cache(&nd->nl_jailnch); 346 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 347 objcache_put(namei_oc, nd->nl_path); 348 nd->nl_path = NULL; 349 } 350 if (nd->nl_cred) { 351 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 352 crfree(nd->nl_cred); 353 nd->nl_cred = NULL; 354 nd->nl_flags &= ~NLC_BORROWCRED; 355 } 356 if (nd->nl_open_vp) { 357 if (nd->nl_flags & NLC_LOCKVP) { 358 vn_unlock(nd->nl_open_vp); 359 nd->nl_flags &= ~NLC_LOCKVP; 360 } 361 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 362 nd->nl_open_vp = NULL; 363 } 364 if (nd->nl_dvp) { 365 vrele(nd->nl_dvp); 366 nd->nl_dvp = NULL; 367 } 368 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 369 } 370 371 /* 372 * Works similarly to nlookup_done() when nd initialized with 373 * nlookup_init_at(). 374 */ 375 void 376 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 377 { 378 nlookup_done(nd); 379 if (fp != NULL) 380 fdrop(fp); 381 } 382 383 void 384 nlookup_zero(struct nlookupdata *nd) 385 { 386 bzero(nd, sizeof(struct nlookupdata)); 387 } 388 389 /* 390 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 391 * if an error occured. 392 * 393 * Note that the returned ncp is not checked for permissions, though VEXEC 394 * is checked on the directory path leading up to the result. The caller 395 * must call naccess() to check the permissions of the returned leaf. 396 */ 397 struct nchandle 398 nlookup_simple(const char *str, enum uio_seg seg, 399 int niflags, int *error) 400 { 401 struct nlookupdata nd; 402 struct nchandle nch; 403 404 *error = nlookup_init(&nd, str, seg, niflags); 405 if (*error == 0) { 406 if ((*error = nlookup(&nd)) == 0) { 407 nch = nd.nl_nch; /* keep hold ref from structure */ 408 cache_zero(&nd.nl_nch); /* and NULL out */ 409 } else { 410 cache_zero(&nch); 411 } 412 nlookup_done(&nd); 413 } else { 414 cache_zero(&nch); 415 } 416 return(nch); 417 } 418 419 /* 420 * Returns non-zero if the path element is the last element 421 */ 422 static 423 int 424 islastelement(const char *ptr) 425 { 426 while (*ptr == '/') 427 ++ptr; 428 return (*ptr == 0); 429 } 430 431 /* 432 * Returns non-zero if we need to lock the namecache element 433 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 434 * the last element of the namecache lookup will be locked 435 * exclusively. 436 * 437 * O_CREAT or O_TRUNC need the last element to be locked exlcusively. 438 * Intermediate elements are always locked shared. 439 * 440 * NOTE: Even if we return on-zero, an unresolved namecache record 441 * will always be locked exclusively. 442 */ 443 static __inline 444 int 445 wantsexcllock(struct nlookupdata *nd, const char *ptr) 446 { 447 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 448 return(islastelement(ptr)); 449 return 0; 450 } 451 452 453 /* 454 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 455 * on return, even if an error occurs. If no error occurs or NLC_CREATE 456 * is flagged and ENOENT is returned, then the returned nl_nch is always 457 * referenced and locked exclusively. 458 * 459 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 460 * the resulting nl_nch may or may not be locked and if locked 461 * might be locked either shared or exclusive. 462 * 463 * Intermediate directory elements, including the current directory, require 464 * execute (search) permission. nlookup does not examine the access 465 * permissions on the returned element. 466 * 467 * If NLC_CREATE is set the last directory must allow node creation, 468 * and an error code of 0 will be returned for a non-existant 469 * target (not ENOENT). 470 * 471 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 472 * plus the sticky check is made, and an error code of 0 will be returned 473 * for a non-existant target (not ENOENT). 474 * 475 * If NLC_DELETE is set the last directory mut allow node deletion, 476 * plus the sticky check is made. 477 * 478 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 479 * of the returned entry. The vnode will be referenced, but not locked, 480 * and will be released by nlookup_done() along with everything else. 481 * 482 * NOTE: As an optimization we attempt to obtain a shared namecache lock 483 * on any intermediate elements. On success, the returned element 484 * is ALWAYS locked exclusively. 485 */ 486 int 487 nlookup(struct nlookupdata *nd) 488 { 489 globaldata_t gd = mycpu; 490 struct nlcomponent nlc; 491 struct nchandle nch; 492 struct nchandle par; 493 struct nchandle nctmp; 494 struct mount *mp; 495 struct vnode *hvp; /* hold to prevent recyclement */ 496 int wasdotordotdot; 497 char *ptr; 498 char *nptr; 499 int error; 500 int len; 501 int dflags; 502 int hit = 1; 503 int saveflag = nd->nl_flags & ~NLC_NCDIR; 504 boolean_t doretry = FALSE; 505 boolean_t inretry = FALSE; 506 507 nlookup_start: 508 #ifdef KTRACE 509 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 510 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 511 #endif 512 bzero(&nlc, sizeof(nlc)); 513 514 /* 515 * Setup for the loop. The current working namecache element is 516 * always at least referenced. We lock it as required, but always 517 * return a locked, resolved namecache entry. 518 */ 519 nd->nl_loopcnt = 0; 520 if (nd->nl_dvp) { 521 vrele(nd->nl_dvp); 522 nd->nl_dvp = NULL; 523 } 524 ptr = nd->nl_path; 525 526 /* 527 * Loop on the path components. At the top of the loop nd->nl_nch 528 * is ref'd and unlocked and represents our current position. 529 */ 530 for (;;) { 531 /* 532 * Make sure nl_nch is locked so we can access the vnode, resolution 533 * state, etc. 534 */ 535 if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) { 536 nd->nl_flags |= NLC_NCPISLOCKED; 537 cache_lock_maybe_shared(&nd->nl_nch, wantsexcllock(nd, ptr)); 538 } 539 540 /* 541 * Check if the root directory should replace the current 542 * directory. This is done at the start of a translation 543 * or after a symbolic link has been found. In other cases 544 * ptr will never be pointing at a '/'. 545 */ 546 if (*ptr == '/') { 547 do { 548 ++ptr; 549 } while (*ptr == '/'); 550 cache_unlock(&nd->nl_nch); 551 cache_get_maybe_shared(&nd->nl_rootnch, &nch, 552 wantsexcllock(nd, ptr)); 553 if (nd->nl_flags & NLC_NCDIR) { 554 cache_drop_ncdir(&nd->nl_nch); 555 nd->nl_flags &= ~NLC_NCDIR; 556 } else { 557 cache_drop(&nd->nl_nch); 558 } 559 nd->nl_nch = nch; /* remains locked */ 560 561 /* 562 * Fast-track termination. There is no parent directory of 563 * the root in the same mount from the point of view of 564 * the caller so return EACCES if NLC_REFDVP is specified, 565 * and EEXIST if NLC_CREATE is also specified. 566 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 567 */ 568 if (*ptr == 0) { 569 if (nd->nl_flags & NLC_REFDVP) 570 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 571 else 572 error = 0; 573 break; 574 } 575 continue; 576 } 577 578 /* 579 * Pre-calculate next path component so we can check whether the 580 * current component directory is the last directory in the path 581 * or not. 582 */ 583 for (nptr = ptr; *nptr && *nptr != '/'; ++nptr) 584 ; 585 586 /* 587 * Check directory search permissions (nd->nl_nch is locked & refd). 588 * This will load dflags to obtain directory-special permissions to 589 * be checked along with the last component. 590 * 591 * We only need to pass-in &dflags for the second-to-last component. 592 * Optimize by passing-in NULL for any prior components, which may 593 * allow the code to bypass the naccess() call. 594 */ 595 dflags = 0; 596 if (*nptr == '/' || (saveflag & NLC_MODIFYING_MASK) == 0) 597 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, NULL); 598 else 599 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags); 600 if (error) { 601 if (keeperror(nd, error)) 602 break; 603 error = 0; 604 } 605 606 /* 607 * Extract the next (or last) path component. Path components are 608 * limited to 255 characters. 609 */ 610 nlc.nlc_nameptr = ptr; 611 nlc.nlc_namelen = nptr - ptr; 612 ptr = nptr; 613 if (nlc.nlc_namelen >= 256) { 614 error = ENAMETOOLONG; 615 break; 616 } 617 618 /* 619 * Lookup the path component in the cache, creating an unresolved 620 * entry if necessary. We have to handle "." and ".." as special 621 * cases. 622 * 623 * When handling ".." we have to detect a traversal back through a 624 * mount point. If we are at the root, ".." just returns the root. 625 * 626 * When handling "." or ".." we also have to recalculate dflags 627 * since our dflags will be for some sub-directory instead of the 628 * parent dir. 629 * 630 * This subsection returns a locked, refd 'nch' unless it errors out, 631 * and an unlocked but still ref'd nd->nl_nch. 632 * 633 * The namecache topology is not allowed to be disconnected, so 634 * encountering a NULL parent will generate EINVAL. This typically 635 * occurs when a directory is removed out from under a process. 636 * 637 * WARNING! The unlocking of nd->nl_nch is sensitive code. 638 */ 639 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 640 641 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 642 cache_unlock(&nd->nl_nch); 643 nd->nl_flags &= ~NLC_NCPISLOCKED; 644 cache_get_maybe_shared(&nd->nl_nch, &nch, wantsexcllock(nd, ptr)); 645 wasdotordotdot = 1; 646 } else if (nlc.nlc_namelen == 2 && 647 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 648 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 649 nd->nl_nch.ncp == nd->nl_rootnch.ncp 650 ) { 651 /* 652 * ".." at the root returns the root 653 */ 654 cache_unlock(&nd->nl_nch); 655 nd->nl_flags &= ~NLC_NCPISLOCKED; 656 cache_get_maybe_shared(&nd->nl_nch, &nch, 657 wantsexcllock(nd, ptr)); 658 } else { 659 /* 660 * Locate the parent ncp. If we are at the root of a 661 * filesystem mount we have to skip to the mounted-on 662 * point in the underlying filesystem. 663 * 664 * Expect the parent to always be good since the 665 * mountpoint doesn't go away. XXX hack. cache_get() 666 * requires the ncp to already have a ref as a safety. 667 * 668 * However, a process which has been broken out of a chroot 669 * will wind up with a NULL parent if it tries to '..' above 670 * the real root, deal with the case. Note that this does 671 * not protect us from a jail breakout, it just stops a panic 672 * if the jail-broken process tries to '..' past the real 673 * root. 674 */ 675 nctmp = nd->nl_nch; 676 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 677 nctmp = nctmp.mount->mnt_ncmounton; 678 if (nctmp.ncp == NULL) 679 break; 680 } 681 if (nctmp.ncp == NULL) { 682 if (curthread->td_proc) { 683 kprintf("vfs_nlookup: '..' traverse broke " 684 "jail: pid %d (%s)\n", 685 curthread->td_proc->p_pid, 686 curthread->td_comm); 687 } 688 nctmp = nd->nl_rootnch; 689 } else { 690 nctmp.ncp = nctmp.ncp->nc_parent; 691 } 692 cache_hold(&nctmp); 693 cache_unlock(&nd->nl_nch); 694 nd->nl_flags &= ~NLC_NCPISLOCKED; 695 cache_get_maybe_shared(&nctmp, &nch, wantsexcllock(nd, ptr)); 696 cache_drop(&nctmp); /* NOTE: zero's nctmp */ 697 } 698 wasdotordotdot = 2; 699 } else { 700 /* 701 * Must unlock nl_nch when traversing down the path. However, 702 * the child ncp has not yet been found/created and the parent's 703 * child list might be empty. Thus releasing the lock can 704 * allow a race whereby the parent ncp's vnode is recycled. 705 * This case can occur especially when maxvnodes is set very low. 706 * 707 * We need the parent's ncp to remain resolved for all normal 708 * filesystem activities, so we vhold() the vp during the lookup 709 * to prevent recyclement due to vnlru / maxvnodes. 710 * 711 * If we race an unlink or rename the ncp might be marked 712 * DESTROYED after resolution, requiring a retry. 713 */ 714 if ((hvp = nd->nl_nch.ncp->nc_vp) != NULL) 715 vhold(hvp); 716 cache_unlock(&nd->nl_nch); 717 nd->nl_flags &= ~NLC_NCPISLOCKED; 718 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 719 wantsexcllock(nd, ptr), &nch); 720 if (error == EWOULDBLOCK) { 721 nch = cache_nlookup(&nd->nl_nch, &nlc); 722 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 723 hit = 0; 724 for (;;) { 725 error = cache_resolve(&nch, nd->nl_cred); 726 if (error != EAGAIN && 727 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 728 if (error == ESTALE) { 729 if (!inretry) 730 error = ENOENT; 731 doretry = TRUE; 732 } 733 break; 734 } 735 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 736 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 737 nch.ncp->nc_name); 738 cache_put(&nch); 739 nch = cache_nlookup(&nd->nl_nch, &nlc); 740 } 741 } 742 if (hvp) 743 vdrop(hvp); 744 wasdotordotdot = 0; 745 } 746 747 /* 748 * If the last component was "." or ".." our dflags no longer 749 * represents the parent directory and we have to explicitly 750 * look it up. 751 * 752 * Expect the parent to be good since nch is locked. 753 */ 754 if (wasdotordotdot && error == 0) { 755 dflags = 0; 756 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 757 par.mount = nch.mount; 758 cache_hold(&par); 759 cache_lock_maybe_shared(&par, wantsexcllock(nd, ptr)); 760 error = naccess(&par, 0, nd->nl_cred, &dflags); 761 cache_put(&par); 762 if (error) { 763 if (!keeperror(nd, error)) 764 error = 0; 765 } 766 } 767 } 768 769 /* 770 * [end of subsection] 771 * 772 * nch is locked and referenced. 773 * nd->nl_nch is unlocked and referenced. 774 * 775 * nl_nch must be unlocked or we could chain lock to the root 776 * if a resolve gets stuck (e.g. in NFS). 777 */ 778 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 779 780 /* 781 * Resolve the namespace if necessary. The ncp returned by 782 * cache_nlookup() is referenced and locked. 783 * 784 * XXX neither '.' nor '..' should return EAGAIN since they were 785 * previously resolved and thus cannot be newly created ncp's. 786 */ 787 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 788 hit = 0; 789 error = cache_resolve(&nch, nd->nl_cred); 790 if (error == ESTALE) { 791 if (!inretry) 792 error = ENOENT; 793 doretry = TRUE; 794 } 795 KKASSERT(error != EAGAIN); 796 } else { 797 error = nch.ncp->nc_error; 798 } 799 800 /* 801 * Early completion. ENOENT is not an error if this is the last 802 * component and NLC_CREATE or NLC_RENAME (rename target) was 803 * requested. Note that ncp->nc_error is left as ENOENT in that 804 * case, which we check later on. 805 * 806 * Also handle invalid '.' or '..' components terminating a path 807 * for a create/rename/delete. The standard requires this and pax 808 * pretty stupidly depends on it. 809 */ 810 if (islastelement(ptr)) { 811 if (error == ENOENT && 812 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 813 ) { 814 if (nd->nl_flags & NLC_NFS_RDONLY) { 815 error = EROFS; 816 } else { 817 error = naccess(&nch, nd->nl_flags | dflags, 818 nd->nl_cred, NULL); 819 } 820 } 821 if (error == 0 && wasdotordotdot && 822 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 823 NLC_RENAME_SRC | NLC_RENAME_DST))) { 824 /* 825 * POSIX junk 826 */ 827 if (nd->nl_flags & NLC_CREATE) 828 error = EEXIST; 829 else if (nd->nl_flags & NLC_DELETE) 830 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 831 else 832 error = EINVAL; 833 } 834 } 835 836 /* 837 * Early completion on error. 838 */ 839 if (error) { 840 cache_put(&nch); 841 break; 842 } 843 844 /* 845 * If the element is a symlink and it is either not the last 846 * element or it is the last element and we are allowed to 847 * follow symlinks, resolve the symlink. 848 */ 849 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 850 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 851 ) { 852 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 853 error = ELOOP; 854 cache_put(&nch); 855 break; 856 } 857 error = nreadsymlink(nd, &nch, &nlc); 858 cache_put(&nch); 859 if (error) 860 break; 861 862 /* 863 * Concatenate trailing path elements onto the returned symlink. 864 * Note that if the path component (ptr) is not exhausted, it 865 * will being with a '/', so we do not have to add another one. 866 * 867 * The symlink may not be empty. 868 */ 869 len = strlen(ptr); 870 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 871 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 872 objcache_put(namei_oc, nlc.nlc_nameptr); 873 break; 874 } 875 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 876 if (nd->nl_flags & NLC_HASBUF) 877 objcache_put(namei_oc, nd->nl_path); 878 nd->nl_path = nlc.nlc_nameptr; 879 nd->nl_flags |= NLC_HASBUF; 880 ptr = nd->nl_path; 881 882 /* 883 * Go back up to the top to resolve any initial '/'s in the 884 * symlink. 885 */ 886 continue; 887 } 888 889 /* 890 * If the element is a directory and we are crossing a mount point, 891 * Locate the mount. 892 */ 893 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 894 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 895 (mp = cache_findmount(&nch)) != NULL 896 ) { 897 struct vnode *tdp; 898 int vfs_do_busy = 0; 899 900 /* 901 * VFS must be busied before the namecache entry is locked, 902 * but we don't want to waste time calling vfs_busy() if the 903 * mount point is already resolved. 904 */ 905 again: 906 cache_put(&nch); 907 if (vfs_do_busy) { 908 while (vfs_busy(mp, 0)) { 909 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 910 kprintf("nlookup: warning umount race avoided\n"); 911 cache_dropmount(mp); 912 error = EBUSY; 913 vfs_do_busy = 0; 914 goto double_break; 915 } 916 } 917 } 918 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 919 wantsexcllock(nd, ptr)); 920 921 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 922 if (vfs_do_busy == 0) { 923 vfs_do_busy = 1; 924 goto again; 925 } 926 error = VFS_ROOT(mp, &tdp); 927 vfs_unbusy(mp); 928 vfs_do_busy = 0; 929 if (keeperror(nd, error)) { 930 cache_dropmount(mp); 931 break; 932 } 933 if (error == 0) { 934 cache_setvp(&nch, tdp); 935 vput(tdp); 936 } 937 } 938 if (vfs_do_busy) 939 vfs_unbusy(mp); 940 cache_dropmount(mp); 941 } 942 943 if (keeperror(nd, error)) { 944 cache_put(&nch); 945 double_break: 946 break; 947 } 948 949 /* 950 * Skip any slashes to get to the next element. If there 951 * are any slashes at all the current element must be a 952 * directory or, in the create case, intended to become a directory. 953 * If it isn't we break without incrementing ptr and fall through 954 * to the failure case below. 955 */ 956 while (*ptr == '/') { 957 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 958 !(nd->nl_flags & NLC_WILLBEDIR) 959 ) { 960 break; 961 } 962 ++ptr; 963 } 964 965 /* 966 * Continuation case: additional elements and the current 967 * element is a directory. 968 */ 969 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 970 if (nd->nl_flags & NLC_NCDIR) { 971 cache_drop_ncdir(&nd->nl_nch); 972 nd->nl_flags &= ~NLC_NCDIR; 973 } else { 974 cache_drop(&nd->nl_nch); 975 } 976 cache_unlock(&nch); 977 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 978 nd->nl_nch = nch; 979 continue; 980 } 981 982 /* 983 * Failure case: additional elements and the current element 984 * is not a directory 985 */ 986 if (*ptr) { 987 cache_put(&nch); 988 error = ENOTDIR; 989 break; 990 } 991 992 /* 993 * Successful lookup of last element. 994 * 995 * Check permissions if the target exists. If the target does not 996 * exist directory permissions were already tested in the early 997 * completion code above. 998 * 999 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 1000 * if the file is marked append-only, and NLC_STICKY if the directory 1001 * containing the file is sticky. 1002 */ 1003 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 1004 error = naccess(&nch, nd->nl_flags | dflags, 1005 nd->nl_cred, NULL); 1006 if (keeperror(nd, error)) { 1007 cache_put(&nch); 1008 break; 1009 } 1010 } 1011 1012 /* 1013 * Termination: no more elements. 1014 * 1015 * If NLC_REFDVP is set acquire a referenced parent dvp. 1016 */ 1017 if (nd->nl_flags & NLC_REFDVP) { 1018 cache_lock(&nd->nl_nch); 1019 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 1020 cache_unlock(&nd->nl_nch); 1021 if (keeperror(nd, error)) { 1022 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 1023 cache_put(&nch); 1024 break; 1025 } 1026 } 1027 if (nd->nl_flags & NLC_NCDIR) { 1028 cache_drop_ncdir(&nd->nl_nch); 1029 nd->nl_flags &= ~NLC_NCDIR; 1030 } else { 1031 cache_drop(&nd->nl_nch); 1032 } 1033 nd->nl_nch = nch; 1034 nd->nl_flags |= NLC_NCPISLOCKED; 1035 error = 0; 1036 break; 1037 } 1038 1039 if (hit) 1040 ++gd->gd_nchstats->ncs_longhits; 1041 else 1042 ++gd->gd_nchstats->ncs_longmiss; 1043 1044 if (nd->nl_flags & NLC_NCPISLOCKED) 1045 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 1046 1047 /* 1048 * Retry the whole thing if doretry flag is set, but only once. 1049 * autofs(5) may mount another filesystem under its root directory 1050 * while resolving a path. 1051 */ 1052 if (doretry && !inretry) { 1053 inretry = TRUE; 1054 nd->nl_flags &= NLC_NCDIR; 1055 nd->nl_flags |= saveflag; 1056 goto nlookup_start; 1057 } 1058 1059 /* 1060 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1061 * (ncp->nc_error will be ENOENT), but we will still return an error 1062 * code of 0. 1063 */ 1064 return(error); 1065 } 1066 1067 /* 1068 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1069 * of continuity in the namecache tree by connecting the ncp related to the 1070 * vnode under the mount to the ncp related to the mount's root vnode. 1071 * 1072 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1073 */ 1074 int 1075 nlookup_mp(struct mount *mp, struct nchandle *nch) 1076 { 1077 struct vnode *vp; 1078 int error; 1079 1080 error = 0; 1081 cache_get(&mp->mnt_ncmountpt, nch); 1082 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1083 while (vfs_busy(mp, 0)) 1084 ; 1085 error = VFS_ROOT(mp, &vp); 1086 vfs_unbusy(mp); 1087 if (error) { 1088 cache_put(nch); 1089 } else { 1090 cache_setvp(nch, vp); 1091 vput(vp); 1092 } 1093 } 1094 return(error); 1095 } 1096 1097 /* 1098 * Read the contents of a symlink, allocate a path buffer out of the 1099 * namei_oc and initialize the supplied nlcomponent with the result. 1100 * 1101 * If an error occurs no buffer will be allocated or returned in the nlc. 1102 */ 1103 int 1104 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1105 struct nlcomponent *nlc) 1106 { 1107 struct vnode *vp; 1108 struct iovec aiov; 1109 struct uio auio; 1110 int linklen; 1111 int error; 1112 char *cp; 1113 1114 nlc->nlc_nameptr = NULL; 1115 nlc->nlc_namelen = 0; 1116 if (nch->ncp->nc_vp == NULL) 1117 return(ENOENT); 1118 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1119 return(error); 1120 cp = objcache_get(namei_oc, M_WAITOK); 1121 aiov.iov_base = cp; 1122 aiov.iov_len = MAXPATHLEN; 1123 auio.uio_iov = &aiov; 1124 auio.uio_iovcnt = 1; 1125 auio.uio_offset = 0; 1126 auio.uio_rw = UIO_READ; 1127 auio.uio_segflg = UIO_SYSSPACE; 1128 auio.uio_td = nd->nl_td; 1129 auio.uio_resid = MAXPATHLEN - 1; 1130 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1131 if (error) 1132 goto fail; 1133 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1134 if (varsym_enable) { 1135 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1136 if (linklen < 0) { 1137 error = ENAMETOOLONG; 1138 goto fail; 1139 } 1140 } 1141 cp[linklen] = 0; 1142 nlc->nlc_nameptr = cp; 1143 nlc->nlc_namelen = linklen; 1144 vput(vp); 1145 return(0); 1146 fail: 1147 objcache_put(namei_oc, cp); 1148 vput(vp); 1149 return(error); 1150 } 1151 1152 /* 1153 * Check access [XXX cache vattr!] [XXX quota] 1154 * 1155 * Generally check the NLC_* access bits. All specified bits must pass 1156 * for this function to return 0. 1157 * 1158 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1159 * access, otherwise it must exist. No error is returned in this case. 1160 * 1161 * The file must not exist if NLC_EXCL is specified. 1162 * 1163 * Directory permissions in general are tested for NLC_CREATE if the file 1164 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1165 * whether the file exists or not. 1166 * 1167 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1168 * the latter is only tested if the target exists. 1169 * 1170 * The passed ncp must be referenced and locked. If it is already resolved 1171 * it may be locked shared but otherwise should be locked exclusively. 1172 */ 1173 1174 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1175 #define S_XOK_MASK (S_IXUSR|S_IXGRP|S_IXOTH) 1176 1177 static int 1178 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 1179 { 1180 struct vnode *vp; 1181 struct vattr va; 1182 struct namecache *ncp; 1183 int error; 1184 int cflags; 1185 1186 KKASSERT(cache_lockstatus(nch) > 0); 1187 1188 ncp = nch->ncp; 1189 if (ncp->nc_flag & NCF_UNRESOLVED) { 1190 cache_resolve(nch, cred); 1191 ncp = nch->ncp; 1192 } 1193 error = ncp->nc_error; 1194 1195 /* 1196 * Directory permissions checks. Silently ignore ENOENT if these 1197 * tests pass. It isn't an error. 1198 * 1199 * We can safely resolve ncp->nc_parent because ncp is currently 1200 * locked. 1201 */ 1202 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1203 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1204 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1205 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1206 (nflags & NLC_RENAME_DST) 1207 ) { 1208 struct nchandle par; 1209 1210 if ((par.ncp = ncp->nc_parent) == NULL) { 1211 if (error != EAGAIN) 1212 error = EINVAL; 1213 } else if (error == 0 || error == ENOENT) { 1214 par.mount = nch->mount; 1215 cache_hold(&par); 1216 cache_lock_maybe_shared(&par, 0); 1217 error = naccess(&par, NLC_WRITE, cred, NULL); 1218 cache_put(&par); 1219 } 1220 } 1221 } 1222 1223 /* 1224 * NLC_EXCL check. Target file must not exist. 1225 */ 1226 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1227 error = EEXIST; 1228 1229 /* 1230 * Try to short-cut the vnode operation for intermediate directory 1231 * components. This is a major SMP win because it avoids having 1232 * to execute a lot of code for intermediate directory components, 1233 * including shared refs and locks on intermediate directory vnodes. 1234 * 1235 * We can only do this if the caller does not need nflagsp. 1236 */ 1237 if (error == 0 && nflagsp == NULL && 1238 nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1239 return 0; 1240 } 1241 1242 /* 1243 * Get the vnode attributes so we can do the rest of our checks. 1244 * 1245 * NOTE: We only call naccess_va() if the target exists. 1246 */ 1247 if (error == 0) { 1248 error = cache_vget(nch, cred, LK_SHARED, &vp); 1249 if (error == ENOENT) { 1250 /* 1251 * Silently zero-out ENOENT if creating or renaming 1252 * (rename target). It isn't an error. 1253 */ 1254 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1255 error = 0; 1256 } else if (error == 0) { 1257 /* 1258 * Get the vnode attributes and check for illegal O_TRUNC 1259 * requests and read-only mounts. 1260 * 1261 * NOTE: You can still open devices on read-only mounts for 1262 * writing. 1263 * 1264 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1265 * check on the parent directory above. 1266 * 1267 * XXX cache the va in the namecache or in the vnode 1268 */ 1269 error = VOP_GETATTR(vp, &va); 1270 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1271 switch(va.va_type) { 1272 case VREG: 1273 case VDATABASE: 1274 case VCHR: 1275 case VBLK: 1276 case VFIFO: 1277 break; 1278 case VDIR: 1279 error = EISDIR; 1280 break; 1281 default: 1282 error = EINVAL; 1283 break; 1284 } 1285 } 1286 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1287 (vp->v_mount->mnt_flag & MNT_RDONLY) 1288 ) { 1289 switch(va.va_type) { 1290 case VDIR: 1291 case VLNK: 1292 case VREG: 1293 case VDATABASE: 1294 error = EROFS; 1295 break; 1296 default: 1297 break; 1298 } 1299 } 1300 vput(vp); 1301 1302 /* 1303 * Check permissions based on file attributes. The passed 1304 * flags (*nflagsp) are modified with feedback based on 1305 * special attributes and requirements. 1306 */ 1307 if (error == 0) { 1308 /* 1309 * Adjust the returned (*nflagsp) if non-NULL. 1310 */ 1311 if (nflagsp) { 1312 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 1313 *nflagsp |= NLC_STICKY; 1314 if (va.va_flags & APPEND) 1315 *nflagsp |= NLC_APPENDONLY; 1316 if (va.va_flags & IMMUTABLE) 1317 *nflagsp |= NLC_IMMUTABLE; 1318 } 1319 1320 /* 1321 * NCF_WXOK can be set for world-searchable directories. 1322 * 1323 * XXX When we implement capabilities this code would also 1324 * need a cap check, or only set the flag if there are no 1325 * capabilities. 1326 */ 1327 cflags = 0; 1328 if (va.va_type == VDIR && 1329 (va.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1330 cflags |= NCF_WXOK; 1331 } 1332 if ((va.va_mode & S_XOK_MASK) == 0) 1333 cflags |= NCF_NOTX; 1334 1335 /* 1336 * Track swapcache management flags in the namecache. 1337 * 1338 * Calculate the flags based on the current vattr info 1339 * and recalculate the inherited flags from the parent 1340 * (the original cache linkage may have occurred without 1341 * getattrs and thus have stale flags). 1342 */ 1343 if (va.va_flags & SF_NOCACHE) 1344 cflags |= NCF_SF_NOCACHE; 1345 if (va.va_flags & UF_CACHE) 1346 cflags |= NCF_UF_CACHE; 1347 if (ncp->nc_parent) { 1348 if (ncp->nc_parent->nc_flag & 1349 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1350 cflags |= NCF_SF_PNOCACHE; 1351 } 1352 if (ncp->nc_parent->nc_flag & 1353 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1354 cflags |= NCF_UF_PCACHE; 1355 } 1356 } 1357 1358 /* 1359 * We're not supposed to update nc_flag when holding a shared 1360 * lock, but we allow the case for certain flags. Note that 1361 * holding an exclusive lock allows updating nc_flag without 1362 * atomics. nc_flag is not allowe to be updated at all unless 1363 * a shared or exclusive lock is held. 1364 */ 1365 atomic_clear_short(&ncp->nc_flag, 1366 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1367 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1368 NCF_WXOK | NCF_NOTX) & ~cflags); 1369 atomic_set_short(&ncp->nc_flag, cflags); 1370 1371 /* 1372 * Process general access. 1373 */ 1374 error = naccess_va(&va, nflags, cred); 1375 } 1376 } 1377 } 1378 return(error); 1379 } 1380 1381 /* 1382 * Check the requested access against the given vattr using cred. 1383 */ 1384 int 1385 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 1386 { 1387 int i; 1388 int vmode; 1389 1390 /* 1391 * Test the immutable bit. Creations, deletions, renames (source 1392 * or destination) are not allowed. chown/chmod/other is also not 1393 * allowed but is handled by SETATTR. Hardlinks to the immutable 1394 * file are allowed. 1395 * 1396 * If the directory is set to immutable then creations, deletions, 1397 * renames (source or dest) and hardlinks to files within the directory 1398 * are not allowed, and regular files opened through the directory may 1399 * not be written to or truncated (unless a special device). 1400 * 1401 * NOTE! New hardlinks to immutable files work but new hardlinks to 1402 * files, immutable or not, sitting inside an immutable directory are 1403 * not allowed. As always if the file is hardlinked via some other 1404 * path additional hardlinks may be possible even if the file is marked 1405 * immutable. The sysop needs to create a closure by checking the hard 1406 * link count. Once closure is achieved you are good, and security 1407 * scripts should check link counts anyway. 1408 * 1409 * Writes and truncations are only allowed on special devices. 1410 */ 1411 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1412 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1413 return (EPERM); 1414 if (nflags & (NLC_CREATE | NLC_DELETE | 1415 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1416 return (EPERM); 1417 } 1418 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1419 switch(va->va_type) { 1420 case VDIR: 1421 return (EISDIR); 1422 case VLNK: 1423 case VREG: 1424 case VDATABASE: 1425 return (EPERM); 1426 default: 1427 break; 1428 } 1429 } 1430 } 1431 1432 /* 1433 * Test the no-unlink and append-only bits for opens, rename targets, 1434 * and deletions. These bits are not tested for creations or 1435 * rename sources. 1436 * 1437 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1438 * If you do not wish this you must also set NOUNLINK. 1439 * 1440 * If the governing directory is marked APPEND-only it implies 1441 * NOUNLINK for all entries in the directory. 1442 */ 1443 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1444 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1445 ) { 1446 return (EPERM); 1447 } 1448 1449 /* 1450 * A file marked append-only may not be deleted but can be renamed. 1451 */ 1452 if ((va->va_flags & APPEND) && 1453 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1454 ) { 1455 return (EPERM); 1456 } 1457 1458 /* 1459 * A file marked append-only which is opened for writing must also 1460 * be opened O_APPEND. 1461 */ 1462 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1463 if (nflags & NLC_TRUNCATE) 1464 return (EPERM); 1465 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1466 if ((nflags & NLC_APPEND) == 0) 1467 return (EPERM); 1468 } 1469 } 1470 1471 /* 1472 * root gets universal access 1473 */ 1474 if (cred->cr_uid == 0) 1475 return(0); 1476 1477 /* 1478 * Check owner perms. 1479 * 1480 * If NLC_OWN is set the owner of the file is allowed no matter when 1481 * the owner-mode bits say (utimes). 1482 */ 1483 vmode = 0; 1484 if (nflags & NLC_READ) 1485 vmode |= S_IRUSR; 1486 if (nflags & NLC_WRITE) 1487 vmode |= S_IWUSR; 1488 if (nflags & NLC_EXEC) 1489 vmode |= S_IXUSR; 1490 1491 if (cred->cr_uid == va->va_uid) { 1492 if ((nflags & NLC_OWN) == 0) { 1493 if ((vmode & va->va_mode) != vmode) 1494 return(EACCES); 1495 } 1496 return(0); 1497 } 1498 1499 /* 1500 * If NLC_STICKY is set only the owner may delete or rename a file. 1501 * This bit is typically set on /tmp. 1502 * 1503 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1504 * the specific delete or rename case. For deletions and renames we 1505 * usually just care about directory permissions, not file permissions. 1506 */ 1507 if ((nflags & NLC_STICKY) && 1508 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1509 return(EACCES); 1510 } 1511 1512 /* 1513 * Check group perms 1514 */ 1515 vmode >>= 3; 1516 for (i = 0; i < cred->cr_ngroups; ++i) { 1517 if (va->va_gid == cred->cr_groups[i]) { 1518 if ((vmode & va->va_mode) != vmode) 1519 return(EACCES); 1520 return(0); 1521 } 1522 } 1523 1524 /* 1525 * Check world perms 1526 */ 1527 vmode >>= 3; 1528 if ((vmode & va->va_mode) != vmode) 1529 return(EACCES); 1530 return(0); 1531 } 1532 1533 /* 1534 * Long-term (10-second interval) statistics collection 1535 */ 1536 static 1537 uint64_t 1538 collect_nlookup_callback(int n) 1539 { 1540 static uint64_t last_total; 1541 uint64_t save; 1542 uint64_t total; 1543 1544 total = 0; 1545 for (n = 0; n < ncpus; ++n) { 1546 globaldata_t gd = globaldata_find(n); 1547 struct nchstats *sp; 1548 1549 if ((sp = gd->gd_nchstats) != NULL) 1550 total += sp->ncs_longhits + sp->ncs_longmiss; 1551 } 1552 save = total; 1553 total = total - last_total; 1554 last_total = save; 1555 1556 return total; 1557 } 1558 1559 static 1560 void 1561 nlookup_collect_init(void *dummy __unused) 1562 { 1563 kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, 1564 KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); 1565 } 1566 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); 1567