1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/kernel.h> 57 #include <sys/vnode.h> 58 #include <sys/mount.h> 59 #include <sys/filedesc.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/malloc.h> 64 #include <sys/stat.h> 65 #include <sys/objcache.h> 66 #include <sys/file.h> 67 #include <sys/kcollect.h> 68 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 static int naccess(struct nchandle *nch, int vmode, struct ucred *cred, 74 int *stickyp); 75 76 /* 77 * unmount operations flag NLC_IGNBADDIR in order to allow the 78 * umount to successfully issue a nlookup() on the path in order 79 * to extract the mount point. Allow certain errors through. 80 */ 81 static __inline 82 int 83 keeperror(struct nlookupdata *nd, int error) 84 { 85 if (error) { 86 if ((nd->nl_flags & NLC_IGNBADDIR) == 0 || 87 (error != EIO && error != EBADRPC && error != ESTALE)) { 88 return 1; 89 } 90 } 91 return 0; 92 } 93 94 /* 95 * Initialize a nlookup() structure, early error return for copyin faults 96 * or a degenerate empty string (which is not allowed). 97 * 98 * The first process proc0's credentials are used if the calling thread 99 * is not associated with a process context. 100 * 101 * MPSAFE 102 */ 103 int 104 nlookup_init(struct nlookupdata *nd, 105 const char *path, enum uio_seg seg, int flags) 106 { 107 size_t pathlen; 108 struct proc *p; 109 thread_t td; 110 int error; 111 112 td = curthread; 113 p = td->td_proc; 114 115 /* 116 * note: the pathlen set by copy*str() includes the terminating \0. 117 */ 118 bzero(nd, sizeof(struct nlookupdata)); 119 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 120 nd->nl_flags |= NLC_HASBUF; 121 if (seg == UIO_SYSSPACE) 122 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 123 else 124 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 125 126 /* 127 * Don't allow empty pathnames. 128 * POSIX.1 requirement: "" is not a vaild file name. 129 */ 130 if (error == 0 && pathlen <= 1) 131 error = ENOENT; 132 133 if (error == 0) { 134 if (p && p->p_fd) { 135 cache_copy_ncdir(p, &nd->nl_nch); 136 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 137 if (p->p_fd->fd_njdir.ncp) 138 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 139 nd->nl_cred = td->td_ucred; 140 nd->nl_flags |= NLC_BORROWCRED | NLC_NCDIR; 141 } else { 142 cache_copy(&rootnch, &nd->nl_nch); 143 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 144 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 145 nd->nl_cred = proc0.p_ucred; 146 nd->nl_flags |= NLC_BORROWCRED; 147 } 148 nd->nl_td = td; 149 nd->nl_flags |= flags; 150 } else { 151 nlookup_done(nd); 152 } 153 return(error); 154 } 155 156 157 /* 158 * nlookup_init() for "at" family of syscalls. 159 * 160 * Works similarly to nlookup_init() but if path is relative and fd is not 161 * AT_FDCWD, path is interpreted relative to the directory pointed to by fd. 162 * In this case, the file entry pointed to by fd is ref'ed and returned in 163 * *fpp. 164 * 165 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 166 * and release the ref to the file entry. 167 */ 168 int 169 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 170 const char *path, enum uio_seg seg, int flags) 171 { 172 struct thread *td = curthread; 173 struct file* fp; 174 struct vnode *vp; 175 int error; 176 177 *fpp = NULL; 178 179 if ((error = nlookup_init(nd, path, seg, flags)) != 0) { 180 return (error); 181 } 182 183 if (nd->nl_path[0] != '/' && fd != AT_FDCWD) { 184 if ((error = holdvnode(td, fd, &fp)) != 0) 185 goto done; 186 vp = (struct vnode*)fp->f_data; 187 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 188 fdrop(fp); 189 fp = NULL; 190 error = ENOTDIR; 191 goto done; 192 } 193 if (nd->nl_flags & NLC_NCDIR) { 194 cache_drop_ncdir(&nd->nl_nch); 195 nd->nl_flags &= ~NLC_NCDIR; 196 } else { 197 cache_drop(&nd->nl_nch); 198 } 199 cache_copy(&fp->f_nchandle, &nd->nl_nch); 200 *fpp = fp; 201 } 202 203 204 done: 205 if (error) 206 nlookup_done(nd); 207 return (error); 208 209 } 210 211 /* 212 * This works similarly to nlookup_init() but does not assume a process 213 * context. rootnch is always chosen for the root directory and the cred 214 * and starting directory are supplied in arguments. 215 */ 216 int 217 nlookup_init_raw(struct nlookupdata *nd, 218 const char *path, enum uio_seg seg, int flags, 219 struct ucred *cred, struct nchandle *ncstart) 220 { 221 size_t pathlen; 222 thread_t td; 223 int error; 224 225 td = curthread; 226 227 bzero(nd, sizeof(struct nlookupdata)); 228 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 229 nd->nl_flags |= NLC_HASBUF; 230 if (seg == UIO_SYSSPACE) 231 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 232 else 233 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 234 235 /* 236 * Don't allow empty pathnames. 237 * POSIX.1 requirement: "" is not a vaild file name. 238 */ 239 if (error == 0 && pathlen <= 1) 240 error = ENOENT; 241 242 if (error == 0) { 243 cache_copy(ncstart, &nd->nl_nch); 244 cache_copy(&rootnch, &nd->nl_rootnch); 245 cache_copy(&rootnch, &nd->nl_jailnch); 246 nd->nl_cred = crhold(cred); 247 nd->nl_td = td; 248 nd->nl_flags |= flags; 249 } else { 250 nlookup_done(nd); 251 } 252 return(error); 253 } 254 255 /* 256 * This works similarly to nlookup_init_raw() but does not rely 257 * on rootnch being initialized yet. 258 */ 259 int 260 nlookup_init_root(struct nlookupdata *nd, 261 const char *path, enum uio_seg seg, int flags, 262 struct ucred *cred, struct nchandle *ncstart, 263 struct nchandle *ncroot) 264 { 265 size_t pathlen; 266 thread_t td; 267 int error; 268 269 td = curthread; 270 271 bzero(nd, sizeof(struct nlookupdata)); 272 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 273 nd->nl_flags |= NLC_HASBUF; 274 if (seg == UIO_SYSSPACE) 275 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 276 else 277 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 278 279 /* 280 * Don't allow empty pathnames. 281 * POSIX.1 requirement: "" is not a vaild file name. 282 */ 283 if (error == 0 && pathlen <= 1) 284 error = ENOENT; 285 286 if (error == 0) { 287 cache_copy(ncstart, &nd->nl_nch); 288 cache_copy(ncroot, &nd->nl_rootnch); 289 cache_copy(ncroot, &nd->nl_jailnch); 290 nd->nl_cred = crhold(cred); 291 nd->nl_td = td; 292 nd->nl_flags |= flags; 293 } else { 294 nlookup_done(nd); 295 } 296 return(error); 297 } 298 299 #if 0 300 /* 301 * Set a different credential; this credential will be used by future 302 * operations performed on nd.nl_open_vp and nlookupdata structure. 303 */ 304 void 305 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 306 { 307 KKASSERT(nd->nl_cred != NULL); 308 309 if (nd->nl_cred != cred) { 310 cred = crhold(cred); 311 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 312 crfree(nd->nl_cred); 313 nd->nl_flags &= ~NLC_BORROWCRED; 314 nd->nl_cred = cred; 315 } 316 } 317 #endif 318 319 /* 320 * Cleanup a nlookupdata structure after we are through with it. This may 321 * be called on any nlookupdata structure initialized with nlookup_init(). 322 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 323 * returns an error, even if as a consumer you believe you have taken all 324 * dynamic elements out of the nlookupdata structure. 325 */ 326 void 327 nlookup_done(struct nlookupdata *nd) 328 { 329 if (nd->nl_nch.ncp) { 330 if (nd->nl_flags & NLC_NCPISLOCKED) { 331 nd->nl_flags &= ~NLC_NCPISLOCKED; 332 cache_unlock(&nd->nl_nch); 333 } 334 if (nd->nl_flags & NLC_NCDIR) { 335 cache_drop_ncdir(&nd->nl_nch); 336 nd->nl_flags &= ~NLC_NCDIR; 337 } else { 338 cache_drop(&nd->nl_nch); /* NULL's out the nch */ 339 } 340 } 341 if (nd->nl_rootnch.ncp) 342 cache_drop_and_cache(&nd->nl_rootnch); 343 if (nd->nl_jailnch.ncp) 344 cache_drop_and_cache(&nd->nl_jailnch); 345 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 346 objcache_put(namei_oc, nd->nl_path); 347 nd->nl_path = NULL; 348 } 349 if (nd->nl_cred) { 350 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 351 crfree(nd->nl_cred); 352 nd->nl_cred = NULL; 353 nd->nl_flags &= ~NLC_BORROWCRED; 354 } 355 if (nd->nl_open_vp) { 356 if (nd->nl_flags & NLC_LOCKVP) { 357 vn_unlock(nd->nl_open_vp); 358 nd->nl_flags &= ~NLC_LOCKVP; 359 } 360 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 361 nd->nl_open_vp = NULL; 362 } 363 if (nd->nl_dvp) { 364 vrele(nd->nl_dvp); 365 nd->nl_dvp = NULL; 366 } 367 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 368 } 369 370 /* 371 * Works similarly to nlookup_done() when nd initialized with 372 * nlookup_init_at(). 373 */ 374 void 375 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 376 { 377 nlookup_done(nd); 378 if (fp != NULL) 379 fdrop(fp); 380 } 381 382 void 383 nlookup_zero(struct nlookupdata *nd) 384 { 385 bzero(nd, sizeof(struct nlookupdata)); 386 } 387 388 /* 389 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 390 * if an error occured. 391 * 392 * Note that the returned ncp is not checked for permissions, though VEXEC 393 * is checked on the directory path leading up to the result. The caller 394 * must call naccess() to check the permissions of the returned leaf. 395 */ 396 struct nchandle 397 nlookup_simple(const char *str, enum uio_seg seg, 398 int niflags, int *error) 399 { 400 struct nlookupdata nd; 401 struct nchandle nch; 402 403 *error = nlookup_init(&nd, str, seg, niflags); 404 if (*error == 0) { 405 if ((*error = nlookup(&nd)) == 0) { 406 nch = nd.nl_nch; /* keep hold ref from structure */ 407 cache_zero(&nd.nl_nch); /* and NULL out */ 408 } else { 409 cache_zero(&nch); 410 } 411 nlookup_done(&nd); 412 } else { 413 cache_zero(&nch); 414 } 415 return(nch); 416 } 417 418 /* 419 * Returns non-zero if the path element is the last element 420 */ 421 static 422 int 423 islastelement(const char *ptr) 424 { 425 while (*ptr == '/') 426 ++ptr; 427 return (*ptr == 0); 428 } 429 430 /* 431 * Returns non-zero if we need to lock the namecache element 432 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 433 * the last element of the namecache lookup will be locked 434 * exclusively. 435 * 436 * NOTE: Even if we return on-zero, an unresolved namecache record 437 * will always be locked exclusively. 438 */ 439 static __inline 440 int 441 wantsexcllock(struct nlookupdata *nd, const char *ptr) 442 { 443 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 444 return(islastelement(ptr)); 445 return(0); 446 } 447 448 449 /* 450 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 451 * on return, even if an error occurs. If no error occurs or NLC_CREATE 452 * is flagged and ENOENT is returned, then the returned nl_nch is always 453 * referenced and locked exclusively. 454 * 455 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 456 * the resulting nl_nch may or may not be locked and if locked 457 * might be locked either shared or exclusive. 458 * 459 * Intermediate directory elements, including the current directory, require 460 * execute (search) permission. nlookup does not examine the access 461 * permissions on the returned element. 462 * 463 * If NLC_CREATE is set the last directory must allow node creation, 464 * and an error code of 0 will be returned for a non-existant 465 * target (not ENOENT). 466 * 467 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 468 * plus the sticky check is made, and an error code of 0 will be returned 469 * for a non-existant target (not ENOENT). 470 * 471 * If NLC_DELETE is set the last directory mut allow node deletion, 472 * plus the sticky check is made. 473 * 474 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 475 * of the returned entry. The vnode will be referenced, but not locked, 476 * and will be released by nlookup_done() along with everything else. 477 * 478 * NOTE: As an optimization we attempt to obtain a shared namecache lock 479 * on any intermediate elements. On success, the returned element 480 * is ALWAYS locked exclusively. 481 */ 482 int 483 nlookup(struct nlookupdata *nd) 484 { 485 globaldata_t gd = mycpu; 486 struct nlcomponent nlc; 487 struct nchandle nch; 488 struct nchandle par; 489 struct nchandle nctmp; 490 struct mount *mp; 491 struct vnode *hvp; /* hold to prevent recyclement */ 492 int wasdotordotdot; 493 char *ptr; 494 char *nptr; 495 int error; 496 int len; 497 int dflags; 498 int hit = 1; 499 int saveflag = nd->nl_flags & ~NLC_NCDIR; 500 boolean_t doretry = FALSE; 501 boolean_t inretry = FALSE; 502 503 nlookup_start: 504 #ifdef KTRACE 505 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 506 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 507 #endif 508 bzero(&nlc, sizeof(nlc)); 509 510 /* 511 * Setup for the loop. The current working namecache element is 512 * always at least referenced. We lock it as required, but always 513 * return a locked, resolved namecache entry. 514 */ 515 nd->nl_loopcnt = 0; 516 if (nd->nl_dvp) { 517 vrele(nd->nl_dvp); 518 nd->nl_dvp = NULL; 519 } 520 ptr = nd->nl_path; 521 522 /* 523 * Loop on the path components. At the top of the loop nd->nl_nch 524 * is ref'd and unlocked and represents our current position. 525 */ 526 for (;;) { 527 /* 528 * Make sure nl_nch is locked so we can access the vnode, resolution 529 * state, etc. 530 */ 531 if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) { 532 nd->nl_flags |= NLC_NCPISLOCKED; 533 cache_lock_maybe_shared(&nd->nl_nch, wantsexcllock(nd, ptr)); 534 } 535 536 /* 537 * Check if the root directory should replace the current 538 * directory. This is done at the start of a translation 539 * or after a symbolic link has been found. In other cases 540 * ptr will never be pointing at a '/'. 541 */ 542 if (*ptr == '/') { 543 do { 544 ++ptr; 545 } while (*ptr == '/'); 546 cache_unlock(&nd->nl_nch); 547 cache_get_maybe_shared(&nd->nl_rootnch, &nch, 548 wantsexcllock(nd, ptr)); 549 if (nd->nl_flags & NLC_NCDIR) { 550 cache_drop_ncdir(&nd->nl_nch); 551 nd->nl_flags &= ~NLC_NCDIR; 552 } else { 553 cache_drop(&nd->nl_nch); 554 } 555 nd->nl_nch = nch; /* remains locked */ 556 557 /* 558 * Fast-track termination. There is no parent directory of 559 * the root in the same mount from the point of view of 560 * the caller so return EACCES if NLC_REFDVP is specified, 561 * and EEXIST if NLC_CREATE is also specified. 562 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 563 */ 564 if (*ptr == 0) { 565 if (nd->nl_flags & NLC_REFDVP) 566 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 567 else 568 error = 0; 569 break; 570 } 571 continue; 572 } 573 574 /* 575 * Pre-calculate next path component so we can check whether the 576 * current component directory is the last directory in the path 577 * or not. 578 */ 579 for (nptr = ptr; *nptr && *nptr != '/'; ++nptr) 580 ; 581 582 /* 583 * Check directory search permissions (nd->nl_nch is locked & refd). 584 * This will load dflags to obtain directory-special permissions to 585 * be checked along with the last component. 586 * 587 * We only need to pass-in &dflags for the second-to-last component. 588 * Optimize by passing-in NULL for any prior components, which may 589 * allow the code to bypass the naccess() call. 590 */ 591 dflags = 0; 592 if (*nptr == '/') 593 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, NULL); 594 else 595 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags); 596 if (error) { 597 if (keeperror(nd, error)) 598 break; 599 error = 0; 600 } 601 602 /* 603 * Extract the next (or last) path component. Path components are 604 * limited to 255 characters. 605 */ 606 nlc.nlc_nameptr = ptr; 607 nlc.nlc_namelen = nptr - ptr; 608 ptr = nptr; 609 if (nlc.nlc_namelen >= 256) { 610 error = ENAMETOOLONG; 611 break; 612 } 613 614 /* 615 * Lookup the path component in the cache, creating an unresolved 616 * entry if necessary. We have to handle "." and ".." as special 617 * cases. 618 * 619 * When handling ".." we have to detect a traversal back through a 620 * mount point. If we are at the root, ".." just returns the root. 621 * 622 * When handling "." or ".." we also have to recalculate dflags 623 * since our dflags will be for some sub-directory instead of the 624 * parent dir. 625 * 626 * This subsection returns a locked, refd 'nch' unless it errors out, 627 * and an unlocked but still ref'd nd->nl_nch. 628 * 629 * The namecache topology is not allowed to be disconnected, so 630 * encountering a NULL parent will generate EINVAL. This typically 631 * occurs when a directory is removed out from under a process. 632 * 633 * WARNING! The unlocking of nd->nl_nch is sensitive code. 634 */ 635 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 636 637 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 638 cache_unlock(&nd->nl_nch); 639 nd->nl_flags &= ~NLC_NCPISLOCKED; 640 cache_get_maybe_shared(&nd->nl_nch, &nch, wantsexcllock(nd, ptr)); 641 wasdotordotdot = 1; 642 } else if (nlc.nlc_namelen == 2 && 643 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 644 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 645 nd->nl_nch.ncp == nd->nl_rootnch.ncp 646 ) { 647 /* 648 * ".." at the root returns the root 649 */ 650 cache_unlock(&nd->nl_nch); 651 nd->nl_flags &= ~NLC_NCPISLOCKED; 652 cache_get_maybe_shared(&nd->nl_nch, &nch, 653 wantsexcllock(nd, ptr)); 654 } else { 655 /* 656 * Locate the parent ncp. If we are at the root of a 657 * filesystem mount we have to skip to the mounted-on 658 * point in the underlying filesystem. 659 * 660 * Expect the parent to always be good since the 661 * mountpoint doesn't go away. XXX hack. cache_get() 662 * requires the ncp to already have a ref as a safety. 663 * 664 * However, a process which has been broken out of a chroot 665 * will wind up with a NULL parent if it tries to '..' above 666 * the real root, deal with the case. Note that this does 667 * not protect us from a jail breakout, it just stops a panic 668 * if the jail-broken process tries to '..' past the real 669 * root. 670 */ 671 nctmp = nd->nl_nch; 672 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 673 nctmp = nctmp.mount->mnt_ncmounton; 674 if (nctmp.ncp == NULL) 675 break; 676 } 677 if (nctmp.ncp == NULL) { 678 if (curthread->td_proc) { 679 kprintf("vfs_nlookup: '..' traverse broke " 680 "jail: pid %d (%s)\n", 681 curthread->td_proc->p_pid, 682 curthread->td_comm); 683 } 684 nctmp = nd->nl_rootnch; 685 } else { 686 nctmp.ncp = nctmp.ncp->nc_parent; 687 } 688 cache_hold(&nctmp); 689 cache_unlock(&nd->nl_nch); 690 nd->nl_flags &= ~NLC_NCPISLOCKED; 691 cache_get_maybe_shared(&nctmp, &nch, wantsexcllock(nd, ptr)); 692 cache_drop(&nctmp); /* NOTE: zero's nctmp */ 693 } 694 wasdotordotdot = 2; 695 } else { 696 /* 697 * Must unlock nl_nch when traversing down the path. However, 698 * the child ncp has not yet been found/created and the parent's 699 * child list might be empty. Thus releasing the lock can 700 * allow a race whereby the parent ncp's vnode is recycled. 701 * This case can occur especially when maxvnodes is set very low. 702 * 703 * We need the parent's ncp to remain resolved for all normal 704 * filesystem activities, so we vhold() the vp during the lookup 705 * to prevent recyclement due to vnlru / maxvnodes. 706 * 707 * If we race an unlink or rename the ncp might be marked 708 * DESTROYED after resolution, requiring a retry. 709 */ 710 if ((hvp = nd->nl_nch.ncp->nc_vp) != NULL) 711 vhold(hvp); 712 cache_unlock(&nd->nl_nch); 713 nd->nl_flags &= ~NLC_NCPISLOCKED; 714 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 715 wantsexcllock(nd, ptr), &nch); 716 if (error == EWOULDBLOCK) { 717 nch = cache_nlookup(&nd->nl_nch, &nlc); 718 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 719 hit = 0; 720 for (;;) { 721 error = cache_resolve(&nch, nd->nl_cred); 722 if (error != EAGAIN && 723 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 724 if (error == ESTALE) { 725 if (!inretry) 726 error = ENOENT; 727 doretry = TRUE; 728 } 729 break; 730 } 731 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 732 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 733 nch.ncp->nc_name); 734 cache_put(&nch); 735 nch = cache_nlookup(&nd->nl_nch, &nlc); 736 } 737 } 738 if (hvp) 739 vdrop(hvp); 740 wasdotordotdot = 0; 741 } 742 743 /* 744 * If the last component was "." or ".." our dflags no longer 745 * represents the parent directory and we have to explicitly 746 * look it up. 747 * 748 * Expect the parent to be good since nch is locked. 749 */ 750 if (wasdotordotdot && error == 0) { 751 dflags = 0; 752 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 753 par.mount = nch.mount; 754 cache_hold(&par); 755 cache_lock_maybe_shared(&par, wantsexcllock(nd, ptr)); 756 error = naccess(&par, 0, nd->nl_cred, &dflags); 757 cache_put(&par); 758 if (error) { 759 if (!keeperror(nd, error)) 760 error = 0; 761 } 762 } 763 } 764 765 /* 766 * [end of subsection] 767 * 768 * nch is locked and referenced. 769 * nd->nl_nch is unlocked and referenced. 770 * 771 * nl_nch must be unlocked or we could chain lock to the root 772 * if a resolve gets stuck (e.g. in NFS). 773 */ 774 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 775 776 /* 777 * Resolve the namespace if necessary. The ncp returned by 778 * cache_nlookup() is referenced and locked. 779 * 780 * XXX neither '.' nor '..' should return EAGAIN since they were 781 * previously resolved and thus cannot be newly created ncp's. 782 */ 783 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 784 hit = 0; 785 error = cache_resolve(&nch, nd->nl_cred); 786 if (error == ESTALE) { 787 if (!inretry) 788 error = ENOENT; 789 doretry = TRUE; 790 } 791 KKASSERT(error != EAGAIN); 792 } else { 793 error = nch.ncp->nc_error; 794 } 795 796 /* 797 * Early completion. ENOENT is not an error if this is the last 798 * component and NLC_CREATE or NLC_RENAME (rename target) was 799 * requested. Note that ncp->nc_error is left as ENOENT in that 800 * case, which we check later on. 801 * 802 * Also handle invalid '.' or '..' components terminating a path 803 * for a create/rename/delete. The standard requires this and pax 804 * pretty stupidly depends on it. 805 */ 806 if (islastelement(ptr)) { 807 if (error == ENOENT && 808 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 809 ) { 810 if (nd->nl_flags & NLC_NFS_RDONLY) { 811 error = EROFS; 812 } else { 813 error = naccess(&nch, nd->nl_flags | dflags, 814 nd->nl_cred, NULL); 815 } 816 } 817 if (error == 0 && wasdotordotdot && 818 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 819 NLC_RENAME_SRC | NLC_RENAME_DST))) { 820 /* 821 * POSIX junk 822 */ 823 if (nd->nl_flags & NLC_CREATE) 824 error = EEXIST; 825 else if (nd->nl_flags & NLC_DELETE) 826 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 827 else 828 error = EINVAL; 829 } 830 } 831 832 /* 833 * Early completion on error. 834 */ 835 if (error) { 836 cache_put(&nch); 837 break; 838 } 839 840 /* 841 * If the element is a symlink and it is either not the last 842 * element or it is the last element and we are allowed to 843 * follow symlinks, resolve the symlink. 844 */ 845 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 846 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 847 ) { 848 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 849 error = ELOOP; 850 cache_put(&nch); 851 break; 852 } 853 error = nreadsymlink(nd, &nch, &nlc); 854 cache_put(&nch); 855 if (error) 856 break; 857 858 /* 859 * Concatenate trailing path elements onto the returned symlink. 860 * Note that if the path component (ptr) is not exhausted, it 861 * will being with a '/', so we do not have to add another one. 862 * 863 * The symlink may not be empty. 864 */ 865 len = strlen(ptr); 866 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 867 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 868 objcache_put(namei_oc, nlc.nlc_nameptr); 869 break; 870 } 871 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 872 if (nd->nl_flags & NLC_HASBUF) 873 objcache_put(namei_oc, nd->nl_path); 874 nd->nl_path = nlc.nlc_nameptr; 875 nd->nl_flags |= NLC_HASBUF; 876 ptr = nd->nl_path; 877 878 /* 879 * Go back up to the top to resolve any initial '/'s in the 880 * symlink. 881 */ 882 continue; 883 } 884 885 /* 886 * If the element is a directory and we are crossing a mount point, 887 * Locate the mount. 888 */ 889 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 890 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 891 (mp = cache_findmount(&nch)) != NULL 892 ) { 893 struct vnode *tdp; 894 int vfs_do_busy = 0; 895 896 /* 897 * VFS must be busied before the namecache entry is locked, 898 * but we don't want to waste time calling vfs_busy() if the 899 * mount point is already resolved. 900 */ 901 again: 902 cache_put(&nch); 903 if (vfs_do_busy) { 904 while (vfs_busy(mp, 0)) { 905 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 906 kprintf("nlookup: warning umount race avoided\n"); 907 cache_dropmount(mp); 908 error = EBUSY; 909 vfs_do_busy = 0; 910 goto double_break; 911 } 912 } 913 } 914 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 915 wantsexcllock(nd, ptr)); 916 917 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 918 if (vfs_do_busy == 0) { 919 vfs_do_busy = 1; 920 goto again; 921 } 922 error = VFS_ROOT(mp, &tdp); 923 vfs_unbusy(mp); 924 vfs_do_busy = 0; 925 if (keeperror(nd, error)) { 926 cache_dropmount(mp); 927 break; 928 } 929 if (error == 0) { 930 cache_setvp(&nch, tdp); 931 vput(tdp); 932 } 933 } 934 if (vfs_do_busy) 935 vfs_unbusy(mp); 936 cache_dropmount(mp); 937 } 938 939 if (keeperror(nd, error)) { 940 cache_put(&nch); 941 double_break: 942 break; 943 } 944 945 /* 946 * Skip any slashes to get to the next element. If there 947 * are any slashes at all the current element must be a 948 * directory or, in the create case, intended to become a directory. 949 * If it isn't we break without incrementing ptr and fall through 950 * to the failure case below. 951 */ 952 while (*ptr == '/') { 953 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 954 !(nd->nl_flags & NLC_WILLBEDIR) 955 ) { 956 break; 957 } 958 ++ptr; 959 } 960 961 /* 962 * Continuation case: additional elements and the current 963 * element is a directory. 964 */ 965 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 966 if (nd->nl_flags & NLC_NCDIR) { 967 cache_drop_ncdir(&nd->nl_nch); 968 nd->nl_flags &= ~NLC_NCDIR; 969 } else { 970 cache_drop(&nd->nl_nch); 971 } 972 cache_unlock(&nch); 973 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 974 nd->nl_nch = nch; 975 continue; 976 } 977 978 /* 979 * Failure case: additional elements and the current element 980 * is not a directory 981 */ 982 if (*ptr) { 983 cache_put(&nch); 984 error = ENOTDIR; 985 break; 986 } 987 988 /* 989 * Successful lookup of last element. 990 * 991 * Check permissions if the target exists. If the target does not 992 * exist directory permissions were already tested in the early 993 * completion code above. 994 * 995 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 996 * if the file is marked append-only, and NLC_STICKY if the directory 997 * containing the file is sticky. 998 */ 999 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 1000 error = naccess(&nch, nd->nl_flags | dflags, 1001 nd->nl_cred, NULL); 1002 if (keeperror(nd, error)) { 1003 cache_put(&nch); 1004 break; 1005 } 1006 } 1007 1008 /* 1009 * Termination: no more elements. 1010 * 1011 * If NLC_REFDVP is set acquire a referenced parent dvp. 1012 */ 1013 if (nd->nl_flags & NLC_REFDVP) { 1014 cache_lock(&nd->nl_nch); 1015 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 1016 cache_unlock(&nd->nl_nch); 1017 if (keeperror(nd, error)) { 1018 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 1019 cache_put(&nch); 1020 break; 1021 } 1022 } 1023 if (nd->nl_flags & NLC_NCDIR) { 1024 cache_drop_ncdir(&nd->nl_nch); 1025 nd->nl_flags &= ~NLC_NCDIR; 1026 } else { 1027 cache_drop(&nd->nl_nch); 1028 } 1029 nd->nl_nch = nch; 1030 nd->nl_flags |= NLC_NCPISLOCKED; 1031 error = 0; 1032 break; 1033 } 1034 1035 if (hit) 1036 ++gd->gd_nchstats->ncs_longhits; 1037 else 1038 ++gd->gd_nchstats->ncs_longmiss; 1039 1040 if (nd->nl_flags & NLC_NCPISLOCKED) 1041 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 1042 1043 /* 1044 * Retry the whole thing if doretry flag is set, but only once. 1045 * autofs(5) may mount another filesystem under its root directory 1046 * while resolving a path. 1047 */ 1048 if (doretry && !inretry) { 1049 inretry = TRUE; 1050 nd->nl_flags &= NLC_NCDIR; 1051 nd->nl_flags |= saveflag; 1052 goto nlookup_start; 1053 } 1054 1055 /* 1056 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1057 * (ncp->nc_error will be ENOENT), but we will still return an error 1058 * code of 0. 1059 */ 1060 return(error); 1061 } 1062 1063 /* 1064 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1065 * of continuity in the namecache tree by connecting the ncp related to the 1066 * vnode under the mount to the ncp related to the mount's root vnode. 1067 * 1068 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1069 */ 1070 int 1071 nlookup_mp(struct mount *mp, struct nchandle *nch) 1072 { 1073 struct vnode *vp; 1074 int error; 1075 1076 error = 0; 1077 cache_get(&mp->mnt_ncmountpt, nch); 1078 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1079 while (vfs_busy(mp, 0)) 1080 ; 1081 error = VFS_ROOT(mp, &vp); 1082 vfs_unbusy(mp); 1083 if (error) { 1084 cache_put(nch); 1085 } else { 1086 cache_setvp(nch, vp); 1087 vput(vp); 1088 } 1089 } 1090 return(error); 1091 } 1092 1093 /* 1094 * Read the contents of a symlink, allocate a path buffer out of the 1095 * namei_oc and initialize the supplied nlcomponent with the result. 1096 * 1097 * If an error occurs no buffer will be allocated or returned in the nlc. 1098 */ 1099 int 1100 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1101 struct nlcomponent *nlc) 1102 { 1103 struct vnode *vp; 1104 struct iovec aiov; 1105 struct uio auio; 1106 int linklen; 1107 int error; 1108 char *cp; 1109 1110 nlc->nlc_nameptr = NULL; 1111 nlc->nlc_namelen = 0; 1112 if (nch->ncp->nc_vp == NULL) 1113 return(ENOENT); 1114 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1115 return(error); 1116 cp = objcache_get(namei_oc, M_WAITOK); 1117 aiov.iov_base = cp; 1118 aiov.iov_len = MAXPATHLEN; 1119 auio.uio_iov = &aiov; 1120 auio.uio_iovcnt = 1; 1121 auio.uio_offset = 0; 1122 auio.uio_rw = UIO_READ; 1123 auio.uio_segflg = UIO_SYSSPACE; 1124 auio.uio_td = nd->nl_td; 1125 auio.uio_resid = MAXPATHLEN - 1; 1126 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1127 if (error) 1128 goto fail; 1129 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1130 if (varsym_enable) { 1131 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1132 if (linklen < 0) { 1133 error = ENAMETOOLONG; 1134 goto fail; 1135 } 1136 } 1137 cp[linklen] = 0; 1138 nlc->nlc_nameptr = cp; 1139 nlc->nlc_namelen = linklen; 1140 vput(vp); 1141 return(0); 1142 fail: 1143 objcache_put(namei_oc, cp); 1144 vput(vp); 1145 return(error); 1146 } 1147 1148 /* 1149 * Check access [XXX cache vattr!] [XXX quota] 1150 * 1151 * Generally check the NLC_* access bits. All specified bits must pass 1152 * for this function to return 0. 1153 * 1154 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1155 * access, otherwise it must exist. No error is returned in this case. 1156 * 1157 * The file must not exist if NLC_EXCL is specified. 1158 * 1159 * Directory permissions in general are tested for NLC_CREATE if the file 1160 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1161 * whether the file exists or not. 1162 * 1163 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1164 * the latter is only tested if the target exists. 1165 * 1166 * The passed ncp must be referenced and locked. If it is already resolved 1167 * it may be locked shared but otherwise should be locked exclusively. 1168 */ 1169 1170 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1171 1172 static int 1173 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 1174 { 1175 struct vnode *vp; 1176 struct vattr va; 1177 struct namecache *ncp; 1178 int error; 1179 int cflags; 1180 1181 KKASSERT(cache_lockstatus(nch) > 0); 1182 1183 ncp = nch->ncp; 1184 if (ncp->nc_flag & NCF_UNRESOLVED) { 1185 cache_resolve(nch, cred); 1186 ncp = nch->ncp; 1187 } 1188 error = ncp->nc_error; 1189 1190 /* 1191 * Directory permissions checks. Silently ignore ENOENT if these 1192 * tests pass. It isn't an error. 1193 * 1194 * We can safely resolve ncp->nc_parent because ncp is currently 1195 * locked. 1196 */ 1197 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1198 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1199 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1200 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1201 (nflags & NLC_RENAME_DST) 1202 ) { 1203 struct nchandle par; 1204 1205 if ((par.ncp = ncp->nc_parent) == NULL) { 1206 if (error != EAGAIN) 1207 error = EINVAL; 1208 } else if (error == 0 || error == ENOENT) { 1209 par.mount = nch->mount; 1210 cache_hold(&par); 1211 cache_lock_maybe_shared(&par, 0); 1212 error = naccess(&par, NLC_WRITE, cred, NULL); 1213 cache_put(&par); 1214 } 1215 } 1216 } 1217 1218 /* 1219 * NLC_EXCL check. Target file must not exist. 1220 */ 1221 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1222 error = EEXIST; 1223 1224 /* 1225 * Try to short-cut the vnode operation for intermediate directory 1226 * components. This is a major SMP win because it avoids having 1227 * to execute a lot of code for intermediate directory components, 1228 * including shared refs and locks on intermediate directory vnodes. 1229 * 1230 * We can only do this if the caller does not need nflagsp. 1231 */ 1232 if (error == 0 && nflagsp == NULL && 1233 nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1234 return 0; 1235 } 1236 1237 /* 1238 * Get the vnode attributes so we can do the rest of our checks. 1239 * 1240 * NOTE: We only call naccess_va() if the target exists. 1241 */ 1242 if (error == 0) { 1243 error = cache_vget(nch, cred, LK_SHARED, &vp); 1244 if (error == ENOENT) { 1245 /* 1246 * Silently zero-out ENOENT if creating or renaming 1247 * (rename target). It isn't an error. 1248 */ 1249 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1250 error = 0; 1251 } else if (error == 0) { 1252 /* 1253 * Get the vnode attributes and check for illegal O_TRUNC 1254 * requests and read-only mounts. 1255 * 1256 * NOTE: You can still open devices on read-only mounts for 1257 * writing. 1258 * 1259 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1260 * check on the parent directory above. 1261 * 1262 * XXX cache the va in the namecache or in the vnode 1263 */ 1264 error = VOP_GETATTR(vp, &va); 1265 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1266 switch(va.va_type) { 1267 case VREG: 1268 case VDATABASE: 1269 case VCHR: 1270 case VBLK: 1271 case VFIFO: 1272 break; 1273 case VDIR: 1274 error = EISDIR; 1275 break; 1276 default: 1277 error = EINVAL; 1278 break; 1279 } 1280 } 1281 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1282 (vp->v_mount->mnt_flag & MNT_RDONLY) 1283 ) { 1284 switch(va.va_type) { 1285 case VDIR: 1286 case VLNK: 1287 case VREG: 1288 case VDATABASE: 1289 error = EROFS; 1290 break; 1291 default: 1292 break; 1293 } 1294 } 1295 vput(vp); 1296 1297 /* 1298 * Check permissions based on file attributes. The passed 1299 * flags (*nflagsp) are modified with feedback based on 1300 * special attributes and requirements. 1301 */ 1302 if (error == 0) { 1303 /* 1304 * Adjust the returned (*nflagsp) if non-NULL. 1305 */ 1306 if (nflagsp) { 1307 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 1308 *nflagsp |= NLC_STICKY; 1309 if (va.va_flags & APPEND) 1310 *nflagsp |= NLC_APPENDONLY; 1311 if (va.va_flags & IMMUTABLE) 1312 *nflagsp |= NLC_IMMUTABLE; 1313 } 1314 1315 /* 1316 * NCF_WXOK can be set for world-searchable directories. 1317 * 1318 * XXX When we implement capabilities this code would also 1319 * need a cap check, or only set the flag if there are no 1320 * capabilities. 1321 */ 1322 cflags = 0; 1323 if (va.va_type == VDIR && 1324 (va.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1325 cflags |= NCF_WXOK; 1326 } 1327 1328 /* 1329 * Track swapcache management flags in the namecache. 1330 * 1331 * Calculate the flags based on the current vattr info 1332 * and recalculate the inherited flags from the parent 1333 * (the original cache linkage may have occurred without 1334 * getattrs and thus have stale flags). 1335 */ 1336 if (va.va_flags & SF_NOCACHE) 1337 cflags |= NCF_SF_NOCACHE; 1338 if (va.va_flags & UF_CACHE) 1339 cflags |= NCF_UF_CACHE; 1340 if (ncp->nc_parent) { 1341 if (ncp->nc_parent->nc_flag & 1342 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1343 cflags |= NCF_SF_PNOCACHE; 1344 } 1345 if (ncp->nc_parent->nc_flag & 1346 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1347 cflags |= NCF_UF_PCACHE; 1348 } 1349 } 1350 1351 /* 1352 * We're not supposed to update nc_flag when holding a shared 1353 * lock, but we allow the case for certain flags. Note that 1354 * holding an exclusive lock allows updating nc_flag without 1355 * atomics. nc_flag is not allowe to be updated at all unless 1356 * a shared or exclusive lock is held. 1357 */ 1358 atomic_clear_short(&ncp->nc_flag, 1359 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1360 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1361 NCF_WXOK) & ~cflags); 1362 atomic_set_short(&ncp->nc_flag, cflags); 1363 1364 /* 1365 * Process general access. 1366 */ 1367 error = naccess_va(&va, nflags, cred); 1368 } 1369 } 1370 } 1371 return(error); 1372 } 1373 1374 /* 1375 * Check the requested access against the given vattr using cred. 1376 */ 1377 int 1378 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 1379 { 1380 int i; 1381 int vmode; 1382 1383 /* 1384 * Test the immutable bit. Creations, deletions, renames (source 1385 * or destination) are not allowed. chown/chmod/other is also not 1386 * allowed but is handled by SETATTR. Hardlinks to the immutable 1387 * file are allowed. 1388 * 1389 * If the directory is set to immutable then creations, deletions, 1390 * renames (source or dest) and hardlinks to files within the directory 1391 * are not allowed, and regular files opened through the directory may 1392 * not be written to or truncated (unless a special device). 1393 * 1394 * NOTE! New hardlinks to immutable files work but new hardlinks to 1395 * files, immutable or not, sitting inside an immutable directory are 1396 * not allowed. As always if the file is hardlinked via some other 1397 * path additional hardlinks may be possible even if the file is marked 1398 * immutable. The sysop needs to create a closure by checking the hard 1399 * link count. Once closure is achieved you are good, and security 1400 * scripts should check link counts anyway. 1401 * 1402 * Writes and truncations are only allowed on special devices. 1403 */ 1404 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1405 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1406 return (EPERM); 1407 if (nflags & (NLC_CREATE | NLC_DELETE | 1408 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1409 return (EPERM); 1410 } 1411 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1412 switch(va->va_type) { 1413 case VDIR: 1414 return (EISDIR); 1415 case VLNK: 1416 case VREG: 1417 case VDATABASE: 1418 return (EPERM); 1419 default: 1420 break; 1421 } 1422 } 1423 } 1424 1425 /* 1426 * Test the no-unlink and append-only bits for opens, rename targets, 1427 * and deletions. These bits are not tested for creations or 1428 * rename sources. 1429 * 1430 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1431 * If you do not wish this you must also set NOUNLINK. 1432 * 1433 * If the governing directory is marked APPEND-only it implies 1434 * NOUNLINK for all entries in the directory. 1435 */ 1436 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1437 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1438 ) { 1439 return (EPERM); 1440 } 1441 1442 /* 1443 * A file marked append-only may not be deleted but can be renamed. 1444 */ 1445 if ((va->va_flags & APPEND) && 1446 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1447 ) { 1448 return (EPERM); 1449 } 1450 1451 /* 1452 * A file marked append-only which is opened for writing must also 1453 * be opened O_APPEND. 1454 */ 1455 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1456 if (nflags & NLC_TRUNCATE) 1457 return (EPERM); 1458 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1459 if ((nflags & NLC_APPEND) == 0) 1460 return (EPERM); 1461 } 1462 } 1463 1464 /* 1465 * root gets universal access 1466 */ 1467 if (cred->cr_uid == 0) 1468 return(0); 1469 1470 /* 1471 * Check owner perms. 1472 * 1473 * If NLC_OWN is set the owner of the file is allowed no matter when 1474 * the owner-mode bits say (utimes). 1475 */ 1476 vmode = 0; 1477 if (nflags & NLC_READ) 1478 vmode |= S_IRUSR; 1479 if (nflags & NLC_WRITE) 1480 vmode |= S_IWUSR; 1481 if (nflags & NLC_EXEC) 1482 vmode |= S_IXUSR; 1483 1484 if (cred->cr_uid == va->va_uid) { 1485 if ((nflags & NLC_OWN) == 0) { 1486 if ((vmode & va->va_mode) != vmode) 1487 return(EACCES); 1488 } 1489 return(0); 1490 } 1491 1492 /* 1493 * If NLC_STICKY is set only the owner may delete or rename a file. 1494 * This bit is typically set on /tmp. 1495 * 1496 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1497 * the specific delete or rename case. For deletions and renames we 1498 * usually just care about directory permissions, not file permissions. 1499 */ 1500 if ((nflags & NLC_STICKY) && 1501 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1502 return(EACCES); 1503 } 1504 1505 /* 1506 * Check group perms 1507 */ 1508 vmode >>= 3; 1509 for (i = 0; i < cred->cr_ngroups; ++i) { 1510 if (va->va_gid == cred->cr_groups[i]) { 1511 if ((vmode & va->va_mode) != vmode) 1512 return(EACCES); 1513 return(0); 1514 } 1515 } 1516 1517 /* 1518 * Check world perms 1519 */ 1520 vmode >>= 3; 1521 if ((vmode & va->va_mode) != vmode) 1522 return(EACCES); 1523 return(0); 1524 } 1525 1526 /* 1527 * Long-term (10-second interval) statistics collection 1528 */ 1529 static 1530 uint64_t 1531 collect_nlookup_callback(int n) 1532 { 1533 static uint64_t last_total; 1534 uint64_t save; 1535 uint64_t total; 1536 1537 total = 0; 1538 for (n = 0; n < ncpus; ++n) { 1539 globaldata_t gd = globaldata_find(n); 1540 struct nchstats *sp; 1541 1542 if ((sp = gd->gd_nchstats) != NULL) 1543 total += sp->ncs_longhits + sp->ncs_longmiss; 1544 } 1545 save = total; 1546 total = total - last_total; 1547 last_total = save; 1548 1549 return total; 1550 } 1551 1552 static 1553 void 1554 nlookup_collect_init(void *dummy __unused) 1555 { 1556 kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, 1557 KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); 1558 } 1559 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); 1560