1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/kernel.h> 57 #include <sys/vnode.h> 58 #include <sys/mount.h> 59 #include <sys/filedesc.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/malloc.h> 64 #include <sys/stat.h> 65 #include <sys/objcache.h> 66 #include <sys/file.h> 67 #include <sys/kcollect.h> 68 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 static int naccess(struct nchandle *nch, int vmode, struct ucred *cred, 74 int *stickyp); 75 76 /* 77 * Initialize a nlookup() structure, early error return for copyin faults 78 * or a degenerate empty string (which is not allowed). 79 * 80 * The first process proc0's credentials are used if the calling thread 81 * is not associated with a process context. 82 * 83 * MPSAFE 84 */ 85 int 86 nlookup_init(struct nlookupdata *nd, 87 const char *path, enum uio_seg seg, int flags) 88 { 89 size_t pathlen; 90 struct proc *p; 91 thread_t td; 92 int error; 93 94 td = curthread; 95 p = td->td_proc; 96 97 /* 98 * note: the pathlen set by copy*str() includes the terminating \0. 99 */ 100 bzero(nd, sizeof(struct nlookupdata)); 101 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 102 nd->nl_flags |= NLC_HASBUF; 103 if (seg == UIO_SYSSPACE) 104 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 105 else 106 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 107 108 /* 109 * Don't allow empty pathnames. 110 * POSIX.1 requirement: "" is not a vaild file name. 111 */ 112 if (error == 0 && pathlen <= 1) 113 error = ENOENT; 114 115 if (error == 0) { 116 if (p && p->p_fd) { 117 cache_copy_ncdir(p, &nd->nl_nch); 118 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 119 if (p->p_fd->fd_njdir.ncp) 120 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 121 nd->nl_cred = td->td_ucred; 122 nd->nl_flags |= NLC_BORROWCRED | NLC_NCDIR; 123 } else { 124 cache_copy(&rootnch, &nd->nl_nch); 125 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 126 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 127 nd->nl_cred = proc0.p_ucred; 128 nd->nl_flags |= NLC_BORROWCRED; 129 } 130 nd->nl_td = td; 131 nd->nl_flags |= flags; 132 } else { 133 nlookup_done(nd); 134 } 135 return(error); 136 } 137 138 139 /* 140 * nlookup_init() for "at" family of syscalls. 141 * 142 * Works similarly to nlookup_init() but if path is relative and fd is not 143 * AT_FDCWD, path is interpreted relative to the directory pointed to by fd. 144 * In this case, the file entry pointed to by fd is ref'ed and returned in 145 * *fpp. 146 * 147 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 148 * and release the ref to the file entry. 149 */ 150 int 151 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 152 const char *path, enum uio_seg seg, int flags) 153 { 154 struct thread *td = curthread; 155 struct proc *p = td->td_proc; 156 struct file* fp; 157 struct vnode *vp; 158 int error; 159 160 *fpp = NULL; 161 162 if ((error = nlookup_init(nd, path, seg, flags)) != 0) { 163 return (error); 164 } 165 166 if (nd->nl_path[0] != '/' && fd != AT_FDCWD) { 167 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 168 goto done; 169 vp = (struct vnode*)fp->f_data; 170 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 171 fdrop(fp); 172 fp = NULL; 173 error = ENOTDIR; 174 goto done; 175 } 176 if (nd->nl_flags & NLC_NCDIR) { 177 cache_drop_ncdir(&nd->nl_nch); 178 nd->nl_flags &= ~NLC_NCDIR; 179 } else { 180 cache_drop(&nd->nl_nch); 181 } 182 cache_copy(&fp->f_nchandle, &nd->nl_nch); 183 *fpp = fp; 184 } 185 186 187 done: 188 if (error) 189 nlookup_done(nd); 190 return (error); 191 192 } 193 194 /* 195 * This works similarly to nlookup_init() but does not assume a process 196 * context. rootnch is always chosen for the root directory and the cred 197 * and starting directory are supplied in arguments. 198 */ 199 int 200 nlookup_init_raw(struct nlookupdata *nd, 201 const char *path, enum uio_seg seg, int flags, 202 struct ucred *cred, struct nchandle *ncstart) 203 { 204 size_t pathlen; 205 thread_t td; 206 int error; 207 208 td = curthread; 209 210 bzero(nd, sizeof(struct nlookupdata)); 211 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 212 nd->nl_flags |= NLC_HASBUF; 213 if (seg == UIO_SYSSPACE) 214 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 215 else 216 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 217 218 /* 219 * Don't allow empty pathnames. 220 * POSIX.1 requirement: "" is not a vaild file name. 221 */ 222 if (error == 0 && pathlen <= 1) 223 error = ENOENT; 224 225 if (error == 0) { 226 cache_copy(ncstart, &nd->nl_nch); 227 cache_copy(&rootnch, &nd->nl_rootnch); 228 cache_copy(&rootnch, &nd->nl_jailnch); 229 nd->nl_cred = crhold(cred); 230 nd->nl_td = td; 231 nd->nl_flags |= flags; 232 } else { 233 nlookup_done(nd); 234 } 235 return(error); 236 } 237 238 /* 239 * This works similarly to nlookup_init_raw() but does not rely 240 * on rootnch being initialized yet. 241 */ 242 int 243 nlookup_init_root(struct nlookupdata *nd, 244 const char *path, enum uio_seg seg, int flags, 245 struct ucred *cred, struct nchandle *ncstart, 246 struct nchandle *ncroot) 247 { 248 size_t pathlen; 249 thread_t td; 250 int error; 251 252 td = curthread; 253 254 bzero(nd, sizeof(struct nlookupdata)); 255 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 256 nd->nl_flags |= NLC_HASBUF; 257 if (seg == UIO_SYSSPACE) 258 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 259 else 260 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 261 262 /* 263 * Don't allow empty pathnames. 264 * POSIX.1 requirement: "" is not a vaild file name. 265 */ 266 if (error == 0 && pathlen <= 1) 267 error = ENOENT; 268 269 if (error == 0) { 270 cache_copy(ncstart, &nd->nl_nch); 271 cache_copy(ncroot, &nd->nl_rootnch); 272 cache_copy(ncroot, &nd->nl_jailnch); 273 nd->nl_cred = crhold(cred); 274 nd->nl_td = td; 275 nd->nl_flags |= flags; 276 } else { 277 nlookup_done(nd); 278 } 279 return(error); 280 } 281 282 #if 0 283 /* 284 * Set a different credential; this credential will be used by future 285 * operations performed on nd.nl_open_vp and nlookupdata structure. 286 */ 287 void 288 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 289 { 290 KKASSERT(nd->nl_cred != NULL); 291 292 if (nd->nl_cred != cred) { 293 cred = crhold(cred); 294 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 295 crfree(nd->nl_cred); 296 nd->nl_flags &= ~NLC_BORROWCRED; 297 nd->nl_cred = cred; 298 } 299 } 300 #endif 301 302 /* 303 * Cleanup a nlookupdata structure after we are through with it. This may 304 * be called on any nlookupdata structure initialized with nlookup_init(). 305 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 306 * returns an error, even if as a consumer you believe you have taken all 307 * dynamic elements out of the nlookupdata structure. 308 */ 309 void 310 nlookup_done(struct nlookupdata *nd) 311 { 312 if (nd->nl_nch.ncp) { 313 if (nd->nl_flags & NLC_NCPISLOCKED) { 314 nd->nl_flags &= ~NLC_NCPISLOCKED; 315 cache_unlock(&nd->nl_nch); 316 } 317 if (nd->nl_flags & NLC_NCDIR) { 318 cache_drop_ncdir(&nd->nl_nch); 319 nd->nl_flags &= ~NLC_NCDIR; 320 } else { 321 cache_drop(&nd->nl_nch); /* NULL's out the nch */ 322 } 323 } 324 if (nd->nl_rootnch.ncp) 325 cache_drop_and_cache(&nd->nl_rootnch); 326 if (nd->nl_jailnch.ncp) 327 cache_drop_and_cache(&nd->nl_jailnch); 328 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 329 objcache_put(namei_oc, nd->nl_path); 330 nd->nl_path = NULL; 331 } 332 if (nd->nl_cred) { 333 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 334 crfree(nd->nl_cred); 335 nd->nl_cred = NULL; 336 nd->nl_flags &= ~NLC_BORROWCRED; 337 } 338 if (nd->nl_open_vp) { 339 if (nd->nl_flags & NLC_LOCKVP) { 340 vn_unlock(nd->nl_open_vp); 341 nd->nl_flags &= ~NLC_LOCKVP; 342 } 343 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 344 nd->nl_open_vp = NULL; 345 } 346 if (nd->nl_dvp) { 347 vrele(nd->nl_dvp); 348 nd->nl_dvp = NULL; 349 } 350 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 351 } 352 353 /* 354 * Works similarly to nlookup_done() when nd initialized with 355 * nlookup_init_at(). 356 */ 357 void 358 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 359 { 360 nlookup_done(nd); 361 if (fp != NULL) 362 fdrop(fp); 363 } 364 365 void 366 nlookup_zero(struct nlookupdata *nd) 367 { 368 bzero(nd, sizeof(struct nlookupdata)); 369 } 370 371 /* 372 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 373 * if an error occured. 374 * 375 * Note that the returned ncp is not checked for permissions, though VEXEC 376 * is checked on the directory path leading up to the result. The caller 377 * must call naccess() to check the permissions of the returned leaf. 378 */ 379 struct nchandle 380 nlookup_simple(const char *str, enum uio_seg seg, 381 int niflags, int *error) 382 { 383 struct nlookupdata nd; 384 struct nchandle nch; 385 386 *error = nlookup_init(&nd, str, seg, niflags); 387 if (*error == 0) { 388 if ((*error = nlookup(&nd)) == 0) { 389 nch = nd.nl_nch; /* keep hold ref from structure */ 390 cache_zero(&nd.nl_nch); /* and NULL out */ 391 } else { 392 cache_zero(&nch); 393 } 394 nlookup_done(&nd); 395 } else { 396 cache_zero(&nch); 397 } 398 return(nch); 399 } 400 401 /* 402 * Returns non-zero if the path element is the last element 403 */ 404 static 405 int 406 islastelement(const char *ptr) 407 { 408 while (*ptr == '/') 409 ++ptr; 410 return (*ptr == 0); 411 } 412 413 /* 414 * Returns non-zero if we need to lock the namecache element 415 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 416 * the last element of the namecache lookup will be locked 417 * exclusively. 418 * 419 * NOTE: Even if we return on-zero, an unresolved namecache record 420 * will always be locked exclusively. 421 */ 422 static __inline 423 int 424 wantsexcllock(struct nlookupdata *nd, const char *ptr) 425 { 426 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 427 return(islastelement(ptr)); 428 return(0); 429 } 430 431 432 /* 433 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 434 * on return, even if an error occurs. If no error occurs or NLC_CREATE 435 * is flagged and ENOENT is returned, then the returned nl_nch is always 436 * referenced and locked exclusively. 437 * 438 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 439 * the resulting nl_nch may or may not be locked and if locked 440 * might be locked either shared or exclusive. 441 * 442 * Intermediate directory elements, including the current directory, require 443 * execute (search) permission. nlookup does not examine the access 444 * permissions on the returned element. 445 * 446 * If NLC_CREATE is set the last directory must allow node creation, 447 * and an error code of 0 will be returned for a non-existant 448 * target (not ENOENT). 449 * 450 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 451 * plus the sticky check is made, and an error code of 0 will be returned 452 * for a non-existant target (not ENOENT). 453 * 454 * If NLC_DELETE is set the last directory mut allow node deletion, 455 * plus the sticky check is made. 456 * 457 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 458 * of the returned entry. The vnode will be referenced, but not locked, 459 * and will be released by nlookup_done() along with everything else. 460 * 461 * NOTE: As an optimization we attempt to obtain a shared namecache lock 462 * on any intermediate elements. On success, the returned element 463 * is ALWAYS locked exclusively. 464 */ 465 int 466 nlookup(struct nlookupdata *nd) 467 { 468 globaldata_t gd = mycpu; 469 struct nlcomponent nlc; 470 struct nchandle nch; 471 struct nchandle par; 472 struct nchandle nctmp; 473 struct mount *mp; 474 struct vnode *hvp; /* hold to prevent recyclement */ 475 int wasdotordotdot; 476 char *ptr; 477 char *nptr; 478 int error; 479 int len; 480 int dflags; 481 int hit = 1; 482 int saveflag = nd->nl_flags & ~NLC_NCDIR; 483 boolean_t doretry = FALSE; 484 boolean_t inretry = FALSE; 485 486 nlookup_start: 487 #ifdef KTRACE 488 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 489 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 490 #endif 491 bzero(&nlc, sizeof(nlc)); 492 493 /* 494 * Setup for the loop. The current working namecache element is 495 * always at least referenced. We lock it as required, but always 496 * return a locked, resolved namecache entry. 497 */ 498 nd->nl_loopcnt = 0; 499 if (nd->nl_dvp) { 500 vrele(nd->nl_dvp); 501 nd->nl_dvp = NULL; 502 } 503 ptr = nd->nl_path; 504 505 /* 506 * Loop on the path components. At the top of the loop nd->nl_nch 507 * is ref'd and unlocked and represents our current position. 508 */ 509 for (;;) { 510 /* 511 * Make sure nl_nch is locked so we can access the vnode, resolution 512 * state, etc. 513 */ 514 if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) { 515 nd->nl_flags |= NLC_NCPISLOCKED; 516 cache_lock_maybe_shared(&nd->nl_nch, wantsexcllock(nd, ptr)); 517 } 518 519 /* 520 * Check if the root directory should replace the current 521 * directory. This is done at the start of a translation 522 * or after a symbolic link has been found. In other cases 523 * ptr will never be pointing at a '/'. 524 */ 525 if (*ptr == '/') { 526 do { 527 ++ptr; 528 } while (*ptr == '/'); 529 cache_unlock(&nd->nl_nch); 530 cache_get_maybe_shared(&nd->nl_rootnch, &nch, 531 wantsexcllock(nd, ptr)); 532 if (nd->nl_flags & NLC_NCDIR) { 533 cache_drop_ncdir(&nd->nl_nch); 534 nd->nl_flags &= ~NLC_NCDIR; 535 } else { 536 cache_drop(&nd->nl_nch); 537 } 538 nd->nl_nch = nch; /* remains locked */ 539 540 /* 541 * Fast-track termination. There is no parent directory of 542 * the root in the same mount from the point of view of 543 * the caller so return EACCES if NLC_REFDVP is specified, 544 * and EEXIST if NLC_CREATE is also specified. 545 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 546 */ 547 if (*ptr == 0) { 548 if (nd->nl_flags & NLC_REFDVP) 549 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 550 else 551 error = 0; 552 break; 553 } 554 continue; 555 } 556 557 /* 558 * Pre-calculate next path component so we can check whether the 559 * current component directory is the last directory in the path 560 * or not. 561 */ 562 for (nptr = ptr; *nptr && *nptr != '/'; ++nptr) 563 ; 564 565 /* 566 * Check directory search permissions (nd->nl_nch is locked & refd). 567 * This will load dflags to obtain directory-special permissions to 568 * be checked along with the last component. 569 * 570 * We only need to pass-in &dflags for the second-to-last component. 571 * Optimize by passing-in NULL for any prior components, which may 572 * allow the code to bypass the naccess() call. 573 */ 574 dflags = 0; 575 if (*nptr == '/') 576 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, NULL); 577 else 578 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags); 579 if (error) 580 break; 581 582 /* 583 * Extract the next (or last) path component. Path components are 584 * limited to 255 characters. 585 */ 586 nlc.nlc_nameptr = ptr; 587 nlc.nlc_namelen = nptr - ptr; 588 ptr = nptr; 589 if (nlc.nlc_namelen >= 256) { 590 error = ENAMETOOLONG; 591 break; 592 } 593 594 /* 595 * Lookup the path component in the cache, creating an unresolved 596 * entry if necessary. We have to handle "." and ".." as special 597 * cases. 598 * 599 * When handling ".." we have to detect a traversal back through a 600 * mount point. If we are at the root, ".." just returns the root. 601 * 602 * When handling "." or ".." we also have to recalculate dflags 603 * since our dflags will be for some sub-directory instead of the 604 * parent dir. 605 * 606 * This subsection returns a locked, refd 'nch' unless it errors out, 607 * and an unlocked but still ref'd nd->nl_nch. 608 * 609 * The namecache topology is not allowed to be disconnected, so 610 * encountering a NULL parent will generate EINVAL. This typically 611 * occurs when a directory is removed out from under a process. 612 * 613 * WARNING! The unlocking of nd->nl_nch is sensitive code. 614 */ 615 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 616 617 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 618 cache_unlock(&nd->nl_nch); 619 nd->nl_flags &= ~NLC_NCPISLOCKED; 620 cache_get_maybe_shared(&nd->nl_nch, &nch, wantsexcllock(nd, ptr)); 621 wasdotordotdot = 1; 622 } else if (nlc.nlc_namelen == 2 && 623 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 624 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 625 nd->nl_nch.ncp == nd->nl_rootnch.ncp 626 ) { 627 /* 628 * ".." at the root returns the root 629 */ 630 cache_unlock(&nd->nl_nch); 631 nd->nl_flags &= ~NLC_NCPISLOCKED; 632 cache_get_maybe_shared(&nd->nl_nch, &nch, 633 wantsexcllock(nd, ptr)); 634 } else { 635 /* 636 * Locate the parent ncp. If we are at the root of a 637 * filesystem mount we have to skip to the mounted-on 638 * point in the underlying filesystem. 639 * 640 * Expect the parent to always be good since the 641 * mountpoint doesn't go away. XXX hack. cache_get() 642 * requires the ncp to already have a ref as a safety. 643 * 644 * However, a process which has been broken out of a chroot 645 * will wind up with a NULL parent if it tries to '..' above 646 * the real root, deal with the case. Note that this does 647 * not protect us from a jail breakout, it just stops a panic 648 * if the jail-broken process tries to '..' past the real 649 * root. 650 */ 651 nctmp = nd->nl_nch; 652 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 653 nctmp = nctmp.mount->mnt_ncmounton; 654 if (nctmp.ncp == NULL) 655 break; 656 } 657 if (nctmp.ncp == NULL) { 658 if (curthread->td_proc) { 659 kprintf("vfs_nlookup: '..' traverse broke " 660 "jail: pid %d (%s)\n", 661 curthread->td_proc->p_pid, 662 curthread->td_comm); 663 } 664 nctmp = nd->nl_rootnch; 665 } else { 666 nctmp.ncp = nctmp.ncp->nc_parent; 667 } 668 cache_hold(&nctmp); 669 cache_unlock(&nd->nl_nch); 670 nd->nl_flags &= ~NLC_NCPISLOCKED; 671 cache_get_maybe_shared(&nctmp, &nch, wantsexcllock(nd, ptr)); 672 cache_drop(&nctmp); /* NOTE: zero's nctmp */ 673 } 674 wasdotordotdot = 2; 675 } else { 676 /* 677 * Must unlock nl_nch when traversing down the path. However, 678 * the child ncp has not yet been found/created and the parent's 679 * child list might be empty. Thus releasing the lock can 680 * allow a race whereby the parent ncp's vnode is recycled. 681 * This case can occur especially when maxvnodes is set very low. 682 * 683 * We need the parent's ncp to remain resolved for all normal 684 * filesystem activities, so we vhold() the vp during the lookup 685 * to prevent recyclement due to vnlru / maxvnodes. 686 * 687 * If we race an unlink or rename the ncp might be marked 688 * DESTROYED after resolution, requiring a retry. 689 */ 690 if ((hvp = nd->nl_nch.ncp->nc_vp) != NULL) 691 vhold(hvp); 692 cache_unlock(&nd->nl_nch); 693 nd->nl_flags &= ~NLC_NCPISLOCKED; 694 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 695 wantsexcllock(nd, ptr), &nch); 696 if (error == EWOULDBLOCK) { 697 nch = cache_nlookup(&nd->nl_nch, &nlc); 698 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 699 hit = 0; 700 for (;;) { 701 error = cache_resolve(&nch, nd->nl_cred); 702 if (error != EAGAIN && 703 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 704 if (error == ESTALE) { 705 if (!inretry) 706 error = ENOENT; 707 doretry = TRUE; 708 } 709 break; 710 } 711 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 712 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 713 nch.ncp->nc_name); 714 cache_put(&nch); 715 nch = cache_nlookup(&nd->nl_nch, &nlc); 716 } 717 } 718 if (hvp) 719 vdrop(hvp); 720 wasdotordotdot = 0; 721 } 722 723 /* 724 * If the last component was "." or ".." our dflags no longer 725 * represents the parent directory and we have to explicitly 726 * look it up. 727 * 728 * Expect the parent to be good since nch is locked. 729 */ 730 if (wasdotordotdot && error == 0) { 731 dflags = 0; 732 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 733 par.mount = nch.mount; 734 cache_hold(&par); 735 cache_lock_maybe_shared(&par, wantsexcllock(nd, ptr)); 736 error = naccess(&par, 0, nd->nl_cred, &dflags); 737 cache_put(&par); 738 } 739 } 740 741 /* 742 * [end of subsection] 743 * 744 * nch is locked and referenced. 745 * nd->nl_nch is unlocked and referenced. 746 * 747 * nl_nch must be unlocked or we could chain lock to the root 748 * if a resolve gets stuck (e.g. in NFS). 749 */ 750 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 751 752 /* 753 * Resolve the namespace if necessary. The ncp returned by 754 * cache_nlookup() is referenced and locked. 755 * 756 * XXX neither '.' nor '..' should return EAGAIN since they were 757 * previously resolved and thus cannot be newly created ncp's. 758 */ 759 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 760 hit = 0; 761 error = cache_resolve(&nch, nd->nl_cred); 762 if (error == ESTALE) { 763 if (!inretry) 764 error = ENOENT; 765 doretry = TRUE; 766 } 767 KKASSERT(error != EAGAIN); 768 } else { 769 error = nch.ncp->nc_error; 770 } 771 772 /* 773 * Early completion. ENOENT is not an error if this is the last 774 * component and NLC_CREATE or NLC_RENAME (rename target) was 775 * requested. Note that ncp->nc_error is left as ENOENT in that 776 * case, which we check later on. 777 * 778 * Also handle invalid '.' or '..' components terminating a path 779 * for a create/rename/delete. The standard requires this and pax 780 * pretty stupidly depends on it. 781 */ 782 if (islastelement(ptr)) { 783 if (error == ENOENT && 784 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 785 ) { 786 if (nd->nl_flags & NLC_NFS_RDONLY) { 787 error = EROFS; 788 } else { 789 error = naccess(&nch, nd->nl_flags | dflags, 790 nd->nl_cred, NULL); 791 } 792 } 793 if (error == 0 && wasdotordotdot && 794 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 795 NLC_RENAME_SRC | NLC_RENAME_DST))) { 796 /* 797 * POSIX junk 798 */ 799 if (nd->nl_flags & NLC_CREATE) 800 error = EEXIST; 801 else if (nd->nl_flags & NLC_DELETE) 802 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 803 else 804 error = EINVAL; 805 } 806 } 807 808 /* 809 * Early completion on error. 810 */ 811 if (error) { 812 cache_put(&nch); 813 break; 814 } 815 816 /* 817 * If the element is a symlink and it is either not the last 818 * element or it is the last element and we are allowed to 819 * follow symlinks, resolve the symlink. 820 */ 821 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 822 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 823 ) { 824 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 825 error = ELOOP; 826 cache_put(&nch); 827 break; 828 } 829 error = nreadsymlink(nd, &nch, &nlc); 830 cache_put(&nch); 831 if (error) 832 break; 833 834 /* 835 * Concatenate trailing path elements onto the returned symlink. 836 * Note that if the path component (ptr) is not exhausted, it 837 * will being with a '/', so we do not have to add another one. 838 * 839 * The symlink may not be empty. 840 */ 841 len = strlen(ptr); 842 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 843 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 844 objcache_put(namei_oc, nlc.nlc_nameptr); 845 break; 846 } 847 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 848 if (nd->nl_flags & NLC_HASBUF) 849 objcache_put(namei_oc, nd->nl_path); 850 nd->nl_path = nlc.nlc_nameptr; 851 nd->nl_flags |= NLC_HASBUF; 852 ptr = nd->nl_path; 853 854 /* 855 * Go back up to the top to resolve any initial '/'s in the 856 * symlink. 857 */ 858 continue; 859 } 860 861 /* 862 * If the element is a directory and we are crossing a mount point, 863 * Locate the mount. 864 */ 865 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 866 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 867 (mp = cache_findmount(&nch)) != NULL 868 ) { 869 struct vnode *tdp; 870 int vfs_do_busy = 0; 871 872 /* 873 * VFS must be busied before the namecache entry is locked, 874 * but we don't want to waste time calling vfs_busy() if the 875 * mount point is already resolved. 876 */ 877 again: 878 cache_put(&nch); 879 if (vfs_do_busy) { 880 while (vfs_busy(mp, 0)) { 881 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 882 kprintf("nlookup: warning umount race avoided\n"); 883 cache_dropmount(mp); 884 error = EBUSY; 885 vfs_do_busy = 0; 886 goto double_break; 887 } 888 } 889 } 890 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 891 wantsexcllock(nd, ptr)); 892 893 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 894 if (vfs_do_busy == 0) { 895 vfs_do_busy = 1; 896 goto again; 897 } 898 error = VFS_ROOT(mp, &tdp); 899 vfs_unbusy(mp); 900 vfs_do_busy = 0; 901 if (error) { 902 cache_dropmount(mp); 903 break; 904 } 905 cache_setvp(&nch, tdp); 906 vput(tdp); 907 } 908 if (vfs_do_busy) 909 vfs_unbusy(mp); 910 cache_dropmount(mp); 911 } 912 913 if (error) { 914 cache_put(&nch); 915 double_break: 916 break; 917 } 918 919 /* 920 * Skip any slashes to get to the next element. If there 921 * are any slashes at all the current element must be a 922 * directory or, in the create case, intended to become a directory. 923 * If it isn't we break without incrementing ptr and fall through 924 * to the failure case below. 925 */ 926 while (*ptr == '/') { 927 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 928 !(nd->nl_flags & NLC_WILLBEDIR) 929 ) { 930 break; 931 } 932 ++ptr; 933 } 934 935 /* 936 * Continuation case: additional elements and the current 937 * element is a directory. 938 */ 939 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 940 if (nd->nl_flags & NLC_NCDIR) { 941 cache_drop_ncdir(&nd->nl_nch); 942 nd->nl_flags &= ~NLC_NCDIR; 943 } else { 944 cache_drop(&nd->nl_nch); 945 } 946 cache_unlock(&nch); 947 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 948 nd->nl_nch = nch; 949 continue; 950 } 951 952 /* 953 * Failure case: additional elements and the current element 954 * is not a directory 955 */ 956 if (*ptr) { 957 cache_put(&nch); 958 error = ENOTDIR; 959 break; 960 } 961 962 /* 963 * Successful lookup of last element. 964 * 965 * Check permissions if the target exists. If the target does not 966 * exist directory permissions were already tested in the early 967 * completion code above. 968 * 969 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 970 * if the file is marked append-only, and NLC_STICKY if the directory 971 * containing the file is sticky. 972 */ 973 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 974 error = naccess(&nch, nd->nl_flags | dflags, 975 nd->nl_cred, NULL); 976 if (error) { 977 cache_put(&nch); 978 break; 979 } 980 } 981 982 /* 983 * Termination: no more elements. 984 * 985 * If NLC_REFDVP is set acquire a referenced parent dvp. 986 */ 987 if (nd->nl_flags & NLC_REFDVP) { 988 cache_lock(&nd->nl_nch); 989 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 990 cache_unlock(&nd->nl_nch); 991 if (error) { 992 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 993 cache_put(&nch); 994 break; 995 } 996 } 997 if (nd->nl_flags & NLC_NCDIR) { 998 cache_drop_ncdir(&nd->nl_nch); 999 nd->nl_flags &= ~NLC_NCDIR; 1000 } else { 1001 cache_drop(&nd->nl_nch); 1002 } 1003 nd->nl_nch = nch; 1004 nd->nl_flags |= NLC_NCPISLOCKED; 1005 error = 0; 1006 break; 1007 } 1008 1009 if (hit) 1010 ++gd->gd_nchstats->ncs_longhits; 1011 else 1012 ++gd->gd_nchstats->ncs_longmiss; 1013 1014 if (nd->nl_flags & NLC_NCPISLOCKED) 1015 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 1016 1017 /* 1018 * Retry the whole thing if doretry flag is set, but only once. 1019 * autofs(5) may mount another filesystem under its root directory 1020 * while resolving a path. 1021 */ 1022 if (doretry && !inretry) { 1023 inretry = TRUE; 1024 nd->nl_flags &= NLC_NCDIR; 1025 nd->nl_flags |= saveflag; 1026 goto nlookup_start; 1027 } 1028 1029 /* 1030 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1031 * (ncp->nc_error will be ENOENT), but we will still return an error 1032 * code of 0. 1033 */ 1034 return(error); 1035 } 1036 1037 /* 1038 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1039 * of continuity in the namecache tree by connecting the ncp related to the 1040 * vnode under the mount to the ncp related to the mount's root vnode. 1041 * 1042 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1043 */ 1044 int 1045 nlookup_mp(struct mount *mp, struct nchandle *nch) 1046 { 1047 struct vnode *vp; 1048 int error; 1049 1050 error = 0; 1051 cache_get(&mp->mnt_ncmountpt, nch); 1052 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1053 while (vfs_busy(mp, 0)) 1054 ; 1055 error = VFS_ROOT(mp, &vp); 1056 vfs_unbusy(mp); 1057 if (error) { 1058 cache_put(nch); 1059 } else { 1060 cache_setvp(nch, vp); 1061 vput(vp); 1062 } 1063 } 1064 return(error); 1065 } 1066 1067 /* 1068 * Read the contents of a symlink, allocate a path buffer out of the 1069 * namei_oc and initialize the supplied nlcomponent with the result. 1070 * 1071 * If an error occurs no buffer will be allocated or returned in the nlc. 1072 */ 1073 int 1074 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1075 struct nlcomponent *nlc) 1076 { 1077 struct vnode *vp; 1078 struct iovec aiov; 1079 struct uio auio; 1080 int linklen; 1081 int error; 1082 char *cp; 1083 1084 nlc->nlc_nameptr = NULL; 1085 nlc->nlc_namelen = 0; 1086 if (nch->ncp->nc_vp == NULL) 1087 return(ENOENT); 1088 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1089 return(error); 1090 cp = objcache_get(namei_oc, M_WAITOK); 1091 aiov.iov_base = cp; 1092 aiov.iov_len = MAXPATHLEN; 1093 auio.uio_iov = &aiov; 1094 auio.uio_iovcnt = 1; 1095 auio.uio_offset = 0; 1096 auio.uio_rw = UIO_READ; 1097 auio.uio_segflg = UIO_SYSSPACE; 1098 auio.uio_td = nd->nl_td; 1099 auio.uio_resid = MAXPATHLEN - 1; 1100 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1101 if (error) 1102 goto fail; 1103 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1104 if (varsym_enable) { 1105 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1106 if (linklen < 0) { 1107 error = ENAMETOOLONG; 1108 goto fail; 1109 } 1110 } 1111 cp[linklen] = 0; 1112 nlc->nlc_nameptr = cp; 1113 nlc->nlc_namelen = linklen; 1114 vput(vp); 1115 return(0); 1116 fail: 1117 objcache_put(namei_oc, cp); 1118 vput(vp); 1119 return(error); 1120 } 1121 1122 /* 1123 * Check access [XXX cache vattr!] [XXX quota] 1124 * 1125 * Generally check the NLC_* access bits. All specified bits must pass 1126 * for this function to return 0. 1127 * 1128 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1129 * access, otherwise it must exist. No error is returned in this case. 1130 * 1131 * The file must not exist if NLC_EXCL is specified. 1132 * 1133 * Directory permissions in general are tested for NLC_CREATE if the file 1134 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1135 * whether the file exists or not. 1136 * 1137 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1138 * the latter is only tested if the target exists. 1139 * 1140 * The passed ncp must be referenced and locked. If it is already resolved 1141 * it may be locked shared but otherwise should be locked exclusively. 1142 */ 1143 1144 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1145 1146 static int 1147 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 1148 { 1149 struct vnode *vp; 1150 struct vattr va; 1151 struct namecache *ncp; 1152 int error; 1153 int cflags; 1154 1155 KKASSERT(cache_lockstatus(nch) > 0); 1156 1157 ncp = nch->ncp; 1158 if (ncp->nc_flag & NCF_UNRESOLVED) { 1159 cache_resolve(nch, cred); 1160 ncp = nch->ncp; 1161 } 1162 error = ncp->nc_error; 1163 1164 /* 1165 * Directory permissions checks. Silently ignore ENOENT if these 1166 * tests pass. It isn't an error. 1167 * 1168 * We can safely resolve ncp->nc_parent because ncp is currently 1169 * locked. 1170 */ 1171 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1172 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1173 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1174 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1175 (nflags & NLC_RENAME_DST) 1176 ) { 1177 struct nchandle par; 1178 1179 if ((par.ncp = ncp->nc_parent) == NULL) { 1180 if (error != EAGAIN) 1181 error = EINVAL; 1182 } else if (error == 0 || error == ENOENT) { 1183 par.mount = nch->mount; 1184 cache_hold(&par); 1185 cache_lock_maybe_shared(&par, 0); 1186 error = naccess(&par, NLC_WRITE, cred, NULL); 1187 cache_put(&par); 1188 } 1189 } 1190 } 1191 1192 /* 1193 * NLC_EXCL check. Target file must not exist. 1194 */ 1195 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1196 error = EEXIST; 1197 1198 /* 1199 * Try to short-cut the vnode operation for intermediate directory 1200 * components. This is a major SMP win because it avoids having 1201 * to execute a lot of code for intermediate directory components, 1202 * including shared refs and locks on intermediate directory vnodes. 1203 * 1204 * We can only do this if the caller does not need nflagsp. 1205 */ 1206 if (error == 0 && nflagsp == NULL && 1207 nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1208 return 0; 1209 } 1210 1211 /* 1212 * Get the vnode attributes so we can do the rest of our checks. 1213 * 1214 * NOTE: We only call naccess_va() if the target exists. 1215 */ 1216 if (error == 0) { 1217 error = cache_vget(nch, cred, LK_SHARED, &vp); 1218 if (error == ENOENT) { 1219 /* 1220 * Silently zero-out ENOENT if creating or renaming 1221 * (rename target). It isn't an error. 1222 */ 1223 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1224 error = 0; 1225 } else if (error == 0) { 1226 /* 1227 * Get the vnode attributes and check for illegal O_TRUNC 1228 * requests and read-only mounts. 1229 * 1230 * NOTE: You can still open devices on read-only mounts for 1231 * writing. 1232 * 1233 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1234 * check on the parent directory above. 1235 * 1236 * XXX cache the va in the namecache or in the vnode 1237 */ 1238 error = VOP_GETATTR(vp, &va); 1239 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1240 switch(va.va_type) { 1241 case VREG: 1242 case VDATABASE: 1243 case VCHR: 1244 case VBLK: 1245 case VFIFO: 1246 break; 1247 case VDIR: 1248 error = EISDIR; 1249 break; 1250 default: 1251 error = EINVAL; 1252 break; 1253 } 1254 } 1255 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1256 (vp->v_mount->mnt_flag & MNT_RDONLY) 1257 ) { 1258 switch(va.va_type) { 1259 case VDIR: 1260 case VLNK: 1261 case VREG: 1262 case VDATABASE: 1263 error = EROFS; 1264 break; 1265 default: 1266 break; 1267 } 1268 } 1269 vput(vp); 1270 1271 /* 1272 * Check permissions based on file attributes. The passed 1273 * flags (*nflagsp) are modified with feedback based on 1274 * special attributes and requirements. 1275 */ 1276 if (error == 0) { 1277 /* 1278 * Adjust the returned (*nflagsp) if non-NULL. 1279 */ 1280 if (nflagsp) { 1281 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 1282 *nflagsp |= NLC_STICKY; 1283 if (va.va_flags & APPEND) 1284 *nflagsp |= NLC_APPENDONLY; 1285 if (va.va_flags & IMMUTABLE) 1286 *nflagsp |= NLC_IMMUTABLE; 1287 } 1288 1289 /* 1290 * NCF_WXOK can be set for world-searchable directories. 1291 * 1292 * XXX When we implement capabilities this code would also 1293 * need a cap check, or only set the flag if there are no 1294 * capabilities. 1295 */ 1296 cflags = 0; 1297 if (va.va_type == VDIR && 1298 (va.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1299 cflags |= NCF_WXOK; 1300 } 1301 1302 /* 1303 * Track swapcache management flags in the namecache. 1304 * 1305 * Calculate the flags based on the current vattr info 1306 * and recalculate the inherited flags from the parent 1307 * (the original cache linkage may have occurred without 1308 * getattrs and thus have stale flags). 1309 */ 1310 if (va.va_flags & SF_NOCACHE) 1311 cflags |= NCF_SF_NOCACHE; 1312 if (va.va_flags & UF_CACHE) 1313 cflags |= NCF_UF_CACHE; 1314 if (ncp->nc_parent) { 1315 if (ncp->nc_parent->nc_flag & 1316 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1317 cflags |= NCF_SF_PNOCACHE; 1318 } 1319 if (ncp->nc_parent->nc_flag & 1320 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1321 cflags |= NCF_UF_PCACHE; 1322 } 1323 } 1324 1325 /* 1326 * We're not supposed to update nc_flag when holding a shared 1327 * lock, but we allow the case for certain flags. Note that 1328 * holding an exclusive lock allows updating nc_flag without 1329 * atomics. nc_flag is not allowe to be updated at all unless 1330 * a shared or exclusive lock is held. 1331 */ 1332 atomic_clear_short(&ncp->nc_flag, 1333 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1334 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1335 NCF_WXOK) & ~cflags); 1336 atomic_set_short(&ncp->nc_flag, cflags); 1337 1338 /* 1339 * Process general access. 1340 */ 1341 error = naccess_va(&va, nflags, cred); 1342 } 1343 } 1344 } 1345 return(error); 1346 } 1347 1348 /* 1349 * Check the requested access against the given vattr using cred. 1350 */ 1351 int 1352 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 1353 { 1354 int i; 1355 int vmode; 1356 1357 /* 1358 * Test the immutable bit. Creations, deletions, renames (source 1359 * or destination) are not allowed. chown/chmod/other is also not 1360 * allowed but is handled by SETATTR. Hardlinks to the immutable 1361 * file are allowed. 1362 * 1363 * If the directory is set to immutable then creations, deletions, 1364 * renames (source or dest) and hardlinks to files within the directory 1365 * are not allowed, and regular files opened through the directory may 1366 * not be written to or truncated (unless a special device). 1367 * 1368 * NOTE! New hardlinks to immutable files work but new hardlinks to 1369 * files, immutable or not, sitting inside an immutable directory are 1370 * not allowed. As always if the file is hardlinked via some other 1371 * path additional hardlinks may be possible even if the file is marked 1372 * immutable. The sysop needs to create a closure by checking the hard 1373 * link count. Once closure is achieved you are good, and security 1374 * scripts should check link counts anyway. 1375 * 1376 * Writes and truncations are only allowed on special devices. 1377 */ 1378 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1379 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1380 return (EPERM); 1381 if (nflags & (NLC_CREATE | NLC_DELETE | 1382 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1383 return (EPERM); 1384 } 1385 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1386 switch(va->va_type) { 1387 case VDIR: 1388 return (EISDIR); 1389 case VLNK: 1390 case VREG: 1391 case VDATABASE: 1392 return (EPERM); 1393 default: 1394 break; 1395 } 1396 } 1397 } 1398 1399 /* 1400 * Test the no-unlink and append-only bits for opens, rename targets, 1401 * and deletions. These bits are not tested for creations or 1402 * rename sources. 1403 * 1404 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1405 * If you do not wish this you must also set NOUNLINK. 1406 * 1407 * If the governing directory is marked APPEND-only it implies 1408 * NOUNLINK for all entries in the directory. 1409 */ 1410 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1411 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1412 ) { 1413 return (EPERM); 1414 } 1415 1416 /* 1417 * A file marked append-only may not be deleted but can be renamed. 1418 */ 1419 if ((va->va_flags & APPEND) && 1420 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1421 ) { 1422 return (EPERM); 1423 } 1424 1425 /* 1426 * A file marked append-only which is opened for writing must also 1427 * be opened O_APPEND. 1428 */ 1429 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1430 if (nflags & NLC_TRUNCATE) 1431 return (EPERM); 1432 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1433 if ((nflags & NLC_APPEND) == 0) 1434 return (EPERM); 1435 } 1436 } 1437 1438 /* 1439 * root gets universal access 1440 */ 1441 if (cred->cr_uid == 0) 1442 return(0); 1443 1444 /* 1445 * Check owner perms. 1446 * 1447 * If NLC_OWN is set the owner of the file is allowed no matter when 1448 * the owner-mode bits say (utimes). 1449 */ 1450 vmode = 0; 1451 if (nflags & NLC_READ) 1452 vmode |= S_IRUSR; 1453 if (nflags & NLC_WRITE) 1454 vmode |= S_IWUSR; 1455 if (nflags & NLC_EXEC) 1456 vmode |= S_IXUSR; 1457 1458 if (cred->cr_uid == va->va_uid) { 1459 if ((nflags & NLC_OWN) == 0) { 1460 if ((vmode & va->va_mode) != vmode) 1461 return(EACCES); 1462 } 1463 return(0); 1464 } 1465 1466 /* 1467 * If NLC_STICKY is set only the owner may delete or rename a file. 1468 * This bit is typically set on /tmp. 1469 * 1470 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1471 * the specific delete or rename case. For deletions and renames we 1472 * usually just care about directory permissions, not file permissions. 1473 */ 1474 if ((nflags & NLC_STICKY) && 1475 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1476 return(EACCES); 1477 } 1478 1479 /* 1480 * Check group perms 1481 */ 1482 vmode >>= 3; 1483 for (i = 0; i < cred->cr_ngroups; ++i) { 1484 if (va->va_gid == cred->cr_groups[i]) { 1485 if ((vmode & va->va_mode) != vmode) 1486 return(EACCES); 1487 return(0); 1488 } 1489 } 1490 1491 /* 1492 * Check world perms 1493 */ 1494 vmode >>= 3; 1495 if ((vmode & va->va_mode) != vmode) 1496 return(EACCES); 1497 return(0); 1498 } 1499 1500 /* 1501 * Long-term (10-second interval) statistics collection 1502 */ 1503 static 1504 uint64_t 1505 collect_nlookup_callback(int n) 1506 { 1507 static uint64_t last_total; 1508 uint64_t save; 1509 uint64_t total; 1510 1511 total = 0; 1512 for (n = 0; n < ncpus; ++n) { 1513 globaldata_t gd = globaldata_find(n); 1514 struct nchstats *sp; 1515 1516 if ((sp = gd->gd_nchstats) != NULL) 1517 total += sp->ncs_longhits + sp->ncs_longmiss; 1518 } 1519 save = total; 1520 total = total - last_total; 1521 last_total = save; 1522 1523 return total; 1524 } 1525 1526 static 1527 void 1528 nlookup_collect_init(void *dummy __unused) 1529 { 1530 kcollect_register(KCOLLECT_NLOOKUP, "nlookup", collect_nlookup_callback, 1531 KCOLLECT_SCALE(KCOLLECT_NLOOKUP_FORMAT, 0)); 1532 } 1533 SYSINIT(collect_nlookup, SI_SUB_PROP, SI_ORDER_ANY, nlookup_collect_init, 0); 1534