1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 /* 35 * nlookup() is the 'new' namei interface. Rather then return directory and 36 * leaf vnodes (in various lock states) the new interface instead deals in 37 * namecache records. Namecache records may represent both a positive or 38 * a negative hit. The namespace is locked via the namecache record instead 39 * of via the vnode, and only the leaf namecache record (representing the 40 * filename) needs to be locked. 41 * 42 * This greatly improves filesystem parallelism and is a huge simplification 43 * of the API verses the old vnode locking / namei scheme. 44 * 45 * Filesystems must actively control the caching aspects of the namecache, 46 * and since namecache pointers are used as handles they are non-optional 47 * even for filesystems which do not generally wish to cache things. It is 48 * intended that a separate cache coherency API will be constructed to handle 49 * these issues. 50 */ 51 52 #include "opt_ktrace.h" 53 54 #include <sys/param.h> 55 #include <sys/systm.h> 56 #include <sys/kernel.h> 57 #include <sys/vnode.h> 58 #include <sys/mount.h> 59 #include <sys/filedesc.h> 60 #include <sys/proc.h> 61 #include <sys/namei.h> 62 #include <sys/nlookup.h> 63 #include <sys/malloc.h> 64 #include <sys/stat.h> 65 #include <sys/objcache.h> 66 #include <sys/file.h> 67 68 #ifdef KTRACE 69 #include <sys/ktrace.h> 70 #endif 71 72 static int naccess(struct nchandle *nch, int vmode, struct ucred *cred, 73 int *stickyp); 74 75 /* 76 * Initialize a nlookup() structure, early error return for copyin faults 77 * or a degenerate empty string (which is not allowed). 78 * 79 * The first process proc0's credentials are used if the calling thread 80 * is not associated with a process context. 81 * 82 * MPSAFE 83 */ 84 int 85 nlookup_init(struct nlookupdata *nd, 86 const char *path, enum uio_seg seg, int flags) 87 { 88 size_t pathlen; 89 struct proc *p; 90 thread_t td; 91 int error; 92 93 td = curthread; 94 p = td->td_proc; 95 96 /* 97 * note: the pathlen set by copy*str() includes the terminating \0. 98 */ 99 bzero(nd, sizeof(struct nlookupdata)); 100 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 101 nd->nl_flags |= NLC_HASBUF; 102 if (seg == UIO_SYSSPACE) 103 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 104 else 105 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 106 107 /* 108 * Don't allow empty pathnames. 109 * POSIX.1 requirement: "" is not a vaild file name. 110 */ 111 if (error == 0 && pathlen <= 1) 112 error = ENOENT; 113 114 if (error == 0) { 115 if (p && p->p_fd) { 116 cache_copy_ncdir(p, &nd->nl_nch); 117 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 118 if (p->p_fd->fd_njdir.ncp) 119 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 120 nd->nl_cred = td->td_ucred; 121 nd->nl_flags |= NLC_BORROWCRED | NLC_NCDIR; 122 } else { 123 cache_copy(&rootnch, &nd->nl_nch); 124 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 125 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 126 nd->nl_cred = proc0.p_ucred; 127 nd->nl_flags |= NLC_BORROWCRED; 128 } 129 nd->nl_td = td; 130 nd->nl_flags |= flags; 131 } else { 132 nlookup_done(nd); 133 } 134 return(error); 135 } 136 137 138 /* 139 * nlookup_init() for "at" family of syscalls. 140 * 141 * Works similarly to nlookup_init() but if path is relative and fd is not 142 * AT_FDCWD, path is interpreted relative to the directory pointed to by fd. 143 * In this case, the file entry pointed to by fd is ref'ed and returned in 144 * *fpp. 145 * 146 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 147 * and release the ref to the file entry. 148 */ 149 int 150 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 151 const char *path, enum uio_seg seg, int flags) 152 { 153 struct thread *td = curthread; 154 struct proc *p = td->td_proc; 155 struct file* fp; 156 struct vnode *vp; 157 int error; 158 159 *fpp = NULL; 160 161 if ((error = nlookup_init(nd, path, seg, flags)) != 0) { 162 return (error); 163 } 164 165 if (nd->nl_path[0] != '/' && fd != AT_FDCWD) { 166 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 167 goto done; 168 vp = (struct vnode*)fp->f_data; 169 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 170 fdrop(fp); 171 fp = NULL; 172 error = ENOTDIR; 173 goto done; 174 } 175 if (nd->nl_flags & NLC_NCDIR) { 176 cache_drop_ncdir(&nd->nl_nch); 177 nd->nl_flags &= ~NLC_NCDIR; 178 } else { 179 cache_drop(&nd->nl_nch); 180 } 181 cache_copy(&fp->f_nchandle, &nd->nl_nch); 182 *fpp = fp; 183 } 184 185 186 done: 187 if (error) 188 nlookup_done(nd); 189 return (error); 190 191 } 192 193 /* 194 * This works similarly to nlookup_init() but does not assume a process 195 * context. rootnch is always chosen for the root directory and the cred 196 * and starting directory are supplied in arguments. 197 */ 198 int 199 nlookup_init_raw(struct nlookupdata *nd, 200 const char *path, enum uio_seg seg, int flags, 201 struct ucred *cred, struct nchandle *ncstart) 202 { 203 size_t pathlen; 204 thread_t td; 205 int error; 206 207 td = curthread; 208 209 bzero(nd, sizeof(struct nlookupdata)); 210 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 211 nd->nl_flags |= NLC_HASBUF; 212 if (seg == UIO_SYSSPACE) 213 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 214 else 215 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 216 217 /* 218 * Don't allow empty pathnames. 219 * POSIX.1 requirement: "" is not a vaild file name. 220 */ 221 if (error == 0 && pathlen <= 1) 222 error = ENOENT; 223 224 if (error == 0) { 225 cache_copy(ncstart, &nd->nl_nch); 226 cache_copy(&rootnch, &nd->nl_rootnch); 227 cache_copy(&rootnch, &nd->nl_jailnch); 228 nd->nl_cred = crhold(cred); 229 nd->nl_td = td; 230 nd->nl_flags |= flags; 231 } else { 232 nlookup_done(nd); 233 } 234 return(error); 235 } 236 237 /* 238 * This works similarly to nlookup_init_raw() but does not rely 239 * on rootnch being initialized yet. 240 */ 241 int 242 nlookup_init_root(struct nlookupdata *nd, 243 const char *path, enum uio_seg seg, int flags, 244 struct ucred *cred, struct nchandle *ncstart, 245 struct nchandle *ncroot) 246 { 247 size_t pathlen; 248 thread_t td; 249 int error; 250 251 td = curthread; 252 253 bzero(nd, sizeof(struct nlookupdata)); 254 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 255 nd->nl_flags |= NLC_HASBUF; 256 if (seg == UIO_SYSSPACE) 257 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 258 else 259 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 260 261 /* 262 * Don't allow empty pathnames. 263 * POSIX.1 requirement: "" is not a vaild file name. 264 */ 265 if (error == 0 && pathlen <= 1) 266 error = ENOENT; 267 268 if (error == 0) { 269 cache_copy(ncstart, &nd->nl_nch); 270 cache_copy(ncroot, &nd->nl_rootnch); 271 cache_copy(ncroot, &nd->nl_jailnch); 272 nd->nl_cred = crhold(cred); 273 nd->nl_td = td; 274 nd->nl_flags |= flags; 275 } else { 276 nlookup_done(nd); 277 } 278 return(error); 279 } 280 281 #if 0 282 /* 283 * Set a different credential; this credential will be used by future 284 * operations performed on nd.nl_open_vp and nlookupdata structure. 285 */ 286 void 287 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 288 { 289 KKASSERT(nd->nl_cred != NULL); 290 291 if (nd->nl_cred != cred) { 292 cred = crhold(cred); 293 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 294 crfree(nd->nl_cred); 295 nd->nl_flags &= ~NLC_BORROWCRED; 296 nd->nl_cred = cred; 297 } 298 } 299 #endif 300 301 /* 302 * Cleanup a nlookupdata structure after we are through with it. This may 303 * be called on any nlookupdata structure initialized with nlookup_init(). 304 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 305 * returns an error, even if as a consumer you believe you have taken all 306 * dynamic elements out of the nlookupdata structure. 307 */ 308 void 309 nlookup_done(struct nlookupdata *nd) 310 { 311 if (nd->nl_nch.ncp) { 312 if (nd->nl_flags & NLC_NCPISLOCKED) { 313 nd->nl_flags &= ~NLC_NCPISLOCKED; 314 cache_unlock(&nd->nl_nch); 315 } 316 if (nd->nl_flags & NLC_NCDIR) { 317 cache_drop_ncdir(&nd->nl_nch); 318 nd->nl_flags &= ~NLC_NCDIR; 319 } else { 320 cache_drop(&nd->nl_nch); /* NULL's out the nch */ 321 } 322 } 323 if (nd->nl_rootnch.ncp) 324 cache_drop_and_cache(&nd->nl_rootnch); 325 if (nd->nl_jailnch.ncp) 326 cache_drop_and_cache(&nd->nl_jailnch); 327 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 328 objcache_put(namei_oc, nd->nl_path); 329 nd->nl_path = NULL; 330 } 331 if (nd->nl_cred) { 332 if ((nd->nl_flags & NLC_BORROWCRED) == 0) 333 crfree(nd->nl_cred); 334 nd->nl_cred = NULL; 335 nd->nl_flags &= ~NLC_BORROWCRED; 336 } 337 if (nd->nl_open_vp) { 338 if (nd->nl_flags & NLC_LOCKVP) { 339 vn_unlock(nd->nl_open_vp); 340 nd->nl_flags &= ~NLC_LOCKVP; 341 } 342 vn_close(nd->nl_open_vp, nd->nl_vp_fmode, NULL); 343 nd->nl_open_vp = NULL; 344 } 345 if (nd->nl_dvp) { 346 vrele(nd->nl_dvp); 347 nd->nl_dvp = NULL; 348 } 349 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 350 } 351 352 /* 353 * Works similarly to nlookup_done() when nd initialized with 354 * nlookup_init_at(). 355 */ 356 void 357 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 358 { 359 nlookup_done(nd); 360 if (fp != NULL) 361 fdrop(fp); 362 } 363 364 void 365 nlookup_zero(struct nlookupdata *nd) 366 { 367 bzero(nd, sizeof(struct nlookupdata)); 368 } 369 370 /* 371 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 372 * if an error occured. 373 * 374 * Note that the returned ncp is not checked for permissions, though VEXEC 375 * is checked on the directory path leading up to the result. The caller 376 * must call naccess() to check the permissions of the returned leaf. 377 */ 378 struct nchandle 379 nlookup_simple(const char *str, enum uio_seg seg, 380 int niflags, int *error) 381 { 382 struct nlookupdata nd; 383 struct nchandle nch; 384 385 *error = nlookup_init(&nd, str, seg, niflags); 386 if (*error == 0) { 387 if ((*error = nlookup(&nd)) == 0) { 388 nch = nd.nl_nch; /* keep hold ref from structure */ 389 cache_zero(&nd.nl_nch); /* and NULL out */ 390 } else { 391 cache_zero(&nch); 392 } 393 nlookup_done(&nd); 394 } else { 395 cache_zero(&nch); 396 } 397 return(nch); 398 } 399 400 /* 401 * Returns non-zero if the path element is the last element 402 */ 403 static 404 int 405 islastelement(const char *ptr) 406 { 407 while (*ptr == '/') 408 ++ptr; 409 return (*ptr == 0); 410 } 411 412 /* 413 * Returns non-zero if we need to lock the namecache element 414 * exclusively. Unless otherwise requested by NLC_SHAREDLOCK, 415 * the last element of the namecache lookup will be locked 416 * exclusively. 417 * 418 * NOTE: Even if we return on-zero, an unresolved namecache record 419 * will always be locked exclusively. 420 */ 421 static __inline 422 int 423 wantsexcllock(struct nlookupdata *nd, const char *ptr) 424 { 425 if ((nd->nl_flags & NLC_SHAREDLOCK) == 0) 426 return(islastelement(ptr)); 427 return(0); 428 } 429 430 431 /* 432 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 433 * on return, even if an error occurs. If no error occurs or NLC_CREATE 434 * is flagged and ENOENT is returned, then the returned nl_nch is always 435 * referenced and locked exclusively. 436 * 437 * WARNING: For any general error other than ENOENT w/NLC_CREATE, the 438 * the resulting nl_nch may or may not be locked and if locked 439 * might be locked either shared or exclusive. 440 * 441 * Intermediate directory elements, including the current directory, require 442 * execute (search) permission. nlookup does not examine the access 443 * permissions on the returned element. 444 * 445 * If NLC_CREATE is set the last directory must allow node creation, 446 * and an error code of 0 will be returned for a non-existant 447 * target (not ENOENT). 448 * 449 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 450 * plus the sticky check is made, and an error code of 0 will be returned 451 * for a non-existant target (not ENOENT). 452 * 453 * If NLC_DELETE is set the last directory mut allow node deletion, 454 * plus the sticky check is made. 455 * 456 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 457 * of the returned entry. The vnode will be referenced, but not locked, 458 * and will be released by nlookup_done() along with everything else. 459 * 460 * NOTE: As an optimization we attempt to obtain a shared namecache lock 461 * on any intermediate elements. On success, the returned element 462 * is ALWAYS locked exclusively. 463 */ 464 int 465 nlookup(struct nlookupdata *nd) 466 { 467 globaldata_t gd = mycpu; 468 struct nlcomponent nlc; 469 struct nchandle nch; 470 struct nchandle par; 471 struct nchandle nctmp; 472 struct mount *mp; 473 struct vnode *hvp; /* hold to prevent recyclement */ 474 int wasdotordotdot; 475 char *ptr; 476 int error; 477 int len; 478 int dflags; 479 int hit = 1; 480 int saveflag = nd->nl_flags & ~NLC_NCDIR; 481 boolean_t doretry = FALSE; 482 boolean_t inretry = FALSE; 483 484 nlookup_start: 485 #ifdef KTRACE 486 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 487 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 488 #endif 489 bzero(&nlc, sizeof(nlc)); 490 491 /* 492 * Setup for the loop. The current working namecache element is 493 * always at least referenced. We lock it as required, but always 494 * return a locked, resolved namecache entry. 495 */ 496 nd->nl_loopcnt = 0; 497 if (nd->nl_dvp) { 498 vrele(nd->nl_dvp); 499 nd->nl_dvp = NULL; 500 } 501 ptr = nd->nl_path; 502 503 /* 504 * Loop on the path components. At the top of the loop nd->nl_nch 505 * is ref'd and unlocked and represents our current position. 506 */ 507 for (;;) { 508 /* 509 * Make sure nl_nch is locked so we can access the vnode, resolution 510 * state, etc. 511 */ 512 if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) { 513 nd->nl_flags |= NLC_NCPISLOCKED; 514 cache_lock_maybe_shared(&nd->nl_nch, wantsexcllock(nd, ptr)); 515 } 516 517 /* 518 * Check if the root directory should replace the current 519 * directory. This is done at the start of a translation 520 * or after a symbolic link has been found. In other cases 521 * ptr will never be pointing at a '/'. 522 */ 523 if (*ptr == '/') { 524 do { 525 ++ptr; 526 } while (*ptr == '/'); 527 cache_unlock(&nd->nl_nch); 528 cache_get_maybe_shared(&nd->nl_rootnch, &nch, 529 wantsexcllock(nd, ptr)); 530 if (nd->nl_flags & NLC_NCDIR) { 531 cache_drop_ncdir(&nd->nl_nch); 532 nd->nl_flags &= ~NLC_NCDIR; 533 } else { 534 cache_drop(&nd->nl_nch); 535 } 536 nd->nl_nch = nch; /* remains locked */ 537 538 /* 539 * Fast-track termination. There is no parent directory of 540 * the root in the same mount from the point of view of 541 * the caller so return EACCES if NLC_REFDVP is specified, 542 * and EEXIST if NLC_CREATE is also specified. 543 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 544 */ 545 if (*ptr == 0) { 546 if (nd->nl_flags & NLC_REFDVP) 547 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 548 else 549 error = 0; 550 break; 551 } 552 continue; 553 } 554 555 /* 556 * Check directory search permissions (nd->nl_nch is locked & refd) 557 */ 558 dflags = 0; 559 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags); 560 if (error) 561 break; 562 563 /* 564 * Extract the path component. Path components are limited to 565 * 255 characters. 566 */ 567 nlc.nlc_nameptr = ptr; 568 while (*ptr && *ptr != '/') 569 ++ptr; 570 nlc.nlc_namelen = ptr - nlc.nlc_nameptr; 571 if (nlc.nlc_namelen >= 256) { 572 error = ENAMETOOLONG; 573 break; 574 } 575 576 /* 577 * Lookup the path component in the cache, creating an unresolved 578 * entry if necessary. We have to handle "." and ".." as special 579 * cases. 580 * 581 * When handling ".." we have to detect a traversal back through a 582 * mount point. If we are at the root, ".." just returns the root. 583 * 584 * When handling "." or ".." we also have to recalculate dflags 585 * since our dflags will be for some sub-directory instead of the 586 * parent dir. 587 * 588 * This subsection returns a locked, refd 'nch' unless it errors out, 589 * and an unlocked but still ref'd nd->nl_nch. 590 * 591 * The namecache topology is not allowed to be disconnected, so 592 * encountering a NULL parent will generate EINVAL. This typically 593 * occurs when a directory is removed out from under a process. 594 * 595 * WARNING! The unlocking of nd->nl_nch is sensitive code. 596 */ 597 KKASSERT(nd->nl_flags & NLC_NCPISLOCKED); 598 599 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 600 cache_unlock(&nd->nl_nch); 601 nd->nl_flags &= ~NLC_NCPISLOCKED; 602 cache_get_maybe_shared(&nd->nl_nch, &nch, wantsexcllock(nd, ptr)); 603 wasdotordotdot = 1; 604 } else if (nlc.nlc_namelen == 2 && 605 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 606 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 607 nd->nl_nch.ncp == nd->nl_rootnch.ncp 608 ) { 609 /* 610 * ".." at the root returns the root 611 */ 612 cache_unlock(&nd->nl_nch); 613 nd->nl_flags &= ~NLC_NCPISLOCKED; 614 cache_get_maybe_shared(&nd->nl_nch, &nch, 615 wantsexcllock(nd, ptr)); 616 } else { 617 /* 618 * Locate the parent ncp. If we are at the root of a 619 * filesystem mount we have to skip to the mounted-on 620 * point in the underlying filesystem. 621 * 622 * Expect the parent to always be good since the 623 * mountpoint doesn't go away. XXX hack. cache_get() 624 * requires the ncp to already have a ref as a safety. 625 * 626 * However, a process which has been broken out of a chroot 627 * will wind up with a NULL parent if it tries to '..' above 628 * the real root, deal with the case. Note that this does 629 * not protect us from a jail breakout, it just stops a panic 630 * if the jail-broken process tries to '..' past the real 631 * root. 632 */ 633 nctmp = nd->nl_nch; 634 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) { 635 nctmp = nctmp.mount->mnt_ncmounton; 636 if (nctmp.ncp == NULL) 637 break; 638 } 639 if (nctmp.ncp == NULL) { 640 if (curthread->td_proc) { 641 kprintf("vfs_nlookup: '..' traverse broke " 642 "jail: pid %d (%s)\n", 643 curthread->td_proc->p_pid, 644 curthread->td_comm); 645 } 646 nctmp = nd->nl_rootnch; 647 } else { 648 nctmp.ncp = nctmp.ncp->nc_parent; 649 } 650 cache_hold(&nctmp); 651 cache_unlock(&nd->nl_nch); 652 nd->nl_flags &= ~NLC_NCPISLOCKED; 653 cache_get_maybe_shared(&nctmp, &nch, wantsexcllock(nd, ptr)); 654 cache_drop(&nctmp); /* NOTE: zero's nctmp */ 655 } 656 wasdotordotdot = 2; 657 } else { 658 /* 659 * Must unlock nl_nch when traversing down the path. However, 660 * the child ncp has not yet been found/created and the parent's 661 * child list might be empty. Thus releasing the lock can 662 * allow a race whereby the parent ncp's vnode is recycled. 663 * This case can occur especially when maxvnodes is set very low. 664 * 665 * We need the parent's ncp to remain resolved for all normal 666 * filesystem activities, so we vhold() the vp during the lookup 667 * to prevent recyclement due to vnlru / maxvnodes. 668 * 669 * If we race an unlink or rename the ncp might be marked 670 * DESTROYED after resolution, requiring a retry. 671 */ 672 if ((hvp = nd->nl_nch.ncp->nc_vp) != NULL) 673 vhold(hvp); 674 cache_unlock(&nd->nl_nch); 675 nd->nl_flags &= ~NLC_NCPISLOCKED; 676 error = cache_nlookup_maybe_shared(&nd->nl_nch, &nlc, 677 wantsexcllock(nd, ptr), &nch); 678 if (error == EWOULDBLOCK) { 679 nch = cache_nlookup(&nd->nl_nch, &nlc); 680 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 681 hit = 0; 682 for (;;) { 683 error = cache_resolve(&nch, nd->nl_cred); 684 if (error != EAGAIN && 685 (nch.ncp->nc_flag & NCF_DESTROYED) == 0) { 686 if (error == ESTALE) { 687 if (!inretry) 688 error = ENOENT; 689 doretry = TRUE; 690 } 691 break; 692 } 693 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 694 nch.ncp->nc_nlen, nch.ncp->nc_nlen, 695 nch.ncp->nc_name); 696 cache_put(&nch); 697 nch = cache_nlookup(&nd->nl_nch, &nlc); 698 } 699 } 700 if (hvp) 701 vdrop(hvp); 702 wasdotordotdot = 0; 703 } 704 705 /* 706 * If the last component was "." or ".." our dflags no longer 707 * represents the parent directory and we have to explicitly 708 * look it up. 709 * 710 * Expect the parent to be good since nch is locked. 711 */ 712 if (wasdotordotdot && error == 0) { 713 dflags = 0; 714 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 715 par.mount = nch.mount; 716 cache_hold(&par); 717 cache_lock_maybe_shared(&par, wantsexcllock(nd, ptr)); 718 error = naccess(&par, 0, nd->nl_cred, &dflags); 719 cache_put(&par); 720 } 721 } 722 723 /* 724 * [end of subsection] 725 * 726 * nch is locked and referenced. 727 * nd->nl_nch is unlocked and referenced. 728 * 729 * nl_nch must be unlocked or we could chain lock to the root 730 * if a resolve gets stuck (e.g. in NFS). 731 */ 732 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 733 734 /* 735 * Resolve the namespace if necessary. The ncp returned by 736 * cache_nlookup() is referenced and locked. 737 * 738 * XXX neither '.' nor '..' should return EAGAIN since they were 739 * previously resolved and thus cannot be newly created ncp's. 740 */ 741 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 742 hit = 0; 743 error = cache_resolve(&nch, nd->nl_cred); 744 if (error == ESTALE) { 745 if (!inretry) 746 error = ENOENT; 747 doretry = TRUE; 748 } 749 KKASSERT(error != EAGAIN); 750 } else { 751 error = nch.ncp->nc_error; 752 } 753 754 /* 755 * Early completion. ENOENT is not an error if this is the last 756 * component and NLC_CREATE or NLC_RENAME (rename target) was 757 * requested. Note that ncp->nc_error is left as ENOENT in that 758 * case, which we check later on. 759 * 760 * Also handle invalid '.' or '..' components terminating a path 761 * for a create/rename/delete. The standard requires this and pax 762 * pretty stupidly depends on it. 763 */ 764 if (islastelement(ptr)) { 765 if (error == ENOENT && 766 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 767 ) { 768 if (nd->nl_flags & NLC_NFS_RDONLY) { 769 error = EROFS; 770 } else { 771 error = naccess(&nch, nd->nl_flags | dflags, 772 nd->nl_cred, NULL); 773 } 774 } 775 if (error == 0 && wasdotordotdot && 776 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 777 NLC_RENAME_SRC | NLC_RENAME_DST))) { 778 /* 779 * POSIX junk 780 */ 781 if (nd->nl_flags & NLC_CREATE) 782 error = EEXIST; 783 else if (nd->nl_flags & NLC_DELETE) 784 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 785 else 786 error = EINVAL; 787 } 788 } 789 790 /* 791 * Early completion on error. 792 */ 793 if (error) { 794 cache_put(&nch); 795 break; 796 } 797 798 /* 799 * If the element is a symlink and it is either not the last 800 * element or it is the last element and we are allowed to 801 * follow symlinks, resolve the symlink. 802 */ 803 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 804 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 805 ) { 806 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 807 error = ELOOP; 808 cache_put(&nch); 809 break; 810 } 811 error = nreadsymlink(nd, &nch, &nlc); 812 cache_put(&nch); 813 if (error) 814 break; 815 816 /* 817 * Concatenate trailing path elements onto the returned symlink. 818 * Note that if the path component (ptr) is not exhausted, it 819 * will being with a '/', so we do not have to add another one. 820 * 821 * The symlink may not be empty. 822 */ 823 len = strlen(ptr); 824 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 825 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 826 objcache_put(namei_oc, nlc.nlc_nameptr); 827 break; 828 } 829 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 830 if (nd->nl_flags & NLC_HASBUF) 831 objcache_put(namei_oc, nd->nl_path); 832 nd->nl_path = nlc.nlc_nameptr; 833 nd->nl_flags |= NLC_HASBUF; 834 ptr = nd->nl_path; 835 836 /* 837 * Go back up to the top to resolve any initial '/'s in the 838 * symlink. 839 */ 840 continue; 841 } 842 843 /* 844 * If the element is a directory and we are crossing a mount point, 845 * Locate the mount. 846 */ 847 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 848 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 849 (mp = cache_findmount(&nch)) != NULL 850 ) { 851 struct vnode *tdp; 852 int vfs_do_busy = 0; 853 854 /* 855 * VFS must be busied before the namecache entry is locked, 856 * but we don't want to waste time calling vfs_busy() if the 857 * mount point is already resolved. 858 */ 859 again: 860 cache_put(&nch); 861 if (vfs_do_busy) { 862 while (vfs_busy(mp, 0)) { 863 if (mp->mnt_kern_flag & MNTK_UNMOUNT) { 864 kprintf("nlookup: warning umount race avoided\n"); 865 cache_dropmount(mp); 866 error = EBUSY; 867 vfs_do_busy = 0; 868 goto double_break; 869 } 870 } 871 } 872 cache_get_maybe_shared(&mp->mnt_ncmountpt, &nch, 873 wantsexcllock(nd, ptr)); 874 875 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 876 if (vfs_do_busy == 0) { 877 vfs_do_busy = 1; 878 goto again; 879 } 880 error = VFS_ROOT(mp, &tdp); 881 vfs_unbusy(mp); 882 vfs_do_busy = 0; 883 if (error) { 884 cache_dropmount(mp); 885 break; 886 } 887 cache_setvp(&nch, tdp); 888 vput(tdp); 889 } 890 if (vfs_do_busy) 891 vfs_unbusy(mp); 892 cache_dropmount(mp); 893 } 894 895 if (error) { 896 cache_put(&nch); 897 double_break: 898 break; 899 } 900 901 /* 902 * Skip any slashes to get to the next element. If there 903 * are any slashes at all the current element must be a 904 * directory or, in the create case, intended to become a directory. 905 * If it isn't we break without incrementing ptr and fall through 906 * to the failure case below. 907 */ 908 while (*ptr == '/') { 909 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 910 !(nd->nl_flags & NLC_WILLBEDIR) 911 ) { 912 break; 913 } 914 ++ptr; 915 } 916 917 /* 918 * Continuation case: additional elements and the current 919 * element is a directory. 920 */ 921 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 922 if (nd->nl_flags & NLC_NCDIR) { 923 cache_drop_ncdir(&nd->nl_nch); 924 nd->nl_flags &= ~NLC_NCDIR; 925 } else { 926 cache_drop(&nd->nl_nch); 927 } 928 cache_unlock(&nch); 929 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 930 nd->nl_nch = nch; 931 continue; 932 } 933 934 /* 935 * Failure case: additional elements and the current element 936 * is not a directory 937 */ 938 if (*ptr) { 939 cache_put(&nch); 940 error = ENOTDIR; 941 break; 942 } 943 944 /* 945 * Successful lookup of last element. 946 * 947 * Check permissions if the target exists. If the target does not 948 * exist directory permissions were already tested in the early 949 * completion code above. 950 * 951 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 952 * if the file is marked append-only, and NLC_STICKY if the directory 953 * containing the file is sticky. 954 */ 955 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 956 error = naccess(&nch, nd->nl_flags | dflags, 957 nd->nl_cred, NULL); 958 if (error) { 959 cache_put(&nch); 960 break; 961 } 962 } 963 964 /* 965 * Termination: no more elements. 966 * 967 * If NLC_REFDVP is set acquire a referenced parent dvp. 968 */ 969 if (nd->nl_flags & NLC_REFDVP) { 970 cache_lock(&nd->nl_nch); 971 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 972 cache_unlock(&nd->nl_nch); 973 if (error) { 974 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 975 cache_put(&nch); 976 break; 977 } 978 } 979 if (nd->nl_flags & NLC_NCDIR) { 980 cache_drop_ncdir(&nd->nl_nch); 981 nd->nl_flags &= ~NLC_NCDIR; 982 } else { 983 cache_drop(&nd->nl_nch); 984 } 985 nd->nl_nch = nch; 986 nd->nl_flags |= NLC_NCPISLOCKED; 987 error = 0; 988 break; 989 } 990 991 if (hit) 992 ++gd->gd_nchstats->ncs_longhits; 993 else 994 ++gd->gd_nchstats->ncs_longmiss; 995 996 if (nd->nl_flags & NLC_NCPISLOCKED) 997 KKASSERT(cache_lockstatus(&nd->nl_nch) > 0); 998 999 /* 1000 * Retry the whole thing if doretry flag is set, but only once. 1001 * autofs(5) may mount another filesystem under its root directory 1002 * while resolving a path. 1003 */ 1004 if (doretry && !inretry) { 1005 inretry = TRUE; 1006 nd->nl_flags &= NLC_NCDIR; 1007 nd->nl_flags |= saveflag; 1008 goto nlookup_start; 1009 } 1010 1011 /* 1012 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 1013 * (ncp->nc_error will be ENOENT), but we will still return an error 1014 * code of 0. 1015 */ 1016 return(error); 1017 } 1018 1019 /* 1020 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 1021 * of continuity in the namecache tree by connecting the ncp related to the 1022 * vnode under the mount to the ncp related to the mount's root vnode. 1023 * 1024 * If no error occured a locked, ref'd ncp is stored in *ncpp. 1025 */ 1026 int 1027 nlookup_mp(struct mount *mp, struct nchandle *nch) 1028 { 1029 struct vnode *vp; 1030 int error; 1031 1032 error = 0; 1033 cache_get(&mp->mnt_ncmountpt, nch); 1034 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 1035 while (vfs_busy(mp, 0)) 1036 ; 1037 error = VFS_ROOT(mp, &vp); 1038 vfs_unbusy(mp); 1039 if (error) { 1040 cache_put(nch); 1041 } else { 1042 cache_setvp(nch, vp); 1043 vput(vp); 1044 } 1045 } 1046 return(error); 1047 } 1048 1049 /* 1050 * Read the contents of a symlink, allocate a path buffer out of the 1051 * namei_oc and initialize the supplied nlcomponent with the result. 1052 * 1053 * If an error occurs no buffer will be allocated or returned in the nlc. 1054 */ 1055 int 1056 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 1057 struct nlcomponent *nlc) 1058 { 1059 struct vnode *vp; 1060 struct iovec aiov; 1061 struct uio auio; 1062 int linklen; 1063 int error; 1064 char *cp; 1065 1066 nlc->nlc_nameptr = NULL; 1067 nlc->nlc_namelen = 0; 1068 if (nch->ncp->nc_vp == NULL) 1069 return(ENOENT); 1070 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 1071 return(error); 1072 cp = objcache_get(namei_oc, M_WAITOK); 1073 aiov.iov_base = cp; 1074 aiov.iov_len = MAXPATHLEN; 1075 auio.uio_iov = &aiov; 1076 auio.uio_iovcnt = 1; 1077 auio.uio_offset = 0; 1078 auio.uio_rw = UIO_READ; 1079 auio.uio_segflg = UIO_SYSSPACE; 1080 auio.uio_td = nd->nl_td; 1081 auio.uio_resid = MAXPATHLEN - 1; 1082 error = VOP_READLINK(vp, &auio, nd->nl_cred); 1083 if (error) 1084 goto fail; 1085 linklen = MAXPATHLEN - 1 - auio.uio_resid; 1086 if (varsym_enable) { 1087 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 1088 if (linklen < 0) { 1089 error = ENAMETOOLONG; 1090 goto fail; 1091 } 1092 } 1093 cp[linklen] = 0; 1094 nlc->nlc_nameptr = cp; 1095 nlc->nlc_namelen = linklen; 1096 vput(vp); 1097 return(0); 1098 fail: 1099 objcache_put(namei_oc, cp); 1100 vput(vp); 1101 return(error); 1102 } 1103 1104 /* 1105 * Check access [XXX cache vattr!] [XXX quota] 1106 * 1107 * Generally check the NLC_* access bits. All specified bits must pass 1108 * for this function to return 0. 1109 * 1110 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 1111 * access, otherwise it must exist. No error is returned in this case. 1112 * 1113 * The file must not exist if NLC_EXCL is specified. 1114 * 1115 * Directory permissions in general are tested for NLC_CREATE if the file 1116 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 1117 * whether the file exists or not. 1118 * 1119 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 1120 * the latter is only tested if the target exists. 1121 * 1122 * The passed ncp must be referenced and locked. If it is already resolved 1123 * it may be locked shared but otherwise should be locked exclusively. 1124 */ 1125 1126 #define S_WXOK_MASK (S_IRUSR|S_IXUSR|S_IRGRP|S_IXGRP|S_IROTH|S_IXOTH) 1127 1128 static int 1129 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 1130 { 1131 struct vnode *vp; 1132 struct vattr va; 1133 struct namecache *ncp; 1134 int error; 1135 int cflags; 1136 1137 KKASSERT(cache_lockstatus(nch) > 0); 1138 1139 ncp = nch->ncp; 1140 if (ncp->nc_flag & NCF_UNRESOLVED) { 1141 cache_resolve(nch, cred); 1142 ncp = nch->ncp; 1143 } 1144 error = ncp->nc_error; 1145 1146 /* 1147 * Directory permissions checks. Silently ignore ENOENT if these 1148 * tests pass. It isn't an error. 1149 * 1150 * We can safely resolve ncp->nc_parent because ncp is currently 1151 * locked. 1152 */ 1153 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 1154 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 1155 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 1156 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 1157 (nflags & NLC_RENAME_DST) 1158 ) { 1159 struct nchandle par; 1160 1161 if ((par.ncp = ncp->nc_parent) == NULL) { 1162 if (error != EAGAIN) 1163 error = EINVAL; 1164 } else if (error == 0 || error == ENOENT) { 1165 par.mount = nch->mount; 1166 cache_hold(&par); 1167 cache_lock_maybe_shared(&par, 0); 1168 error = naccess(&par, NLC_WRITE, cred, NULL); 1169 cache_put(&par); 1170 } 1171 } 1172 } 1173 1174 /* 1175 * NLC_EXCL check. Target file must not exist. 1176 */ 1177 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 1178 error = EEXIST; 1179 1180 /* 1181 * Try to short-cut the vnode operation for intermediate directory 1182 * components. This is a major SMP win because it avoids having 1183 * to execute a lot of code for intermediate directory components, 1184 * including shared refs and locks on intermediate directory vnodes. 1185 */ 1186 if (error == 0 && nflags == NLC_EXEC && (ncp->nc_flag & NCF_WXOK)) { 1187 return 0; 1188 } 1189 1190 /* 1191 * Get the vnode attributes so we can do the rest of our checks. 1192 * 1193 * NOTE: We only call naccess_va() if the target exists. 1194 */ 1195 if (error == 0) { 1196 error = cache_vget(nch, cred, LK_SHARED, &vp); 1197 if (error == ENOENT) { 1198 /* 1199 * Silently zero-out ENOENT if creating or renaming 1200 * (rename target). It isn't an error. 1201 */ 1202 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1203 error = 0; 1204 } else if (error == 0) { 1205 /* 1206 * Get the vnode attributes and check for illegal O_TRUNC 1207 * requests and read-only mounts. 1208 * 1209 * NOTE: You can still open devices on read-only mounts for 1210 * writing. 1211 * 1212 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1213 * check on the parent directory above. 1214 * 1215 * XXX cache the va in the namecache or in the vnode 1216 */ 1217 error = VOP_GETATTR(vp, &va); 1218 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1219 switch(va.va_type) { 1220 case VREG: 1221 case VDATABASE: 1222 case VCHR: 1223 case VBLK: 1224 case VFIFO: 1225 break; 1226 case VDIR: 1227 error = EISDIR; 1228 break; 1229 default: 1230 error = EINVAL; 1231 break; 1232 } 1233 } 1234 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1235 (vp->v_mount->mnt_flag & MNT_RDONLY) 1236 ) { 1237 switch(va.va_type) { 1238 case VDIR: 1239 case VLNK: 1240 case VREG: 1241 case VDATABASE: 1242 error = EROFS; 1243 break; 1244 default: 1245 break; 1246 } 1247 } 1248 vput(vp); 1249 1250 /* 1251 * Check permissions based on file attributes. The passed 1252 * flags (*nflagsp) are modified with feedback based on 1253 * special attributes and requirements. 1254 */ 1255 if (error == 0) { 1256 /* 1257 * Adjust the returned (*nflagsp) if non-NULL. 1258 */ 1259 if (nflagsp) { 1260 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 1261 *nflagsp |= NLC_STICKY; 1262 if (va.va_flags & APPEND) 1263 *nflagsp |= NLC_APPENDONLY; 1264 if (va.va_flags & IMMUTABLE) 1265 *nflagsp |= NLC_IMMUTABLE; 1266 } 1267 1268 /* 1269 * NCF_WXOK can be set for world-searchable directories. 1270 * 1271 * XXX When we implement capabilities this code would also 1272 * need a cap check, or only set the flag if there are no 1273 * capabilities. 1274 */ 1275 cflags = 0; 1276 if (va.va_type == VDIR && 1277 (va.va_mode & S_WXOK_MASK) == S_WXOK_MASK) { 1278 cflags |= NCF_WXOK; 1279 } 1280 1281 /* 1282 * Track swapcache management flags in the namecache. 1283 * 1284 * Calculate the flags based on the current vattr info 1285 * and recalculate the inherited flags from the parent 1286 * (the original cache linkage may have occurred without 1287 * getattrs and thus have stale flags). 1288 */ 1289 if (va.va_flags & SF_NOCACHE) 1290 cflags |= NCF_SF_NOCACHE; 1291 if (va.va_flags & UF_CACHE) 1292 cflags |= NCF_UF_CACHE; 1293 if (ncp->nc_parent) { 1294 if (ncp->nc_parent->nc_flag & 1295 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1296 cflags |= NCF_SF_PNOCACHE; 1297 } 1298 if (ncp->nc_parent->nc_flag & 1299 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1300 cflags |= NCF_UF_PCACHE; 1301 } 1302 } 1303 1304 /* 1305 * We're not supposed to update nc_flag when holding a shared 1306 * lock, but we allow the case for certain flags. Note that 1307 * holding an exclusive lock allows updating nc_flag without 1308 * atomics. nc_flag is not allowe to be updated at all unless 1309 * a shared or exclusive lock is held. 1310 */ 1311 atomic_clear_short(&ncp->nc_flag, 1312 (NCF_SF_NOCACHE | NCF_UF_CACHE | 1313 NCF_SF_PNOCACHE | NCF_UF_PCACHE | 1314 NCF_WXOK) & ~cflags); 1315 atomic_set_short(&ncp->nc_flag, cflags); 1316 1317 /* 1318 * Process general access. 1319 */ 1320 error = naccess_va(&va, nflags, cred); 1321 } 1322 } 1323 } 1324 return(error); 1325 } 1326 1327 /* 1328 * Check the requested access against the given vattr using cred. 1329 */ 1330 int 1331 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 1332 { 1333 int i; 1334 int vmode; 1335 1336 /* 1337 * Test the immutable bit. Creations, deletions, renames (source 1338 * or destination) are not allowed. chown/chmod/other is also not 1339 * allowed but is handled by SETATTR. Hardlinks to the immutable 1340 * file are allowed. 1341 * 1342 * If the directory is set to immutable then creations, deletions, 1343 * renames (source or dest) and hardlinks to files within the directory 1344 * are not allowed, and regular files opened through the directory may 1345 * not be written to or truncated (unless a special device). 1346 * 1347 * NOTE! New hardlinks to immutable files work but new hardlinks to 1348 * files, immutable or not, sitting inside an immutable directory are 1349 * not allowed. As always if the file is hardlinked via some other 1350 * path additional hardlinks may be possible even if the file is marked 1351 * immutable. The sysop needs to create a closure by checking the hard 1352 * link count. Once closure is achieved you are good, and security 1353 * scripts should check link counts anyway. 1354 * 1355 * Writes and truncations are only allowed on special devices. 1356 */ 1357 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1358 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1359 return (EPERM); 1360 if (nflags & (NLC_CREATE | NLC_DELETE | 1361 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1362 return (EPERM); 1363 } 1364 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1365 switch(va->va_type) { 1366 case VDIR: 1367 return (EISDIR); 1368 case VLNK: 1369 case VREG: 1370 case VDATABASE: 1371 return (EPERM); 1372 default: 1373 break; 1374 } 1375 } 1376 } 1377 1378 /* 1379 * Test the no-unlink and append-only bits for opens, rename targets, 1380 * and deletions. These bits are not tested for creations or 1381 * rename sources. 1382 * 1383 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1384 * If you do not wish this you must also set NOUNLINK. 1385 * 1386 * If the governing directory is marked APPEND-only it implies 1387 * NOUNLINK for all entries in the directory. 1388 */ 1389 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1390 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1391 ) { 1392 return (EPERM); 1393 } 1394 1395 /* 1396 * A file marked append-only may not be deleted but can be renamed. 1397 */ 1398 if ((va->va_flags & APPEND) && 1399 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1400 ) { 1401 return (EPERM); 1402 } 1403 1404 /* 1405 * A file marked append-only which is opened for writing must also 1406 * be opened O_APPEND. 1407 */ 1408 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1409 if (nflags & NLC_TRUNCATE) 1410 return (EPERM); 1411 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1412 if ((nflags & NLC_APPEND) == 0) 1413 return (EPERM); 1414 } 1415 } 1416 1417 /* 1418 * root gets universal access 1419 */ 1420 if (cred->cr_uid == 0) 1421 return(0); 1422 1423 /* 1424 * Check owner perms. 1425 * 1426 * If NLC_OWN is set the owner of the file is allowed no matter when 1427 * the owner-mode bits say (utimes). 1428 */ 1429 vmode = 0; 1430 if (nflags & NLC_READ) 1431 vmode |= S_IRUSR; 1432 if (nflags & NLC_WRITE) 1433 vmode |= S_IWUSR; 1434 if (nflags & NLC_EXEC) 1435 vmode |= S_IXUSR; 1436 1437 if (cred->cr_uid == va->va_uid) { 1438 if ((nflags & NLC_OWN) == 0) { 1439 if ((vmode & va->va_mode) != vmode) 1440 return(EACCES); 1441 } 1442 return(0); 1443 } 1444 1445 /* 1446 * If NLC_STICKY is set only the owner may delete or rename a file. 1447 * This bit is typically set on /tmp. 1448 * 1449 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1450 * the specific delete or rename case. For deletions and renames we 1451 * usually just care about directory permissions, not file permissions. 1452 */ 1453 if ((nflags & NLC_STICKY) && 1454 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1455 return(EACCES); 1456 } 1457 1458 /* 1459 * Check group perms 1460 */ 1461 vmode >>= 3; 1462 for (i = 0; i < cred->cr_ngroups; ++i) { 1463 if (va->va_gid == cred->cr_groups[i]) { 1464 if ((vmode & va->va_mode) != vmode) 1465 return(EACCES); 1466 return(0); 1467 } 1468 } 1469 1470 /* 1471 * Check world perms 1472 */ 1473 vmode >>= 3; 1474 if ((vmode & va->va_mode) != vmode) 1475 return(EACCES); 1476 return(0); 1477 } 1478 1479