1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/vfs_nlookup.c,v 1.25 2008/07/19 04:43:33 dillon Exp $ 35 */ 36 /* 37 * nlookup() is the 'new' namei interface. Rather then return directory and 38 * leaf vnodes (in various lock states) the new interface instead deals in 39 * namecache records. Namecache records may represent both a positive or 40 * a negative hit. The namespace is locked via the namecache record instead 41 * of via the vnode, and only the leaf namecache record (representing the 42 * filename) needs to be locked. 43 * 44 * This greatly improves filesystem parallelism and is a huge simplification 45 * of the API verses the old vnode locking / namei scheme. 46 * 47 * Filesystems must actively control the caching aspects of the namecache, 48 * and since namecache pointers are used as handles they are non-optional 49 * even for filesystems which do not generally wish to cache things. It is 50 * intended that a separate cache coherency API will be constructed to handle 51 * these issues. 52 */ 53 54 #include "opt_ktrace.h" 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/vnode.h> 60 #include <sys/mount.h> 61 #include <sys/filedesc.h> 62 #include <sys/proc.h> 63 #include <sys/namei.h> 64 #include <sys/nlookup.h> 65 #include <sys/malloc.h> 66 #include <sys/stat.h> 67 #include <sys/objcache.h> 68 69 #ifdef KTRACE 70 #include <sys/ktrace.h> 71 #endif 72 73 /* 74 * Initialize a nlookup() structure, early error return for copyin faults 75 * or a degenerate empty string (which is not allowed). 76 * 77 * The first process proc0's credentials are used if the calling thread 78 * is not associated with a process context. 79 */ 80 int 81 nlookup_init(struct nlookupdata *nd, 82 const char *path, enum uio_seg seg, int flags) 83 { 84 size_t pathlen; 85 struct proc *p; 86 thread_t td; 87 int error; 88 89 td = curthread; 90 p = td->td_proc; 91 92 /* 93 * note: the pathlen set by copy*str() includes the terminating \0. 94 */ 95 bzero(nd, sizeof(struct nlookupdata)); 96 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 97 nd->nl_flags |= NLC_HASBUF; 98 if (seg == UIO_SYSSPACE) 99 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 100 else 101 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 102 103 /* 104 * Don't allow empty pathnames. 105 * POSIX.1 requirement: "" is not a vaild file name. 106 */ 107 if (error == 0 && pathlen <= 1) 108 error = ENOENT; 109 110 if (error == 0) { 111 if (p && p->p_fd) { 112 cache_copy(&p->p_fd->fd_ncdir, &nd->nl_nch); 113 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 114 if (p->p_fd->fd_njdir.ncp) 115 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 116 nd->nl_cred = crhold(p->p_ucred); 117 } else { 118 cache_copy(&rootnch, &nd->nl_nch); 119 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 120 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 121 nd->nl_cred = crhold(proc0.p_ucred); 122 } 123 nd->nl_td = td; 124 nd->nl_flags |= flags; 125 } else { 126 nlookup_done(nd); 127 } 128 return(error); 129 } 130 131 /* 132 * This works similarly to nlookup_init() but does not assume a process 133 * context. rootnch is always chosen for the root directory and the cred 134 * and starting directory are supplied in arguments. 135 */ 136 int 137 nlookup_init_raw(struct nlookupdata *nd, 138 const char *path, enum uio_seg seg, int flags, 139 struct ucred *cred, struct nchandle *ncstart) 140 { 141 size_t pathlen; 142 thread_t td; 143 int error; 144 145 td = curthread; 146 147 bzero(nd, sizeof(struct nlookupdata)); 148 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 149 nd->nl_flags |= NLC_HASBUF; 150 if (seg == UIO_SYSSPACE) 151 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 152 else 153 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 154 155 /* 156 * Don't allow empty pathnames. 157 * POSIX.1 requirement: "" is not a vaild file name. 158 */ 159 if (error == 0 && pathlen <= 1) 160 error = ENOENT; 161 162 if (error == 0) { 163 cache_copy(ncstart, &nd->nl_nch); 164 cache_copy(&rootnch, &nd->nl_rootnch); 165 cache_copy(&rootnch, &nd->nl_jailnch); 166 nd->nl_cred = crhold(cred); 167 nd->nl_td = td; 168 nd->nl_flags |= flags; 169 } else { 170 nlookup_done(nd); 171 } 172 return(error); 173 } 174 175 /* 176 * Set a different credential; this credential will be used by future 177 * operations performed on nd.nl_open_vp and nlookupdata structure. 178 */ 179 void 180 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 181 { 182 KKASSERT(nd->nl_cred != NULL); 183 184 if (nd->nl_cred != cred) { 185 cred = crhold(cred); 186 crfree(nd->nl_cred); 187 nd->nl_cred = cred; 188 } 189 } 190 191 /* 192 * Cleanup a nlookupdata structure after we are through with it. This may 193 * be called on any nlookupdata structure initialized with nlookup_init(). 194 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 195 * returns an error, even if as a consumer you believe you have taken all 196 * dynamic elements out of the nlookupdata structure. 197 */ 198 void 199 nlookup_done(struct nlookupdata *nd) 200 { 201 if (nd->nl_nch.ncp) { 202 if (nd->nl_flags & NLC_NCPISLOCKED) { 203 nd->nl_flags &= ~NLC_NCPISLOCKED; 204 cache_unlock(&nd->nl_nch); 205 } 206 cache_drop(&nd->nl_nch); 207 } 208 if (nd->nl_rootnch.ncp) 209 cache_drop(&nd->nl_rootnch); 210 if (nd->nl_jailnch.ncp) 211 cache_drop(&nd->nl_jailnch); 212 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 213 objcache_put(namei_oc, nd->nl_path); 214 nd->nl_path = NULL; 215 } 216 if (nd->nl_cred) { 217 crfree(nd->nl_cred); 218 nd->nl_cred = NULL; 219 } 220 if (nd->nl_open_vp) { 221 if (nd->nl_flags & NLC_LOCKVP) { 222 vn_unlock(nd->nl_open_vp); 223 nd->nl_flags &= ~NLC_LOCKVP; 224 } 225 vn_close(nd->nl_open_vp, nd->nl_vp_fmode); 226 nd->nl_open_vp = NULL; 227 } 228 if (nd->nl_dvp) { 229 vrele(nd->nl_dvp); 230 nd->nl_dvp = NULL; 231 } 232 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 233 } 234 235 void 236 nlookup_zero(struct nlookupdata *nd) 237 { 238 bzero(nd, sizeof(struct nlookupdata)); 239 } 240 241 /* 242 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 243 * if an error occured. 244 * 245 * Note that the returned ncp is not checked for permissions, though VEXEC 246 * is checked on the directory path leading up to the result. The caller 247 * must call naccess() to check the permissions of the returned leaf. 248 */ 249 struct nchandle 250 nlookup_simple(const char *str, enum uio_seg seg, 251 int niflags, int *error) 252 { 253 struct nlookupdata nd; 254 struct nchandle nch; 255 256 *error = nlookup_init(&nd, str, seg, niflags); 257 if (*error == 0) { 258 if ((*error = nlookup(&nd)) == 0) { 259 nch = nd.nl_nch; /* keep hold ref from structure */ 260 cache_zero(&nd.nl_nch); /* and NULL out */ 261 } else { 262 cache_zero(&nch); 263 } 264 nlookup_done(&nd); 265 } else { 266 cache_zero(&nch); 267 } 268 return(nch); 269 } 270 271 /* 272 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 273 * on return, even if an error occurs. If no error occurs the returned 274 * nl_nch is always referenced and locked, otherwise it may or may not be. 275 * 276 * Intermediate directory elements, including the current directory, require 277 * execute (search) permission. nlookup does not examine the access 278 * permissions on the returned element. 279 * 280 * If NLC_CREATE is set the last directory must allow node creation, 281 * and an error code of 0 will be returned for a non-existant 282 * target (not ENOENT). 283 * 284 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 285 * plus the sticky check is made, and an error code of 0 will be returned 286 * for a non-existant target (not ENOENT). 287 * 288 * If NLC_DELETE is set the last directory mut allow node deletion, 289 * plus the sticky check is made. 290 * 291 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 292 * of the returned entry. The vnode will be referenced, but not locked, 293 * and will be released by nlookup_done() along with everything else. 294 */ 295 int 296 nlookup(struct nlookupdata *nd) 297 { 298 struct nlcomponent nlc; 299 struct nchandle nch; 300 struct nchandle par; 301 struct mount *mp; 302 int wasdotordotdot; 303 char *ptr; 304 char *xptr; 305 int error; 306 int len; 307 int dflags; 308 309 #ifdef KTRACE 310 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 311 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 312 #endif 313 bzero(&nlc, sizeof(nlc)); 314 315 /* 316 * Setup for the loop. The current working namecache element must 317 * be in a refd + unlocked state. This typically the case on entry except 318 * when stringing nlookup()'s along in a chain, since nlookup() always 319 * returns nl_nch in a locked state. 320 */ 321 nd->nl_loopcnt = 0; 322 if (nd->nl_flags & NLC_NCPISLOCKED) { 323 nd->nl_flags &= ~NLC_NCPISLOCKED; 324 cache_unlock(&nd->nl_nch); 325 } 326 if (nd->nl_dvp ) { 327 vrele(nd->nl_dvp); 328 nd->nl_dvp = NULL; 329 } 330 ptr = nd->nl_path; 331 332 /* 333 * Loop on the path components. At the top of the loop nd->nl_nch 334 * is ref'd and unlocked and represents our current position. 335 */ 336 for (;;) { 337 /* 338 * Check if the root directory should replace the current 339 * directory. This is done at the start of a translation 340 * or after a symbolic link has been found. In other cases 341 * ptr will never be pointing at a '/'. 342 */ 343 if (*ptr == '/') { 344 do { 345 ++ptr; 346 } while (*ptr == '/'); 347 cache_copy(&nd->nl_rootnch, &nch); 348 cache_drop(&nd->nl_nch); 349 nd->nl_nch = nch; 350 351 /* 352 * Fast-track termination. There is no parent directory of 353 * the root in the same mount from the point of view of 354 * the caller so return EPERM if NLC_REFDVP is specified. 355 * e.g. 'rmdir /' is not allowed. 356 */ 357 if (*ptr == 0) { 358 if (nd->nl_flags & NLC_REFDVP) { 359 error = EPERM; 360 } else { 361 cache_lock(&nd->nl_nch); 362 nd->nl_flags |= NLC_NCPISLOCKED; 363 error = 0; 364 } 365 break; 366 } 367 continue; 368 } 369 370 /* 371 * Check directory search permissions. 372 */ 373 dflags = 0; 374 if ((error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags)) != 0) 375 break; 376 377 /* 378 * Extract the path component. Path components are limited to 379 * 255 characters. 380 */ 381 nlc.nlc_nameptr = ptr; 382 while (*ptr && *ptr != '/') 383 ++ptr; 384 nlc.nlc_namelen = ptr - nlc.nlc_nameptr; 385 if (nlc.nlc_namelen >= 256) { 386 error = ENAMETOOLONG; 387 break; 388 } 389 390 /* 391 * Lookup the path component in the cache, creating an unresolved 392 * entry if necessary. We have to handle "." and ".." as special 393 * cases. 394 * 395 * When handling ".." we have to detect a traversal back through a 396 * mount point. If we are at the root, ".." just returns the root. 397 * 398 * When handling "." or ".." we also have to recalculate dflags 399 * since our dflags will be for some sub-directory instead of the 400 * parent dir. 401 * 402 * This subsection returns a locked, refd 'nch' unless it errors out. 403 * The namecache topology is not allowed to be disconnected, so 404 * encountering a NULL parent will generate EINVAL. This typically 405 * occurs when a directory is removed out from under a process. 406 */ 407 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 408 cache_get(&nd->nl_nch, &nch); 409 wasdotordotdot = 1; 410 } else if (nlc.nlc_namelen == 2 && 411 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 412 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 413 nd->nl_nch.ncp == nd->nl_rootnch.ncp 414 ) { 415 /* 416 * ".." at the root returns the root 417 */ 418 cache_get(&nd->nl_nch, &nch); 419 } else { 420 /* 421 * Locate the parent ncp. If we are at the root of a 422 * filesystem mount we have to skip to the mounted-on 423 * point in the underlying filesystem. 424 */ 425 nch = nd->nl_nch; 426 while (nch.ncp == nch.mount->mnt_ncmountpt.ncp) 427 nch = nch.mount->mnt_ncmounton; 428 nch.ncp = nch.ncp->nc_parent; 429 KKASSERT(nch.ncp != NULL); 430 cache_get(&nch, &nch); 431 } 432 wasdotordotdot = 2; 433 } else { 434 nch = cache_nlookup(&nd->nl_nch, &nlc); 435 while ((error = cache_resolve(&nch, nd->nl_cred)) == EAGAIN) { 436 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 437 nch.ncp->nc_nlen, nch.ncp->nc_nlen, nch.ncp->nc_name); 438 cache_put(&nch); 439 nch = cache_nlookup(&nd->nl_nch, &nlc); 440 } 441 wasdotordotdot = 0; 442 } 443 444 /* 445 * If the last component was "." or ".." our dflags no longer 446 * represents the parent directory and we have to explicitly 447 * look it up. 448 */ 449 if (wasdotordotdot && error == 0) { 450 dflags = 0; 451 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 452 par.mount = nch.mount; 453 cache_hold(&par); 454 dflags = 0; 455 error = naccess(&par, 0, nd->nl_cred, &dflags); 456 cache_drop(&par); 457 } 458 } 459 460 /* 461 * [end of subsection] ncp is locked and ref'd. nd->nl_nch is ref'd 462 */ 463 464 /* 465 * Resolve the namespace if necessary. The ncp returned by 466 * cache_nlookup() is referenced and locked. 467 * 468 * XXX neither '.' nor '..' should return EAGAIN since they were 469 * previously resolved and thus cannot be newly created ncp's. 470 */ 471 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 472 error = cache_resolve(&nch, nd->nl_cred); 473 KKASSERT(error != EAGAIN); 474 } else { 475 error = nch.ncp->nc_error; 476 } 477 478 /* 479 * Early completion. ENOENT is not an error if this is the last 480 * component and NLC_CREATE or NLC_RENAME (rename target) was 481 * requested. Note that ncp->nc_error is left as ENOENT in that 482 * case, which we check later on. 483 * 484 * Also handle invalid '.' or '..' components terminating a path 485 * for a create/rename/delete. The standard requires this and pax 486 * pretty stupidly depends on it. 487 */ 488 for (xptr = ptr; *xptr == '/'; ++xptr) 489 ; 490 if (*xptr == 0) { 491 if (error == ENOENT && 492 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 493 ) { 494 if (nd->nl_flags & NLC_NFS_RDONLY) { 495 error = EROFS; 496 } else { 497 error = naccess(&nch, nd->nl_flags | dflags, 498 nd->nl_cred, NULL); 499 } 500 } 501 if (error == 0 && wasdotordotdot && 502 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 503 NLC_RENAME_SRC | NLC_RENAME_DST))) { 504 /* 505 * POSIX junk 506 */ 507 if (nd->nl_flags & NLC_CREATE) 508 error = EEXIST; 509 else if (nd->nl_flags & NLC_DELETE) 510 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 511 else 512 error = EINVAL; 513 } 514 } 515 516 /* 517 * Early completion on error. 518 */ 519 if (error) { 520 cache_put(&nch); 521 break; 522 } 523 524 /* 525 * If the element is a symlink and it is either not the last 526 * element or it is the last element and we are allowed to 527 * follow symlinks, resolve the symlink. 528 */ 529 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 530 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 531 ) { 532 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 533 error = ELOOP; 534 cache_put(&nch); 535 break; 536 } 537 error = nreadsymlink(nd, &nch, &nlc); 538 cache_put(&nch); 539 if (error) 540 break; 541 542 /* 543 * Concatenate trailing path elements onto the returned symlink. 544 * Note that if the path component (ptr) is not exhausted, it 545 * will being with a '/', so we do not have to add another one. 546 * 547 * The symlink may not be empty. 548 */ 549 len = strlen(ptr); 550 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 551 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 552 objcache_put(namei_oc, nlc.nlc_nameptr); 553 break; 554 } 555 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 556 if (nd->nl_flags & NLC_HASBUF) 557 objcache_put(namei_oc, nd->nl_path); 558 nd->nl_path = nlc.nlc_nameptr; 559 nd->nl_flags |= NLC_HASBUF; 560 ptr = nd->nl_path; 561 562 /* 563 * Go back up to the top to resolve any initial '/'s in the 564 * symlink. 565 */ 566 continue; 567 } 568 569 /* 570 * If the element is a directory and we are crossing a mount point, 571 * Locate the mount. 572 */ 573 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 574 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 575 (mp = cache_findmount(&nch)) != NULL 576 ) { 577 struct vnode *tdp; 578 579 cache_put(&nch); 580 cache_get(&mp->mnt_ncmountpt, &nch); 581 582 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 583 while (vfs_busy(mp, 0)) 584 ; 585 error = VFS_ROOT(mp, &tdp); 586 vfs_unbusy(mp); 587 if (error) 588 break; 589 cache_setvp(&nch, tdp); 590 vput(tdp); 591 } 592 } 593 if (error) { 594 cache_put(&nch); 595 break; 596 } 597 598 /* 599 * Skip any slashes to get to the next element. If there 600 * are any slashes at all the current element must be a 601 * directory or, in the create case, intended to become a directory. 602 * If it isn't we break without incrementing ptr and fall through 603 * to the failure case below. 604 */ 605 while (*ptr == '/') { 606 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 607 !(nd->nl_flags & NLC_WILLBEDIR) 608 ) { 609 break; 610 } 611 ++ptr; 612 } 613 614 /* 615 * Continuation case: additional elements and the current 616 * element is a directory. 617 */ 618 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 619 cache_drop(&nd->nl_nch); 620 cache_unlock(&nch); 621 nd->nl_nch = nch; 622 continue; 623 } 624 625 /* 626 * Failure case: additional elements and the current element 627 * is not a directory 628 */ 629 if (*ptr) { 630 cache_put(&nch); 631 error = ENOTDIR; 632 break; 633 } 634 635 /* 636 * Successful lookup of last element. 637 * 638 * Check permissions if the target exists. If the target does not 639 * exist directory permissions were already tested in the early 640 * completion code above. 641 * 642 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 643 * if the file is marked append-only, and NLC_STICKY if the directory 644 * containing the file is sticky. 645 */ 646 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 647 error = naccess(&nch, nd->nl_flags | dflags, 648 nd->nl_cred, NULL); 649 if (error) { 650 cache_put(&nch); 651 break; 652 } 653 } 654 655 /* 656 * Termination: no more elements. 657 * 658 * If NLC_REFDVP is set acquire a referenced parent dvp. 659 */ 660 if (nd->nl_flags & NLC_REFDVP) { 661 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 662 if (error) { 663 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 664 cache_put(&nch); 665 break; 666 } 667 } 668 cache_drop(&nd->nl_nch); 669 nd->nl_nch = nch; 670 nd->nl_flags |= NLC_NCPISLOCKED; 671 error = 0; 672 break; 673 } 674 675 /* 676 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 677 * (ncp->nc_error will be ENOENT), but we will still return an error 678 * code of 0. 679 */ 680 return(error); 681 } 682 683 /* 684 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 685 * of continuity in the namecache tree by connecting the ncp related to the 686 * vnode under the mount to the ncp related to the mount's root vnode. 687 * 688 * If no error occured a locked, ref'd ncp is stored in *ncpp. 689 */ 690 int 691 nlookup_mp(struct mount *mp, struct nchandle *nch) 692 { 693 struct vnode *vp; 694 int error; 695 696 error = 0; 697 cache_get(&mp->mnt_ncmountpt, nch); 698 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 699 while (vfs_busy(mp, 0)) 700 ; 701 error = VFS_ROOT(mp, &vp); 702 vfs_unbusy(mp); 703 if (error) { 704 cache_put(nch); 705 } else { 706 cache_setvp(nch, vp); 707 vput(vp); 708 } 709 } 710 return(error); 711 } 712 713 /* 714 * Read the contents of a symlink, allocate a path buffer out of the 715 * namei_oc and initialize the supplied nlcomponent with the result. 716 * 717 * If an error occurs no buffer will be allocated or returned in the nlc. 718 */ 719 int 720 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 721 struct nlcomponent *nlc) 722 { 723 struct vnode *vp; 724 struct iovec aiov; 725 struct uio auio; 726 int linklen; 727 int error; 728 char *cp; 729 730 nlc->nlc_nameptr = NULL; 731 nlc->nlc_namelen = 0; 732 if (nch->ncp->nc_vp == NULL) 733 return(ENOENT); 734 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 735 return(error); 736 cp = objcache_get(namei_oc, M_WAITOK); 737 aiov.iov_base = cp; 738 aiov.iov_len = MAXPATHLEN; 739 auio.uio_iov = &aiov; 740 auio.uio_iovcnt = 1; 741 auio.uio_offset = 0; 742 auio.uio_rw = UIO_READ; 743 auio.uio_segflg = UIO_SYSSPACE; 744 auio.uio_td = nd->nl_td; 745 auio.uio_resid = MAXPATHLEN - 1; 746 error = VOP_READLINK(vp, &auio, nd->nl_cred); 747 if (error) 748 goto fail; 749 linklen = MAXPATHLEN - 1 - auio.uio_resid; 750 if (varsym_enable) { 751 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 752 if (linklen < 0) { 753 error = ENAMETOOLONG; 754 goto fail; 755 } 756 } 757 cp[linklen] = 0; 758 nlc->nlc_nameptr = cp; 759 nlc->nlc_namelen = linklen; 760 vput(vp); 761 return(0); 762 fail: 763 objcache_put(namei_oc, cp); 764 vput(vp); 765 return(error); 766 } 767 768 /* 769 * Check access [XXX cache vattr!] [XXX quota] 770 * 771 * Generally check the NLC_* access bits. All specified bits must pass 772 * for this function to return 0. 773 * 774 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 775 * access, otherwise it must exist. No error is returned in this case. 776 * 777 * The file must not exist if NLC_EXCL is specified. 778 * 779 * Directory permissions in general are tested for NLC_CREATE if the file 780 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 781 * whether the file exists or not. 782 * 783 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 784 * the latter is only tested if the target exists. 785 * 786 * The passed ncp may or may not be locked. The caller should use a 787 * locked ncp on leaf lookups, especially for NLC_CREATE, NLC_RENAME_DST, 788 * NLC_DELETE, and NLC_EXCL checks. 789 */ 790 int 791 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 792 { 793 struct nchandle par; 794 struct vnode *vp; 795 struct vattr va; 796 int error; 797 int sticky; 798 799 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 800 cache_lock(nch); 801 cache_resolve(nch, cred); 802 cache_unlock(nch); 803 } 804 error = nch->ncp->nc_error; 805 806 /* 807 * Directory permissions checks. Silently ignore ENOENT if these 808 * tests pass. It isn't an error. 809 */ 810 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 811 if (((nflags & NLC_CREATE) && nch->ncp->nc_vp == NULL) || 812 ((nflags & NLC_DELETE) && nch->ncp->nc_vp != NULL) || 813 ((nflags & NLC_RENAME_SRC) && nch->ncp->nc_vp != NULL) || 814 (nflags & NLC_RENAME_DST) 815 ) { 816 if ((par.ncp = nch->ncp->nc_parent) == NULL) { 817 if (error != EAGAIN) 818 error = EINVAL; 819 } else if (error == 0 || error == ENOENT) { 820 par.mount = nch->mount; 821 cache_hold(&par); 822 sticky = 0; 823 error = naccess(&par, NLC_WRITE, cred, NULL); 824 cache_drop(&par); 825 } 826 } 827 } 828 829 /* 830 * NLC_EXCL check. Target file must not exist. 831 */ 832 if (error == 0 && (nflags & NLC_EXCL) && nch->ncp->nc_vp != NULL) 833 error = EEXIST; 834 835 /* 836 * Get the vnode attributes so we can do the rest of our checks. 837 * 838 * NOTE: We only call naccess_va() if the target exists. 839 */ 840 if (error == 0) { 841 error = cache_vget(nch, cred, LK_SHARED, &vp); 842 if (error == ENOENT) { 843 /* 844 * Silently zero-out ENOENT if creating or renaming 845 * (rename target). It isn't an error. 846 */ 847 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 848 error = 0; 849 } else if (error == 0) { 850 /* 851 * Get the vnode attributes and check for illegal O_TRUNC 852 * requests and read-only mounts. 853 * 854 * NOTE: You can still open devices on read-only mounts for 855 * writing. 856 * 857 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 858 * check on the parent directory above. 859 * 860 * XXX cache the va in the namecache or in the vnode 861 */ 862 error = VOP_GETATTR(vp, &va); 863 if (error == 0 && (nflags & NLC_TRUNCATE)) { 864 switch(va.va_type) { 865 case VREG: 866 case VDATABASE: 867 case VCHR: 868 case VBLK: 869 break; 870 case VDIR: 871 error = EISDIR; 872 break; 873 default: 874 error = EINVAL; 875 break; 876 } 877 } 878 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 879 (vp->v_mount->mnt_flag & MNT_RDONLY) 880 ) { 881 switch(va.va_type) { 882 case VDIR: 883 case VLNK: 884 case VREG: 885 case VDATABASE: 886 error = EROFS; 887 break; 888 default: 889 break; 890 } 891 } 892 vput(vp); 893 894 /* 895 * Check permissions based on file attributes. The passed 896 * flags (*nflagsp) are modified with feedback based on 897 * special attributes and requirements. 898 */ 899 if (error == 0) { 900 /* 901 * Adjust the returned (*nflagsp) if non-NULL. 902 */ 903 if (nflagsp) { 904 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 905 *nflagsp |= NLC_STICKY; 906 if (va.va_flags & APPEND) 907 *nflagsp |= NLC_APPENDONLY; 908 if (va.va_flags & IMMUTABLE) 909 *nflagsp |= NLC_IMMUTABLE; 910 } 911 912 /* 913 * Process general access. 914 */ 915 error = naccess_va(&va, nflags, cred); 916 } 917 } 918 } 919 return(error); 920 } 921 922 /* 923 * Check the requested access against the given vattr using cred. 924 */ 925 int 926 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 927 { 928 int i; 929 int vmode; 930 931 /* 932 * Test the immutable bit. Creations, deletions, renames (source 933 * or destination) are not allowed. chown/chmod/other is also not 934 * allowed but is handled by SETATTR. Hardlinks to the immutable 935 * file are allowed. 936 * 937 * If the directory is set to immutable then creations, deletions, 938 * renames (source or dest) and hardlinks to files within the directory 939 * are not allowed, and regular files opened through the directory may 940 * not be written to or truncated (unless a special device). 941 * 942 * NOTE! New hardlinks to immutable files work but new hardlinks to 943 * files, immutable or not, sitting inside an immutable directory are 944 * not allowed. As always if the file is hardlinked via some other 945 * path additional hardlinks may be possible even if the file is marked 946 * immutable. The sysop needs to create a closure by checking the hard 947 * link count. Once closure is achieved you are good, and security 948 * scripts should check link counts anyway. 949 * 950 * Writes and truncations are only allowed on special devices. 951 */ 952 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 953 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 954 return (EPERM); 955 if (nflags & (NLC_CREATE | NLC_DELETE | 956 NLC_RENAME_SRC | NLC_RENAME_DST)) { 957 return (EPERM); 958 } 959 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 960 switch(va->va_type) { 961 case VDIR: 962 return (EISDIR); 963 case VLNK: 964 case VREG: 965 case VDATABASE: 966 return (EPERM); 967 default: 968 break; 969 } 970 } 971 } 972 973 /* 974 * Test the no-unlink and append-only bits for opens, rename targets, 975 * and deletions. These bits are not tested for creations or 976 * rename sources. 977 * 978 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 979 * If you do not wish this you must also set NOUNLINK. 980 * 981 * If the governing directory is marked APPEND-only it implies 982 * NOUNLINK for all entries in the directory. 983 */ 984 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 985 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 986 ) { 987 return (EPERM); 988 } 989 990 /* 991 * A file marked append-only may not be deleted but can be renamed. 992 */ 993 if ((va->va_flags & APPEND) && 994 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 995 ) { 996 return (EPERM); 997 } 998 999 /* 1000 * A file marked append-only which is opened for writing must also 1001 * be opened O_APPEND. 1002 */ 1003 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1004 if (nflags & NLC_TRUNCATE) 1005 return (EPERM); 1006 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1007 if ((nflags & NLC_APPEND) == 0) 1008 return (EPERM); 1009 } 1010 } 1011 1012 /* 1013 * root gets universal access 1014 */ 1015 if (cred->cr_uid == 0) 1016 return(0); 1017 1018 /* 1019 * Check owner perms. 1020 * 1021 * If NLC_OWN is set the owner of the file is allowed no matter when 1022 * the owner-mode bits say (utimes). 1023 */ 1024 vmode = 0; 1025 if (nflags & NLC_READ) 1026 vmode |= S_IRUSR; 1027 if (nflags & NLC_WRITE) 1028 vmode |= S_IWUSR; 1029 if (nflags & NLC_EXEC) 1030 vmode |= S_IXUSR; 1031 1032 if (cred->cr_uid == va->va_uid) { 1033 if ((nflags & NLC_OWN) == 0) { 1034 if ((vmode & va->va_mode) != vmode) 1035 return(EACCES); 1036 } 1037 return(0); 1038 } 1039 1040 /* 1041 * If NLC_STICKY is set only the owner may delete or rename a file. 1042 * This bit is typically set on /tmp. 1043 * 1044 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1045 * the specific delete or rename case. For deletions and renames we 1046 * usually just care about directory permissions, not file permissions. 1047 */ 1048 if ((nflags & NLC_STICKY) && 1049 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1050 return(EACCES); 1051 } 1052 1053 /* 1054 * Check group perms 1055 */ 1056 vmode >>= 3; 1057 for (i = 0; i < cred->cr_ngroups; ++i) { 1058 if (va->va_gid == cred->cr_groups[i]) { 1059 if ((vmode & va->va_mode) != vmode) 1060 return(EACCES); 1061 return(0); 1062 } 1063 } 1064 1065 /* 1066 * Check world perms 1067 */ 1068 vmode >>= 3; 1069 if ((vmode & va->va_mode) != vmode) 1070 return(EACCES); 1071 return(0); 1072 } 1073 1074