1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/vfs_nlookup.c,v 1.25 2008/07/19 04:43:33 dillon Exp $ 35 */ 36 /* 37 * nlookup() is the 'new' namei interface. Rather then return directory and 38 * leaf vnodes (in various lock states) the new interface instead deals in 39 * namecache records. Namecache records may represent both a positive or 40 * a negative hit. The namespace is locked via the namecache record instead 41 * of via the vnode, and only the leaf namecache record (representing the 42 * filename) needs to be locked. 43 * 44 * This greatly improves filesystem parallelism and is a huge simplification 45 * of the API verses the old vnode locking / namei scheme. 46 * 47 * Filesystems must actively control the caching aspects of the namecache, 48 * and since namecache pointers are used as handles they are non-optional 49 * even for filesystems which do not generally wish to cache things. It is 50 * intended that a separate cache coherency API will be constructed to handle 51 * these issues. 52 */ 53 54 #include "opt_ktrace.h" 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/vnode.h> 60 #include <sys/mount.h> 61 #include <sys/filedesc.h> 62 #include <sys/proc.h> 63 #include <sys/namei.h> 64 #include <sys/nlookup.h> 65 #include <sys/malloc.h> 66 #include <sys/stat.h> 67 #include <sys/objcache.h> 68 #include <sys/file.h> 69 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 74 /* 75 * Initialize a nlookup() structure, early error return for copyin faults 76 * or a degenerate empty string (which is not allowed). 77 * 78 * The first process proc0's credentials are used if the calling thread 79 * is not associated with a process context. 80 */ 81 int 82 nlookup_init(struct nlookupdata *nd, 83 const char *path, enum uio_seg seg, int flags) 84 { 85 size_t pathlen; 86 struct proc *p; 87 thread_t td; 88 int error; 89 90 td = curthread; 91 p = td->td_proc; 92 93 /* 94 * note: the pathlen set by copy*str() includes the terminating \0. 95 */ 96 bzero(nd, sizeof(struct nlookupdata)); 97 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 98 nd->nl_flags |= NLC_HASBUF; 99 if (seg == UIO_SYSSPACE) 100 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 101 else 102 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 103 104 /* 105 * Don't allow empty pathnames. 106 * POSIX.1 requirement: "" is not a vaild file name. 107 */ 108 if (error == 0 && pathlen <= 1) 109 error = ENOENT; 110 111 if (error == 0) { 112 if (p && p->p_fd) { 113 cache_copy(&p->p_fd->fd_ncdir, &nd->nl_nch); 114 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 115 if (p->p_fd->fd_njdir.ncp) 116 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 117 nd->nl_cred = crhold(p->p_ucred); 118 } else { 119 cache_copy(&rootnch, &nd->nl_nch); 120 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 121 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 122 nd->nl_cred = crhold(proc0.p_ucred); 123 } 124 nd->nl_td = td; 125 nd->nl_flags |= flags; 126 } else { 127 nlookup_done(nd); 128 } 129 return(error); 130 } 131 132 133 /* 134 * nlookup_init() for "at" family of syscalls. 135 * 136 * Works similarly to nlookup_init() but if path is relative and fd is not 137 * AT_FDCWD, path is interpreted relative to the directory pointed to by fd. 138 * In this case, the file entry pointed to by fd is ref'ed and returned in 139 * *fpp. 140 * 141 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 142 * and release the ref to the file entry. 143 */ 144 int 145 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 146 const char *path, enum uio_seg seg, int flags) 147 { 148 struct thread *td = curthread; 149 struct proc *p = td->td_proc; 150 struct file* fp; 151 struct vnode *vp; 152 int error; 153 154 *fpp = NULL; 155 156 if ((error = nlookup_init(nd, path, seg, flags)) != 0) { 157 return (error); 158 } 159 160 if (nd->nl_path[0] != '/' && fd != AT_FDCWD) { 161 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 162 goto done; 163 vp = (struct vnode*)fp->f_data; 164 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 165 fdrop(fp); 166 fp = NULL; 167 error = ENOTDIR; 168 goto done; 169 } 170 cache_drop(&nd->nl_nch); 171 cache_copy(&fp->f_nchandle, &nd->nl_nch); 172 *fpp = fp; 173 } 174 175 176 done: 177 if (error) 178 nlookup_done(nd); 179 return (error); 180 181 } 182 183 /* 184 * This works similarly to nlookup_init() but does not assume a process 185 * context. rootnch is always chosen for the root directory and the cred 186 * and starting directory are supplied in arguments. 187 */ 188 int 189 nlookup_init_raw(struct nlookupdata *nd, 190 const char *path, enum uio_seg seg, int flags, 191 struct ucred *cred, struct nchandle *ncstart) 192 { 193 size_t pathlen; 194 thread_t td; 195 int error; 196 197 td = curthread; 198 199 bzero(nd, sizeof(struct nlookupdata)); 200 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 201 nd->nl_flags |= NLC_HASBUF; 202 if (seg == UIO_SYSSPACE) 203 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 204 else 205 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 206 207 /* 208 * Don't allow empty pathnames. 209 * POSIX.1 requirement: "" is not a vaild file name. 210 */ 211 if (error == 0 && pathlen <= 1) 212 error = ENOENT; 213 214 if (error == 0) { 215 cache_copy(ncstart, &nd->nl_nch); 216 cache_copy(&rootnch, &nd->nl_rootnch); 217 cache_copy(&rootnch, &nd->nl_jailnch); 218 nd->nl_cred = crhold(cred); 219 nd->nl_td = td; 220 nd->nl_flags |= flags; 221 } else { 222 nlookup_done(nd); 223 } 224 return(error); 225 } 226 227 /* 228 * Set a different credential; this credential will be used by future 229 * operations performed on nd.nl_open_vp and nlookupdata structure. 230 */ 231 void 232 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 233 { 234 KKASSERT(nd->nl_cred != NULL); 235 236 if (nd->nl_cred != cred) { 237 cred = crhold(cred); 238 crfree(nd->nl_cred); 239 nd->nl_cred = cred; 240 } 241 } 242 243 /* 244 * Cleanup a nlookupdata structure after we are through with it. This may 245 * be called on any nlookupdata structure initialized with nlookup_init(). 246 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 247 * returns an error, even if as a consumer you believe you have taken all 248 * dynamic elements out of the nlookupdata structure. 249 */ 250 void 251 nlookup_done(struct nlookupdata *nd) 252 { 253 if (nd->nl_nch.ncp) { 254 if (nd->nl_flags & NLC_NCPISLOCKED) { 255 nd->nl_flags &= ~NLC_NCPISLOCKED; 256 cache_unlock(&nd->nl_nch); 257 } 258 cache_drop(&nd->nl_nch); 259 } 260 if (nd->nl_rootnch.ncp) 261 cache_drop(&nd->nl_rootnch); 262 if (nd->nl_jailnch.ncp) 263 cache_drop(&nd->nl_jailnch); 264 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 265 objcache_put(namei_oc, nd->nl_path); 266 nd->nl_path = NULL; 267 } 268 if (nd->nl_cred) { 269 crfree(nd->nl_cred); 270 nd->nl_cred = NULL; 271 } 272 if (nd->nl_open_vp) { 273 if (nd->nl_flags & NLC_LOCKVP) { 274 vn_unlock(nd->nl_open_vp); 275 nd->nl_flags &= ~NLC_LOCKVP; 276 } 277 vn_close(nd->nl_open_vp, nd->nl_vp_fmode); 278 nd->nl_open_vp = NULL; 279 } 280 if (nd->nl_dvp) { 281 vrele(nd->nl_dvp); 282 nd->nl_dvp = NULL; 283 } 284 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 285 } 286 287 /* 288 * Works similarly to nlookup_done() when nd initialized with 289 * nlookup_init_at(). 290 */ 291 void 292 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 293 { 294 nlookup_done(nd); 295 if (fp != NULL) 296 fdrop(fp); 297 } 298 299 void 300 nlookup_zero(struct nlookupdata *nd) 301 { 302 bzero(nd, sizeof(struct nlookupdata)); 303 } 304 305 /* 306 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 307 * if an error occured. 308 * 309 * Note that the returned ncp is not checked for permissions, though VEXEC 310 * is checked on the directory path leading up to the result. The caller 311 * must call naccess() to check the permissions of the returned leaf. 312 */ 313 struct nchandle 314 nlookup_simple(const char *str, enum uio_seg seg, 315 int niflags, int *error) 316 { 317 struct nlookupdata nd; 318 struct nchandle nch; 319 320 *error = nlookup_init(&nd, str, seg, niflags); 321 if (*error == 0) { 322 if ((*error = nlookup(&nd)) == 0) { 323 nch = nd.nl_nch; /* keep hold ref from structure */ 324 cache_zero(&nd.nl_nch); /* and NULL out */ 325 } else { 326 cache_zero(&nch); 327 } 328 nlookup_done(&nd); 329 } else { 330 cache_zero(&nch); 331 } 332 return(nch); 333 } 334 335 /* 336 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 337 * on return, even if an error occurs. If no error occurs the returned 338 * nl_nch is always referenced and locked, otherwise it may or may not be. 339 * 340 * Intermediate directory elements, including the current directory, require 341 * execute (search) permission. nlookup does not examine the access 342 * permissions on the returned element. 343 * 344 * If NLC_CREATE is set the last directory must allow node creation, 345 * and an error code of 0 will be returned for a non-existant 346 * target (not ENOENT). 347 * 348 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 349 * plus the sticky check is made, and an error code of 0 will be returned 350 * for a non-existant target (not ENOENT). 351 * 352 * If NLC_DELETE is set the last directory mut allow node deletion, 353 * plus the sticky check is made. 354 * 355 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 356 * of the returned entry. The vnode will be referenced, but not locked, 357 * and will be released by nlookup_done() along with everything else. 358 */ 359 int 360 nlookup(struct nlookupdata *nd) 361 { 362 struct nlcomponent nlc; 363 struct nchandle nch; 364 struct nchandle par; 365 struct mount *mp; 366 int wasdotordotdot; 367 char *ptr; 368 char *xptr; 369 int error; 370 int len; 371 int dflags; 372 373 #ifdef KTRACE 374 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 375 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 376 #endif 377 bzero(&nlc, sizeof(nlc)); 378 379 /* 380 * Setup for the loop. The current working namecache element must 381 * be in a refd + unlocked state. This typically the case on entry except 382 * when stringing nlookup()'s along in a chain, since nlookup() always 383 * returns nl_nch in a locked state. 384 */ 385 nd->nl_loopcnt = 0; 386 if (nd->nl_flags & NLC_NCPISLOCKED) { 387 nd->nl_flags &= ~NLC_NCPISLOCKED; 388 cache_unlock(&nd->nl_nch); 389 } 390 if (nd->nl_dvp ) { 391 vrele(nd->nl_dvp); 392 nd->nl_dvp = NULL; 393 } 394 ptr = nd->nl_path; 395 396 /* 397 * Loop on the path components. At the top of the loop nd->nl_nch 398 * is ref'd and unlocked and represents our current position. 399 */ 400 for (;;) { 401 /* 402 * Check if the root directory should replace the current 403 * directory. This is done at the start of a translation 404 * or after a symbolic link has been found. In other cases 405 * ptr will never be pointing at a '/'. 406 */ 407 if (*ptr == '/') { 408 do { 409 ++ptr; 410 } while (*ptr == '/'); 411 cache_copy(&nd->nl_rootnch, &nch); 412 cache_drop(&nd->nl_nch); 413 nd->nl_nch = nch; 414 415 /* 416 * Fast-track termination. There is no parent directory of 417 * the root in the same mount from the point of view of 418 * the caller so return EPERM if NLC_REFDVP is specified. 419 * e.g. 'rmdir /' is not allowed. 420 */ 421 if (*ptr == 0) { 422 if (nd->nl_flags & NLC_REFDVP) { 423 error = EPERM; 424 } else { 425 cache_lock(&nd->nl_nch); 426 nd->nl_flags |= NLC_NCPISLOCKED; 427 error = 0; 428 } 429 break; 430 } 431 continue; 432 } 433 434 /* 435 * Check directory search permissions. 436 */ 437 dflags = 0; 438 if ((error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags)) != 0) 439 break; 440 441 /* 442 * Extract the path component. Path components are limited to 443 * 255 characters. 444 */ 445 nlc.nlc_nameptr = ptr; 446 while (*ptr && *ptr != '/') 447 ++ptr; 448 nlc.nlc_namelen = ptr - nlc.nlc_nameptr; 449 if (nlc.nlc_namelen >= 256) { 450 error = ENAMETOOLONG; 451 break; 452 } 453 454 /* 455 * Lookup the path component in the cache, creating an unresolved 456 * entry if necessary. We have to handle "." and ".." as special 457 * cases. 458 * 459 * When handling ".." we have to detect a traversal back through a 460 * mount point. If we are at the root, ".." just returns the root. 461 * 462 * When handling "." or ".." we also have to recalculate dflags 463 * since our dflags will be for some sub-directory instead of the 464 * parent dir. 465 * 466 * This subsection returns a locked, refd 'nch' unless it errors out. 467 * The namecache topology is not allowed to be disconnected, so 468 * encountering a NULL parent will generate EINVAL. This typically 469 * occurs when a directory is removed out from under a process. 470 */ 471 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 472 cache_get(&nd->nl_nch, &nch); 473 wasdotordotdot = 1; 474 } else if (nlc.nlc_namelen == 2 && 475 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 476 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 477 nd->nl_nch.ncp == nd->nl_rootnch.ncp 478 ) { 479 /* 480 * ".." at the root returns the root 481 */ 482 cache_get(&nd->nl_nch, &nch); 483 } else { 484 /* 485 * Locate the parent ncp. If we are at the root of a 486 * filesystem mount we have to skip to the mounted-on 487 * point in the underlying filesystem. 488 */ 489 nch = nd->nl_nch; 490 while (nch.ncp == nch.mount->mnt_ncmountpt.ncp) 491 nch = nch.mount->mnt_ncmounton; 492 nch.ncp = nch.ncp->nc_parent; 493 KKASSERT(nch.ncp != NULL); 494 cache_get(&nch, &nch); 495 } 496 wasdotordotdot = 2; 497 } else { 498 nch = cache_nlookup(&nd->nl_nch, &nlc); 499 while ((error = cache_resolve(&nch, nd->nl_cred)) == EAGAIN) { 500 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 501 nch.ncp->nc_nlen, nch.ncp->nc_nlen, nch.ncp->nc_name); 502 cache_put(&nch); 503 nch = cache_nlookup(&nd->nl_nch, &nlc); 504 } 505 wasdotordotdot = 0; 506 } 507 508 /* 509 * If the last component was "." or ".." our dflags no longer 510 * represents the parent directory and we have to explicitly 511 * look it up. 512 */ 513 if (wasdotordotdot && error == 0) { 514 dflags = 0; 515 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 516 par.mount = nch.mount; 517 cache_hold(&par); 518 dflags = 0; 519 error = naccess(&par, 0, nd->nl_cred, &dflags); 520 cache_drop(&par); 521 } 522 } 523 524 /* 525 * [end of subsection] ncp is locked and ref'd. nd->nl_nch is ref'd 526 */ 527 528 /* 529 * Resolve the namespace if necessary. The ncp returned by 530 * cache_nlookup() is referenced and locked. 531 * 532 * XXX neither '.' nor '..' should return EAGAIN since they were 533 * previously resolved and thus cannot be newly created ncp's. 534 */ 535 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 536 error = cache_resolve(&nch, nd->nl_cred); 537 KKASSERT(error != EAGAIN); 538 } else { 539 error = nch.ncp->nc_error; 540 } 541 542 /* 543 * Early completion. ENOENT is not an error if this is the last 544 * component and NLC_CREATE or NLC_RENAME (rename target) was 545 * requested. Note that ncp->nc_error is left as ENOENT in that 546 * case, which we check later on. 547 * 548 * Also handle invalid '.' or '..' components terminating a path 549 * for a create/rename/delete. The standard requires this and pax 550 * pretty stupidly depends on it. 551 */ 552 for (xptr = ptr; *xptr == '/'; ++xptr) 553 ; 554 if (*xptr == 0) { 555 if (error == ENOENT && 556 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 557 ) { 558 if (nd->nl_flags & NLC_NFS_RDONLY) { 559 error = EROFS; 560 } else { 561 error = naccess(&nch, nd->nl_flags | dflags, 562 nd->nl_cred, NULL); 563 } 564 } 565 if (error == 0 && wasdotordotdot && 566 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 567 NLC_RENAME_SRC | NLC_RENAME_DST))) { 568 /* 569 * POSIX junk 570 */ 571 if (nd->nl_flags & NLC_CREATE) 572 error = EEXIST; 573 else if (nd->nl_flags & NLC_DELETE) 574 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 575 else 576 error = EINVAL; 577 } 578 } 579 580 /* 581 * Early completion on error. 582 */ 583 if (error) { 584 cache_put(&nch); 585 break; 586 } 587 588 /* 589 * If the element is a symlink and it is either not the last 590 * element or it is the last element and we are allowed to 591 * follow symlinks, resolve the symlink. 592 */ 593 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 594 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 595 ) { 596 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 597 error = ELOOP; 598 cache_put(&nch); 599 break; 600 } 601 error = nreadsymlink(nd, &nch, &nlc); 602 cache_put(&nch); 603 if (error) 604 break; 605 606 /* 607 * Concatenate trailing path elements onto the returned symlink. 608 * Note that if the path component (ptr) is not exhausted, it 609 * will being with a '/', so we do not have to add another one. 610 * 611 * The symlink may not be empty. 612 */ 613 len = strlen(ptr); 614 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 615 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 616 objcache_put(namei_oc, nlc.nlc_nameptr); 617 break; 618 } 619 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 620 if (nd->nl_flags & NLC_HASBUF) 621 objcache_put(namei_oc, nd->nl_path); 622 nd->nl_path = nlc.nlc_nameptr; 623 nd->nl_flags |= NLC_HASBUF; 624 ptr = nd->nl_path; 625 626 /* 627 * Go back up to the top to resolve any initial '/'s in the 628 * symlink. 629 */ 630 continue; 631 } 632 633 /* 634 * If the element is a directory and we are crossing a mount point, 635 * Locate the mount. 636 */ 637 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 638 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 639 (mp = cache_findmount(&nch)) != NULL 640 ) { 641 struct vnode *tdp; 642 643 cache_put(&nch); 644 cache_get(&mp->mnt_ncmountpt, &nch); 645 646 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 647 while (vfs_busy(mp, 0)) 648 ; 649 error = VFS_ROOT(mp, &tdp); 650 vfs_unbusy(mp); 651 if (error) 652 break; 653 cache_setvp(&nch, tdp); 654 vput(tdp); 655 } 656 } 657 if (error) { 658 cache_put(&nch); 659 break; 660 } 661 662 /* 663 * Skip any slashes to get to the next element. If there 664 * are any slashes at all the current element must be a 665 * directory or, in the create case, intended to become a directory. 666 * If it isn't we break without incrementing ptr and fall through 667 * to the failure case below. 668 */ 669 while (*ptr == '/') { 670 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 671 !(nd->nl_flags & NLC_WILLBEDIR) 672 ) { 673 break; 674 } 675 ++ptr; 676 } 677 678 /* 679 * Continuation case: additional elements and the current 680 * element is a directory. 681 */ 682 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 683 cache_drop(&nd->nl_nch); 684 cache_unlock(&nch); 685 nd->nl_nch = nch; 686 continue; 687 } 688 689 /* 690 * Failure case: additional elements and the current element 691 * is not a directory 692 */ 693 if (*ptr) { 694 cache_put(&nch); 695 error = ENOTDIR; 696 break; 697 } 698 699 /* 700 * Successful lookup of last element. 701 * 702 * Check permissions if the target exists. If the target does not 703 * exist directory permissions were already tested in the early 704 * completion code above. 705 * 706 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 707 * if the file is marked append-only, and NLC_STICKY if the directory 708 * containing the file is sticky. 709 */ 710 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 711 error = naccess(&nch, nd->nl_flags | dflags, 712 nd->nl_cred, NULL); 713 if (error) { 714 cache_put(&nch); 715 break; 716 } 717 } 718 719 /* 720 * Termination: no more elements. 721 * 722 * If NLC_REFDVP is set acquire a referenced parent dvp. 723 */ 724 if (nd->nl_flags & NLC_REFDVP) { 725 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 726 if (error) { 727 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 728 cache_put(&nch); 729 break; 730 } 731 } 732 cache_drop(&nd->nl_nch); 733 nd->nl_nch = nch; 734 nd->nl_flags |= NLC_NCPISLOCKED; 735 error = 0; 736 break; 737 } 738 739 /* 740 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 741 * (ncp->nc_error will be ENOENT), but we will still return an error 742 * code of 0. 743 */ 744 return(error); 745 } 746 747 /* 748 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 749 * of continuity in the namecache tree by connecting the ncp related to the 750 * vnode under the mount to the ncp related to the mount's root vnode. 751 * 752 * If no error occured a locked, ref'd ncp is stored in *ncpp. 753 */ 754 int 755 nlookup_mp(struct mount *mp, struct nchandle *nch) 756 { 757 struct vnode *vp; 758 int error; 759 760 error = 0; 761 cache_get(&mp->mnt_ncmountpt, nch); 762 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 763 while (vfs_busy(mp, 0)) 764 ; 765 error = VFS_ROOT(mp, &vp); 766 vfs_unbusy(mp); 767 if (error) { 768 cache_put(nch); 769 } else { 770 cache_setvp(nch, vp); 771 vput(vp); 772 } 773 } 774 return(error); 775 } 776 777 /* 778 * Read the contents of a symlink, allocate a path buffer out of the 779 * namei_oc and initialize the supplied nlcomponent with the result. 780 * 781 * If an error occurs no buffer will be allocated or returned in the nlc. 782 */ 783 int 784 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 785 struct nlcomponent *nlc) 786 { 787 struct vnode *vp; 788 struct iovec aiov; 789 struct uio auio; 790 int linklen; 791 int error; 792 char *cp; 793 794 nlc->nlc_nameptr = NULL; 795 nlc->nlc_namelen = 0; 796 if (nch->ncp->nc_vp == NULL) 797 return(ENOENT); 798 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 799 return(error); 800 cp = objcache_get(namei_oc, M_WAITOK); 801 aiov.iov_base = cp; 802 aiov.iov_len = MAXPATHLEN; 803 auio.uio_iov = &aiov; 804 auio.uio_iovcnt = 1; 805 auio.uio_offset = 0; 806 auio.uio_rw = UIO_READ; 807 auio.uio_segflg = UIO_SYSSPACE; 808 auio.uio_td = nd->nl_td; 809 auio.uio_resid = MAXPATHLEN - 1; 810 error = VOP_READLINK(vp, &auio, nd->nl_cred); 811 if (error) 812 goto fail; 813 linklen = MAXPATHLEN - 1 - auio.uio_resid; 814 if (varsym_enable) { 815 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 816 if (linklen < 0) { 817 error = ENAMETOOLONG; 818 goto fail; 819 } 820 } 821 cp[linklen] = 0; 822 nlc->nlc_nameptr = cp; 823 nlc->nlc_namelen = linklen; 824 vput(vp); 825 return(0); 826 fail: 827 objcache_put(namei_oc, cp); 828 vput(vp); 829 return(error); 830 } 831 832 /* 833 * Check access [XXX cache vattr!] [XXX quota] 834 * 835 * Generally check the NLC_* access bits. All specified bits must pass 836 * for this function to return 0. 837 * 838 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 839 * access, otherwise it must exist. No error is returned in this case. 840 * 841 * The file must not exist if NLC_EXCL is specified. 842 * 843 * Directory permissions in general are tested for NLC_CREATE if the file 844 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 845 * whether the file exists or not. 846 * 847 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 848 * the latter is only tested if the target exists. 849 * 850 * The passed ncp may or may not be locked. The caller should use a 851 * locked ncp on leaf lookups, especially for NLC_CREATE, NLC_RENAME_DST, 852 * NLC_DELETE, and NLC_EXCL checks. 853 */ 854 int 855 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 856 { 857 struct nchandle par; 858 struct vnode *vp; 859 struct vattr va; 860 int error; 861 int sticky; 862 863 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 864 cache_lock(nch); 865 cache_resolve(nch, cred); 866 cache_unlock(nch); 867 } 868 error = nch->ncp->nc_error; 869 870 /* 871 * Directory permissions checks. Silently ignore ENOENT if these 872 * tests pass. It isn't an error. 873 */ 874 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 875 if (((nflags & NLC_CREATE) && nch->ncp->nc_vp == NULL) || 876 ((nflags & NLC_DELETE) && nch->ncp->nc_vp != NULL) || 877 ((nflags & NLC_RENAME_SRC) && nch->ncp->nc_vp != NULL) || 878 (nflags & NLC_RENAME_DST) 879 ) { 880 if ((par.ncp = nch->ncp->nc_parent) == NULL) { 881 if (error != EAGAIN) 882 error = EINVAL; 883 } else if (error == 0 || error == ENOENT) { 884 par.mount = nch->mount; 885 cache_hold(&par); 886 sticky = 0; 887 error = naccess(&par, NLC_WRITE, cred, NULL); 888 cache_drop(&par); 889 } 890 } 891 } 892 893 /* 894 * NLC_EXCL check. Target file must not exist. 895 */ 896 if (error == 0 && (nflags & NLC_EXCL) && nch->ncp->nc_vp != NULL) 897 error = EEXIST; 898 899 /* 900 * Get the vnode attributes so we can do the rest of our checks. 901 * 902 * NOTE: We only call naccess_va() if the target exists. 903 */ 904 if (error == 0) { 905 error = cache_vget(nch, cred, LK_SHARED, &vp); 906 if (error == ENOENT) { 907 /* 908 * Silently zero-out ENOENT if creating or renaming 909 * (rename target). It isn't an error. 910 */ 911 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 912 error = 0; 913 } else if (error == 0) { 914 /* 915 * Get the vnode attributes and check for illegal O_TRUNC 916 * requests and read-only mounts. 917 * 918 * NOTE: You can still open devices on read-only mounts for 919 * writing. 920 * 921 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 922 * check on the parent directory above. 923 * 924 * XXX cache the va in the namecache or in the vnode 925 */ 926 error = VOP_GETATTR(vp, &va); 927 if (error == 0 && (nflags & NLC_TRUNCATE)) { 928 switch(va.va_type) { 929 case VREG: 930 case VDATABASE: 931 case VCHR: 932 case VBLK: 933 case VFIFO: 934 break; 935 case VDIR: 936 error = EISDIR; 937 break; 938 default: 939 error = EINVAL; 940 break; 941 } 942 } 943 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 944 (vp->v_mount->mnt_flag & MNT_RDONLY) 945 ) { 946 switch(va.va_type) { 947 case VDIR: 948 case VLNK: 949 case VREG: 950 case VDATABASE: 951 error = EROFS; 952 break; 953 default: 954 break; 955 } 956 } 957 vput(vp); 958 959 /* 960 * Check permissions based on file attributes. The passed 961 * flags (*nflagsp) are modified with feedback based on 962 * special attributes and requirements. 963 */ 964 if (error == 0) { 965 /* 966 * Adjust the returned (*nflagsp) if non-NULL. 967 */ 968 if (nflagsp) { 969 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 970 *nflagsp |= NLC_STICKY; 971 if (va.va_flags & APPEND) 972 *nflagsp |= NLC_APPENDONLY; 973 if (va.va_flags & IMMUTABLE) 974 *nflagsp |= NLC_IMMUTABLE; 975 } 976 977 /* 978 * Process general access. 979 */ 980 error = naccess_va(&va, nflags, cred); 981 } 982 } 983 } 984 return(error); 985 } 986 987 /* 988 * Check the requested access against the given vattr using cred. 989 */ 990 int 991 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 992 { 993 int i; 994 int vmode; 995 996 /* 997 * Test the immutable bit. Creations, deletions, renames (source 998 * or destination) are not allowed. chown/chmod/other is also not 999 * allowed but is handled by SETATTR. Hardlinks to the immutable 1000 * file are allowed. 1001 * 1002 * If the directory is set to immutable then creations, deletions, 1003 * renames (source or dest) and hardlinks to files within the directory 1004 * are not allowed, and regular files opened through the directory may 1005 * not be written to or truncated (unless a special device). 1006 * 1007 * NOTE! New hardlinks to immutable files work but new hardlinks to 1008 * files, immutable or not, sitting inside an immutable directory are 1009 * not allowed. As always if the file is hardlinked via some other 1010 * path additional hardlinks may be possible even if the file is marked 1011 * immutable. The sysop needs to create a closure by checking the hard 1012 * link count. Once closure is achieved you are good, and security 1013 * scripts should check link counts anyway. 1014 * 1015 * Writes and truncations are only allowed on special devices. 1016 */ 1017 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1018 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1019 return (EPERM); 1020 if (nflags & (NLC_CREATE | NLC_DELETE | 1021 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1022 return (EPERM); 1023 } 1024 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1025 switch(va->va_type) { 1026 case VDIR: 1027 return (EISDIR); 1028 case VLNK: 1029 case VREG: 1030 case VDATABASE: 1031 return (EPERM); 1032 default: 1033 break; 1034 } 1035 } 1036 } 1037 1038 /* 1039 * Test the no-unlink and append-only bits for opens, rename targets, 1040 * and deletions. These bits are not tested for creations or 1041 * rename sources. 1042 * 1043 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1044 * If you do not wish this you must also set NOUNLINK. 1045 * 1046 * If the governing directory is marked APPEND-only it implies 1047 * NOUNLINK for all entries in the directory. 1048 */ 1049 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1050 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1051 ) { 1052 return (EPERM); 1053 } 1054 1055 /* 1056 * A file marked append-only may not be deleted but can be renamed. 1057 */ 1058 if ((va->va_flags & APPEND) && 1059 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1060 ) { 1061 return (EPERM); 1062 } 1063 1064 /* 1065 * A file marked append-only which is opened for writing must also 1066 * be opened O_APPEND. 1067 */ 1068 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1069 if (nflags & NLC_TRUNCATE) 1070 return (EPERM); 1071 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1072 if ((nflags & NLC_APPEND) == 0) 1073 return (EPERM); 1074 } 1075 } 1076 1077 /* 1078 * root gets universal access 1079 */ 1080 if (cred->cr_uid == 0) 1081 return(0); 1082 1083 /* 1084 * Check owner perms. 1085 * 1086 * If NLC_OWN is set the owner of the file is allowed no matter when 1087 * the owner-mode bits say (utimes). 1088 */ 1089 vmode = 0; 1090 if (nflags & NLC_READ) 1091 vmode |= S_IRUSR; 1092 if (nflags & NLC_WRITE) 1093 vmode |= S_IWUSR; 1094 if (nflags & NLC_EXEC) 1095 vmode |= S_IXUSR; 1096 1097 if (cred->cr_uid == va->va_uid) { 1098 if ((nflags & NLC_OWN) == 0) { 1099 if ((vmode & va->va_mode) != vmode) 1100 return(EACCES); 1101 } 1102 return(0); 1103 } 1104 1105 /* 1106 * If NLC_STICKY is set only the owner may delete or rename a file. 1107 * This bit is typically set on /tmp. 1108 * 1109 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1110 * the specific delete or rename case. For deletions and renames we 1111 * usually just care about directory permissions, not file permissions. 1112 */ 1113 if ((nflags & NLC_STICKY) && 1114 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1115 return(EACCES); 1116 } 1117 1118 /* 1119 * Check group perms 1120 */ 1121 vmode >>= 3; 1122 for (i = 0; i < cred->cr_ngroups; ++i) { 1123 if (va->va_gid == cred->cr_groups[i]) { 1124 if ((vmode & va->va_mode) != vmode) 1125 return(EACCES); 1126 return(0); 1127 } 1128 } 1129 1130 /* 1131 * Check world perms 1132 */ 1133 vmode >>= 3; 1134 if ((vmode & va->va_mode) != vmode) 1135 return(EACCES); 1136 return(0); 1137 } 1138 1139