1 /* 2 * Copyright (c) 2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/vfs_nlookup.c,v 1.25 2008/07/19 04:43:33 dillon Exp $ 35 */ 36 /* 37 * nlookup() is the 'new' namei interface. Rather then return directory and 38 * leaf vnodes (in various lock states) the new interface instead deals in 39 * namecache records. Namecache records may represent both a positive or 40 * a negative hit. The namespace is locked via the namecache record instead 41 * of via the vnode, and only the leaf namecache record (representing the 42 * filename) needs to be locked. 43 * 44 * This greatly improves filesystem parallelism and is a huge simplification 45 * of the API verses the old vnode locking / namei scheme. 46 * 47 * Filesystems must actively control the caching aspects of the namecache, 48 * and since namecache pointers are used as handles they are non-optional 49 * even for filesystems which do not generally wish to cache things. It is 50 * intended that a separate cache coherency API will be constructed to handle 51 * these issues. 52 */ 53 54 #include "opt_ktrace.h" 55 56 #include <sys/param.h> 57 #include <sys/systm.h> 58 #include <sys/kernel.h> 59 #include <sys/vnode.h> 60 #include <sys/mount.h> 61 #include <sys/filedesc.h> 62 #include <sys/proc.h> 63 #include <sys/namei.h> 64 #include <sys/nlookup.h> 65 #include <sys/malloc.h> 66 #include <sys/stat.h> 67 #include <sys/objcache.h> 68 #include <sys/file.h> 69 70 #ifdef KTRACE 71 #include <sys/ktrace.h> 72 #endif 73 74 static int naccess(struct nchandle *nch, int vmode, struct ucred *cred, 75 int *stickyp); 76 77 /* 78 * Initialize a nlookup() structure, early error return for copyin faults 79 * or a degenerate empty string (which is not allowed). 80 * 81 * The first process proc0's credentials are used if the calling thread 82 * is not associated with a process context. 83 * 84 * MPSAFE 85 */ 86 int 87 nlookup_init(struct nlookupdata *nd, 88 const char *path, enum uio_seg seg, int flags) 89 { 90 size_t pathlen; 91 struct proc *p; 92 thread_t td; 93 int error; 94 95 td = curthread; 96 p = td->td_proc; 97 98 /* 99 * note: the pathlen set by copy*str() includes the terminating \0. 100 */ 101 bzero(nd, sizeof(struct nlookupdata)); 102 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 103 nd->nl_flags |= NLC_HASBUF; 104 if (seg == UIO_SYSSPACE) 105 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 106 else 107 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 108 109 /* 110 * Don't allow empty pathnames. 111 * POSIX.1 requirement: "" is not a vaild file name. 112 */ 113 if (error == 0 && pathlen <= 1) 114 error = ENOENT; 115 116 if (error == 0) { 117 if (p && p->p_fd) { 118 cache_copy(&p->p_fd->fd_ncdir, &nd->nl_nch); 119 cache_copy(&p->p_fd->fd_nrdir, &nd->nl_rootnch); 120 if (p->p_fd->fd_njdir.ncp) 121 cache_copy(&p->p_fd->fd_njdir, &nd->nl_jailnch); 122 nd->nl_cred = crhold(p->p_ucred); 123 } else { 124 cache_copy(&rootnch, &nd->nl_nch); 125 cache_copy(&nd->nl_nch, &nd->nl_rootnch); 126 cache_copy(&nd->nl_nch, &nd->nl_jailnch); 127 nd->nl_cred = crhold(proc0.p_ucred); 128 } 129 nd->nl_td = td; 130 nd->nl_flags |= flags; 131 } else { 132 nlookup_done(nd); 133 } 134 return(error); 135 } 136 137 138 /* 139 * nlookup_init() for "at" family of syscalls. 140 * 141 * Works similarly to nlookup_init() but if path is relative and fd is not 142 * AT_FDCWD, path is interpreted relative to the directory pointed to by fd. 143 * In this case, the file entry pointed to by fd is ref'ed and returned in 144 * *fpp. 145 * 146 * If the call succeeds, nlookup_done_at() must be called to clean-up the nd 147 * and release the ref to the file entry. 148 */ 149 int 150 nlookup_init_at(struct nlookupdata *nd, struct file **fpp, int fd, 151 const char *path, enum uio_seg seg, int flags) 152 { 153 struct thread *td = curthread; 154 struct proc *p = td->td_proc; 155 struct file* fp; 156 struct vnode *vp; 157 int error; 158 159 *fpp = NULL; 160 161 if ((error = nlookup_init(nd, path, seg, flags)) != 0) { 162 return (error); 163 } 164 165 if (nd->nl_path[0] != '/' && fd != AT_FDCWD) { 166 if ((error = holdvnode(p->p_fd, fd, &fp)) != 0) 167 goto done; 168 vp = (struct vnode*)fp->f_data; 169 if (vp->v_type != VDIR || fp->f_nchandle.ncp == NULL) { 170 fdrop(fp); 171 fp = NULL; 172 error = ENOTDIR; 173 goto done; 174 } 175 cache_drop(&nd->nl_nch); 176 cache_copy(&fp->f_nchandle, &nd->nl_nch); 177 *fpp = fp; 178 } 179 180 181 done: 182 if (error) 183 nlookup_done(nd); 184 return (error); 185 186 } 187 188 /* 189 * This works similarly to nlookup_init() but does not assume a process 190 * context. rootnch is always chosen for the root directory and the cred 191 * and starting directory are supplied in arguments. 192 */ 193 int 194 nlookup_init_raw(struct nlookupdata *nd, 195 const char *path, enum uio_seg seg, int flags, 196 struct ucred *cred, struct nchandle *ncstart) 197 { 198 size_t pathlen; 199 thread_t td; 200 int error; 201 202 td = curthread; 203 204 bzero(nd, sizeof(struct nlookupdata)); 205 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 206 nd->nl_flags |= NLC_HASBUF; 207 if (seg == UIO_SYSSPACE) 208 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 209 else 210 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 211 212 /* 213 * Don't allow empty pathnames. 214 * POSIX.1 requirement: "" is not a vaild file name. 215 */ 216 if (error == 0 && pathlen <= 1) 217 error = ENOENT; 218 219 if (error == 0) { 220 cache_copy(ncstart, &nd->nl_nch); 221 cache_copy(&rootnch, &nd->nl_rootnch); 222 cache_copy(&rootnch, &nd->nl_jailnch); 223 nd->nl_cred = crhold(cred); 224 nd->nl_td = td; 225 nd->nl_flags |= flags; 226 } else { 227 nlookup_done(nd); 228 } 229 return(error); 230 } 231 232 /* 233 * This works similarly to nlookup_init_raw() but does not rely 234 * on rootnch being initialized yet. 235 */ 236 int 237 nlookup_init_root(struct nlookupdata *nd, 238 const char *path, enum uio_seg seg, int flags, 239 struct ucred *cred, struct nchandle *ncstart, 240 struct nchandle *ncroot) 241 { 242 size_t pathlen; 243 thread_t td; 244 int error; 245 246 td = curthread; 247 248 bzero(nd, sizeof(struct nlookupdata)); 249 nd->nl_path = objcache_get(namei_oc, M_WAITOK); 250 nd->nl_flags |= NLC_HASBUF; 251 if (seg == UIO_SYSSPACE) 252 error = copystr(path, nd->nl_path, MAXPATHLEN, &pathlen); 253 else 254 error = copyinstr(path, nd->nl_path, MAXPATHLEN, &pathlen); 255 256 /* 257 * Don't allow empty pathnames. 258 * POSIX.1 requirement: "" is not a vaild file name. 259 */ 260 if (error == 0 && pathlen <= 1) 261 error = ENOENT; 262 263 if (error == 0) { 264 cache_copy(ncstart, &nd->nl_nch); 265 cache_copy(ncroot, &nd->nl_rootnch); 266 cache_copy(ncroot, &nd->nl_jailnch); 267 nd->nl_cred = crhold(cred); 268 nd->nl_td = td; 269 nd->nl_flags |= flags; 270 } else { 271 nlookup_done(nd); 272 } 273 return(error); 274 } 275 276 /* 277 * Set a different credential; this credential will be used by future 278 * operations performed on nd.nl_open_vp and nlookupdata structure. 279 */ 280 void 281 nlookup_set_cred(struct nlookupdata *nd, struct ucred *cred) 282 { 283 KKASSERT(nd->nl_cred != NULL); 284 285 if (nd->nl_cred != cred) { 286 cred = crhold(cred); 287 crfree(nd->nl_cred); 288 nd->nl_cred = cred; 289 } 290 } 291 292 /* 293 * Cleanup a nlookupdata structure after we are through with it. This may 294 * be called on any nlookupdata structure initialized with nlookup_init(). 295 * Calling nlookup_done() is mandatory in all cases except where nlookup_init() 296 * returns an error, even if as a consumer you believe you have taken all 297 * dynamic elements out of the nlookupdata structure. 298 */ 299 void 300 nlookup_done(struct nlookupdata *nd) 301 { 302 if (nd->nl_nch.ncp) { 303 if (nd->nl_flags & NLC_NCPISLOCKED) { 304 nd->nl_flags &= ~NLC_NCPISLOCKED; 305 cache_unlock(&nd->nl_nch); 306 } 307 cache_drop(&nd->nl_nch); /* NULL's out the nch */ 308 } 309 if (nd->nl_rootnch.ncp) 310 cache_drop(&nd->nl_rootnch); 311 if (nd->nl_jailnch.ncp) 312 cache_drop(&nd->nl_jailnch); 313 if ((nd->nl_flags & NLC_HASBUF) && nd->nl_path) { 314 objcache_put(namei_oc, nd->nl_path); 315 nd->nl_path = NULL; 316 } 317 if (nd->nl_cred) { 318 crfree(nd->nl_cred); 319 nd->nl_cred = NULL; 320 } 321 if (nd->nl_open_vp) { 322 if (nd->nl_flags & NLC_LOCKVP) { 323 vn_unlock(nd->nl_open_vp); 324 nd->nl_flags &= ~NLC_LOCKVP; 325 } 326 vn_close(nd->nl_open_vp, nd->nl_vp_fmode); 327 nd->nl_open_vp = NULL; 328 } 329 if (nd->nl_dvp) { 330 vrele(nd->nl_dvp); 331 nd->nl_dvp = NULL; 332 } 333 nd->nl_flags = 0; /* clear remaining flags (just clear everything) */ 334 } 335 336 /* 337 * Works similarly to nlookup_done() when nd initialized with 338 * nlookup_init_at(). 339 */ 340 void 341 nlookup_done_at(struct nlookupdata *nd, struct file *fp) 342 { 343 nlookup_done(nd); 344 if (fp != NULL) 345 fdrop(fp); 346 } 347 348 void 349 nlookup_zero(struct nlookupdata *nd) 350 { 351 bzero(nd, sizeof(struct nlookupdata)); 352 } 353 354 /* 355 * Simple all-in-one nlookup. Returns a locked namecache structure or NULL 356 * if an error occured. 357 * 358 * Note that the returned ncp is not checked for permissions, though VEXEC 359 * is checked on the directory path leading up to the result. The caller 360 * must call naccess() to check the permissions of the returned leaf. 361 */ 362 struct nchandle 363 nlookup_simple(const char *str, enum uio_seg seg, 364 int niflags, int *error) 365 { 366 struct nlookupdata nd; 367 struct nchandle nch; 368 369 *error = nlookup_init(&nd, str, seg, niflags); 370 if (*error == 0) { 371 if ((*error = nlookup(&nd)) == 0) { 372 nch = nd.nl_nch; /* keep hold ref from structure */ 373 cache_zero(&nd.nl_nch); /* and NULL out */ 374 } else { 375 cache_zero(&nch); 376 } 377 nlookup_done(&nd); 378 } else { 379 cache_zero(&nch); 380 } 381 return(nch); 382 } 383 384 /* 385 * Do a generic nlookup. Note that the passed nd is not nlookup_done()'d 386 * on return, even if an error occurs. If no error occurs the returned 387 * nl_nch is always referenced and locked, otherwise it may or may not be. 388 * 389 * Intermediate directory elements, including the current directory, require 390 * execute (search) permission. nlookup does not examine the access 391 * permissions on the returned element. 392 * 393 * If NLC_CREATE is set the last directory must allow node creation, 394 * and an error code of 0 will be returned for a non-existant 395 * target (not ENOENT). 396 * 397 * If NLC_RENAME_DST is set the last directory mut allow node deletion, 398 * plus the sticky check is made, and an error code of 0 will be returned 399 * for a non-existant target (not ENOENT). 400 * 401 * If NLC_DELETE is set the last directory mut allow node deletion, 402 * plus the sticky check is made. 403 * 404 * If NLC_REFDVP is set nd->nl_dvp will be set to the directory vnode 405 * of the returned entry. The vnode will be referenced, but not locked, 406 * and will be released by nlookup_done() along with everything else. 407 */ 408 int 409 nlookup(struct nlookupdata *nd) 410 { 411 globaldata_t gd = mycpu; 412 struct nlcomponent nlc; 413 struct nchandle nch; 414 struct nchandle par; 415 struct nchandle nctmp; 416 struct mount *mp; 417 int wasdotordotdot; 418 char *ptr; 419 char *xptr; 420 int error; 421 int len; 422 int dflags; 423 int hit = 1; 424 425 #ifdef KTRACE 426 if (KTRPOINT(nd->nl_td, KTR_NAMEI)) 427 ktrnamei(nd->nl_td->td_lwp, nd->nl_path); 428 #endif 429 bzero(&nlc, sizeof(nlc)); 430 431 /* 432 * Setup for the loop. The current working namecache element is 433 * always at least referenced. We lock it as required, but always 434 * return a locked, resolved namecache entry. 435 */ 436 nd->nl_loopcnt = 0; 437 if (nd->nl_dvp) { 438 vrele(nd->nl_dvp); 439 nd->nl_dvp = NULL; 440 } 441 ptr = nd->nl_path; 442 443 /* 444 * Loop on the path components. At the top of the loop nd->nl_nch 445 * is ref'd and unlocked and represents our current position. 446 */ 447 for (;;) { 448 /* 449 * Make sure nl_nch is locked so we can access the vnode, resolution 450 * state, etc. 451 */ 452 if ((nd->nl_flags & NLC_NCPISLOCKED) == 0) { 453 nd->nl_flags |= NLC_NCPISLOCKED; 454 cache_lock(&nd->nl_nch); 455 } 456 457 /* 458 * Check if the root directory should replace the current 459 * directory. This is done at the start of a translation 460 * or after a symbolic link has been found. In other cases 461 * ptr will never be pointing at a '/'. 462 */ 463 if (*ptr == '/') { 464 do { 465 ++ptr; 466 } while (*ptr == '/'); 467 cache_get(&nd->nl_rootnch, &nch); 468 cache_put(&nd->nl_nch); 469 nd->nl_nch = nch; /* remains locked */ 470 471 /* 472 * Fast-track termination. There is no parent directory of 473 * the root in the same mount from the point of view of 474 * the caller so return EACCES if NLC_REFDVP is specified, 475 * and EEXIST if NLC_CREATE is also specified. 476 * e.g. 'rmdir /' or 'mkdir /' are not allowed. 477 */ 478 if (*ptr == 0) { 479 if (nd->nl_flags & NLC_REFDVP) 480 error = (nd->nl_flags & NLC_CREATE) ? EEXIST : EACCES; 481 else 482 error = 0; 483 break; 484 } 485 continue; 486 } 487 488 /* 489 * Check directory search permissions. 490 */ 491 dflags = 0; 492 error = naccess(&nd->nl_nch, NLC_EXEC, nd->nl_cred, &dflags); 493 if (error) 494 break; 495 496 /* 497 * Extract the path component. Path components are limited to 498 * 255 characters. 499 */ 500 nlc.nlc_nameptr = ptr; 501 while (*ptr && *ptr != '/') 502 ++ptr; 503 nlc.nlc_namelen = ptr - nlc.nlc_nameptr; 504 if (nlc.nlc_namelen >= 256) { 505 error = ENAMETOOLONG; 506 break; 507 } 508 509 /* 510 * Lookup the path component in the cache, creating an unresolved 511 * entry if necessary. We have to handle "." and ".." as special 512 * cases. 513 * 514 * When handling ".." we have to detect a traversal back through a 515 * mount point. If we are at the root, ".." just returns the root. 516 * 517 * When handling "." or ".." we also have to recalculate dflags 518 * since our dflags will be for some sub-directory instead of the 519 * parent dir. 520 * 521 * This subsection returns a locked, refd 'nch' unless it errors out. 522 * The namecache topology is not allowed to be disconnected, so 523 * encountering a NULL parent will generate EINVAL. This typically 524 * occurs when a directory is removed out from under a process. 525 */ 526 if (nlc.nlc_namelen == 1 && nlc.nlc_nameptr[0] == '.') { 527 cache_get(&nd->nl_nch, &nch); 528 wasdotordotdot = 1; 529 } else if (nlc.nlc_namelen == 2 && 530 nlc.nlc_nameptr[0] == '.' && nlc.nlc_nameptr[1] == '.') { 531 if (nd->nl_nch.mount == nd->nl_rootnch.mount && 532 nd->nl_nch.ncp == nd->nl_rootnch.ncp 533 ) { 534 /* 535 * ".." at the root returns the root 536 */ 537 cache_get(&nd->nl_nch, &nch); 538 } else { 539 /* 540 * Locate the parent ncp. If we are at the root of a 541 * filesystem mount we have to skip to the mounted-on 542 * point in the underlying filesystem. 543 * 544 * Expect the parent to always be good since the 545 * mountpoint doesn't go away. XXX hack. cache_get() 546 * requires the ncp to already have a ref as a safety. 547 */ 548 nctmp = nd->nl_nch; 549 while (nctmp.ncp == nctmp.mount->mnt_ncmountpt.ncp) 550 nctmp = nctmp.mount->mnt_ncmounton; 551 nctmp.ncp = nctmp.ncp->nc_parent; 552 KKASSERT(nctmp.ncp != NULL); 553 cache_hold(&nctmp); 554 cache_get(&nctmp, &nch); 555 cache_drop(&nctmp); /* NOTE: zero's nctmp */ 556 } 557 wasdotordotdot = 2; 558 } else { 559 /* 560 * Must unlock nl_nch when traversing down the path. 561 */ 562 cache_unlock(&nd->nl_nch); 563 nd->nl_flags &= ~NLC_NCPISLOCKED; 564 nch = cache_nlookup(&nd->nl_nch, &nlc); 565 if (nch.ncp->nc_flag & NCF_UNRESOLVED) 566 hit = 0; 567 while ((error = cache_resolve(&nch, nd->nl_cred)) == EAGAIN) { 568 kprintf("[diagnostic] nlookup: relookup %*.*s\n", 569 nch.ncp->nc_nlen, nch.ncp->nc_nlen, nch.ncp->nc_name); 570 cache_put(&nch); 571 nch = cache_nlookup(&nd->nl_nch, &nlc); 572 } 573 wasdotordotdot = 0; 574 } 575 576 /* 577 * If the last component was "." or ".." our dflags no longer 578 * represents the parent directory and we have to explicitly 579 * look it up. 580 * 581 * Expect the parent to be good since nch is locked. 582 */ 583 if (wasdotordotdot && error == 0) { 584 dflags = 0; 585 if ((par.ncp = nch.ncp->nc_parent) != NULL) { 586 par.mount = nch.mount; 587 cache_hold(&par); 588 cache_lock(&par); 589 error = naccess(&par, 0, nd->nl_cred, &dflags); 590 cache_put(&par); 591 } 592 } 593 if (nd->nl_flags & NLC_NCPISLOCKED) { 594 cache_unlock(&nd->nl_nch); 595 nd->nl_flags &= ~NLC_NCPISLOCKED; 596 } 597 598 /* 599 * [end of subsection] 600 * 601 * nch is locked and referenced. 602 * nd->nl_nch is unlocked and referenced. 603 * 604 * nl_nch must be unlocked or we could chain lock to the root 605 * if a resolve gets stuck (e.g. in NFS). 606 */ 607 608 /* 609 * Resolve the namespace if necessary. The ncp returned by 610 * cache_nlookup() is referenced and locked. 611 * 612 * XXX neither '.' nor '..' should return EAGAIN since they were 613 * previously resolved and thus cannot be newly created ncp's. 614 */ 615 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 616 hit = 0; 617 error = cache_resolve(&nch, nd->nl_cred); 618 KKASSERT(error != EAGAIN); 619 } else { 620 error = nch.ncp->nc_error; 621 } 622 623 /* 624 * Early completion. ENOENT is not an error if this is the last 625 * component and NLC_CREATE or NLC_RENAME (rename target) was 626 * requested. Note that ncp->nc_error is left as ENOENT in that 627 * case, which we check later on. 628 * 629 * Also handle invalid '.' or '..' components terminating a path 630 * for a create/rename/delete. The standard requires this and pax 631 * pretty stupidly depends on it. 632 */ 633 for (xptr = ptr; *xptr == '/'; ++xptr) 634 ; 635 if (*xptr == 0) { 636 if (error == ENOENT && 637 (nd->nl_flags & (NLC_CREATE | NLC_RENAME_DST)) 638 ) { 639 if (nd->nl_flags & NLC_NFS_RDONLY) { 640 error = EROFS; 641 } else { 642 error = naccess(&nch, nd->nl_flags | dflags, 643 nd->nl_cred, NULL); 644 } 645 } 646 if (error == 0 && wasdotordotdot && 647 (nd->nl_flags & (NLC_CREATE | NLC_DELETE | 648 NLC_RENAME_SRC | NLC_RENAME_DST))) { 649 /* 650 * POSIX junk 651 */ 652 if (nd->nl_flags & NLC_CREATE) 653 error = EEXIST; 654 else if (nd->nl_flags & NLC_DELETE) 655 error = (wasdotordotdot == 1) ? EINVAL : ENOTEMPTY; 656 else 657 error = EINVAL; 658 } 659 } 660 661 /* 662 * Early completion on error. 663 */ 664 if (error) { 665 cache_put(&nch); 666 break; 667 } 668 669 /* 670 * If the element is a symlink and it is either not the last 671 * element or it is the last element and we are allowed to 672 * follow symlinks, resolve the symlink. 673 */ 674 if ((nch.ncp->nc_flag & NCF_ISSYMLINK) && 675 (*ptr || (nd->nl_flags & NLC_FOLLOW)) 676 ) { 677 if (nd->nl_loopcnt++ >= MAXSYMLINKS) { 678 error = ELOOP; 679 cache_put(&nch); 680 break; 681 } 682 error = nreadsymlink(nd, &nch, &nlc); 683 cache_put(&nch); 684 if (error) 685 break; 686 687 /* 688 * Concatenate trailing path elements onto the returned symlink. 689 * Note that if the path component (ptr) is not exhausted, it 690 * will being with a '/', so we do not have to add another one. 691 * 692 * The symlink may not be empty. 693 */ 694 len = strlen(ptr); 695 if (nlc.nlc_namelen == 0 || nlc.nlc_namelen + len >= MAXPATHLEN) { 696 error = nlc.nlc_namelen ? ENAMETOOLONG : ENOENT; 697 objcache_put(namei_oc, nlc.nlc_nameptr); 698 break; 699 } 700 bcopy(ptr, nlc.nlc_nameptr + nlc.nlc_namelen, len + 1); 701 if (nd->nl_flags & NLC_HASBUF) 702 objcache_put(namei_oc, nd->nl_path); 703 nd->nl_path = nlc.nlc_nameptr; 704 nd->nl_flags |= NLC_HASBUF; 705 ptr = nd->nl_path; 706 707 /* 708 * Go back up to the top to resolve any initial '/'s in the 709 * symlink. 710 */ 711 continue; 712 } 713 714 /* 715 * If the element is a directory and we are crossing a mount point, 716 * Locate the mount. 717 */ 718 while ((nch.ncp->nc_flag & NCF_ISMOUNTPT) && 719 (nd->nl_flags & NLC_NOCROSSMOUNT) == 0 && 720 (mp = cache_findmount(&nch)) != NULL 721 ) { 722 struct vnode *tdp; 723 724 cache_put(&nch); 725 cache_get(&mp->mnt_ncmountpt, &nch); 726 727 if (nch.ncp->nc_flag & NCF_UNRESOLVED) { 728 while (vfs_busy(mp, 0)) 729 ; 730 error = VFS_ROOT(mp, &tdp); 731 vfs_unbusy(mp); 732 if (error) 733 break; 734 cache_setvp(&nch, tdp); 735 vput(tdp); 736 } 737 } 738 if (error) { 739 cache_put(&nch); 740 break; 741 } 742 743 /* 744 * Skip any slashes to get to the next element. If there 745 * are any slashes at all the current element must be a 746 * directory or, in the create case, intended to become a directory. 747 * If it isn't we break without incrementing ptr and fall through 748 * to the failure case below. 749 */ 750 while (*ptr == '/') { 751 if ((nch.ncp->nc_flag & NCF_ISDIR) == 0 && 752 !(nd->nl_flags & NLC_WILLBEDIR) 753 ) { 754 break; 755 } 756 ++ptr; 757 } 758 759 /* 760 * Continuation case: additional elements and the current 761 * element is a directory. 762 */ 763 if (*ptr && (nch.ncp->nc_flag & NCF_ISDIR)) { 764 cache_drop(&nd->nl_nch); 765 cache_unlock(&nch); 766 KKASSERT((nd->nl_flags & NLC_NCPISLOCKED) == 0); 767 nd->nl_nch = nch; 768 continue; 769 } 770 771 /* 772 * Failure case: additional elements and the current element 773 * is not a directory 774 */ 775 if (*ptr) { 776 cache_put(&nch); 777 error = ENOTDIR; 778 break; 779 } 780 781 /* 782 * Successful lookup of last element. 783 * 784 * Check permissions if the target exists. If the target does not 785 * exist directory permissions were already tested in the early 786 * completion code above. 787 * 788 * nd->nl_flags will be adjusted on return with NLC_APPENDONLY 789 * if the file is marked append-only, and NLC_STICKY if the directory 790 * containing the file is sticky. 791 */ 792 if (nch.ncp->nc_vp && (nd->nl_flags & NLC_ALLCHKS)) { 793 error = naccess(&nch, nd->nl_flags | dflags, 794 nd->nl_cred, NULL); 795 if (error) { 796 cache_put(&nch); 797 break; 798 } 799 } 800 801 /* 802 * Termination: no more elements. 803 * 804 * If NLC_REFDVP is set acquire a referenced parent dvp. 805 */ 806 if (nd->nl_flags & NLC_REFDVP) { 807 cache_lock(&nd->nl_nch); 808 error = cache_vref(&nd->nl_nch, nd->nl_cred, &nd->nl_dvp); 809 cache_unlock(&nd->nl_nch); 810 if (error) { 811 kprintf("NLC_REFDVP: Cannot ref dvp of %p\n", nch.ncp); 812 cache_put(&nch); 813 break; 814 } 815 } 816 cache_drop(&nd->nl_nch); 817 nd->nl_nch = nch; 818 nd->nl_flags |= NLC_NCPISLOCKED; 819 error = 0; 820 break; 821 } 822 823 if (hit) 824 ++gd->gd_nchstats->ncs_longhits; 825 else 826 ++gd->gd_nchstats->ncs_longmiss; 827 828 /* 829 * NOTE: If NLC_CREATE was set the ncp may represent a negative hit 830 * (ncp->nc_error will be ENOENT), but we will still return an error 831 * code of 0. 832 */ 833 return(error); 834 } 835 836 /* 837 * Resolve a mount point's glue ncp. This ncp connects creates the illusion 838 * of continuity in the namecache tree by connecting the ncp related to the 839 * vnode under the mount to the ncp related to the mount's root vnode. 840 * 841 * If no error occured a locked, ref'd ncp is stored in *ncpp. 842 */ 843 int 844 nlookup_mp(struct mount *mp, struct nchandle *nch) 845 { 846 struct vnode *vp; 847 int error; 848 849 error = 0; 850 cache_get(&mp->mnt_ncmountpt, nch); 851 if (nch->ncp->nc_flag & NCF_UNRESOLVED) { 852 while (vfs_busy(mp, 0)) 853 ; 854 error = VFS_ROOT(mp, &vp); 855 vfs_unbusy(mp); 856 if (error) { 857 cache_put(nch); 858 } else { 859 cache_setvp(nch, vp); 860 vput(vp); 861 } 862 } 863 return(error); 864 } 865 866 /* 867 * Read the contents of a symlink, allocate a path buffer out of the 868 * namei_oc and initialize the supplied nlcomponent with the result. 869 * 870 * If an error occurs no buffer will be allocated or returned in the nlc. 871 */ 872 int 873 nreadsymlink(struct nlookupdata *nd, struct nchandle *nch, 874 struct nlcomponent *nlc) 875 { 876 struct vnode *vp; 877 struct iovec aiov; 878 struct uio auio; 879 int linklen; 880 int error; 881 char *cp; 882 883 nlc->nlc_nameptr = NULL; 884 nlc->nlc_namelen = 0; 885 if (nch->ncp->nc_vp == NULL) 886 return(ENOENT); 887 if ((error = cache_vget(nch, nd->nl_cred, LK_SHARED, &vp)) != 0) 888 return(error); 889 cp = objcache_get(namei_oc, M_WAITOK); 890 aiov.iov_base = cp; 891 aiov.iov_len = MAXPATHLEN; 892 auio.uio_iov = &aiov; 893 auio.uio_iovcnt = 1; 894 auio.uio_offset = 0; 895 auio.uio_rw = UIO_READ; 896 auio.uio_segflg = UIO_SYSSPACE; 897 auio.uio_td = nd->nl_td; 898 auio.uio_resid = MAXPATHLEN - 1; 899 error = VOP_READLINK(vp, &auio, nd->nl_cred); 900 if (error) 901 goto fail; 902 linklen = MAXPATHLEN - 1 - auio.uio_resid; 903 if (varsym_enable) { 904 linklen = varsymreplace(cp, linklen, MAXPATHLEN - 1); 905 if (linklen < 0) { 906 error = ENAMETOOLONG; 907 goto fail; 908 } 909 } 910 cp[linklen] = 0; 911 nlc->nlc_nameptr = cp; 912 nlc->nlc_namelen = linklen; 913 vput(vp); 914 return(0); 915 fail: 916 objcache_put(namei_oc, cp); 917 vput(vp); 918 return(error); 919 } 920 921 /* 922 * Check access [XXX cache vattr!] [XXX quota] 923 * 924 * Generally check the NLC_* access bits. All specified bits must pass 925 * for this function to return 0. 926 * 927 * The file does not have to exist when checking NLC_CREATE or NLC_RENAME_DST 928 * access, otherwise it must exist. No error is returned in this case. 929 * 930 * The file must not exist if NLC_EXCL is specified. 931 * 932 * Directory permissions in general are tested for NLC_CREATE if the file 933 * does not exist, NLC_DELETE if the file does exist, and NLC_RENAME_DST 934 * whether the file exists or not. 935 * 936 * The directory sticky bit is tested for NLC_DELETE and NLC_RENAME_DST, 937 * the latter is only tested if the target exists. 938 * 939 * The passed ncp must be referenced and locked. 940 */ 941 int 942 naccess(struct nchandle *nch, int nflags, struct ucred *cred, int *nflagsp) 943 { 944 struct vnode *vp; 945 struct vattr va; 946 struct namecache *ncp; 947 int error; 948 int cflags; 949 950 ASSERT_NCH_LOCKED(nch); 951 ncp = nch->ncp; 952 if (ncp->nc_flag & NCF_UNRESOLVED) { 953 cache_resolve(nch, cred); 954 ncp = nch->ncp; 955 } 956 error = ncp->nc_error; 957 958 /* 959 * Directory permissions checks. Silently ignore ENOENT if these 960 * tests pass. It isn't an error. 961 * 962 * We can safely resolve ncp->nc_parent because ncp is currently 963 * locked. 964 */ 965 if (nflags & (NLC_CREATE | NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) { 966 if (((nflags & NLC_CREATE) && ncp->nc_vp == NULL) || 967 ((nflags & NLC_DELETE) && ncp->nc_vp != NULL) || 968 ((nflags & NLC_RENAME_SRC) && ncp->nc_vp != NULL) || 969 (nflags & NLC_RENAME_DST) 970 ) { 971 struct nchandle par; 972 973 if ((par.ncp = ncp->nc_parent) == NULL) { 974 if (error != EAGAIN) 975 error = EINVAL; 976 } else if (error == 0 || error == ENOENT) { 977 par.mount = nch->mount; 978 cache_hold(&par); 979 cache_lock(&par); 980 error = naccess(&par, NLC_WRITE, cred, NULL); 981 cache_put(&par); 982 } 983 } 984 } 985 986 /* 987 * NLC_EXCL check. Target file must not exist. 988 */ 989 if (error == 0 && (nflags & NLC_EXCL) && ncp->nc_vp != NULL) 990 error = EEXIST; 991 992 /* 993 * Get the vnode attributes so we can do the rest of our checks. 994 * 995 * NOTE: We only call naccess_va() if the target exists. 996 */ 997 if (error == 0) { 998 error = cache_vget(nch, cred, LK_SHARED, &vp); 999 if (error == ENOENT) { 1000 /* 1001 * Silently zero-out ENOENT if creating or renaming 1002 * (rename target). It isn't an error. 1003 */ 1004 if (nflags & (NLC_CREATE | NLC_RENAME_DST)) 1005 error = 0; 1006 } else if (error == 0) { 1007 /* 1008 * Get the vnode attributes and check for illegal O_TRUNC 1009 * requests and read-only mounts. 1010 * 1011 * NOTE: You can still open devices on read-only mounts for 1012 * writing. 1013 * 1014 * NOTE: creates/deletes/renames are handled by the NLC_WRITE 1015 * check on the parent directory above. 1016 * 1017 * XXX cache the va in the namecache or in the vnode 1018 */ 1019 error = VOP_GETATTR(vp, &va); 1020 if (error == 0 && (nflags & NLC_TRUNCATE)) { 1021 switch(va.va_type) { 1022 case VREG: 1023 case VDATABASE: 1024 case VCHR: 1025 case VBLK: 1026 case VFIFO: 1027 break; 1028 case VDIR: 1029 error = EISDIR; 1030 break; 1031 default: 1032 error = EINVAL; 1033 break; 1034 } 1035 } 1036 if (error == 0 && (nflags & NLC_WRITE) && vp->v_mount && 1037 (vp->v_mount->mnt_flag & MNT_RDONLY) 1038 ) { 1039 switch(va.va_type) { 1040 case VDIR: 1041 case VLNK: 1042 case VREG: 1043 case VDATABASE: 1044 error = EROFS; 1045 break; 1046 default: 1047 break; 1048 } 1049 } 1050 vput(vp); 1051 1052 /* 1053 * Check permissions based on file attributes. The passed 1054 * flags (*nflagsp) are modified with feedback based on 1055 * special attributes and requirements. 1056 */ 1057 if (error == 0) { 1058 /* 1059 * Adjust the returned (*nflagsp) if non-NULL. 1060 */ 1061 if (nflagsp) { 1062 if ((va.va_mode & VSVTX) && va.va_uid != cred->cr_uid) 1063 *nflagsp |= NLC_STICKY; 1064 if (va.va_flags & APPEND) 1065 *nflagsp |= NLC_APPENDONLY; 1066 if (va.va_flags & IMMUTABLE) 1067 *nflagsp |= NLC_IMMUTABLE; 1068 } 1069 1070 /* 1071 * Track swapcache management flags in the namecache. 1072 * 1073 * Calculate the flags based on the current vattr info 1074 * and recalculate the inherited flags from the parent 1075 * (the original cache linkage may have occurred without 1076 * getattrs and thus have stale flags). 1077 */ 1078 cflags = 0; 1079 if (va.va_flags & SF_NOCACHE) 1080 cflags |= NCF_SF_NOCACHE; 1081 if (va.va_flags & UF_CACHE) 1082 cflags |= NCF_UF_CACHE; 1083 if (ncp->nc_parent) { 1084 if (ncp->nc_parent->nc_flag & 1085 (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) { 1086 cflags |= NCF_SF_PNOCACHE; 1087 } 1088 if (ncp->nc_parent->nc_flag & 1089 (NCF_UF_CACHE | NCF_UF_PCACHE)) { 1090 cflags |= NCF_UF_PCACHE; 1091 } 1092 } 1093 ncp->nc_flag &= ~(NCF_SF_NOCACHE | NCF_UF_CACHE | 1094 NCF_SF_PNOCACHE | NCF_UF_PCACHE); 1095 ncp->nc_flag |= cflags; 1096 1097 /* 1098 * Process general access. 1099 */ 1100 error = naccess_va(&va, nflags, cred); 1101 } 1102 } 1103 } 1104 return(error); 1105 } 1106 1107 /* 1108 * Check the requested access against the given vattr using cred. 1109 */ 1110 int 1111 naccess_va(struct vattr *va, int nflags, struct ucred *cred) 1112 { 1113 int i; 1114 int vmode; 1115 1116 /* 1117 * Test the immutable bit. Creations, deletions, renames (source 1118 * or destination) are not allowed. chown/chmod/other is also not 1119 * allowed but is handled by SETATTR. Hardlinks to the immutable 1120 * file are allowed. 1121 * 1122 * If the directory is set to immutable then creations, deletions, 1123 * renames (source or dest) and hardlinks to files within the directory 1124 * are not allowed, and regular files opened through the directory may 1125 * not be written to or truncated (unless a special device). 1126 * 1127 * NOTE! New hardlinks to immutable files work but new hardlinks to 1128 * files, immutable or not, sitting inside an immutable directory are 1129 * not allowed. As always if the file is hardlinked via some other 1130 * path additional hardlinks may be possible even if the file is marked 1131 * immutable. The sysop needs to create a closure by checking the hard 1132 * link count. Once closure is achieved you are good, and security 1133 * scripts should check link counts anyway. 1134 * 1135 * Writes and truncations are only allowed on special devices. 1136 */ 1137 if ((va->va_flags & IMMUTABLE) || (nflags & NLC_IMMUTABLE)) { 1138 if ((nflags & NLC_IMMUTABLE) && (nflags & NLC_HLINK)) 1139 return (EPERM); 1140 if (nflags & (NLC_CREATE | NLC_DELETE | 1141 NLC_RENAME_SRC | NLC_RENAME_DST)) { 1142 return (EPERM); 1143 } 1144 if (nflags & (NLC_WRITE | NLC_TRUNCATE)) { 1145 switch(va->va_type) { 1146 case VDIR: 1147 return (EISDIR); 1148 case VLNK: 1149 case VREG: 1150 case VDATABASE: 1151 return (EPERM); 1152 default: 1153 break; 1154 } 1155 } 1156 } 1157 1158 /* 1159 * Test the no-unlink and append-only bits for opens, rename targets, 1160 * and deletions. These bits are not tested for creations or 1161 * rename sources. 1162 * 1163 * Unlike FreeBSD we allow a file with APPEND set to be renamed. 1164 * If you do not wish this you must also set NOUNLINK. 1165 * 1166 * If the governing directory is marked APPEND-only it implies 1167 * NOUNLINK for all entries in the directory. 1168 */ 1169 if (((va->va_flags & NOUNLINK) || (nflags & NLC_APPENDONLY)) && 1170 (nflags & (NLC_DELETE | NLC_RENAME_SRC | NLC_RENAME_DST)) 1171 ) { 1172 return (EPERM); 1173 } 1174 1175 /* 1176 * A file marked append-only may not be deleted but can be renamed. 1177 */ 1178 if ((va->va_flags & APPEND) && 1179 (nflags & (NLC_DELETE | NLC_RENAME_DST)) 1180 ) { 1181 return (EPERM); 1182 } 1183 1184 /* 1185 * A file marked append-only which is opened for writing must also 1186 * be opened O_APPEND. 1187 */ 1188 if ((va->va_flags & APPEND) && (nflags & (NLC_OPEN | NLC_TRUNCATE))) { 1189 if (nflags & NLC_TRUNCATE) 1190 return (EPERM); 1191 if ((nflags & (NLC_OPEN | NLC_WRITE)) == (NLC_OPEN | NLC_WRITE)) { 1192 if ((nflags & NLC_APPEND) == 0) 1193 return (EPERM); 1194 } 1195 } 1196 1197 /* 1198 * root gets universal access 1199 */ 1200 if (cred->cr_uid == 0) 1201 return(0); 1202 1203 /* 1204 * Check owner perms. 1205 * 1206 * If NLC_OWN is set the owner of the file is allowed no matter when 1207 * the owner-mode bits say (utimes). 1208 */ 1209 vmode = 0; 1210 if (nflags & NLC_READ) 1211 vmode |= S_IRUSR; 1212 if (nflags & NLC_WRITE) 1213 vmode |= S_IWUSR; 1214 if (nflags & NLC_EXEC) 1215 vmode |= S_IXUSR; 1216 1217 if (cred->cr_uid == va->va_uid) { 1218 if ((nflags & NLC_OWN) == 0) { 1219 if ((vmode & va->va_mode) != vmode) 1220 return(EACCES); 1221 } 1222 return(0); 1223 } 1224 1225 /* 1226 * If NLC_STICKY is set only the owner may delete or rename a file. 1227 * This bit is typically set on /tmp. 1228 * 1229 * Note that the NLC_READ/WRITE/EXEC bits are not typically set in 1230 * the specific delete or rename case. For deletions and renames we 1231 * usually just care about directory permissions, not file permissions. 1232 */ 1233 if ((nflags & NLC_STICKY) && 1234 (nflags & (NLC_RENAME_SRC | NLC_RENAME_DST | NLC_DELETE))) { 1235 return(EACCES); 1236 } 1237 1238 /* 1239 * Check group perms 1240 */ 1241 vmode >>= 3; 1242 for (i = 0; i < cred->cr_ngroups; ++i) { 1243 if (va->va_gid == cred->cr_groups[i]) { 1244 if ((vmode & va->va_mode) != vmode) 1245 return(EACCES); 1246 return(0); 1247 } 1248 } 1249 1250 /* 1251 * Check world perms 1252 */ 1253 vmode >>= 3; 1254 if ((vmode & va->va_mode) != vmode) 1255 return(EACCES); 1256 return(0); 1257 } 1258 1259