1 /*- 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)vfs_lookup.c 8.4 (Berkeley) 2/16/94 35 */ 36 37 #include <sys/cdefs.h> 38 __FBSDID("$FreeBSD$"); 39 40 #include "opt_kdtrace.h" 41 #include "opt_ktrace.h" 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/fcntl.h> 47 #include <sys/jail.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/namei.h> 51 #include <sys/vnode.h> 52 #include <sys/mount.h> 53 #include <sys/filedesc.h> 54 #include <sys/proc.h> 55 #include <sys/sdt.h> 56 #include <sys/syscallsubr.h> 57 #include <sys/sysctl.h> 58 #ifdef KTRACE 59 #include <sys/ktrace.h> 60 #endif 61 62 #include <security/audit/audit.h> 63 #include <security/mac/mac_framework.h> 64 65 #include <vm/uma.h> 66 67 #define NAMEI_DIAGNOSTIC 1 68 #undef NAMEI_DIAGNOSTIC 69 70 SDT_PROVIDER_DECLARE(vfs); 71 SDT_PROBE_DEFINE3(vfs, namei, lookup, entry, entry, "struct vnode *", "char *", 72 "unsigned long"); 73 SDT_PROBE_DEFINE2(vfs, namei, lookup, return, return, "int", "struct vnode *"); 74 75 /* 76 * Allocation zone for namei 77 */ 78 uma_zone_t namei_zone; 79 /* 80 * Placeholder vnode for mp traversal 81 */ 82 static struct vnode *vp_crossmp; 83 84 static void 85 nameiinit(void *dummy __unused) 86 { 87 88 namei_zone = uma_zcreate("NAMEI", MAXPATHLEN, NULL, NULL, NULL, NULL, 89 UMA_ALIGN_PTR, 0); 90 getnewvnode("crossmp", NULL, &dead_vnodeops, &vp_crossmp); 91 vn_lock(vp_crossmp, LK_EXCLUSIVE); 92 VN_LOCK_ASHARE(vp_crossmp); 93 VOP_UNLOCK(vp_crossmp, 0); 94 } 95 SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); 96 97 static int lookup_shared = 1; 98 SYSCTL_INT(_vfs, OID_AUTO, lookup_shared, CTLFLAG_RW, &lookup_shared, 0, 99 "Enables/Disables shared locks for path name translation"); 100 TUNABLE_INT("vfs.lookup_shared", &lookup_shared); 101 102 /* 103 * Convert a pathname into a pointer to a locked vnode. 104 * 105 * The FOLLOW flag is set when symbolic links are to be followed 106 * when they occur at the end of the name translation process. 107 * Symbolic links are always followed for all other pathname 108 * components other than the last. 109 * 110 * The segflg defines whether the name is to be copied from user 111 * space or kernel space. 112 * 113 * Overall outline of namei: 114 * 115 * copy in name 116 * get starting directory 117 * while (!done && !error) { 118 * call lookup to search path. 119 * if symbolic link, massage name in buffer and continue 120 * } 121 */ 122 int 123 namei(struct nameidata *ndp) 124 { 125 struct filedesc *fdp; /* pointer to file descriptor state */ 126 char *cp; /* pointer into pathname argument */ 127 struct vnode *dp; /* the directory we are searching */ 128 struct iovec aiov; /* uio for reading symbolic links */ 129 struct uio auio; 130 int error, linklen; 131 struct componentname *cnp = &ndp->ni_cnd; 132 struct thread *td = cnp->cn_thread; 133 struct proc *p = td->td_proc; 134 int vfslocked; 135 136 KASSERT((cnp->cn_flags & MPSAFE) != 0 || mtx_owned(&Giant) != 0, 137 ("NOT MPSAFE and Giant not held")); 138 ndp->ni_cnd.cn_cred = ndp->ni_cnd.cn_thread->td_ucred; 139 KASSERT(cnp->cn_cred && p, ("namei: bad cred/proc")); 140 KASSERT((cnp->cn_nameiop & (~OPMASK)) == 0, 141 ("namei: nameiop contaminated with flags")); 142 KASSERT((cnp->cn_flags & OPMASK) == 0, 143 ("namei: flags contaminated with nameiops")); 144 if (!lookup_shared) 145 cnp->cn_flags &= ~LOCKSHARED; 146 fdp = p->p_fd; 147 148 /* We will set this ourselves if we need it. */ 149 cnp->cn_flags &= ~TRAILINGSLASH; 150 151 /* 152 * Get a buffer for the name to be translated, and copy the 153 * name into the buffer. 154 */ 155 if ((cnp->cn_flags & HASBUF) == 0) 156 cnp->cn_pnbuf = uma_zalloc(namei_zone, M_WAITOK); 157 if (ndp->ni_segflg == UIO_SYSSPACE) 158 error = copystr(ndp->ni_dirp, cnp->cn_pnbuf, 159 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 160 else 161 error = copyinstr(ndp->ni_dirp, cnp->cn_pnbuf, 162 MAXPATHLEN, (size_t *)&ndp->ni_pathlen); 163 164 if (error == 0) { 165 /* 166 * If we are auditing the kernel pathname, save the user 167 * pathname. 168 */ 169 if (cnp->cn_flags & AUDITVNODE1) 170 AUDIT_ARG_UPATH1(td, cnp->cn_pnbuf); 171 if (cnp->cn_flags & AUDITVNODE2) 172 AUDIT_ARG_UPATH2(td, cnp->cn_pnbuf); 173 } 174 175 /* 176 * Don't allow empty pathnames. 177 */ 178 if (!error && *cnp->cn_pnbuf == '\0') 179 error = ENOENT; 180 181 if (error) { 182 uma_zfree(namei_zone, cnp->cn_pnbuf); 183 #ifdef DIAGNOSTIC 184 cnp->cn_pnbuf = NULL; 185 cnp->cn_nameptr = NULL; 186 #endif 187 ndp->ni_vp = NULL; 188 return (error); 189 } 190 ndp->ni_loopcnt = 0; 191 #ifdef KTRACE 192 if (KTRPOINT(td, KTR_NAMEI)) { 193 KASSERT(cnp->cn_thread == curthread, 194 ("namei not using curthread")); 195 ktrnamei(cnp->cn_pnbuf); 196 } 197 #endif 198 /* 199 * Get starting point for the translation. 200 */ 201 FILEDESC_SLOCK(fdp); 202 ndp->ni_rootdir = fdp->fd_rdir; 203 ndp->ni_topdir = fdp->fd_jdir; 204 205 dp = NULL; 206 if (cnp->cn_pnbuf[0] != '/') { 207 if (ndp->ni_startdir != NULL) { 208 dp = ndp->ni_startdir; 209 error = 0; 210 } else if (ndp->ni_dirfd != AT_FDCWD) { 211 if (cnp->cn_flags & AUDITVNODE1) 212 AUDIT_ARG_ATFD1(ndp->ni_dirfd); 213 if (cnp->cn_flags & AUDITVNODE2) 214 AUDIT_ARG_ATFD2(ndp->ni_dirfd); 215 error = fgetvp(td, ndp->ni_dirfd, &dp); 216 } 217 if (error != 0 || dp != NULL) { 218 FILEDESC_SUNLOCK(fdp); 219 if (error == 0 && dp->v_type != VDIR) { 220 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 221 vrele(dp); 222 VFS_UNLOCK_GIANT(vfslocked); 223 error = ENOTDIR; 224 } 225 } 226 if (error) { 227 uma_zfree(namei_zone, cnp->cn_pnbuf); 228 #ifdef DIAGNOSTIC 229 cnp->cn_pnbuf = NULL; 230 cnp->cn_nameptr = NULL; 231 #endif 232 return (error); 233 } 234 } 235 if (dp == NULL) { 236 dp = fdp->fd_cdir; 237 VREF(dp); 238 FILEDESC_SUNLOCK(fdp); 239 if (ndp->ni_startdir != NULL) { 240 vfslocked = VFS_LOCK_GIANT(ndp->ni_startdir->v_mount); 241 vrele(ndp->ni_startdir); 242 VFS_UNLOCK_GIANT(vfslocked); 243 } 244 } 245 SDT_PROBE(vfs, namei, lookup, entry, dp, cnp->cn_pnbuf, 246 cnp->cn_flags, 0, 0); 247 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 248 for (;;) { 249 /* 250 * Check if root directory should replace current directory. 251 * Done at start of translation and after symbolic link. 252 */ 253 cnp->cn_nameptr = cnp->cn_pnbuf; 254 if (*(cnp->cn_nameptr) == '/') { 255 vrele(dp); 256 VFS_UNLOCK_GIANT(vfslocked); 257 while (*(cnp->cn_nameptr) == '/') { 258 cnp->cn_nameptr++; 259 ndp->ni_pathlen--; 260 } 261 dp = ndp->ni_rootdir; 262 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 263 VREF(dp); 264 } 265 if (vfslocked) 266 ndp->ni_cnd.cn_flags |= GIANTHELD; 267 ndp->ni_startdir = dp; 268 error = lookup(ndp); 269 if (error) { 270 uma_zfree(namei_zone, cnp->cn_pnbuf); 271 #ifdef DIAGNOSTIC 272 cnp->cn_pnbuf = NULL; 273 cnp->cn_nameptr = NULL; 274 #endif 275 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 276 0, 0); 277 return (error); 278 } 279 vfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 280 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 281 /* 282 * If not a symbolic link, we're done. 283 */ 284 if ((cnp->cn_flags & ISSYMLINK) == 0) { 285 if ((cnp->cn_flags & (SAVENAME | SAVESTART)) == 0) { 286 uma_zfree(namei_zone, cnp->cn_pnbuf); 287 #ifdef DIAGNOSTIC 288 cnp->cn_pnbuf = NULL; 289 cnp->cn_nameptr = NULL; 290 #endif 291 } else 292 cnp->cn_flags |= HASBUF; 293 294 if ((cnp->cn_flags & MPSAFE) == 0) { 295 VFS_UNLOCK_GIANT(vfslocked); 296 } else if (vfslocked) 297 ndp->ni_cnd.cn_flags |= GIANTHELD; 298 SDT_PROBE(vfs, namei, lookup, return, 0, ndp->ni_vp, 299 0, 0, 0); 300 return (0); 301 } 302 if (ndp->ni_loopcnt++ >= MAXSYMLINKS) { 303 error = ELOOP; 304 break; 305 } 306 #ifdef MAC 307 if ((cnp->cn_flags & NOMACCHECK) == 0) { 308 error = mac_vnode_check_readlink(td->td_ucred, 309 ndp->ni_vp); 310 if (error) 311 break; 312 } 313 #endif 314 if (ndp->ni_pathlen > 1) 315 cp = uma_zalloc(namei_zone, M_WAITOK); 316 else 317 cp = cnp->cn_pnbuf; 318 aiov.iov_base = cp; 319 aiov.iov_len = MAXPATHLEN; 320 auio.uio_iov = &aiov; 321 auio.uio_iovcnt = 1; 322 auio.uio_offset = 0; 323 auio.uio_rw = UIO_READ; 324 auio.uio_segflg = UIO_SYSSPACE; 325 auio.uio_td = (struct thread *)0; 326 auio.uio_resid = MAXPATHLEN; 327 error = VOP_READLINK(ndp->ni_vp, &auio, cnp->cn_cred); 328 if (error) { 329 if (ndp->ni_pathlen > 1) 330 uma_zfree(namei_zone, cp); 331 break; 332 } 333 linklen = MAXPATHLEN - auio.uio_resid; 334 if (linklen == 0) { 335 if (ndp->ni_pathlen > 1) 336 uma_zfree(namei_zone, cp); 337 error = ENOENT; 338 break; 339 } 340 if (linklen + ndp->ni_pathlen >= MAXPATHLEN) { 341 if (ndp->ni_pathlen > 1) 342 uma_zfree(namei_zone, cp); 343 error = ENAMETOOLONG; 344 break; 345 } 346 if (ndp->ni_pathlen > 1) { 347 bcopy(ndp->ni_next, cp + linklen, ndp->ni_pathlen); 348 uma_zfree(namei_zone, cnp->cn_pnbuf); 349 cnp->cn_pnbuf = cp; 350 } else 351 cnp->cn_pnbuf[linklen] = '\0'; 352 ndp->ni_pathlen += linklen; 353 vput(ndp->ni_vp); 354 dp = ndp->ni_dvp; 355 } 356 uma_zfree(namei_zone, cnp->cn_pnbuf); 357 #ifdef DIAGNOSTIC 358 cnp->cn_pnbuf = NULL; 359 cnp->cn_nameptr = NULL; 360 #endif 361 vput(ndp->ni_vp); 362 ndp->ni_vp = NULL; 363 vrele(ndp->ni_dvp); 364 VFS_UNLOCK_GIANT(vfslocked); 365 SDT_PROBE(vfs, namei, lookup, return, error, NULL, 0, 0, 0); 366 return (error); 367 } 368 369 static int 370 compute_cn_lkflags(struct mount *mp, int lkflags) 371 { 372 373 if (mp == NULL || 374 ((lkflags & LK_SHARED) && !(mp->mnt_kern_flag & MNTK_LOOKUP_SHARED))) { 375 lkflags &= ~LK_SHARED; 376 lkflags |= LK_EXCLUSIVE; 377 } 378 return (lkflags); 379 } 380 381 static __inline int 382 needs_exclusive_leaf(struct mount *mp, int flags) 383 { 384 385 /* 386 * Intermediate nodes can use shared locks, we only need to 387 * force an exclusive lock for leaf nodes. 388 */ 389 if ((flags & (ISLASTCN | LOCKLEAF)) != (ISLASTCN | LOCKLEAF)) 390 return (0); 391 392 /* Always use exclusive locks if LOCKSHARED isn't set. */ 393 if (!(flags & LOCKSHARED)) 394 return (1); 395 396 /* 397 * For lookups during open(), if the mount point supports 398 * extended shared operations, then use a shared lock for the 399 * leaf node, otherwise use an exclusive lock. 400 */ 401 if (flags & ISOPEN) { 402 if (mp != NULL && 403 (mp->mnt_kern_flag & MNTK_EXTENDED_SHARED)) 404 return (0); 405 else 406 return (1); 407 } 408 409 /* 410 * Lookup requests outside of open() that specify LOCKSHARED 411 * only need a shared lock on the leaf vnode. 412 */ 413 return (0); 414 } 415 416 /* 417 * Search a pathname. 418 * This is a very central and rather complicated routine. 419 * 420 * The pathname is pointed to by ni_ptr and is of length ni_pathlen. 421 * The starting directory is taken from ni_startdir. The pathname is 422 * descended until done, or a symbolic link is encountered. The variable 423 * ni_more is clear if the path is completed; it is set to one if a 424 * symbolic link needing interpretation is encountered. 425 * 426 * The flag argument is LOOKUP, CREATE, RENAME, or DELETE depending on 427 * whether the name is to be looked up, created, renamed, or deleted. 428 * When CREATE, RENAME, or DELETE is specified, information usable in 429 * creating, renaming, or deleting a directory entry may be calculated. 430 * If flag has LOCKPARENT or'ed into it, the parent directory is returned 431 * locked. If flag has WANTPARENT or'ed into it, the parent directory is 432 * returned unlocked. Otherwise the parent directory is not returned. If 433 * the target of the pathname exists and LOCKLEAF is or'ed into the flag 434 * the target is returned locked, otherwise it is returned unlocked. 435 * When creating or renaming and LOCKPARENT is specified, the target may not 436 * be ".". When deleting and LOCKPARENT is specified, the target may be ".". 437 * 438 * Overall outline of lookup: 439 * 440 * dirloop: 441 * identify next component of name at ndp->ni_ptr 442 * handle degenerate case where name is null string 443 * if .. and crossing mount points and on mounted filesys, find parent 444 * call VOP_LOOKUP routine for next component name 445 * directory vnode returned in ni_dvp, unlocked unless LOCKPARENT set 446 * component vnode returned in ni_vp (if it exists), locked. 447 * if result vnode is mounted on and crossing mount points, 448 * find mounted on vnode 449 * if more components of name, do next level at dirloop 450 * return the answer in ni_vp, locked if LOCKLEAF set 451 * if LOCKPARENT set, return locked parent in ni_dvp 452 * if WANTPARENT set, return unlocked parent in ni_dvp 453 */ 454 int 455 lookup(struct nameidata *ndp) 456 { 457 char *cp; /* pointer into pathname argument */ 458 struct vnode *dp = 0; /* the directory we are searching */ 459 struct vnode *tdp; /* saved dp */ 460 struct mount *mp; /* mount table entry */ 461 struct prison *pr; 462 int docache; /* == 0 do not cache last component */ 463 int wantparent; /* 1 => wantparent or lockparent flag */ 464 int rdonly; /* lookup read-only flag bit */ 465 int error = 0; 466 int dpunlocked = 0; /* dp has already been unlocked */ 467 struct componentname *cnp = &ndp->ni_cnd; 468 int vfslocked; /* VFS Giant state for child */ 469 int dvfslocked; /* VFS Giant state for parent */ 470 int tvfslocked; 471 int lkflags_save; 472 473 /* 474 * Setup: break out flag bits into variables. 475 */ 476 dvfslocked = (ndp->ni_cnd.cn_flags & GIANTHELD) != 0; 477 vfslocked = 0; 478 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 479 wantparent = cnp->cn_flags & (LOCKPARENT | WANTPARENT); 480 KASSERT(cnp->cn_nameiop == LOOKUP || wantparent, 481 ("CREATE, DELETE, RENAME require LOCKPARENT or WANTPARENT.")); 482 docache = (cnp->cn_flags & NOCACHE) ^ NOCACHE; 483 if (cnp->cn_nameiop == DELETE || 484 (wantparent && cnp->cn_nameiop != CREATE && 485 cnp->cn_nameiop != LOOKUP)) 486 docache = 0; 487 rdonly = cnp->cn_flags & RDONLY; 488 cnp->cn_flags &= ~ISSYMLINK; 489 ndp->ni_dvp = NULL; 490 /* 491 * We use shared locks until we hit the parent of the last cn then 492 * we adjust based on the requesting flags. 493 */ 494 if (lookup_shared) 495 cnp->cn_lkflags = LK_SHARED; 496 else 497 cnp->cn_lkflags = LK_EXCLUSIVE; 498 dp = ndp->ni_startdir; 499 ndp->ni_startdir = NULLVP; 500 vn_lock(dp, 501 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | LK_RETRY)); 502 503 dirloop: 504 /* 505 * Search a new directory. 506 * 507 * The last component of the filename is left accessible via 508 * cnp->cn_nameptr for callers that need the name. Callers needing 509 * the name set the SAVENAME flag. When done, they assume 510 * responsibility for freeing the pathname buffer. 511 */ 512 cnp->cn_consume = 0; 513 for (cp = cnp->cn_nameptr; *cp != 0 && *cp != '/'; cp++) 514 continue; 515 cnp->cn_namelen = cp - cnp->cn_nameptr; 516 if (cnp->cn_namelen > NAME_MAX) { 517 error = ENAMETOOLONG; 518 goto bad; 519 } 520 #ifdef NAMEI_DIAGNOSTIC 521 { char c = *cp; 522 *cp = '\0'; 523 printf("{%s}: ", cnp->cn_nameptr); 524 *cp = c; } 525 #endif 526 ndp->ni_pathlen -= cnp->cn_namelen; 527 ndp->ni_next = cp; 528 529 /* 530 * Replace multiple slashes by a single slash and trailing slashes 531 * by a null. This must be done before VOP_LOOKUP() because some 532 * fs's don't know about trailing slashes. Remember if there were 533 * trailing slashes to handle symlinks, existing non-directories 534 * and non-existing files that won't be directories specially later. 535 */ 536 while (*cp == '/' && (cp[1] == '/' || cp[1] == '\0')) { 537 cp++; 538 ndp->ni_pathlen--; 539 if (*cp == '\0') { 540 *ndp->ni_next = '\0'; 541 cnp->cn_flags |= TRAILINGSLASH; 542 } 543 } 544 ndp->ni_next = cp; 545 546 cnp->cn_flags |= MAKEENTRY; 547 if (*cp == '\0' && docache == 0) 548 cnp->cn_flags &= ~MAKEENTRY; 549 if (cnp->cn_namelen == 2 && 550 cnp->cn_nameptr[1] == '.' && cnp->cn_nameptr[0] == '.') 551 cnp->cn_flags |= ISDOTDOT; 552 else 553 cnp->cn_flags &= ~ISDOTDOT; 554 if (*ndp->ni_next == 0) 555 cnp->cn_flags |= ISLASTCN; 556 else 557 cnp->cn_flags &= ~ISLASTCN; 558 559 if ((cnp->cn_flags & ISLASTCN) != 0 && 560 cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.' && 561 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 562 error = EINVAL; 563 goto bad; 564 } 565 566 /* 567 * Check for degenerate name (e.g. / or "") 568 * which is a way of talking about a directory, 569 * e.g. like "/." or ".". 570 */ 571 if (cnp->cn_nameptr[0] == '\0') { 572 if (dp->v_type != VDIR) { 573 error = ENOTDIR; 574 goto bad; 575 } 576 if (cnp->cn_nameiop != LOOKUP) { 577 error = EISDIR; 578 goto bad; 579 } 580 if (wantparent) { 581 ndp->ni_dvp = dp; 582 VREF(dp); 583 } 584 ndp->ni_vp = dp; 585 586 if (cnp->cn_flags & AUDITVNODE1) 587 AUDIT_ARG_VNODE1(dp); 588 else if (cnp->cn_flags & AUDITVNODE2) 589 AUDIT_ARG_VNODE2(dp); 590 591 if (!(cnp->cn_flags & (LOCKPARENT | LOCKLEAF))) 592 VOP_UNLOCK(dp, 0); 593 /* XXX This should probably move to the top of function. */ 594 if (cnp->cn_flags & SAVESTART) 595 panic("lookup: SAVESTART"); 596 goto success; 597 } 598 599 /* 600 * Handle "..": four special cases. 601 * 1. Return an error if this is the last component of 602 * the name and the operation is DELETE or RENAME. 603 * 2. If at root directory (e.g. after chroot) 604 * or at absolute root directory 605 * then ignore it so can't get out. 606 * 3. If this vnode is the root of a mounted 607 * filesystem, then replace it with the 608 * vnode which was mounted on so we take the 609 * .. in the other filesystem. 610 * 4. If the vnode is the top directory of 611 * the jail or chroot, don't let them out. 612 */ 613 if (cnp->cn_flags & ISDOTDOT) { 614 if ((cnp->cn_flags & ISLASTCN) != 0 && 615 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 616 error = EINVAL; 617 goto bad; 618 } 619 for (;;) { 620 for (pr = cnp->cn_cred->cr_prison; pr != NULL; 621 pr = pr->pr_parent) 622 if (dp == pr->pr_root) 623 break; 624 if (dp == ndp->ni_rootdir || 625 dp == ndp->ni_topdir || 626 dp == rootvnode || 627 pr != NULL || 628 ((dp->v_vflag & VV_ROOT) != 0 && 629 (cnp->cn_flags & NOCROSSMOUNT) != 0)) { 630 ndp->ni_dvp = dp; 631 ndp->ni_vp = dp; 632 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 633 VREF(dp); 634 goto nextname; 635 } 636 if ((dp->v_vflag & VV_ROOT) == 0) 637 break; 638 if (dp->v_iflag & VI_DOOMED) { /* forced unmount */ 639 error = ENOENT; 640 goto bad; 641 } 642 tdp = dp; 643 dp = dp->v_mount->mnt_vnodecovered; 644 tvfslocked = dvfslocked; 645 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 646 VREF(dp); 647 vput(tdp); 648 VFS_UNLOCK_GIANT(tvfslocked); 649 vn_lock(dp, 650 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 651 LK_RETRY)); 652 } 653 } 654 655 /* 656 * We now have a segment name to search for, and a directory to search. 657 */ 658 unionlookup: 659 #ifdef MAC 660 if ((cnp->cn_flags & NOMACCHECK) == 0) { 661 error = mac_vnode_check_lookup(cnp->cn_thread->td_ucred, dp, 662 cnp); 663 if (error) 664 goto bad; 665 } 666 #endif 667 ndp->ni_dvp = dp; 668 ndp->ni_vp = NULL; 669 ASSERT_VOP_LOCKED(dp, "lookup"); 670 VNASSERT(vfslocked == 0, dp, ("lookup: vfslocked %d", vfslocked)); 671 /* 672 * If we have a shared lock we may need to upgrade the lock for the 673 * last operation. 674 */ 675 if (dp != vp_crossmp && 676 VOP_ISLOCKED(dp) == LK_SHARED && 677 (cnp->cn_flags & ISLASTCN) && (cnp->cn_flags & LOCKPARENT)) 678 vn_lock(dp, LK_UPGRADE|LK_RETRY); 679 /* 680 * If we're looking up the last component and we need an exclusive 681 * lock, adjust our lkflags. 682 */ 683 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags)) 684 cnp->cn_lkflags = LK_EXCLUSIVE; 685 #ifdef NAMEI_DIAGNOSTIC 686 vprint("lookup in", dp); 687 #endif 688 lkflags_save = cnp->cn_lkflags; 689 cnp->cn_lkflags = compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags); 690 if ((error = VOP_LOOKUP(dp, &ndp->ni_vp, cnp)) != 0) { 691 cnp->cn_lkflags = lkflags_save; 692 KASSERT(ndp->ni_vp == NULL, ("leaf should be empty")); 693 #ifdef NAMEI_DIAGNOSTIC 694 printf("not found\n"); 695 #endif 696 if ((error == ENOENT) && 697 (dp->v_vflag & VV_ROOT) && (dp->v_mount != NULL) && 698 (dp->v_mount->mnt_flag & MNT_UNION)) { 699 tdp = dp; 700 dp = dp->v_mount->mnt_vnodecovered; 701 tvfslocked = dvfslocked; 702 dvfslocked = VFS_LOCK_GIANT(dp->v_mount); 703 VREF(dp); 704 vput(tdp); 705 VFS_UNLOCK_GIANT(tvfslocked); 706 vn_lock(dp, 707 compute_cn_lkflags(dp->v_mount, cnp->cn_lkflags | 708 LK_RETRY)); 709 goto unionlookup; 710 } 711 712 if (error != EJUSTRETURN) 713 goto bad; 714 /* 715 * At this point, we know we're at the end of the 716 * pathname. If creating / renaming, we can consider 717 * allowing the file or directory to be created / renamed, 718 * provided we're not on a read-only filesystem. 719 */ 720 if (rdonly) { 721 error = EROFS; 722 goto bad; 723 } 724 /* trailing slash only allowed for directories */ 725 if ((cnp->cn_flags & TRAILINGSLASH) && 726 !(cnp->cn_flags & WILLBEDIR)) { 727 error = ENOENT; 728 goto bad; 729 } 730 if ((cnp->cn_flags & LOCKPARENT) == 0) 731 VOP_UNLOCK(dp, 0); 732 /* 733 * We return with ni_vp NULL to indicate that the entry 734 * doesn't currently exist, leaving a pointer to the 735 * (possibly locked) directory vnode in ndp->ni_dvp. 736 */ 737 if (cnp->cn_flags & SAVESTART) { 738 ndp->ni_startdir = ndp->ni_dvp; 739 VREF(ndp->ni_startdir); 740 } 741 goto success; 742 } else 743 cnp->cn_lkflags = lkflags_save; 744 #ifdef NAMEI_DIAGNOSTIC 745 printf("found\n"); 746 #endif 747 /* 748 * Take into account any additional components consumed by 749 * the underlying filesystem. 750 */ 751 if (cnp->cn_consume > 0) { 752 cnp->cn_nameptr += cnp->cn_consume; 753 ndp->ni_next += cnp->cn_consume; 754 ndp->ni_pathlen -= cnp->cn_consume; 755 cnp->cn_consume = 0; 756 } 757 758 dp = ndp->ni_vp; 759 vfslocked = VFS_LOCK_GIANT(dp->v_mount); 760 761 /* 762 * Check to see if the vnode has been mounted on; 763 * if so find the root of the mounted filesystem. 764 */ 765 while (dp->v_type == VDIR && (mp = dp->v_mountedhere) && 766 (cnp->cn_flags & NOCROSSMOUNT) == 0) { 767 if (vfs_busy(mp, 0)) 768 continue; 769 vput(dp); 770 VFS_UNLOCK_GIANT(vfslocked); 771 vfslocked = VFS_LOCK_GIANT(mp); 772 if (dp != ndp->ni_dvp) 773 vput(ndp->ni_dvp); 774 else 775 vrele(ndp->ni_dvp); 776 VFS_UNLOCK_GIANT(dvfslocked); 777 dvfslocked = 0; 778 vref(vp_crossmp); 779 ndp->ni_dvp = vp_crossmp; 780 error = VFS_ROOT(mp, compute_cn_lkflags(mp, cnp->cn_lkflags), 781 &tdp); 782 vfs_unbusy(mp); 783 if (vn_lock(vp_crossmp, LK_SHARED | LK_NOWAIT)) 784 panic("vp_crossmp exclusively locked or reclaimed"); 785 if (error) { 786 dpunlocked = 1; 787 goto bad2; 788 } 789 ndp->ni_vp = dp = tdp; 790 } 791 792 /* 793 * Check for symbolic link 794 */ 795 if ((dp->v_type == VLNK) && 796 ((cnp->cn_flags & FOLLOW) || (cnp->cn_flags & TRAILINGSLASH) || 797 *ndp->ni_next == '/')) { 798 cnp->cn_flags |= ISSYMLINK; 799 if (dp->v_iflag & VI_DOOMED) { 800 /* 801 * We can't know whether the directory was mounted with 802 * NOSYMFOLLOW, so we can't follow safely. 803 */ 804 error = ENOENT; 805 goto bad2; 806 } 807 if (dp->v_mount->mnt_flag & MNT_NOSYMFOLLOW) { 808 error = EACCES; 809 goto bad2; 810 } 811 /* 812 * Symlink code always expects an unlocked dvp. 813 */ 814 if (ndp->ni_dvp != ndp->ni_vp) 815 VOP_UNLOCK(ndp->ni_dvp, 0); 816 goto success; 817 } 818 819 nextname: 820 /* 821 * Not a symbolic link that we will follow. Continue with the 822 * next component if there is any; otherwise, we're done. 823 */ 824 KASSERT((cnp->cn_flags & ISLASTCN) || *ndp->ni_next == '/', 825 ("lookup: invalid path state.")); 826 if (*ndp->ni_next == '/') { 827 cnp->cn_nameptr = ndp->ni_next; 828 while (*cnp->cn_nameptr == '/') { 829 cnp->cn_nameptr++; 830 ndp->ni_pathlen--; 831 } 832 if (ndp->ni_dvp != dp) 833 vput(ndp->ni_dvp); 834 else 835 vrele(ndp->ni_dvp); 836 VFS_UNLOCK_GIANT(dvfslocked); 837 dvfslocked = vfslocked; /* dp becomes dvp in dirloop */ 838 vfslocked = 0; 839 goto dirloop; 840 } 841 /* 842 * If we're processing a path with a trailing slash, 843 * check that the end result is a directory. 844 */ 845 if ((cnp->cn_flags & TRAILINGSLASH) && dp->v_type != VDIR) { 846 error = ENOTDIR; 847 goto bad2; 848 } 849 /* 850 * Disallow directory write attempts on read-only filesystems. 851 */ 852 if (rdonly && 853 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 854 error = EROFS; 855 goto bad2; 856 } 857 if (cnp->cn_flags & SAVESTART) { 858 ndp->ni_startdir = ndp->ni_dvp; 859 VREF(ndp->ni_startdir); 860 } 861 if (!wantparent) { 862 if (ndp->ni_dvp != dp) 863 vput(ndp->ni_dvp); 864 else 865 vrele(ndp->ni_dvp); 866 VFS_UNLOCK_GIANT(dvfslocked); 867 dvfslocked = 0; 868 } else if ((cnp->cn_flags & LOCKPARENT) == 0 && ndp->ni_dvp != dp) 869 VOP_UNLOCK(ndp->ni_dvp, 0); 870 871 if (cnp->cn_flags & AUDITVNODE1) 872 AUDIT_ARG_VNODE1(dp); 873 else if (cnp->cn_flags & AUDITVNODE2) 874 AUDIT_ARG_VNODE2(dp); 875 876 if ((cnp->cn_flags & LOCKLEAF) == 0) 877 VOP_UNLOCK(dp, 0); 878 success: 879 /* 880 * Because of lookup_shared we may have the vnode shared locked, but 881 * the caller may want it to be exclusively locked. 882 */ 883 if (needs_exclusive_leaf(dp->v_mount, cnp->cn_flags) && 884 VOP_ISLOCKED(dp) != LK_EXCLUSIVE) { 885 vn_lock(dp, LK_UPGRADE | LK_RETRY); 886 if (dp->v_iflag & VI_DOOMED) { 887 error = ENOENT; 888 goto bad2; 889 } 890 } 891 if (vfslocked && dvfslocked) 892 VFS_UNLOCK_GIANT(dvfslocked); /* Only need one */ 893 if (vfslocked || dvfslocked) 894 ndp->ni_cnd.cn_flags |= GIANTHELD; 895 return (0); 896 897 bad2: 898 if (dp != ndp->ni_dvp) 899 vput(ndp->ni_dvp); 900 else 901 vrele(ndp->ni_dvp); 902 bad: 903 if (!dpunlocked) 904 vput(dp); 905 VFS_UNLOCK_GIANT(vfslocked); 906 VFS_UNLOCK_GIANT(dvfslocked); 907 ndp->ni_cnd.cn_flags &= ~GIANTHELD; 908 ndp->ni_vp = NULL; 909 return (error); 910 } 911 912 /* 913 * relookup - lookup a path name component 914 * Used by lookup to re-acquire things. 915 */ 916 int 917 relookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp) 918 { 919 struct vnode *dp = 0; /* the directory we are searching */ 920 int wantparent; /* 1 => wantparent or lockparent flag */ 921 int rdonly; /* lookup read-only flag bit */ 922 int error = 0; 923 924 KASSERT(cnp->cn_flags & ISLASTCN, 925 ("relookup: Not given last component.")); 926 /* 927 * Setup: break out flag bits into variables. 928 */ 929 wantparent = cnp->cn_flags & (LOCKPARENT|WANTPARENT); 930 KASSERT(wantparent, ("relookup: parent not wanted.")); 931 rdonly = cnp->cn_flags & RDONLY; 932 cnp->cn_flags &= ~ISSYMLINK; 933 dp = dvp; 934 cnp->cn_lkflags = LK_EXCLUSIVE; 935 vn_lock(dp, LK_EXCLUSIVE | LK_RETRY); 936 937 /* 938 * Search a new directory. 939 * 940 * The last component of the filename is left accessible via 941 * cnp->cn_nameptr for callers that need the name. Callers needing 942 * the name set the SAVENAME flag. When done, they assume 943 * responsibility for freeing the pathname buffer. 944 */ 945 #ifdef NAMEI_DIAGNOSTIC 946 printf("{%s}: ", cnp->cn_nameptr); 947 #endif 948 949 /* 950 * Check for "" which represents the root directory after slash 951 * removal. 952 */ 953 if (cnp->cn_nameptr[0] == '\0') { 954 /* 955 * Support only LOOKUP for "/" because lookup() 956 * can't succeed for CREATE, DELETE and RENAME. 957 */ 958 KASSERT(cnp->cn_nameiop == LOOKUP, ("nameiop must be LOOKUP")); 959 KASSERT(dp->v_type == VDIR, ("dp is not a directory")); 960 961 if (!(cnp->cn_flags & LOCKLEAF)) 962 VOP_UNLOCK(dp, 0); 963 *vpp = dp; 964 /* XXX This should probably move to the top of function. */ 965 if (cnp->cn_flags & SAVESTART) 966 panic("lookup: SAVESTART"); 967 return (0); 968 } 969 970 if (cnp->cn_flags & ISDOTDOT) 971 panic ("relookup: lookup on dot-dot"); 972 973 /* 974 * We now have a segment name to search for, and a directory to search. 975 */ 976 #ifdef NAMEI_DIAGNOSTIC 977 vprint("search in:", dp); 978 #endif 979 if ((error = VOP_LOOKUP(dp, vpp, cnp)) != 0) { 980 KASSERT(*vpp == NULL, ("leaf should be empty")); 981 if (error != EJUSTRETURN) 982 goto bad; 983 /* 984 * If creating and at end of pathname, then can consider 985 * allowing file to be created. 986 */ 987 if (rdonly) { 988 error = EROFS; 989 goto bad; 990 } 991 /* ASSERT(dvp == ndp->ni_startdir) */ 992 if (cnp->cn_flags & SAVESTART) 993 VREF(dvp); 994 if ((cnp->cn_flags & LOCKPARENT) == 0) 995 VOP_UNLOCK(dp, 0); 996 /* 997 * We return with ni_vp NULL to indicate that the entry 998 * doesn't currently exist, leaving a pointer to the 999 * (possibly locked) directory vnode in ndp->ni_dvp. 1000 */ 1001 return (0); 1002 } 1003 1004 dp = *vpp; 1005 1006 /* 1007 * Disallow directory write attempts on read-only filesystems. 1008 */ 1009 if (rdonly && 1010 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) { 1011 if (dvp == dp) 1012 vrele(dvp); 1013 else 1014 vput(dvp); 1015 error = EROFS; 1016 goto bad; 1017 } 1018 /* 1019 * Set the parent lock/ref state to the requested state. 1020 */ 1021 if ((cnp->cn_flags & LOCKPARENT) == 0 && dvp != dp) { 1022 if (wantparent) 1023 VOP_UNLOCK(dvp, 0); 1024 else 1025 vput(dvp); 1026 } else if (!wantparent) 1027 vrele(dvp); 1028 /* 1029 * Check for symbolic link 1030 */ 1031 KASSERT(dp->v_type != VLNK || !(cnp->cn_flags & FOLLOW), 1032 ("relookup: symlink found.\n")); 1033 1034 /* ASSERT(dvp == ndp->ni_startdir) */ 1035 if (cnp->cn_flags & SAVESTART) 1036 VREF(dvp); 1037 1038 if ((cnp->cn_flags & LOCKLEAF) == 0) 1039 VOP_UNLOCK(dp, 0); 1040 return (0); 1041 bad: 1042 vput(dp); 1043 *vpp = NULL; 1044 return (error); 1045 } 1046 1047 /* 1048 * Free data allocated by namei(); see namei(9) for details. 1049 */ 1050 void 1051 NDFREE(struct nameidata *ndp, const u_int flags) 1052 { 1053 int unlock_dvp; 1054 int unlock_vp; 1055 1056 unlock_dvp = 0; 1057 unlock_vp = 0; 1058 1059 if (!(flags & NDF_NO_FREE_PNBUF) && 1060 (ndp->ni_cnd.cn_flags & HASBUF)) { 1061 uma_zfree(namei_zone, ndp->ni_cnd.cn_pnbuf); 1062 ndp->ni_cnd.cn_flags &= ~HASBUF; 1063 } 1064 if (!(flags & NDF_NO_VP_UNLOCK) && 1065 (ndp->ni_cnd.cn_flags & LOCKLEAF) && ndp->ni_vp) 1066 unlock_vp = 1; 1067 if (!(flags & NDF_NO_VP_RELE) && ndp->ni_vp) { 1068 if (unlock_vp) { 1069 vput(ndp->ni_vp); 1070 unlock_vp = 0; 1071 } else 1072 vrele(ndp->ni_vp); 1073 ndp->ni_vp = NULL; 1074 } 1075 if (unlock_vp) 1076 VOP_UNLOCK(ndp->ni_vp, 0); 1077 if (!(flags & NDF_NO_DVP_UNLOCK) && 1078 (ndp->ni_cnd.cn_flags & LOCKPARENT) && 1079 ndp->ni_dvp != ndp->ni_vp) 1080 unlock_dvp = 1; 1081 if (!(flags & NDF_NO_DVP_RELE) && 1082 (ndp->ni_cnd.cn_flags & (LOCKPARENT|WANTPARENT))) { 1083 if (unlock_dvp) { 1084 vput(ndp->ni_dvp); 1085 unlock_dvp = 0; 1086 } else 1087 vrele(ndp->ni_dvp); 1088 ndp->ni_dvp = NULL; 1089 } 1090 if (unlock_dvp) 1091 VOP_UNLOCK(ndp->ni_dvp, 0); 1092 if (!(flags & NDF_NO_STARTDIR_RELE) && 1093 (ndp->ni_cnd.cn_flags & SAVESTART)) { 1094 vrele(ndp->ni_startdir); 1095 ndp->ni_startdir = NULL; 1096 } 1097 } 1098 1099 /* 1100 * Determine if there is a suitable alternate filename under the specified 1101 * prefix for the specified path. If the create flag is set, then the 1102 * alternate prefix will be used so long as the parent directory exists. 1103 * This is used by the various compatiblity ABIs so that Linux binaries prefer 1104 * files under /compat/linux for example. The chosen path (whether under 1105 * the prefix or under /) is returned in a kernel malloc'd buffer pointed 1106 * to by pathbuf. The caller is responsible for free'ing the buffer from 1107 * the M_TEMP bucket if one is returned. 1108 */ 1109 int 1110 kern_alternate_path(struct thread *td, const char *prefix, const char *path, 1111 enum uio_seg pathseg, char **pathbuf, int create, int dirfd) 1112 { 1113 struct nameidata nd, ndroot; 1114 char *ptr, *buf, *cp; 1115 size_t len, sz; 1116 int error; 1117 1118 buf = (char *) malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1119 *pathbuf = buf; 1120 1121 /* Copy the prefix into the new pathname as a starting point. */ 1122 len = strlcpy(buf, prefix, MAXPATHLEN); 1123 if (len >= MAXPATHLEN) { 1124 *pathbuf = NULL; 1125 free(buf, M_TEMP); 1126 return (EINVAL); 1127 } 1128 sz = MAXPATHLEN - len; 1129 ptr = buf + len; 1130 1131 /* Append the filename to the prefix. */ 1132 if (pathseg == UIO_SYSSPACE) 1133 error = copystr(path, ptr, sz, &len); 1134 else 1135 error = copyinstr(path, ptr, sz, &len); 1136 1137 if (error) { 1138 *pathbuf = NULL; 1139 free(buf, M_TEMP); 1140 return (error); 1141 } 1142 1143 /* Only use a prefix with absolute pathnames. */ 1144 if (*ptr != '/') { 1145 error = EINVAL; 1146 goto keeporig; 1147 } 1148 1149 if (dirfd != AT_FDCWD) { 1150 /* 1151 * We want the original because the "prefix" is 1152 * included in the already opened dirfd. 1153 */ 1154 bcopy(ptr, buf, len); 1155 return (0); 1156 } 1157 1158 /* 1159 * We know that there is a / somewhere in this pathname. 1160 * Search backwards for it, to find the file's parent dir 1161 * to see if it exists in the alternate tree. If it does, 1162 * and we want to create a file (cflag is set). We don't 1163 * need to worry about the root comparison in this case. 1164 */ 1165 1166 if (create) { 1167 for (cp = &ptr[len] - 1; *cp != '/'; cp--); 1168 *cp = '\0'; 1169 1170 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1171 error = namei(&nd); 1172 *cp = '/'; 1173 if (error != 0) 1174 goto keeporig; 1175 } else { 1176 NDINIT(&nd, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, buf, td); 1177 1178 error = namei(&nd); 1179 if (error != 0) 1180 goto keeporig; 1181 1182 /* 1183 * We now compare the vnode of the prefix to the one 1184 * vnode asked. If they resolve to be the same, then we 1185 * ignore the match so that the real root gets used. 1186 * This avoids the problem of traversing "../.." to find the 1187 * root directory and never finding it, because "/" resolves 1188 * to the emulation root directory. This is expensive :-( 1189 */ 1190 NDINIT(&ndroot, LOOKUP, FOLLOW | MPSAFE, UIO_SYSSPACE, prefix, 1191 td); 1192 1193 /* We shouldn't ever get an error from this namei(). */ 1194 error = namei(&ndroot); 1195 if (error == 0) { 1196 if (nd.ni_vp == ndroot.ni_vp) 1197 error = ENOENT; 1198 1199 NDFREE(&ndroot, NDF_ONLY_PNBUF); 1200 vrele(ndroot.ni_vp); 1201 VFS_UNLOCK_GIANT(NDHASGIANT(&ndroot)); 1202 } 1203 } 1204 1205 NDFREE(&nd, NDF_ONLY_PNBUF); 1206 vrele(nd.ni_vp); 1207 VFS_UNLOCK_GIANT(NDHASGIANT(&nd)); 1208 1209 keeporig: 1210 /* If there was an error, use the original path name. */ 1211 if (error) 1212 bcopy(ptr, buf, len); 1213 return (error); 1214 } 1215