1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>, 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Poul-Henning Kamp of the FreeBSD Project. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 39 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $ 40 * $DragonFly: src/sys/kern/vfs_cache.c,v 1.12 2003/10/18 05:53:57 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysctl.h> 47 #include <sys/mount.h> 48 #include <sys/vnode.h> 49 #include <sys/malloc.h> 50 #include <sys/sysproto.h> 51 #include <sys/proc.h> 52 #include <sys/namei.h> 53 #include <sys/filedesc.h> 54 #include <sys/fnv_hash.h> 55 56 /* 57 * Random lookups in the cache are accomplished with a hash table using 58 * a hash key of (nc_src_vp, name). 59 * 60 * Negative entries may exist and correspond to structures where nc_vp 61 * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry 62 * corresponds to a whited-out directory entry (verses simply not finding the 63 * entry at all). 64 * 65 * Upon reaching the last segment of a path, if the reference is for DELETE, 66 * or NOCACHE is set (rewrite), and the name is located in the cache, it 67 * will be dropped. 68 */ 69 70 /* 71 * Structures associated with name cacheing. 72 */ 73 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash]) 74 #define MINNEG 1024 75 76 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 77 static struct namecache_list ncneglist; /* instead of vnode */ 78 static struct namecache rootnamecache; /* Dummy node */ 79 80 static int nczapcheck; /* panic on bad release */ 81 SYSCTL_INT(_debug, OID_AUTO, nczapcheck, CTLFLAG_RW, &nczapcheck, 0, ""); 82 83 static u_long nchash; /* size of hash table */ 84 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 85 86 static u_long ncnegfactor = 16; /* ratio of negative entries */ 87 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 88 89 static u_long numneg; /* number of cache entries allocated */ 90 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 91 92 static u_long numcache; /* number of cache entries allocated */ 93 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 94 95 static u_long numunres; /* number of unresolved entries */ 96 SYSCTL_ULONG(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, ""); 97 98 struct nchstats nchstats; /* cache effectiveness statistics */ 99 100 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 101 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 102 103 /* 104 * The new name cache statistics 105 */ 106 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 107 #define STATNODE(mode, name, var) \ 108 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 109 STATNODE(CTLFLAG_RD, numneg, &numneg); 110 STATNODE(CTLFLAG_RD, numcache, &numcache); 111 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 112 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 113 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 114 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 115 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 116 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 117 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 118 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 119 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 120 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 121 122 123 static void cache_zap(struct namecache *ncp); 124 125 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 126 127 /* 128 * cache_hold() and cache_drop() prevent the premature deletion of a 129 * namecache entry but do not prevent operations (such as zapping) on 130 * that namecache entry. 131 */ 132 static __inline 133 struct namecache * 134 _cache_hold(struct namecache *ncp) 135 { 136 ++ncp->nc_refs; 137 return(ncp); 138 } 139 140 static __inline 141 void 142 _cache_drop(struct namecache *ncp) 143 { 144 KKASSERT(ncp->nc_refs > 0); 145 if (ncp->nc_refs == 1 && 146 (ncp->nc_flag & NCF_UNRESOLVED) && 147 TAILQ_EMPTY(&ncp->nc_list) 148 ) { 149 cache_zap(ncp); 150 } else { 151 --ncp->nc_refs; 152 } 153 } 154 155 struct namecache * 156 cache_hold(struct namecache *ncp) 157 { 158 return(_cache_hold(ncp)); 159 } 160 161 void 162 cache_drop(struct namecache *ncp) 163 { 164 _cache_drop(ncp); 165 } 166 167 /* 168 * Try to destroy a namecache entry. The entry is disassociated from its 169 * vnode or ncneglist and reverted to an UNRESOLVED state. 170 * 171 * Then, if there are no additional references to the ncp and we can 172 * successfully delete the children, the entry is also removed from the 173 * namecache hashlist / topology. 174 * 175 * References or undeletable children will prevent the entry from being 176 * removed from the topology. The entry may be revalidated (typically 177 * by cache_enter()) at a later time. Children remain because: 178 * 179 * + we have tried to delete a node rather then a leaf in the topology. 180 * + the presence of negative entries (we try to scrap these). 181 * + an entry or child has a non-zero ref count and cannot be scrapped. 182 * 183 * This function must be called with the ncp held and will drop the ref 184 * count during zapping. 185 */ 186 static void 187 cache_zap(struct namecache *ncp) 188 { 189 struct namecache *par; 190 struct vnode *vp; 191 192 /* 193 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED. 194 */ 195 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 196 ncp->nc_flag |= NCF_UNRESOLVED; 197 ++numunres; 198 if ((vp = ncp->nc_vp) != NULL) { 199 --numcache; 200 ncp->nc_vp = NULL; /* safety */ 201 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode); 202 if (!TAILQ_EMPTY(&ncp->nc_list)) 203 vdrop(vp); 204 } else { 205 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 206 --numneg; 207 } 208 } 209 210 /* 211 * Try to scrap the entry and possibly tail-recurse on its parent. 212 * We only scrap unref'd (other then our ref) unresolved entries, 213 * we do not scrap 'live' entries. 214 */ 215 while (ncp->nc_flag & NCF_UNRESOLVED) { 216 /* 217 * Someone other then us has a ref, stop. 218 */ 219 if (ncp->nc_refs > 1) 220 goto done; 221 222 /* 223 * We have children, stop. 224 */ 225 if (!TAILQ_EMPTY(&ncp->nc_list)) 226 goto done; 227 228 /* 229 * Ok, we can completely destroy and free this entry. Sanity 230 * check it against our static rootnamecache structure, 231 * then remove it from the hash. 232 */ 233 KKASSERT(ncp != &rootnamecache); 234 235 if (ncp->nc_flag & NCF_HASHED) { 236 ncp->nc_flag &= ~NCF_HASHED; 237 LIST_REMOVE(ncp, nc_hash); 238 } 239 240 /* 241 * Unlink from its parent and free, then loop on the 242 * parent. XXX temp hack, in stage-3 parent is never NULL 243 */ 244 if ((par = ncp->nc_parent) != NULL) { 245 par = cache_hold(par); 246 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 247 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 248 vdrop(par->nc_vp); 249 } 250 --numunres; 251 ncp->nc_refs = -1; /* safety */ 252 ncp->nc_parent = NULL; /* safety */ 253 if (ncp->nc_name) 254 free(ncp->nc_name, M_VFSCACHE); 255 free(ncp, M_VFSCACHE); 256 ncp = par; 257 if (par == NULL) /* temp hack */ 258 return; /* temp hack */ 259 } 260 done: 261 --ncp->nc_refs; 262 } 263 264 /* 265 * Lookup an entry in the cache 266 * 267 * Lookup is called with dvp pointing to the directory to search, 268 * cnp pointing to the name of the entry being sought. 269 * 270 * If the lookup succeeds, the vnode is returned in *vpp, and a 271 * status of -1 is returned. 272 * 273 * If the lookup determines that the name does not exist (negative cacheing), 274 * a status of ENOENT is returned. 275 * 276 * If the lookup fails, a status of zero is returned. 277 * 278 * Note that UNRESOLVED entries are ignored. They are not negative cache 279 * entries. 280 */ 281 int 282 cache_lookup(struct vnode *dvp, struct namecache *par, struct vnode **vpp, 283 struct namecache **ncpp, struct componentname *cnp) 284 { 285 struct namecache *ncp; 286 u_int32_t hash; 287 288 numcalls++; 289 290 if (cnp->cn_nameptr[0] == '.') { 291 if (cnp->cn_namelen == 1) { 292 *vpp = dvp; 293 dothits++; 294 numposhits++; /* include in total statistics */ 295 return (-1); 296 } 297 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 298 dotdothits++; 299 numposhits++; /* include in total statistics */ 300 if (dvp->v_dd->v_id != dvp->v_ddid || 301 (cnp->cn_flags & CNP_MAKEENTRY) == 0) { 302 dvp->v_ddid = 0; 303 return (0); 304 } 305 *vpp = dvp->v_dd; 306 return (-1); 307 } 308 } 309 310 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 311 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 312 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 313 numchecks++; 314 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 && 315 /* ncp->nc_parent == par instead of dvp STAGE-3 */ 316 ncp->nc_dvp_data == (uintptr_t)dvp && /* STAGE-2 only */ 317 ncp->nc_dvp_id == dvp->v_id && /* STAGE-2 only */ 318 ncp->nc_nlen == cnp->cn_namelen && 319 bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0 320 ) { 321 cache_hold(ncp); 322 break; 323 } 324 } 325 326 /* We failed to find an entry */ 327 if (ncp == NULL) { 328 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 329 nummisszap++; 330 } else { 331 nummiss++; 332 } 333 nchstats.ncs_miss++; 334 return (0); 335 } 336 337 /* We don't want to have an entry, so dump it */ 338 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 339 numposzaps++; 340 nchstats.ncs_badhits++; 341 cache_zap(ncp); 342 return (0); 343 } 344 345 /* We found a "positive" match, return the vnode */ 346 if (ncp->nc_vp) { 347 numposhits++; 348 nchstats.ncs_goodhits++; 349 *vpp = ncp->nc_vp; 350 cache_drop(ncp); 351 return (-1); 352 } 353 354 /* We found a negative match, and want to create it, so purge */ 355 if (cnp->cn_nameiop == NAMEI_CREATE) { 356 numnegzaps++; 357 nchstats.ncs_badhits++; 358 cache_zap(ncp); 359 return (0); 360 } 361 362 numneghits++; 363 364 /* 365 * We found a "negative" match, ENOENT notifies client of this match. 366 * The nc_flag field records whether this is a whiteout. Since there 367 * is no vnode we can use the vnode tailq link field with ncneglist. 368 */ 369 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 370 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 371 nchstats.ncs_neghits++; 372 if (ncp->nc_flag & NCF_WHITEOUT) 373 cnp->cn_flags |= CNP_ISWHITEOUT; 374 cache_drop(ncp); 375 return (ENOENT); 376 } 377 378 /* 379 * Generate a special linkage between the mount point and the root of the 380 * mounted filesystem in order to maintain the namecache topology across 381 * a mount point. The special linkage has a 0-length name component 382 * and sets NCF_MOUNTPT. 383 */ 384 void 385 cache_mount(struct vnode *dvp, struct vnode *tvp) 386 { 387 struct namecache *ncp; 388 struct namecache *par; 389 struct nchashhead *nchpp; 390 u_int32_t hash; 391 392 /* 393 * If a linkage already exists we do not have to do anything. 394 */ 395 hash = fnv_32_buf("", 0, FNV1_32_INIT); 396 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 397 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 398 numchecks++; 399 if (ncp->nc_vp == tvp && 400 ncp->nc_nlen == 0 && 401 /* ncp->nc_parent == par STAGE-3 par instad of dvp */ 402 ncp->nc_dvp_data == (uintptr_t)dvp && /* STAGE-2 only */ 403 ncp->nc_dvp_id == dvp->v_id /* STAGE-2 only */ 404 ) { 405 return; 406 } 407 } 408 409 /* 410 * STAGE-2 par can be NULL 411 * STAGE-3 par is passed as argument and cannot be NULL 412 */ 413 par = TAILQ_FIRST(&dvp->v_namecache); 414 415 /* 416 * Otherwise create a new linkage. 417 */ 418 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK | M_ZERO); 419 420 TAILQ_INIT(&ncp->nc_list); 421 ncp->nc_flag = NCF_MOUNTPT; 422 if (par != NULL) { 423 /* STAGE-3 par never NULL */ 424 ncp->nc_parent = par; 425 if (TAILQ_EMPTY(&par->nc_list)) { 426 if (par->nc_vp) 427 vhold(par->nc_vp); 428 } 429 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 430 } 431 ncp->nc_dvp_data = (uintptr_t)dvp; /* STAGE-2 ONLY */ 432 ncp->nc_dvp_id = dvp->v_id; /* STAGE-2 ONLY */ 433 434 /* 435 * Linkup the target vnode. The target vnode is NULL if this is 436 * to be a negative cache entry. 437 */ 438 ++numcache; 439 ncp->nc_vp = tvp; 440 TAILQ_INSERT_HEAD(&tvp->v_namecache, ncp, nc_vnode); 441 #if 0 442 if (tvp->v_type == VDIR) { 443 vp->v_dd = dvp; 444 vp->v_ddid = dvp->v_id; 445 } 446 #endif 447 448 /* 449 * Hash table 450 */ 451 hash = fnv_32_buf("", 0, FNV1_32_INIT); 452 hash = fnv_32_buf(&ncp->nc_dvp_id, sizeof(ncp->nc_dvp_id), hash); 453 nchpp = NCHHASH(hash); 454 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 455 456 ncp->nc_flag |= NCF_HASHED; 457 } 458 459 /* 460 * Add an entry to the cache. 461 */ 462 void 463 cache_enter(struct vnode *dvp, struct namecache *par, struct vnode *vp, struct componentname *cnp) 464 { 465 struct namecache *ncp; 466 struct nchashhead *nchpp; 467 u_int32_t hash; 468 469 /* YYY use par */ 470 471 /* 472 * "." and ".." are degenerate cases, they are not added to the 473 * cache. 474 */ 475 if (cnp->cn_nameptr[0] == '.') { 476 if (cnp->cn_namelen == 1) { 477 return; 478 } 479 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 480 if (vp) { 481 dvp->v_dd = vp; 482 dvp->v_ddid = vp->v_id; 483 } else { 484 dvp->v_dd = dvp; 485 dvp->v_ddid = 0; 486 } 487 return; 488 } 489 } 490 491 if (nczapcheck) 492 printf("ENTER '%*.*s' %p ", (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr, vp); 493 494 /* 495 * Locate other entries associated with this vnode and zap them, 496 * because the purge code may not be able to find them due to 497 * the topology not yet being consistent. This is a temporary 498 * hack. 499 */ 500 if (vp) { 501 again: 502 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 503 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 504 cache_zap(cache_hold(ncp)); 505 goto again; 506 } 507 } 508 } 509 510 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 511 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 512 513 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK | M_ZERO); 514 ncp->nc_name = malloc(cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 515 TAILQ_INIT(&ncp->nc_list); 516 if (nczapcheck) 517 printf("alloc\n"); 518 519 /* 520 * Linkup the parent pointer, bump the parent vnode's hold 521 * count when we go from 0->1 children. 522 * 523 * STAGE-2 par may be NULL 524 * STAGE-3 par may not be NULL, nc_dvp_* removed 525 */ 526 par = TAILQ_FIRST(&dvp->v_namecache); 527 if (par != NULL) { 528 ncp->nc_parent = par; 529 if (TAILQ_EMPTY(&par->nc_list)) { 530 if (par->nc_vp) 531 vhold(par->nc_vp); 532 } 533 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 534 } 535 ncp->nc_dvp_data = (uintptr_t)dvp; 536 ncp->nc_dvp_id = dvp->v_id; 537 538 /* 539 * Add to the hash table 540 */ 541 ncp->nc_nlen = cnp->cn_namelen; 542 bcopy(cnp->cn_nameptr, ncp->nc_name, cnp->cn_namelen); 543 nchpp = NCHHASH(hash); 544 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 545 546 ncp->nc_flag |= NCF_HASHED; 547 548 /* 549 * Linkup the target vnode. The target vnode is NULL if this is 550 * to be a negative cache entry. 551 */ 552 ncp->nc_vp = vp; 553 if (vp == NULL) { 554 ++numneg; 555 TAILQ_INSERT_HEAD(&ncneglist, ncp, nc_vnode); 556 ncp->nc_flag &= ~NCF_WHITEOUT; 557 if (cnp->cn_flags & CNP_ISWHITEOUT) 558 ncp->nc_flag |= NCF_WHITEOUT; 559 } else { 560 ++numcache; 561 if (!TAILQ_EMPTY(&ncp->nc_list)) 562 vhold(vp); 563 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode); 564 if (vp->v_type == VDIR) { 565 vp->v_dd = dvp; 566 vp->v_ddid = dvp->v_id; 567 } 568 } 569 570 /* 571 * Don't cache too many negative hits 572 */ 573 if (numneg > MINNEG && numneg * ncnegfactor > numcache) { 574 ncp = TAILQ_FIRST(&ncneglist); 575 KKASSERT(ncp != NULL); 576 cache_zap(cache_hold(ncp)); 577 } 578 } 579 580 /* 581 * Name cache initialization, from vfs_init() when we are booting 582 * 583 * rootnamecache is initialized such that it cannot be recursively deleted. 584 */ 585 void 586 nchinit(void) 587 { 588 TAILQ_INIT(&ncneglist); 589 nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash); 590 TAILQ_INIT(&rootnamecache.nc_list); 591 rootnamecache.nc_flag |= NCF_HASHED | NCF_ROOT | NCF_UNRESOLVED; 592 rootnamecache.nc_refs = 1; 593 } 594 595 /* 596 * vfs_cache_setroot() 597 * 598 * Create an association between the root of our namecache and 599 * the root vnode. This routine may be called several times during 600 * booting. 601 */ 602 void 603 vfs_cache_setroot(struct vnode *nvp) 604 { 605 KKASSERT(rootnamecache.nc_refs > 0); /* don't accidently free */ 606 cache_zap(cache_hold(&rootnamecache)); 607 608 rootnamecache.nc_vp = nvp; 609 rootnamecache.nc_flag &= ~NCF_UNRESOLVED; 610 if (nvp) { 611 ++numcache; 612 if (!TAILQ_EMPTY(&rootnamecache.nc_list)) 613 vhold(nvp); 614 TAILQ_INSERT_HEAD(&nvp->v_namecache, &rootnamecache, nc_vnode); 615 } else { 616 ++numneg; 617 TAILQ_INSERT_HEAD(&ncneglist, &rootnamecache, nc_vnode); 618 rootnamecache.nc_flag &= ~NCF_WHITEOUT; 619 } 620 } 621 622 /* 623 * Invalidate all namecache entries to a particular vnode as well as 624 * any direct children of that vnode in the namecache. This is a 625 * 'catch all' purge used by filesystems that do not know any better. 626 * 627 * A new vnode v_id is generated. Note that no vnode will ever have a 628 * v_id of 0. 629 * 630 * Note that the linkage between the vnode and its namecache entries will 631 * be removed, but the namecache entries themselves might stay put due to 632 * active references from elsewhere in the system or due to the existance of 633 * the children. The namecache topology is left intact even if we do not 634 * know what the vnode association is. Such entries will be marked 635 * NCF_UNRESOLVED. 636 * 637 * XXX: Only time and the size of v_id prevents this from failing: 638 * XXX: In theory we should hunt down all (struct vnode*, v_id) 639 * XXX: soft references and nuke them, at least on the global 640 * XXX: v_id wraparound. The period of resistance can be extended 641 * XXX: by incrementing each vnodes v_id individually instead of 642 * XXX: using the global v_id. 643 */ 644 void 645 cache_purge(struct vnode *vp) 646 { 647 static u_long nextid; 648 struct namecache *ncp; 649 struct namecache *scan; 650 651 /* 652 * Disassociate the vnode from its namecache entries along with 653 * (for historical reasons) any direct children. 654 */ 655 while ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) { 656 cache_hold(ncp); 657 658 restart: /* YYY hack, fix me */ 659 TAILQ_FOREACH(scan, &ncp->nc_list, nc_entry) { 660 if ((scan->nc_flag & NCF_UNRESOLVED) == 0) { 661 cache_zap(cache_hold(scan)); 662 goto restart; 663 } 664 } 665 cache_zap(ncp); 666 } 667 668 /* 669 * Calculate a new unique id for ".." handling 670 */ 671 do { 672 nextid++; 673 } while (nextid == vp->v_id || nextid == 0); 674 vp->v_id = nextid; 675 vp->v_dd = vp; 676 vp->v_ddid = 0; 677 } 678 679 /* 680 * Flush all entries referencing a particular filesystem. 681 * 682 * Since we need to check it anyway, we will flush all the invalid 683 * entries at the same time. 684 */ 685 void 686 cache_purgevfs(struct mount *mp) 687 { 688 struct nchashhead *nchpp; 689 struct namecache *ncp, *nnp; 690 691 /* 692 * Scan hash tables for applicable entries. 693 */ 694 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) { 695 ncp = LIST_FIRST(nchpp); 696 if (ncp) 697 cache_hold(ncp); 698 while (ncp) { 699 nnp = LIST_NEXT(ncp, nc_hash); 700 if (nnp) 701 cache_hold(nnp); 702 if (ncp->nc_vp && ncp->nc_vp->v_mount == mp) 703 cache_zap(ncp); 704 else 705 cache_drop(ncp); 706 ncp = nnp; 707 } 708 } 709 } 710 711 /* 712 * cache_leaf_test() 713 * 714 * Test whether the vnode is at a leaf in the nameicache tree. 715 * 716 * Returns 0 if it is a leaf, -1 if it isn't. 717 */ 718 int 719 cache_leaf_test(struct vnode *vp) 720 { 721 struct namecache *scan; 722 struct namecache *ncp; 723 724 TAILQ_FOREACH(scan, &vp->v_namecache, nc_vnode) { 725 TAILQ_FOREACH(ncp, &scan->nc_list, nc_entry) { 726 /* YYY && ncp->nc_vp->v_type == VDIR ? */ 727 if (ncp->nc_vp != NULL) 728 return(-1); 729 } 730 } 731 return(0); 732 } 733 734 /* 735 * Perform canonical checks and cache lookup and pass on to filesystem 736 * through the vop_cachedlookup only if needed. 737 * 738 * vop_lookup_args { 739 * struct vnode a_dvp; 740 * struct namecache *a_ncp; 741 * struct vnode **a_vpp; 742 * struct namecache **a_ncpp; 743 * struct componentname *a_cnp; 744 * } 745 */ 746 int 747 vfs_cache_lookup(struct vop_lookup_args *ap) 748 { 749 struct vnode *dvp, *vp; 750 int lockparent; 751 int error; 752 struct namecache *par = ap->a_par; 753 struct vnode **vpp = ap->a_vpp; 754 struct namecache **ncpp = ap->a_ncpp; 755 struct componentname *cnp = ap->a_cnp; 756 struct ucred *cred = cnp->cn_cred; 757 int flags = cnp->cn_flags; 758 struct thread *td = cnp->cn_td; 759 u_long vpid; /* capability number of vnode */ 760 761 *vpp = NULL; 762 if (ncpp) 763 *ncpp = NULL; 764 dvp = ap->a_dvp; 765 lockparent = flags & CNP_LOCKPARENT; 766 767 if (dvp->v_type != VDIR) 768 return (ENOTDIR); 769 770 if ((flags & CNP_ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 771 (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) { 772 return (EROFS); 773 } 774 775 error = VOP_ACCESS(dvp, VEXEC, cred, td); 776 777 if (error) 778 return (error); 779 780 error = cache_lookup(dvp, par, vpp, ncpp, cnp); 781 782 if (!error) 783 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 784 785 if (error == ENOENT) 786 return (error); 787 788 vp = *vpp; 789 vpid = vp->v_id; 790 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 791 if (dvp == vp) { /* lookup on "." */ 792 VREF(vp); 793 error = 0; 794 } else if (flags & CNP_ISDOTDOT) { 795 VOP_UNLOCK(dvp, 0, td); 796 cnp->cn_flags |= CNP_PDIRUNLOCK; 797 error = vget(vp, LK_EXCLUSIVE, td); 798 if (!error && lockparent && (flags & CNP_ISLASTCN)) { 799 if ((error = vn_lock(dvp, LK_EXCLUSIVE, td)) == 0) 800 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 801 } 802 } else { 803 error = vget(vp, LK_EXCLUSIVE, td); 804 if (!lockparent || error || !(flags & CNP_ISLASTCN)) { 805 VOP_UNLOCK(dvp, 0, td); 806 cnp->cn_flags |= CNP_PDIRUNLOCK; 807 } 808 } 809 /* 810 * Check that the capability number did not change 811 * while we were waiting for the lock. 812 */ 813 if (!error) { 814 if (vpid == vp->v_id) 815 return (0); 816 vput(vp); 817 if (lockparent && dvp != vp && (flags & CNP_ISLASTCN)) { 818 VOP_UNLOCK(dvp, 0, td); 819 cnp->cn_flags |= CNP_PDIRUNLOCK; 820 } 821 } 822 if (cnp->cn_flags & CNP_PDIRUNLOCK) { 823 error = vn_lock(dvp, LK_EXCLUSIVE, td); 824 if (error) 825 return (error); 826 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 827 } 828 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 829 } 830 831 static int disablecwd; 832 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 833 834 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 835 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 836 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 837 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 838 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 839 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 840 841 int 842 __getcwd(struct __getcwd_args *uap) 843 { 844 struct proc *p = curproc; 845 char *bp, *buf; 846 int error, i, slash_prefixed; 847 struct filedesc *fdp; 848 struct namecache *ncp; 849 struct vnode *vp; 850 851 numcwdcalls++; 852 if (disablecwd) 853 return (ENODEV); 854 if (uap->buflen < 2) 855 return (EINVAL); 856 if (uap->buflen > MAXPATHLEN) 857 uap->buflen = MAXPATHLEN; 858 buf = bp = malloc(uap->buflen, M_TEMP, M_WAITOK); 859 bp += uap->buflen - 1; 860 *bp = '\0'; 861 fdp = p->p_fd; 862 slash_prefixed = 0; 863 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 864 if (vp->v_flag & VROOT) { 865 if (vp->v_mount == NULL) { /* forced unmount */ 866 free(buf, M_TEMP); 867 return (EBADF); 868 } 869 vp = vp->v_mount->mnt_vnodecovered; 870 continue; 871 } 872 if (vp->v_dd->v_id != vp->v_ddid) { 873 numcwdfail1++; 874 free(buf, M_TEMP); 875 return (ENOTDIR); 876 } 877 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 878 /* ncp->nc_parent == par STAGE-3 */ 879 if (ncp->nc_dvp_data == (uintptr_t)vp->v_dd && 880 ncp->nc_dvp_id == vp->v_ddid) { 881 break; 882 } 883 } 884 if (ncp == NULL) { 885 numcwdfail2++; 886 free(buf, M_TEMP); 887 return (ENOENT); 888 } 889 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 890 if (bp == buf) { 891 numcwdfail4++; 892 free(buf, M_TEMP); 893 return (ENOMEM); 894 } 895 *--bp = ncp->nc_name[i]; 896 } 897 if (bp == buf) { 898 numcwdfail4++; 899 free(buf, M_TEMP); 900 return (ENOMEM); 901 } 902 *--bp = '/'; 903 slash_prefixed = 1; 904 vp = vp->v_dd; 905 } 906 if (!slash_prefixed) { 907 if (bp == buf) { 908 numcwdfail4++; 909 free(buf, M_TEMP); 910 return (ENOMEM); 911 } 912 *--bp = '/'; 913 } 914 numcwdfound++; 915 error = copyout(bp, uap->buf, strlen(bp) + 1); 916 free(buf, M_TEMP); 917 return (error); 918 } 919 920 /* 921 * Thus begins the fullpath magic. 922 */ 923 924 #undef STATNODE 925 #define STATNODE(name) \ 926 static u_int name; \ 927 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 928 929 static int disablefullpath; 930 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 931 &disablefullpath, 0, ""); 932 933 STATNODE(numfullpathcalls); 934 STATNODE(numfullpathfail1); 935 STATNODE(numfullpathfail2); 936 STATNODE(numfullpathfail3); 937 STATNODE(numfullpathfail4); 938 STATNODE(numfullpathfound); 939 940 int 941 textvp_fullpath(struct proc *p, char **retbuf, char **retfreebuf) 942 { 943 char *bp, *buf; 944 int i, slash_prefixed; 945 struct filedesc *fdp; 946 struct namecache *ncp; 947 struct vnode *vp, *textvp; 948 949 numfullpathcalls++; 950 if (disablefullpath) 951 return (ENODEV); 952 textvp = p->p_textvp; 953 if (textvp == NULL) 954 return (EINVAL); 955 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 956 bp = buf + MAXPATHLEN - 1; 957 *bp = '\0'; 958 fdp = p->p_fd; 959 slash_prefixed = 0; 960 for (vp = textvp; vp != fdp->fd_rdir && vp != rootvnode;) { 961 if (vp->v_flag & VROOT) { 962 if (vp->v_mount == NULL) { /* forced unmount */ 963 free(buf, M_TEMP); 964 return (EBADF); 965 } 966 vp = vp->v_mount->mnt_vnodecovered; 967 continue; 968 } 969 if (vp != textvp && vp->v_dd->v_id != vp->v_ddid) { 970 numfullpathfail1++; 971 free(buf, M_TEMP); 972 return (ENOTDIR); 973 } 974 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 975 if (vp == textvp) 976 break; 977 /* ncp->nc_parent == par STAGE-3 */ 978 if (ncp->nc_dvp_data == (uintptr_t)vp->v_dd && 979 ncp->nc_dvp_id == vp->v_ddid) { 980 break; 981 } 982 } 983 if (ncp == NULL) { 984 numfullpathfail2++; 985 free(buf, M_TEMP); 986 return (ENOENT); 987 } 988 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 989 if (bp == buf) { 990 numfullpathfail4++; 991 free(buf, M_TEMP); 992 return (ENOMEM); 993 } 994 *--bp = ncp->nc_name[i]; 995 } 996 if (bp == buf) { 997 numfullpathfail4++; 998 free(buf, M_TEMP); 999 return (ENOMEM); 1000 } 1001 *--bp = '/'; 1002 slash_prefixed = 1; 1003 vp = vp->v_dd; 1004 } 1005 if (!slash_prefixed) { 1006 if (bp == buf) { 1007 numfullpathfail4++; 1008 free(buf, M_TEMP); 1009 return (ENOMEM); 1010 } 1011 *--bp = '/'; 1012 } 1013 numfullpathfound++; 1014 *retbuf = bp; 1015 *retfreebuf = buf; 1016 return (0); 1017 } 1018 1019