1 /* 2 * Copyright (c) 1989, 1993, 1995 3 * The Regents of the University of California. All rights reserved. 4 * Copyright (c) 2003 Matthew Dillon <dillon@backplane.com>, 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to Berkeley by 8 * Poul-Henning Kamp of the FreeBSD Project. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 39 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $ 40 * $DragonFly: src/sys/kern/vfs_cache.c,v 1.24 2004/06/25 17:37:19 dillon Exp $ 41 */ 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/kernel.h> 46 #include <sys/sysctl.h> 47 #include <sys/mount.h> 48 #include <sys/vnode.h> 49 #include <sys/malloc.h> 50 #include <sys/sysproto.h> 51 #include <sys/proc.h> 52 #include <sys/namei.h> 53 #include <sys/filedesc.h> 54 #include <sys/fnv_hash.h> 55 #include <sys/globaldata.h> 56 #include <sys/kern_syscall.h> 57 58 /* 59 * Random lookups in the cache are accomplished with a hash table using 60 * a hash key of (nc_src_vp, name). 61 * 62 * Negative entries may exist and correspond to structures where nc_vp 63 * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry 64 * corresponds to a whited-out directory entry (verses simply not finding the 65 * entry at all). 66 * 67 * Upon reaching the last segment of a path, if the reference is for DELETE, 68 * or NOCACHE is set (rewrite), and the name is located in the cache, it 69 * will be dropped. 70 */ 71 72 /* 73 * Structures associated with name cacheing. 74 */ 75 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash]) 76 #define MINNEG 1024 77 78 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 79 80 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 81 static struct namecache_list ncneglist; /* instead of vnode */ 82 static struct namecache rootnamecache; /* Dummy node */ 83 84 static int nczapcheck; /* panic on bad release */ 85 SYSCTL_INT(_debug, OID_AUTO, nczapcheck, CTLFLAG_RW, &nczapcheck, 0, ""); 86 87 static u_long nchash; /* size of hash table */ 88 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 89 90 static u_long ncnegfactor = 16; /* ratio of negative entries */ 91 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 92 93 static u_long numneg; /* number of cache entries allocated */ 94 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 95 96 static u_long numcache; /* number of cache entries allocated */ 97 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 98 99 static u_long numunres; /* number of unresolved entries */ 100 SYSCTL_ULONG(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, ""); 101 102 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 103 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 104 105 /* 106 * The new name cache statistics 107 */ 108 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 109 #define STATNODE(mode, name, var) \ 110 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 111 STATNODE(CTLFLAG_RD, numneg, &numneg); 112 STATNODE(CTLFLAG_RD, numcache, &numcache); 113 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 114 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 115 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 116 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 117 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 118 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 119 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 120 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 121 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 122 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 123 124 struct nchstats nchstats[SMP_MAXCPU]; 125 /* 126 * Export VFS cache effectiveness statistics to user-land. 127 * 128 * The statistics are left for aggregation to user-land so 129 * neat things can be achieved, like observing per-CPU cache 130 * distribution. 131 */ 132 static int 133 sysctl_nchstats(SYSCTL_HANDLER_ARGS) 134 { 135 struct globaldata *gd; 136 int i, error; 137 138 error = 0; 139 for (i = 0; i < ncpus; ++i) { 140 gd = globaldata_find(i); 141 if ((error = SYSCTL_OUT(req, (void *)&(*gd->gd_nchstats), 142 sizeof(struct nchstats)))) 143 break; 144 } 145 146 return (error); 147 } 148 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD, 149 0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics"); 150 151 static void cache_zap(struct namecache *ncp); 152 153 /* 154 * cache_hold() and cache_drop() prevent the premature deletion of a 155 * namecache entry but do not prevent operations (such as zapping) on 156 * that namecache entry. 157 */ 158 static __inline 159 struct namecache * 160 _cache_hold(struct namecache *ncp) 161 { 162 ++ncp->nc_refs; 163 return(ncp); 164 } 165 166 static __inline 167 void 168 _cache_drop(struct namecache *ncp) 169 { 170 KKASSERT(ncp->nc_refs > 0); 171 if (ncp->nc_refs == 1 && 172 (ncp->nc_flag & NCF_UNRESOLVED) && 173 TAILQ_EMPTY(&ncp->nc_list) 174 ) { 175 cache_zap(ncp); 176 } else { 177 --ncp->nc_refs; 178 } 179 } 180 181 struct namecache * 182 cache_hold(struct namecache *ncp) 183 { 184 return(_cache_hold(ncp)); 185 } 186 187 void 188 cache_drop(struct namecache *ncp) 189 { 190 _cache_drop(ncp); 191 } 192 193 static void 194 cache_link_parent(struct namecache *ncp, struct namecache *par) 195 { 196 KKASSERT(ncp->nc_parent == NULL); 197 ncp->nc_parent = par; 198 if (TAILQ_EMPTY(&par->nc_list)) { 199 if (par->nc_vp) 200 vhold(par->nc_vp); 201 } 202 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 203 } 204 205 static void 206 cache_unlink_parent(struct namecache *ncp) 207 { 208 struct namecache *par; 209 210 if ((par = ncp->nc_parent) != NULL) { 211 ncp->nc_parent = NULL; 212 par = cache_hold(par); 213 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 214 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 215 vdrop(par->nc_vp); 216 cache_drop(par); 217 } 218 } 219 220 static struct namecache * 221 cache_alloc(struct vnode *vp) 222 { 223 struct namecache *ncp; 224 225 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO); 226 TAILQ_INIT(&ncp->nc_list); 227 ncp->nc_vp = vp; 228 if (vp != NULL) { 229 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode); 230 ++numcache; 231 } else { 232 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 233 ++numneg; 234 } 235 return(ncp); 236 } 237 238 /* 239 * Try to destroy a namecache entry. The entry is disassociated from its 240 * vnode or ncneglist and reverted to an UNRESOLVED state. 241 * 242 * Then, if there are no additional references to the ncp and we can 243 * successfully delete the children, the entry is also removed from the 244 * namecache hashlist / topology. 245 * 246 * References or undeletable children will prevent the entry from being 247 * removed from the topology. The entry may be revalidated (typically 248 * by cache_enter()) at a later time. Children remain because: 249 * 250 * + we have tried to delete a node rather then a leaf in the topology. 251 * + the presence of negative entries (we try to scrap these). 252 * + an entry or child has a non-zero ref count and cannot be scrapped. 253 * 254 * This function must be called with the ncp held and will drop the ref 255 * count during zapping. 256 */ 257 static void 258 cache_zap(struct namecache *ncp) 259 { 260 struct namecache *par; 261 struct vnode *vp; 262 263 /* 264 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED. 265 */ 266 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 267 ncp->nc_flag |= NCF_UNRESOLVED; 268 ++numunres; 269 if ((vp = ncp->nc_vp) != NULL) { 270 --numcache; 271 ncp->nc_vp = NULL; /* safety */ 272 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode); 273 if (!TAILQ_EMPTY(&ncp->nc_list)) 274 vdrop(vp); 275 } else { 276 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 277 --numneg; 278 } 279 } 280 281 /* 282 * Try to scrap the entry and possibly tail-recurse on its parent. 283 * We only scrap unref'd (other then our ref) unresolved entries, 284 * we do not scrap 'live' entries. 285 */ 286 while (ncp->nc_flag & NCF_UNRESOLVED) { 287 /* 288 * Someone other then us has a ref, stop. 289 */ 290 if (ncp->nc_refs > 1) 291 goto done; 292 293 /* 294 * We have children, stop. 295 */ 296 if (!TAILQ_EMPTY(&ncp->nc_list)) 297 goto done; 298 299 /* 300 * Ok, we can completely destroy and free this entry. Sanity 301 * check it against our static rootnamecache structure, 302 * then remove it from the hash. 303 */ 304 KKASSERT(ncp != &rootnamecache); 305 306 if (ncp->nc_flag & NCF_HASHED) { 307 ncp->nc_flag &= ~NCF_HASHED; 308 LIST_REMOVE(ncp, nc_hash); 309 } 310 311 /* 312 * Unlink from its parent and free, then loop on the 313 * parent. XXX temp hack, in stage-3 parent is never NULL 314 */ 315 if ((par = ncp->nc_parent) != NULL) { 316 par = cache_hold(par); 317 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 318 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 319 vdrop(par->nc_vp); 320 } 321 --numunres; 322 ncp->nc_refs = -1; /* safety */ 323 ncp->nc_parent = NULL; /* safety */ 324 if (ncp->nc_name) 325 free(ncp->nc_name, M_VFSCACHE); 326 free(ncp, M_VFSCACHE); 327 ncp = par; 328 if (par == NULL) /* temp hack */ 329 return; /* temp hack */ 330 } 331 done: 332 --ncp->nc_refs; 333 } 334 335 /* 336 * Lookup an entry in the cache 337 * 338 * Lookup is called with dvp pointing to the directory to search, 339 * cnp pointing to the name of the entry being sought. 340 * 341 * If the lookup succeeds, the vnode is returned in *vpp, and a 342 * status of -1 is returned. 343 * 344 * If the lookup determines that the name does not exist (negative cacheing), 345 * a status of ENOENT is returned. 346 * 347 * If the lookup fails, a status of zero is returned. 348 * 349 * Note that UNRESOLVED entries are ignored. They are not negative cache 350 * entries. 351 */ 352 int 353 cache_lookup(struct vnode *dvp, struct namecache *par, struct vnode **vpp, 354 struct namecache **ncpp, struct componentname *cnp) 355 { 356 struct namecache *ncp; 357 u_int32_t hash; 358 globaldata_t gd = mycpu; 359 360 numcalls++; 361 362 /* 363 * Obtain the namecache entry associated with dvp, creating one if 364 * necessary. If we have to create one we have insufficient 365 * information to hash it or even supply the name, but we still 366 * need one so we can link it in. 367 * 368 * NOTE: in this stage of development, the passed 'par' is 369 * almost always NULL. 370 */ 371 if (par == NULL) { 372 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 373 par = cache_alloc(dvp); 374 } 375 376 /* 377 * Deal with "." and "..". In this stage of code development we leave 378 * the returned ncpp NULL. Note that if the namecache is disjoint, 379 * we won't find a vnode for "..". 380 */ 381 if (cnp->cn_nameptr[0] == '.') { 382 if (cnp->cn_namelen == 1) { 383 *vpp = dvp; 384 dothits++; 385 numposhits++; /* include in total statistics */ 386 return (-1); 387 } 388 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 389 dotdothits++; 390 numposhits++; /* include in total statistics */ 391 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) 392 return (0); 393 if (par->nc_parent == NULL || 394 par->nc_parent->nc_vp == NULL) { 395 return (0); 396 } 397 *vpp = par->nc_parent->nc_vp; 398 return (-1); 399 } 400 } 401 402 /* 403 * Try to locate an existing entry 404 */ 405 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 406 hash = fnv_32_buf(&par, sizeof(par), hash); 407 if (nczapcheck > 1) 408 printf("DVP %p/%p %08x %*.*s\n", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr); 409 restart: 410 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 411 numchecks++; 412 if (nczapcheck > 1) { 413 printf("TEST ncp par=%p %*.*s\n", 414 ncp->nc_parent, ncp->nc_nlen, ncp->nc_nlen, 415 ncp->nc_name); 416 } 417 418 /* 419 * Zap entries that have timed out. 420 */ 421 if (ncp->nc_timeout && 422 (int)(ncp->nc_timeout - ticks) < 0 423 ) { 424 if (nczapcheck > 1) 425 printf("TIMEOUT\n"); 426 cache_zap(cache_hold(ncp)); 427 goto restart; 428 } 429 430 /* 431 * Break out if we find a matching entry. UNRESOLVED entries 432 * never match (they are in the middle of being destroyed). 433 */ 434 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 && 435 ncp->nc_parent == par && 436 ncp->nc_nlen == cnp->cn_namelen && 437 bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0 438 ) { 439 if (nczapcheck > 1) 440 printf("GOOD\n"); 441 cache_hold(ncp); 442 break; 443 } 444 } 445 446 /* 447 * If we failed to locate an entry, return 0 (indicates failure). 448 */ 449 if (ncp == NULL) { 450 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 451 nummisszap++; 452 } else { 453 nummiss++; 454 } 455 gd->gd_nchstats->ncs_miss++; 456 if (nczapcheck) { 457 printf("MISS %p/%p %*.*s/%*.*s\n", dvp, par, 458 par->nc_nlen, par->nc_nlen, (par->nc_name ? par->nc_name : ""), 459 (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr); 460 } 461 return (0); 462 } 463 464 /* 465 * If we found an entry, but we don't want to have one, we zap it. 466 */ 467 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 468 numposzaps++; 469 gd->gd_nchstats->ncs_badhits++; 470 cache_zap(ncp); 471 return (0); 472 } 473 474 /* 475 * If the vnode is not NULL then return the positive match. 476 */ 477 if (ncp->nc_vp) { 478 numposhits++; 479 gd->gd_nchstats->ncs_goodhits++; 480 *vpp = ncp->nc_vp; 481 cache_drop(ncp); 482 return (-1); 483 } 484 485 /* 486 * If the vnode is NULL we found a negative match. If we want to 487 * create it, purge the negative match and return failure (as if 488 * we hadn't found a match in the first place). 489 */ 490 if (cnp->cn_nameiop == NAMEI_CREATE) { 491 numnegzaps++; 492 gd->gd_nchstats->ncs_badhits++; 493 cache_zap(ncp); 494 return (0); 495 } 496 497 numneghits++; 498 499 /* 500 * We found a "negative" match, ENOENT notifies client of this match. 501 * The nc_flag field records whether this is a whiteout. Since there 502 * is no vnode we can use the vnode tailq link field with ncneglist. 503 */ 504 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 505 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 506 gd->gd_nchstats->ncs_neghits++; 507 if (ncp->nc_flag & NCF_WHITEOUT) 508 cnp->cn_flags |= CNP_ISWHITEOUT; 509 cache_drop(ncp); 510 return (ENOENT); 511 } 512 513 /* 514 * Generate a special linkage between the mount point and the root of the 515 * mounted filesystem in order to maintain the namecache topology across 516 * a mount point. The special linkage has a 0-length name component 517 * and sets NCF_MOUNTPT. 518 */ 519 void 520 cache_mount(struct vnode *dvp, struct vnode *tvp) 521 { 522 struct namecache *ncp; 523 struct namecache *par; 524 struct nchashhead *nchpp; 525 u_int32_t hash; 526 527 /* 528 * If a linkage already exists we do not have to do anything. 529 */ 530 hash = fnv_32_buf("", 0, FNV1_32_INIT); 531 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 532 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 533 numchecks++; 534 if (ncp->nc_vp == tvp && 535 ncp->nc_nlen == 0 && 536 ncp->nc_parent && 537 ncp->nc_parent->nc_vp == dvp 538 ) { 539 return; 540 } 541 } 542 543 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 544 par = cache_alloc(dvp); 545 546 /* 547 * Otherwise create a new linkage. 548 */ 549 ncp = cache_alloc(tvp); 550 ncp->nc_flag = NCF_MOUNTPT; 551 cache_link_parent(ncp, par); 552 553 /* 554 * Hash table 555 */ 556 hash = fnv_32_buf("", 0, FNV1_32_INIT); 557 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 558 nchpp = NCHHASH(hash); 559 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 560 561 ncp->nc_flag |= NCF_HASHED; 562 } 563 564 /* 565 * Add an entry to the cache. 566 */ 567 void 568 cache_enter(struct vnode *dvp, struct namecache *par, struct vnode *vp, struct componentname *cnp) 569 { 570 struct namecache *ncp; 571 struct namecache *bpar; 572 struct nchashhead *nchpp; 573 u_int32_t hash; 574 char *name; 575 576 /* 577 * If the directory has no namecache entry we must associate one with 578 * it. The name of the entry is not known so it isn't hashed. 579 */ 580 if (par == NULL) { 581 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 582 par = cache_alloc(dvp); 583 } 584 585 /* 586 * This may be a bit confusing. "." and ".." are 'virtual' entries. 587 * We do not actually create a namecache entry representing either. 588 * However, the ".." case is used to linkup a potentially disjoint 589 * directory with its parent, to disconnect a directory from its 590 * parent, or to change an existing linkage that may no longer be 591 * correct (as might occur when a subdirectory is renamed). 592 */ 593 594 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') 595 return; 596 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' && 597 cnp->cn_nameptr[1] == '.' 598 ) { 599 if (vp == NULL) { 600 if (par->nc_parent) 601 cache_unlink_parent(par); 602 } else { 603 if ((ncp = TAILQ_FIRST(&vp->v_namecache)) == NULL) 604 ncp = cache_alloc(vp); 605 cache_hold(par); 606 if (par->nc_parent) 607 cache_unlink_parent(par); 608 cache_link_parent(par, ncp); /* ncp is parent of par */ 609 cache_drop(par); 610 } 611 return; 612 } 613 614 /* 615 * Locate other entries associated with this vnode and zap them, 616 * because the purge code may not be able to find them due to 617 * the topology not yet being consistent. This is a temporary 618 * hack. 619 */ 620 if (vp) { 621 again: 622 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 623 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 624 cache_zap(cache_hold(ncp)); 625 goto again; 626 } 627 } 628 } 629 630 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 631 bpar = par; 632 hash = fnv_32_buf(&bpar, sizeof(bpar), hash); 633 634 if (nczapcheck > 1) 635 printf("ENTER %p/%p %08x '%*.*s' %p ", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr, vp); 636 637 638 name = malloc(cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 639 ncp = cache_alloc(vp); 640 if (nczapcheck > 1) 641 printf("alloc\n"); 642 643 /* 644 * Set a timeout 645 */ 646 if (cnp->cn_flags & CNP_CACHETIMEOUT) { 647 if ((ncp->nc_timeout = ticks + cnp->cn_timeout) == 0) 648 ncp->nc_timeout = 1; 649 } 650 651 /* 652 * Linkup the parent pointer, bump the parent vnode's hold 653 * count when we go from 0->1 children. 654 */ 655 cache_link_parent(ncp, par); 656 657 /* 658 * Add to the hash table 659 */ 660 ncp->nc_name = name; 661 ncp->nc_nlen = cnp->cn_namelen; 662 bcopy(cnp->cn_nameptr, ncp->nc_name, cnp->cn_namelen); 663 nchpp = NCHHASH(hash); 664 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 665 ncp->nc_flag |= NCF_HASHED; 666 667 /* 668 * If the target vnode is NULL if this is to be a negative cache 669 * entry. 670 */ 671 if (vp == NULL) { 672 ncp->nc_flag &= ~NCF_WHITEOUT; 673 if (cnp->cn_flags & CNP_ISWHITEOUT) 674 ncp->nc_flag |= NCF_WHITEOUT; 675 } 676 677 /* 678 * Don't cache too many negative hits 679 */ 680 if (numneg > MINNEG && numneg * ncnegfactor > numcache) { 681 ncp = TAILQ_FIRST(&ncneglist); 682 KKASSERT(ncp != NULL); 683 cache_zap(cache_hold(ncp)); 684 } 685 } 686 687 /* 688 * Name cache initialization, from vfsinit() when we are booting 689 * 690 * rootnamecache is initialized such that it cannot be recursively deleted. 691 */ 692 void 693 nchinit(void) 694 { 695 int i; 696 globaldata_t gd; 697 698 /* initialise per-cpu namecache effectiveness statistics. */ 699 for (i = 0; i < ncpus; ++i) { 700 gd = globaldata_find(i); 701 gd->gd_nchstats = &nchstats[i]; 702 } 703 704 TAILQ_INIT(&ncneglist); 705 nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash); 706 TAILQ_INIT(&rootnamecache.nc_list); 707 rootnamecache.nc_flag |= NCF_HASHED | NCF_ROOT | NCF_UNRESOLVED; 708 rootnamecache.nc_refs = 1; 709 } 710 711 /* 712 * vfs_cache_setroot() 713 * 714 * Create an association between the root of our namecache and 715 * the root vnode. This routine may be called several times during 716 * booting. 717 */ 718 void 719 vfs_cache_setroot(struct vnode *nvp) 720 { 721 KKASSERT(rootnamecache.nc_refs > 0); /* don't accidently free */ 722 cache_zap(cache_hold(&rootnamecache)); 723 724 rootnamecache.nc_vp = nvp; 725 rootnamecache.nc_flag &= ~NCF_UNRESOLVED; 726 if (nvp) { 727 ++numcache; 728 if (!TAILQ_EMPTY(&rootnamecache.nc_list)) 729 vhold(nvp); 730 TAILQ_INSERT_HEAD(&nvp->v_namecache, &rootnamecache, nc_vnode); 731 } else { 732 ++numneg; 733 TAILQ_INSERT_TAIL(&ncneglist, &rootnamecache, nc_vnode); 734 rootnamecache.nc_flag &= ~NCF_WHITEOUT; 735 } 736 } 737 738 /* 739 * Invalidate all namecache entries to a particular vnode as well as 740 * any direct children of that vnode in the namecache. This is a 741 * 'catch all' purge used by filesystems that do not know any better. 742 * 743 * A new vnode v_id is generated. Note that no vnode will ever have a 744 * v_id of 0. 745 * 746 * Note that the linkage between the vnode and its namecache entries will 747 * be removed, but the namecache entries themselves might stay put due to 748 * active references from elsewhere in the system or due to the existance of 749 * the children. The namecache topology is left intact even if we do not 750 * know what the vnode association is. Such entries will be marked 751 * NCF_UNRESOLVED. 752 * 753 * XXX: Only time and the size of v_id prevents this from failing: 754 * XXX: In theory we should hunt down all (struct vnode*, v_id) 755 * XXX: soft references and nuke them, at least on the global 756 * XXX: v_id wraparound. The period of resistance can be extended 757 * XXX: by incrementing each vnodes v_id individually instead of 758 * XXX: using the global v_id. 759 */ 760 void 761 cache_purge(struct vnode *vp) 762 { 763 static u_long nextid; 764 struct namecache *ncp; 765 struct namecache *scan; 766 767 /* 768 * Disassociate the vnode from its namecache entries along with 769 * (for historical reasons) any direct children. 770 */ 771 while ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) { 772 cache_hold(ncp); 773 774 restart: /* YYY hack, fix me */ 775 TAILQ_FOREACH(scan, &ncp->nc_list, nc_entry) { 776 if ((scan->nc_flag & NCF_UNRESOLVED) == 0) { 777 cache_zap(cache_hold(scan)); 778 goto restart; 779 } 780 } 781 cache_zap(ncp); 782 } 783 784 /* 785 * Calculate a new unique id for ".." handling 786 */ 787 do { 788 nextid++; 789 } while (nextid == vp->v_id || nextid == 0); 790 vp->v_id = nextid; 791 } 792 793 /* 794 * Flush all entries referencing a particular filesystem. 795 * 796 * Since we need to check it anyway, we will flush all the invalid 797 * entries at the same time. 798 */ 799 void 800 cache_purgevfs(struct mount *mp) 801 { 802 struct nchashhead *nchpp; 803 struct namecache *ncp, *nnp; 804 805 /* 806 * Scan hash tables for applicable entries. 807 */ 808 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) { 809 ncp = LIST_FIRST(nchpp); 810 if (ncp) 811 cache_hold(ncp); 812 while (ncp) { 813 nnp = LIST_NEXT(ncp, nc_hash); 814 if (nnp) 815 cache_hold(nnp); 816 if (ncp->nc_vp && ncp->nc_vp->v_mount == mp) 817 cache_zap(ncp); 818 else 819 cache_drop(ncp); 820 ncp = nnp; 821 } 822 } 823 } 824 825 /* 826 * cache_leaf_test() 827 * 828 * Test whether the vnode is at a leaf in the nameicache tree. 829 * 830 * Returns 0 if it is a leaf, -1 if it isn't. 831 */ 832 int 833 cache_leaf_test(struct vnode *vp) 834 { 835 struct namecache *scan; 836 struct namecache *ncp; 837 838 TAILQ_FOREACH(scan, &vp->v_namecache, nc_vnode) { 839 TAILQ_FOREACH(ncp, &scan->nc_list, nc_entry) { 840 /* YYY && ncp->nc_vp->v_type == VDIR ? */ 841 if (ncp->nc_vp != NULL) 842 return(-1); 843 } 844 } 845 return(0); 846 } 847 848 /* 849 * Perform canonical checks and cache lookup and pass on to filesystem 850 * through the vop_cachedlookup only if needed. 851 * 852 * vop_lookup_args { 853 * struct vnode a_dvp; 854 * struct namecache *a_ncp; 855 * struct vnode **a_vpp; 856 * struct namecache **a_ncpp; 857 * struct componentname *a_cnp; 858 * } 859 */ 860 int 861 vfs_cache_lookup(struct vop_lookup_args *ap) 862 { 863 struct vnode *dvp, *vp; 864 int lockparent; 865 int error; 866 struct namecache *par = ap->a_par; 867 struct vnode **vpp = ap->a_vpp; 868 struct namecache **ncpp = ap->a_ncpp; 869 struct componentname *cnp = ap->a_cnp; 870 struct ucred *cred = cnp->cn_cred; 871 int flags = cnp->cn_flags; 872 struct thread *td = cnp->cn_td; 873 u_long vpid; /* capability number of vnode */ 874 875 *vpp = NULL; 876 if (ncpp) 877 *ncpp = NULL; 878 dvp = ap->a_dvp; 879 lockparent = flags & CNP_LOCKPARENT; 880 881 if (dvp->v_type != VDIR) 882 return (ENOTDIR); 883 884 if ((flags & CNP_ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 885 (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) { 886 return (EROFS); 887 } 888 889 error = VOP_ACCESS(dvp, VEXEC, cred, td); 890 891 if (error) 892 return (error); 893 894 error = cache_lookup(dvp, par, vpp, ncpp, cnp); 895 896 if (!error) 897 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 898 899 if (error == ENOENT) 900 return (error); 901 902 vp = *vpp; 903 vpid = vp->v_id; 904 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 905 if (dvp == vp) { /* lookup on "." */ 906 vref(vp); 907 error = 0; 908 } else if (flags & CNP_ISDOTDOT) { 909 VOP_UNLOCK(dvp, NULL, 0, td); 910 cnp->cn_flags |= CNP_PDIRUNLOCK; 911 error = vget(vp, NULL, LK_EXCLUSIVE, td); 912 if (!error && lockparent && (flags & CNP_ISLASTCN)) { 913 if ((error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td)) == 0) 914 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 915 } 916 } else { 917 error = vget(vp, NULL, LK_EXCLUSIVE, td); 918 if (!lockparent || error || !(flags & CNP_ISLASTCN)) { 919 VOP_UNLOCK(dvp, NULL, 0, td); 920 cnp->cn_flags |= CNP_PDIRUNLOCK; 921 } 922 } 923 /* 924 * Check that the capability number did not change 925 * while we were waiting for the lock. 926 */ 927 if (!error) { 928 if (vpid == vp->v_id) 929 return (0); 930 vput(vp); 931 if (lockparent && dvp != vp && (flags & CNP_ISLASTCN)) { 932 VOP_UNLOCK(dvp, NULL, 0, td); 933 cnp->cn_flags |= CNP_PDIRUNLOCK; 934 } 935 } 936 if (cnp->cn_flags & CNP_PDIRUNLOCK) { 937 error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td); 938 if (error) 939 return (error); 940 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 941 } 942 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 943 } 944 945 static int disablecwd; 946 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 947 948 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 949 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 950 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 951 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 952 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 953 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 954 955 int 956 __getcwd(struct __getcwd_args *uap) 957 { 958 int buflen; 959 int error; 960 char *buf; 961 char *bp; 962 963 if (disablecwd) 964 return (ENODEV); 965 966 buflen = uap->buflen; 967 if (buflen < 2) 968 return (EINVAL); 969 if (buflen > MAXPATHLEN) 970 buflen = MAXPATHLEN; 971 972 buf = malloc(buflen, M_TEMP, M_WAITOK); 973 bp = kern_getcwd(buf, buflen, &error); 974 if (error == 0) 975 error = copyout(bp, uap->buf, strlen(bp) + 1); 976 free(buf, M_TEMP); 977 return (error); 978 } 979 980 char * 981 kern_getcwd(char *buf, size_t buflen, int *error) 982 { 983 struct proc *p = curproc; 984 char *bp; 985 int i, slash_prefixed; 986 struct filedesc *fdp; 987 struct namecache *ncp; 988 struct vnode *vp; 989 990 numcwdcalls++; 991 bp = buf; 992 bp += buflen - 1; 993 *bp = '\0'; 994 fdp = p->p_fd; 995 slash_prefixed = 0; 996 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 997 if (vp->v_flag & VROOT) { 998 if (vp->v_mount == NULL) { /* forced unmount */ 999 *error = EBADF; 1000 return(NULL); 1001 } 1002 vp = vp->v_mount->mnt_vnodecovered; 1003 continue; 1004 } 1005 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 1006 if (ncp->nc_parent && ncp->nc_parent->nc_vp && 1007 ncp->nc_nlen > 0) { 1008 break; 1009 } 1010 } 1011 if (ncp == NULL) { 1012 numcwdfail2++; 1013 *error = ENOENT; 1014 return(NULL); 1015 } 1016 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 1017 if (bp == buf) { 1018 numcwdfail4++; 1019 *error = ENOMEM; 1020 return(NULL); 1021 } 1022 *--bp = ncp->nc_name[i]; 1023 } 1024 if (bp == buf) { 1025 numcwdfail4++; 1026 *error = ENOMEM; 1027 return(NULL); 1028 } 1029 *--bp = '/'; 1030 slash_prefixed = 1; 1031 vp = ncp->nc_parent->nc_vp; 1032 } 1033 if (!slash_prefixed) { 1034 if (bp == buf) { 1035 numcwdfail4++; 1036 *error = ENOMEM; 1037 return(NULL); 1038 } 1039 *--bp = '/'; 1040 } 1041 numcwdfound++; 1042 *error = 0; 1043 return (bp); 1044 } 1045 1046 /* 1047 * Thus begins the fullpath magic. 1048 */ 1049 1050 #undef STATNODE 1051 #define STATNODE(name) \ 1052 static u_int name; \ 1053 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 1054 1055 static int disablefullpath; 1056 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 1057 &disablefullpath, 0, ""); 1058 1059 STATNODE(numfullpathcalls); 1060 STATNODE(numfullpathfail1); 1061 STATNODE(numfullpathfail2); 1062 STATNODE(numfullpathfail3); 1063 STATNODE(numfullpathfail4); 1064 STATNODE(numfullpathfound); 1065 1066 int 1067 vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf) 1068 { 1069 char *bp, *buf; 1070 int i, slash_prefixed; 1071 struct filedesc *fdp; 1072 struct namecache *ncp; 1073 struct vnode *vp; 1074 1075 numfullpathcalls++; 1076 if (disablefullpath) 1077 return (ENODEV); 1078 1079 if (p == NULL) 1080 return (EINVAL); 1081 1082 /* vn is NULL, client wants us to use p->p_textvp */ 1083 if (vn == NULL) { 1084 if ((vn = p->p_textvp) == NULL) 1085 return (EINVAL); 1086 } 1087 1088 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1089 bp = buf + MAXPATHLEN - 1; 1090 *bp = '\0'; 1091 fdp = p->p_fd; 1092 slash_prefixed = 0; 1093 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 1094 if (vp->v_flag & VROOT) { 1095 if (vp->v_mount == NULL) { /* forced unmount */ 1096 free(buf, M_TEMP); 1097 return (EBADF); 1098 } 1099 vp = vp->v_mount->mnt_vnodecovered; 1100 continue; 1101 } 1102 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 1103 if (ncp->nc_parent && ncp->nc_parent->nc_vp && 1104 ncp->nc_nlen > 0) { 1105 break; 1106 } 1107 } 1108 if (ncp == NULL) { 1109 numfullpathfail2++; 1110 free(buf, M_TEMP); 1111 return (ENOENT); 1112 } 1113 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 1114 if (bp == buf) { 1115 numfullpathfail4++; 1116 free(buf, M_TEMP); 1117 return (ENOMEM); 1118 } 1119 *--bp = ncp->nc_name[i]; 1120 } 1121 if (bp == buf) { 1122 numfullpathfail4++; 1123 free(buf, M_TEMP); 1124 return (ENOMEM); 1125 } 1126 *--bp = '/'; 1127 slash_prefixed = 1; 1128 vp = ncp->nc_parent->nc_vp; 1129 } 1130 if (!slash_prefixed) { 1131 if (bp == buf) { 1132 numfullpathfail4++; 1133 free(buf, M_TEMP); 1134 return (ENOMEM); 1135 } 1136 *--bp = '/'; 1137 } 1138 numfullpathfound++; 1139 *retbuf = bp; 1140 *freebuf = buf; 1141 return (0); 1142 } 1143 1144