1 /* 2 * Copyright (c) 2003,2004 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * Copyright (c) 1989, 1993, 1995 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * Poul-Henning Kamp of the FreeBSD Project. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. All advertising materials mentioning features or use of this software 49 * must display the following acknowledgement: 50 * This product includes software developed by the University of 51 * California, Berkeley and its contributors. 52 * 4. Neither the name of the University nor the names of its contributors 53 * may be used to endorse or promote products derived from this software 54 * without specific prior written permission. 55 * 56 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 57 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 58 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 59 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 60 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 61 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 62 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 63 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 64 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 65 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 66 * SUCH DAMAGE. 67 * 68 * @(#)vfs_cache.c 8.5 (Berkeley) 3/22/95 69 * $FreeBSD: src/sys/kern/vfs_cache.c,v 1.42.2.6 2001/10/05 20:07:03 dillon Exp $ 70 * $DragonFly: src/sys/kern/vfs_cache.c,v 1.25 2004/07/16 05:51:10 dillon Exp $ 71 */ 72 73 #include <sys/param.h> 74 #include <sys/systm.h> 75 #include <sys/kernel.h> 76 #include <sys/sysctl.h> 77 #include <sys/mount.h> 78 #include <sys/vnode.h> 79 #include <sys/malloc.h> 80 #include <sys/sysproto.h> 81 #include <sys/proc.h> 82 #include <sys/namei.h> 83 #include <sys/filedesc.h> 84 #include <sys/fnv_hash.h> 85 #include <sys/globaldata.h> 86 #include <sys/kern_syscall.h> 87 88 /* 89 * Random lookups in the cache are accomplished with a hash table using 90 * a hash key of (nc_src_vp, name). 91 * 92 * Negative entries may exist and correspond to structures where nc_vp 93 * is NULL. In a negative entry, NCF_WHITEOUT will be set if the entry 94 * corresponds to a whited-out directory entry (verses simply not finding the 95 * entry at all). 96 * 97 * Upon reaching the last segment of a path, if the reference is for DELETE, 98 * or NOCACHE is set (rewrite), and the name is located in the cache, it 99 * will be dropped. 100 */ 101 102 /* 103 * Structures associated with name cacheing. 104 */ 105 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash]) 106 #define MINNEG 1024 107 108 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 109 110 static LIST_HEAD(nchashhead, namecache) *nchashtbl; /* Hash Table */ 111 static struct namecache_list ncneglist; /* instead of vnode */ 112 static struct namecache rootnamecache; /* Dummy node */ 113 114 static int nczapcheck; /* panic on bad release */ 115 SYSCTL_INT(_debug, OID_AUTO, nczapcheck, CTLFLAG_RW, &nczapcheck, 0, ""); 116 117 static u_long nchash; /* size of hash table */ 118 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, ""); 119 120 static u_long ncnegfactor = 16; /* ratio of negative entries */ 121 SYSCTL_ULONG(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, ""); 122 123 static u_long numneg; /* number of cache entries allocated */ 124 SYSCTL_ULONG(_debug, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, ""); 125 126 static u_long numcache; /* number of cache entries allocated */ 127 SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, ""); 128 129 static u_long numunres; /* number of unresolved entries */ 130 SYSCTL_ULONG(_debug, OID_AUTO, numunres, CTLFLAG_RD, &numunres, 0, ""); 131 132 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), ""); 133 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), ""); 134 135 /* 136 * The new name cache statistics 137 */ 138 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 139 #define STATNODE(mode, name, var) \ 140 SYSCTL_ULONG(_vfs_cache, OID_AUTO, name, mode, var, 0, ""); 141 STATNODE(CTLFLAG_RD, numneg, &numneg); 142 STATNODE(CTLFLAG_RD, numcache, &numcache); 143 static u_long numcalls; STATNODE(CTLFLAG_RD, numcalls, &numcalls); 144 static u_long dothits; STATNODE(CTLFLAG_RD, dothits, &dothits); 145 static u_long dotdothits; STATNODE(CTLFLAG_RD, dotdothits, &dotdothits); 146 static u_long numchecks; STATNODE(CTLFLAG_RD, numchecks, &numchecks); 147 static u_long nummiss; STATNODE(CTLFLAG_RD, nummiss, &nummiss); 148 static u_long nummisszap; STATNODE(CTLFLAG_RD, nummisszap, &nummisszap); 149 static u_long numposzaps; STATNODE(CTLFLAG_RD, numposzaps, &numposzaps); 150 static u_long numposhits; STATNODE(CTLFLAG_RD, numposhits, &numposhits); 151 static u_long numnegzaps; STATNODE(CTLFLAG_RD, numnegzaps, &numnegzaps); 152 static u_long numneghits; STATNODE(CTLFLAG_RD, numneghits, &numneghits); 153 154 struct nchstats nchstats[SMP_MAXCPU]; 155 /* 156 * Export VFS cache effectiveness statistics to user-land. 157 * 158 * The statistics are left for aggregation to user-land so 159 * neat things can be achieved, like observing per-CPU cache 160 * distribution. 161 */ 162 static int 163 sysctl_nchstats(SYSCTL_HANDLER_ARGS) 164 { 165 struct globaldata *gd; 166 int i, error; 167 168 error = 0; 169 for (i = 0; i < ncpus; ++i) { 170 gd = globaldata_find(i); 171 if ((error = SYSCTL_OUT(req, (void *)&(*gd->gd_nchstats), 172 sizeof(struct nchstats)))) 173 break; 174 } 175 176 return (error); 177 } 178 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD, 179 0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics"); 180 181 static void cache_zap(struct namecache *ncp); 182 183 /* 184 * cache_hold() and cache_drop() prevent the premature deletion of a 185 * namecache entry but do not prevent operations (such as zapping) on 186 * that namecache entry. 187 */ 188 static __inline 189 struct namecache * 190 _cache_hold(struct namecache *ncp) 191 { 192 ++ncp->nc_refs; 193 return(ncp); 194 } 195 196 static __inline 197 void 198 _cache_drop(struct namecache *ncp) 199 { 200 KKASSERT(ncp->nc_refs > 0); 201 if (ncp->nc_refs == 1 && 202 (ncp->nc_flag & NCF_UNRESOLVED) && 203 TAILQ_EMPTY(&ncp->nc_list) 204 ) { 205 cache_zap(ncp); 206 } else { 207 --ncp->nc_refs; 208 } 209 } 210 211 struct namecache * 212 cache_hold(struct namecache *ncp) 213 { 214 return(_cache_hold(ncp)); 215 } 216 217 void 218 cache_drop(struct namecache *ncp) 219 { 220 _cache_drop(ncp); 221 } 222 223 static void 224 cache_link_parent(struct namecache *ncp, struct namecache *par) 225 { 226 KKASSERT(ncp->nc_parent == NULL); 227 ncp->nc_parent = par; 228 if (TAILQ_EMPTY(&par->nc_list)) { 229 if (par->nc_vp) 230 vhold(par->nc_vp); 231 } 232 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 233 } 234 235 static void 236 cache_unlink_parent(struct namecache *ncp) 237 { 238 struct namecache *par; 239 240 if ((par = ncp->nc_parent) != NULL) { 241 ncp->nc_parent = NULL; 242 par = cache_hold(par); 243 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 244 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 245 vdrop(par->nc_vp); 246 cache_drop(par); 247 } 248 } 249 250 static struct namecache * 251 cache_alloc(struct vnode *vp) 252 { 253 struct namecache *ncp; 254 255 ncp = malloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO); 256 TAILQ_INIT(&ncp->nc_list); 257 ncp->nc_vp = vp; 258 if (vp != NULL) { 259 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode); 260 ++numcache; 261 } else { 262 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 263 ++numneg; 264 } 265 return(ncp); 266 } 267 268 /* 269 * Try to destroy a namecache entry. The entry is disassociated from its 270 * vnode or ncneglist and reverted to an UNRESOLVED state. 271 * 272 * Then, if there are no additional references to the ncp and we can 273 * successfully delete the children, the entry is also removed from the 274 * namecache hashlist / topology. 275 * 276 * References or undeletable children will prevent the entry from being 277 * removed from the topology. The entry may be revalidated (typically 278 * by cache_enter()) at a later time. Children remain because: 279 * 280 * + we have tried to delete a node rather then a leaf in the topology. 281 * + the presence of negative entries (we try to scrap these). 282 * + an entry or child has a non-zero ref count and cannot be scrapped. 283 * 284 * This function must be called with the ncp held and will drop the ref 285 * count during zapping. 286 */ 287 static void 288 cache_zap(struct namecache *ncp) 289 { 290 struct namecache *par; 291 struct vnode *vp; 292 293 /* 294 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED. 295 */ 296 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 297 ncp->nc_flag |= NCF_UNRESOLVED; 298 ++numunres; 299 if ((vp = ncp->nc_vp) != NULL) { 300 --numcache; 301 ncp->nc_vp = NULL; /* safety */ 302 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode); 303 if (!TAILQ_EMPTY(&ncp->nc_list)) 304 vdrop(vp); 305 } else { 306 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 307 --numneg; 308 } 309 } 310 311 /* 312 * Try to scrap the entry and possibly tail-recurse on its parent. 313 * We only scrap unref'd (other then our ref) unresolved entries, 314 * we do not scrap 'live' entries. 315 */ 316 while (ncp->nc_flag & NCF_UNRESOLVED) { 317 /* 318 * Someone other then us has a ref, stop. 319 */ 320 if (ncp->nc_refs > 1) 321 goto done; 322 323 /* 324 * We have children, stop. 325 */ 326 if (!TAILQ_EMPTY(&ncp->nc_list)) 327 goto done; 328 329 /* 330 * Ok, we can completely destroy and free this entry. Sanity 331 * check it against our static rootnamecache structure, 332 * then remove it from the hash. 333 */ 334 KKASSERT(ncp != &rootnamecache); 335 336 if (ncp->nc_flag & NCF_HASHED) { 337 ncp->nc_flag &= ~NCF_HASHED; 338 LIST_REMOVE(ncp, nc_hash); 339 } 340 341 /* 342 * Unlink from its parent and free, then loop on the 343 * parent. XXX temp hack, in stage-3 parent is never NULL 344 */ 345 if ((par = ncp->nc_parent) != NULL) { 346 par = cache_hold(par); 347 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 348 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 349 vdrop(par->nc_vp); 350 } 351 --numunres; 352 ncp->nc_refs = -1; /* safety */ 353 ncp->nc_parent = NULL; /* safety */ 354 if (ncp->nc_name) 355 free(ncp->nc_name, M_VFSCACHE); 356 free(ncp, M_VFSCACHE); 357 ncp = par; 358 if (par == NULL) /* temp hack */ 359 return; /* temp hack */ 360 } 361 done: 362 --ncp->nc_refs; 363 } 364 365 /* 366 * Lookup an entry in the cache 367 * 368 * Lookup is called with dvp pointing to the directory to search, 369 * cnp pointing to the name of the entry being sought. 370 * 371 * If the lookup succeeds, the vnode is returned in *vpp, and a 372 * status of -1 is returned. 373 * 374 * If the lookup determines that the name does not exist (negative cacheing), 375 * a status of ENOENT is returned. 376 * 377 * If the lookup fails, a status of zero is returned. 378 * 379 * Note that UNRESOLVED entries are ignored. They are not negative cache 380 * entries. 381 */ 382 int 383 cache_lookup(struct vnode *dvp, struct namecache *par, struct vnode **vpp, 384 struct namecache **ncpp, struct componentname *cnp) 385 { 386 struct namecache *ncp; 387 u_int32_t hash; 388 globaldata_t gd = mycpu; 389 390 numcalls++; 391 392 /* 393 * Obtain the namecache entry associated with dvp, creating one if 394 * necessary. If we have to create one we have insufficient 395 * information to hash it or even supply the name, but we still 396 * need one so we can link it in. 397 * 398 * NOTE: in this stage of development, the passed 'par' is 399 * almost always NULL. 400 */ 401 if (par == NULL) { 402 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 403 par = cache_alloc(dvp); 404 } 405 406 /* 407 * Deal with "." and "..". In this stage of code development we leave 408 * the returned ncpp NULL. Note that if the namecache is disjoint, 409 * we won't find a vnode for "..". 410 */ 411 if (cnp->cn_nameptr[0] == '.') { 412 if (cnp->cn_namelen == 1) { 413 *vpp = dvp; 414 dothits++; 415 numposhits++; /* include in total statistics */ 416 return (-1); 417 } 418 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[1] == '.') { 419 dotdothits++; 420 numposhits++; /* include in total statistics */ 421 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) 422 return (0); 423 if (par->nc_parent == NULL || 424 par->nc_parent->nc_vp == NULL) { 425 return (0); 426 } 427 *vpp = par->nc_parent->nc_vp; 428 return (-1); 429 } 430 } 431 432 /* 433 * Try to locate an existing entry 434 */ 435 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 436 hash = fnv_32_buf(&par, sizeof(par), hash); 437 if (nczapcheck > 1) 438 printf("DVP %p/%p %08x %*.*s\n", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr); 439 restart: 440 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 441 numchecks++; 442 if (nczapcheck > 1) { 443 printf("TEST ncp par=%p %*.*s\n", 444 ncp->nc_parent, ncp->nc_nlen, ncp->nc_nlen, 445 ncp->nc_name); 446 } 447 448 /* 449 * Zap entries that have timed out. 450 */ 451 if (ncp->nc_timeout && 452 (int)(ncp->nc_timeout - ticks) < 0 453 ) { 454 if (nczapcheck > 1) 455 printf("TIMEOUT\n"); 456 cache_zap(cache_hold(ncp)); 457 goto restart; 458 } 459 460 /* 461 * Break out if we find a matching entry. UNRESOLVED entries 462 * never match (they are in the middle of being destroyed). 463 */ 464 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 && 465 ncp->nc_parent == par && 466 ncp->nc_nlen == cnp->cn_namelen && 467 bcmp(ncp->nc_name, cnp->cn_nameptr, ncp->nc_nlen) == 0 468 ) { 469 if (nczapcheck > 1) 470 printf("GOOD\n"); 471 cache_hold(ncp); 472 break; 473 } 474 } 475 476 /* 477 * If we failed to locate an entry, return 0 (indicates failure). 478 */ 479 if (ncp == NULL) { 480 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 481 nummisszap++; 482 } else { 483 nummiss++; 484 } 485 gd->gd_nchstats->ncs_miss++; 486 if (nczapcheck) { 487 printf("MISS %p/%p %*.*s/%*.*s\n", dvp, par, 488 par->nc_nlen, par->nc_nlen, (par->nc_name ? par->nc_name : ""), 489 (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr); 490 } 491 return (0); 492 } 493 494 /* 495 * If we found an entry, but we don't want to have one, we zap it. 496 */ 497 if ((cnp->cn_flags & CNP_MAKEENTRY) == 0) { 498 numposzaps++; 499 gd->gd_nchstats->ncs_badhits++; 500 cache_zap(ncp); 501 return (0); 502 } 503 504 /* 505 * If the vnode is not NULL then return the positive match. 506 */ 507 if (ncp->nc_vp) { 508 numposhits++; 509 gd->gd_nchstats->ncs_goodhits++; 510 *vpp = ncp->nc_vp; 511 cache_drop(ncp); 512 return (-1); 513 } 514 515 /* 516 * If the vnode is NULL we found a negative match. If we want to 517 * create it, purge the negative match and return failure (as if 518 * we hadn't found a match in the first place). 519 */ 520 if (cnp->cn_nameiop == NAMEI_CREATE) { 521 numnegzaps++; 522 gd->gd_nchstats->ncs_badhits++; 523 cache_zap(ncp); 524 return (0); 525 } 526 527 numneghits++; 528 529 /* 530 * We found a "negative" match, ENOENT notifies client of this match. 531 * The nc_flag field records whether this is a whiteout. Since there 532 * is no vnode we can use the vnode tailq link field with ncneglist. 533 */ 534 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 535 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 536 gd->gd_nchstats->ncs_neghits++; 537 if (ncp->nc_flag & NCF_WHITEOUT) 538 cnp->cn_flags |= CNP_ISWHITEOUT; 539 cache_drop(ncp); 540 return (ENOENT); 541 } 542 543 /* 544 * Generate a special linkage between the mount point and the root of the 545 * mounted filesystem in order to maintain the namecache topology across 546 * a mount point. The special linkage has a 0-length name component 547 * and sets NCF_MOUNTPT. 548 */ 549 void 550 cache_mount(struct vnode *dvp, struct vnode *tvp) 551 { 552 struct namecache *ncp; 553 struct namecache *par; 554 struct nchashhead *nchpp; 555 u_int32_t hash; 556 557 /* 558 * If a linkage already exists we do not have to do anything. 559 */ 560 hash = fnv_32_buf("", 0, FNV1_32_INIT); 561 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 562 LIST_FOREACH(ncp, (NCHHASH(hash)), nc_hash) { 563 numchecks++; 564 if (ncp->nc_vp == tvp && 565 ncp->nc_nlen == 0 && 566 ncp->nc_parent && 567 ncp->nc_parent->nc_vp == dvp 568 ) { 569 return; 570 } 571 } 572 573 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 574 par = cache_alloc(dvp); 575 576 /* 577 * Otherwise create a new linkage. 578 */ 579 ncp = cache_alloc(tvp); 580 ncp->nc_flag = NCF_MOUNTPT; 581 cache_link_parent(ncp, par); 582 583 /* 584 * Hash table 585 */ 586 hash = fnv_32_buf("", 0, FNV1_32_INIT); 587 hash = fnv_32_buf(&dvp->v_id, sizeof(dvp->v_id), hash); 588 nchpp = NCHHASH(hash); 589 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 590 591 ncp->nc_flag |= NCF_HASHED; 592 } 593 594 /* 595 * Add an entry to the cache. 596 */ 597 void 598 cache_enter(struct vnode *dvp, struct namecache *par, struct vnode *vp, struct componentname *cnp) 599 { 600 struct namecache *ncp; 601 struct namecache *bpar; 602 struct nchashhead *nchpp; 603 u_int32_t hash; 604 char *name; 605 606 /* 607 * If the directory has no namecache entry we must associate one with 608 * it. The name of the entry is not known so it isn't hashed. 609 */ 610 if (par == NULL) { 611 if ((par = TAILQ_FIRST(&dvp->v_namecache)) == NULL) 612 par = cache_alloc(dvp); 613 } 614 615 /* 616 * This may be a bit confusing. "." and ".." are 'virtual' entries. 617 * We do not actually create a namecache entry representing either. 618 * However, the ".." case is used to linkup a potentially disjoint 619 * directory with its parent, to disconnect a directory from its 620 * parent, or to change an existing linkage that may no longer be 621 * correct (as might occur when a subdirectory is renamed). 622 */ 623 624 if (cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.') 625 return; 626 if (cnp->cn_namelen == 2 && cnp->cn_nameptr[0] == '.' && 627 cnp->cn_nameptr[1] == '.' 628 ) { 629 if (vp == NULL) { 630 if (par->nc_parent) 631 cache_unlink_parent(par); 632 } else { 633 if ((ncp = TAILQ_FIRST(&vp->v_namecache)) == NULL) 634 ncp = cache_alloc(vp); 635 cache_hold(par); 636 if (par->nc_parent) 637 cache_unlink_parent(par); 638 cache_link_parent(par, ncp); /* ncp is parent of par */ 639 cache_drop(par); 640 } 641 return; 642 } 643 644 /* 645 * Locate other entries associated with this vnode and zap them, 646 * because the purge code may not be able to find them due to 647 * the topology not yet being consistent. This is a temporary 648 * hack. 649 */ 650 if (vp) { 651 again: 652 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 653 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 654 cache_zap(cache_hold(ncp)); 655 goto again; 656 } 657 } 658 } 659 660 hash = fnv_32_buf(cnp->cn_nameptr, cnp->cn_namelen, FNV1_32_INIT); 661 bpar = par; 662 hash = fnv_32_buf(&bpar, sizeof(bpar), hash); 663 664 if (nczapcheck > 1) 665 printf("ENTER %p/%p %08x '%*.*s' %p ", dvp, par, hash, (int)cnp->cn_namelen, (int)cnp->cn_namelen, cnp->cn_nameptr, vp); 666 667 668 name = malloc(cnp->cn_namelen, M_VFSCACHE, M_WAITOK); 669 ncp = cache_alloc(vp); 670 if (nczapcheck > 1) 671 printf("alloc\n"); 672 673 /* 674 * Set a timeout 675 */ 676 if (cnp->cn_flags & CNP_CACHETIMEOUT) { 677 if ((ncp->nc_timeout = ticks + cnp->cn_timeout) == 0) 678 ncp->nc_timeout = 1; 679 } 680 681 /* 682 * Linkup the parent pointer, bump the parent vnode's hold 683 * count when we go from 0->1 children. 684 */ 685 cache_link_parent(ncp, par); 686 687 /* 688 * Add to the hash table 689 */ 690 ncp->nc_name = name; 691 ncp->nc_nlen = cnp->cn_namelen; 692 bcopy(cnp->cn_nameptr, ncp->nc_name, cnp->cn_namelen); 693 nchpp = NCHHASH(hash); 694 LIST_INSERT_HEAD(nchpp, ncp, nc_hash); 695 ncp->nc_flag |= NCF_HASHED; 696 697 /* 698 * If the target vnode is NULL if this is to be a negative cache 699 * entry. 700 */ 701 if (vp == NULL) { 702 ncp->nc_flag &= ~NCF_WHITEOUT; 703 if (cnp->cn_flags & CNP_ISWHITEOUT) 704 ncp->nc_flag |= NCF_WHITEOUT; 705 } 706 707 /* 708 * Don't cache too many negative hits 709 */ 710 if (numneg > MINNEG && numneg * ncnegfactor > numcache) { 711 ncp = TAILQ_FIRST(&ncneglist); 712 KKASSERT(ncp != NULL); 713 cache_zap(cache_hold(ncp)); 714 } 715 } 716 717 /* 718 * Name cache initialization, from vfsinit() when we are booting 719 * 720 * rootnamecache is initialized such that it cannot be recursively deleted. 721 */ 722 void 723 nchinit(void) 724 { 725 int i; 726 globaldata_t gd; 727 728 /* initialise per-cpu namecache effectiveness statistics. */ 729 for (i = 0; i < ncpus; ++i) { 730 gd = globaldata_find(i); 731 gd->gd_nchstats = &nchstats[i]; 732 } 733 734 TAILQ_INIT(&ncneglist); 735 nchashtbl = hashinit(desiredvnodes*2, M_VFSCACHE, &nchash); 736 TAILQ_INIT(&rootnamecache.nc_list); 737 rootnamecache.nc_flag |= NCF_HASHED | NCF_ROOT | NCF_UNRESOLVED; 738 rootnamecache.nc_refs = 1; 739 } 740 741 /* 742 * vfs_cache_setroot() 743 * 744 * Create an association between the root of our namecache and 745 * the root vnode. This routine may be called several times during 746 * booting. 747 */ 748 void 749 vfs_cache_setroot(struct vnode *nvp) 750 { 751 KKASSERT(rootnamecache.nc_refs > 0); /* don't accidently free */ 752 cache_zap(cache_hold(&rootnamecache)); 753 754 rootnamecache.nc_vp = nvp; 755 rootnamecache.nc_flag &= ~NCF_UNRESOLVED; 756 if (nvp) { 757 ++numcache; 758 if (!TAILQ_EMPTY(&rootnamecache.nc_list)) 759 vhold(nvp); 760 TAILQ_INSERT_HEAD(&nvp->v_namecache, &rootnamecache, nc_vnode); 761 } else { 762 ++numneg; 763 TAILQ_INSERT_TAIL(&ncneglist, &rootnamecache, nc_vnode); 764 rootnamecache.nc_flag &= ~NCF_WHITEOUT; 765 } 766 } 767 768 /* 769 * Invalidate all namecache entries to a particular vnode as well as 770 * any direct children of that vnode in the namecache. This is a 771 * 'catch all' purge used by filesystems that do not know any better. 772 * 773 * A new vnode v_id is generated. Note that no vnode will ever have a 774 * v_id of 0. 775 * 776 * Note that the linkage between the vnode and its namecache entries will 777 * be removed, but the namecache entries themselves might stay put due to 778 * active references from elsewhere in the system or due to the existance of 779 * the children. The namecache topology is left intact even if we do not 780 * know what the vnode association is. Such entries will be marked 781 * NCF_UNRESOLVED. 782 * 783 * XXX: Only time and the size of v_id prevents this from failing: 784 * XXX: In theory we should hunt down all (struct vnode*, v_id) 785 * XXX: soft references and nuke them, at least on the global 786 * XXX: v_id wraparound. The period of resistance can be extended 787 * XXX: by incrementing each vnodes v_id individually instead of 788 * XXX: using the global v_id. 789 */ 790 void 791 cache_purge(struct vnode *vp) 792 { 793 static u_long nextid; 794 struct namecache *ncp; 795 struct namecache *scan; 796 797 /* 798 * Disassociate the vnode from its namecache entries along with 799 * (for historical reasons) any direct children. 800 */ 801 while ((ncp = TAILQ_FIRST(&vp->v_namecache)) != NULL) { 802 cache_hold(ncp); 803 804 restart: /* YYY hack, fix me */ 805 TAILQ_FOREACH(scan, &ncp->nc_list, nc_entry) { 806 if ((scan->nc_flag & NCF_UNRESOLVED) == 0) { 807 cache_zap(cache_hold(scan)); 808 goto restart; 809 } 810 } 811 cache_zap(ncp); 812 } 813 814 /* 815 * Calculate a new unique id for ".." handling 816 */ 817 do { 818 nextid++; 819 } while (nextid == vp->v_id || nextid == 0); 820 vp->v_id = nextid; 821 } 822 823 /* 824 * Flush all entries referencing a particular filesystem. 825 * 826 * Since we need to check it anyway, we will flush all the invalid 827 * entries at the same time. 828 */ 829 void 830 cache_purgevfs(struct mount *mp) 831 { 832 struct nchashhead *nchpp; 833 struct namecache *ncp, *nnp; 834 835 /* 836 * Scan hash tables for applicable entries. 837 */ 838 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) { 839 ncp = LIST_FIRST(nchpp); 840 if (ncp) 841 cache_hold(ncp); 842 while (ncp) { 843 nnp = LIST_NEXT(ncp, nc_hash); 844 if (nnp) 845 cache_hold(nnp); 846 if (ncp->nc_vp && ncp->nc_vp->v_mount == mp) 847 cache_zap(ncp); 848 else 849 cache_drop(ncp); 850 ncp = nnp; 851 } 852 } 853 } 854 855 /* 856 * cache_leaf_test() 857 * 858 * Test whether the vnode is at a leaf in the nameicache tree. 859 * 860 * Returns 0 if it is a leaf, -1 if it isn't. 861 */ 862 int 863 cache_leaf_test(struct vnode *vp) 864 { 865 struct namecache *scan; 866 struct namecache *ncp; 867 868 TAILQ_FOREACH(scan, &vp->v_namecache, nc_vnode) { 869 TAILQ_FOREACH(ncp, &scan->nc_list, nc_entry) { 870 /* YYY && ncp->nc_vp->v_type == VDIR ? */ 871 if (ncp->nc_vp != NULL) 872 return(-1); 873 } 874 } 875 return(0); 876 } 877 878 /* 879 * Perform canonical checks and cache lookup and pass on to filesystem 880 * through the vop_cachedlookup only if needed. 881 * 882 * vop_lookup_args { 883 * struct vnode a_dvp; 884 * struct namecache *a_ncp; 885 * struct vnode **a_vpp; 886 * struct namecache **a_ncpp; 887 * struct componentname *a_cnp; 888 * } 889 */ 890 int 891 vfs_cache_lookup(struct vop_lookup_args *ap) 892 { 893 struct vnode *dvp, *vp; 894 int lockparent; 895 int error; 896 struct namecache *par = ap->a_par; 897 struct vnode **vpp = ap->a_vpp; 898 struct namecache **ncpp = ap->a_ncpp; 899 struct componentname *cnp = ap->a_cnp; 900 struct ucred *cred = cnp->cn_cred; 901 int flags = cnp->cn_flags; 902 struct thread *td = cnp->cn_td; 903 u_long vpid; /* capability number of vnode */ 904 905 *vpp = NULL; 906 if (ncpp) 907 *ncpp = NULL; 908 dvp = ap->a_dvp; 909 lockparent = flags & CNP_LOCKPARENT; 910 911 if (dvp->v_type != VDIR) 912 return (ENOTDIR); 913 914 if ((flags & CNP_ISLASTCN) && (dvp->v_mount->mnt_flag & MNT_RDONLY) && 915 (cnp->cn_nameiop == NAMEI_DELETE || cnp->cn_nameiop == NAMEI_RENAME)) { 916 return (EROFS); 917 } 918 919 error = VOP_ACCESS(dvp, VEXEC, cred, td); 920 921 if (error) 922 return (error); 923 924 error = cache_lookup(dvp, par, vpp, ncpp, cnp); 925 926 if (!error) 927 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 928 929 if (error == ENOENT) 930 return (error); 931 932 vp = *vpp; 933 vpid = vp->v_id; 934 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 935 if (dvp == vp) { /* lookup on "." */ 936 vref(vp); 937 error = 0; 938 } else if (flags & CNP_ISDOTDOT) { 939 VOP_UNLOCK(dvp, NULL, 0, td); 940 cnp->cn_flags |= CNP_PDIRUNLOCK; 941 error = vget(vp, NULL, LK_EXCLUSIVE, td); 942 if (!error && lockparent && (flags & CNP_ISLASTCN)) { 943 if ((error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td)) == 0) 944 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 945 } 946 } else { 947 error = vget(vp, NULL, LK_EXCLUSIVE, td); 948 if (!lockparent || error || !(flags & CNP_ISLASTCN)) { 949 VOP_UNLOCK(dvp, NULL, 0, td); 950 cnp->cn_flags |= CNP_PDIRUNLOCK; 951 } 952 } 953 /* 954 * Check that the capability number did not change 955 * while we were waiting for the lock. 956 */ 957 if (!error) { 958 if (vpid == vp->v_id) 959 return (0); 960 vput(vp); 961 if (lockparent && dvp != vp && (flags & CNP_ISLASTCN)) { 962 VOP_UNLOCK(dvp, NULL, 0, td); 963 cnp->cn_flags |= CNP_PDIRUNLOCK; 964 } 965 } 966 if (cnp->cn_flags & CNP_PDIRUNLOCK) { 967 error = vn_lock(dvp, NULL, LK_EXCLUSIVE, td); 968 if (error) 969 return (error); 970 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 971 } 972 return (VOP_CACHEDLOOKUP(dvp, par, vpp, ncpp, cnp)); 973 } 974 975 static int disablecwd; 976 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, ""); 977 978 static u_long numcwdcalls; STATNODE(CTLFLAG_RD, numcwdcalls, &numcwdcalls); 979 static u_long numcwdfail1; STATNODE(CTLFLAG_RD, numcwdfail1, &numcwdfail1); 980 static u_long numcwdfail2; STATNODE(CTLFLAG_RD, numcwdfail2, &numcwdfail2); 981 static u_long numcwdfail3; STATNODE(CTLFLAG_RD, numcwdfail3, &numcwdfail3); 982 static u_long numcwdfail4; STATNODE(CTLFLAG_RD, numcwdfail4, &numcwdfail4); 983 static u_long numcwdfound; STATNODE(CTLFLAG_RD, numcwdfound, &numcwdfound); 984 985 int 986 __getcwd(struct __getcwd_args *uap) 987 { 988 int buflen; 989 int error; 990 char *buf; 991 char *bp; 992 993 if (disablecwd) 994 return (ENODEV); 995 996 buflen = uap->buflen; 997 if (buflen < 2) 998 return (EINVAL); 999 if (buflen > MAXPATHLEN) 1000 buflen = MAXPATHLEN; 1001 1002 buf = malloc(buflen, M_TEMP, M_WAITOK); 1003 bp = kern_getcwd(buf, buflen, &error); 1004 if (error == 0) 1005 error = copyout(bp, uap->buf, strlen(bp) + 1); 1006 free(buf, M_TEMP); 1007 return (error); 1008 } 1009 1010 char * 1011 kern_getcwd(char *buf, size_t buflen, int *error) 1012 { 1013 struct proc *p = curproc; 1014 char *bp; 1015 int i, slash_prefixed; 1016 struct filedesc *fdp; 1017 struct namecache *ncp; 1018 struct vnode *vp; 1019 1020 numcwdcalls++; 1021 bp = buf; 1022 bp += buflen - 1; 1023 *bp = '\0'; 1024 fdp = p->p_fd; 1025 slash_prefixed = 0; 1026 for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { 1027 if (vp->v_flag & VROOT) { 1028 if (vp->v_mount == NULL) { /* forced unmount */ 1029 *error = EBADF; 1030 return(NULL); 1031 } 1032 vp = vp->v_mount->mnt_vnodecovered; 1033 continue; 1034 } 1035 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 1036 if (ncp->nc_parent && ncp->nc_parent->nc_vp && 1037 ncp->nc_nlen > 0) { 1038 break; 1039 } 1040 } 1041 if (ncp == NULL) { 1042 numcwdfail2++; 1043 *error = ENOENT; 1044 return(NULL); 1045 } 1046 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 1047 if (bp == buf) { 1048 numcwdfail4++; 1049 *error = ENOMEM; 1050 return(NULL); 1051 } 1052 *--bp = ncp->nc_name[i]; 1053 } 1054 if (bp == buf) { 1055 numcwdfail4++; 1056 *error = ENOMEM; 1057 return(NULL); 1058 } 1059 *--bp = '/'; 1060 slash_prefixed = 1; 1061 vp = ncp->nc_parent->nc_vp; 1062 } 1063 if (!slash_prefixed) { 1064 if (bp == buf) { 1065 numcwdfail4++; 1066 *error = ENOMEM; 1067 return(NULL); 1068 } 1069 *--bp = '/'; 1070 } 1071 numcwdfound++; 1072 *error = 0; 1073 return (bp); 1074 } 1075 1076 /* 1077 * Thus begins the fullpath magic. 1078 */ 1079 1080 #undef STATNODE 1081 #define STATNODE(name) \ 1082 static u_int name; \ 1083 SYSCTL_UINT(_vfs_cache, OID_AUTO, name, CTLFLAG_RD, &name, 0, "") 1084 1085 static int disablefullpath; 1086 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 1087 &disablefullpath, 0, ""); 1088 1089 STATNODE(numfullpathcalls); 1090 STATNODE(numfullpathfail1); 1091 STATNODE(numfullpathfail2); 1092 STATNODE(numfullpathfail3); 1093 STATNODE(numfullpathfail4); 1094 STATNODE(numfullpathfound); 1095 1096 int 1097 vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf) 1098 { 1099 char *bp, *buf; 1100 int i, slash_prefixed; 1101 struct filedesc *fdp; 1102 struct namecache *ncp; 1103 struct vnode *vp; 1104 1105 numfullpathcalls++; 1106 if (disablefullpath) 1107 return (ENODEV); 1108 1109 if (p == NULL) 1110 return (EINVAL); 1111 1112 /* vn is NULL, client wants us to use p->p_textvp */ 1113 if (vn == NULL) { 1114 if ((vn = p->p_textvp) == NULL) 1115 return (EINVAL); 1116 } 1117 1118 buf = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); 1119 bp = buf + MAXPATHLEN - 1; 1120 *bp = '\0'; 1121 fdp = p->p_fd; 1122 slash_prefixed = 0; 1123 for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { 1124 if (vp->v_flag & VROOT) { 1125 if (vp->v_mount == NULL) { /* forced unmount */ 1126 free(buf, M_TEMP); 1127 return (EBADF); 1128 } 1129 vp = vp->v_mount->mnt_vnodecovered; 1130 continue; 1131 } 1132 TAILQ_FOREACH(ncp, &vp->v_namecache, nc_vnode) { 1133 if (ncp->nc_parent && ncp->nc_parent->nc_vp && 1134 ncp->nc_nlen > 0) { 1135 break; 1136 } 1137 } 1138 if (ncp == NULL) { 1139 numfullpathfail2++; 1140 free(buf, M_TEMP); 1141 return (ENOENT); 1142 } 1143 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 1144 if (bp == buf) { 1145 numfullpathfail4++; 1146 free(buf, M_TEMP); 1147 return (ENOMEM); 1148 } 1149 *--bp = ncp->nc_name[i]; 1150 } 1151 if (bp == buf) { 1152 numfullpathfail4++; 1153 free(buf, M_TEMP); 1154 return (ENOMEM); 1155 } 1156 *--bp = '/'; 1157 slash_prefixed = 1; 1158 vp = ncp->nc_parent->nc_vp; 1159 } 1160 if (!slash_prefixed) { 1161 if (bp == buf) { 1162 numfullpathfail4++; 1163 free(buf, M_TEMP); 1164 return (ENOMEM); 1165 } 1166 *--bp = '/'; 1167 } 1168 numfullpathfound++; 1169 *retbuf = bp; 1170 *freebuf = buf; 1171 return (0); 1172 } 1173 1174