1 /* 2 * Copyright (c) 2003,2004,2009 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * Copyright (c) 1989, 1993, 1995 35 * The Regents of the University of California. All rights reserved. 36 * 37 * This code is derived from software contributed to Berkeley by 38 * Poul-Henning Kamp of the FreeBSD Project. 39 * 40 * Redistribution and use in source and binary forms, with or without 41 * modification, are permitted provided that the following conditions 42 * are met: 43 * 1. Redistributions of source code must retain the above copyright 44 * notice, this list of conditions and the following disclaimer. 45 * 2. Redistributions in binary form must reproduce the above copyright 46 * notice, this list of conditions and the following disclaimer in the 47 * documentation and/or other materials provided with the distribution. 48 * 3. Neither the name of the University nor the names of its contributors 49 * may be used to endorse or promote products derived from this software 50 * without specific prior written permission. 51 * 52 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 53 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 54 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 55 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 56 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 57 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 58 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 59 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 60 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 61 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 62 * SUCH DAMAGE. 63 */ 64 65 #include <sys/param.h> 66 #include <sys/systm.h> 67 #include <sys/kernel.h> 68 #include <sys/sysctl.h> 69 #include <sys/mount.h> 70 #include <sys/vnode.h> 71 #include <sys/malloc.h> 72 #include <sys/sysproto.h> 73 #include <sys/spinlock.h> 74 #include <sys/proc.h> 75 #include <sys/namei.h> 76 #include <sys/nlookup.h> 77 #include <sys/filedesc.h> 78 #include <sys/fnv_hash.h> 79 #include <sys/globaldata.h> 80 #include <sys/kern_syscall.h> 81 #include <sys/dirent.h> 82 #include <ddb/ddb.h> 83 84 #include <sys/sysref2.h> 85 #include <sys/spinlock2.h> 86 #include <sys/mplock2.h> 87 88 #define MAX_RECURSION_DEPTH 64 89 90 /* 91 * Random lookups in the cache are accomplished with a hash table using 92 * a hash key of (nc_src_vp, name). Each hash chain has its own spin lock. 93 * 94 * Negative entries may exist and correspond to resolved namecache 95 * structures where nc_vp is NULL. In a negative entry, NCF_WHITEOUT 96 * will be set if the entry corresponds to a whited-out directory entry 97 * (verses simply not finding the entry at all). ncneglist is locked 98 * with a global spinlock (ncspin). 99 * 100 * MPSAFE RULES: 101 * 102 * (1) A ncp must be referenced before it can be locked. 103 * 104 * (2) A ncp must be locked in order to modify it. 105 * 106 * (3) ncp locks are always ordered child -> parent. That may seem 107 * backwards but forward scans use the hash table and thus can hold 108 * the parent unlocked when traversing downward. 109 * 110 * This allows insert/rename/delete/dot-dot and other operations 111 * to use ncp->nc_parent links. 112 * 113 * This also prevents a locked up e.g. NFS node from creating a 114 * chain reaction all the way back to the root vnode / namecache. 115 * 116 * (4) parent linkages require both the parent and child to be locked. 117 */ 118 119 /* 120 * Structures associated with name cacheing. 121 */ 122 #define NCHHASH(hash) (&nchashtbl[(hash) & nchash]) 123 #define MINNEG 1024 124 #define MINPOS 1024 125 #define NCMOUNT_NUMCACHE 1009 /* prime number */ 126 127 MALLOC_DEFINE(M_VFSCACHE, "vfscache", "VFS name cache entries"); 128 129 LIST_HEAD(nchash_list, namecache); 130 131 struct nchash_head { 132 struct nchash_list list; 133 struct spinlock spin; 134 }; 135 136 struct ncmount_cache { 137 struct spinlock spin; 138 struct namecache *ncp; 139 struct mount *mp; 140 int isneg; /* if != 0 mp is originator and not target */ 141 }; 142 143 static struct nchash_head *nchashtbl; 144 static struct namecache_list ncneglist; 145 static struct spinlock ncspin; 146 static struct ncmount_cache ncmount_cache[NCMOUNT_NUMCACHE]; 147 148 /* 149 * ncvp_debug - debug cache_fromvp(). This is used by the NFS server 150 * to create the namecache infrastructure leading to a dangling vnode. 151 * 152 * 0 Only errors are reported 153 * 1 Successes are reported 154 * 2 Successes + the whole directory scan is reported 155 * 3 Force the directory scan code run as if the parent vnode did not 156 * have a namecache record, even if it does have one. 157 */ 158 static int ncvp_debug; 159 SYSCTL_INT(_debug, OID_AUTO, ncvp_debug, CTLFLAG_RW, &ncvp_debug, 0, 160 "Namecache debug level (0-3)"); 161 162 static u_long nchash; /* size of hash table */ 163 SYSCTL_ULONG(_debug, OID_AUTO, nchash, CTLFLAG_RD, &nchash, 0, 164 "Size of namecache hash table"); 165 166 static int ncnegflush = 10; /* burst for negative flush */ 167 SYSCTL_INT(_debug, OID_AUTO, ncnegflush, CTLFLAG_RW, &ncnegflush, 0, 168 "Batch flush negative entries"); 169 170 static int ncposflush = 10; /* burst for positive flush */ 171 SYSCTL_INT(_debug, OID_AUTO, ncposflush, CTLFLAG_RW, &ncposflush, 0, 172 "Batch flush positive entries"); 173 174 static int ncnegfactor = 16; /* ratio of negative entries */ 175 SYSCTL_INT(_debug, OID_AUTO, ncnegfactor, CTLFLAG_RW, &ncnegfactor, 0, 176 "Ratio of namecache negative entries"); 177 178 static int nclockwarn; /* warn on locked entries in ticks */ 179 SYSCTL_INT(_debug, OID_AUTO, nclockwarn, CTLFLAG_RW, &nclockwarn, 0, 180 "Warn on locked namecache entries in ticks"); 181 182 static int numdefered; /* number of cache entries allocated */ 183 SYSCTL_INT(_debug, OID_AUTO, numdefered, CTLFLAG_RD, &numdefered, 0, 184 "Number of cache entries allocated"); 185 186 static int ncposlimit; /* number of cache entries allocated */ 187 SYSCTL_INT(_debug, OID_AUTO, ncposlimit, CTLFLAG_RW, &ncposlimit, 0, 188 "Number of cache entries allocated"); 189 190 static int ncp_shared_lock_disable = 0; 191 SYSCTL_INT(_debug, OID_AUTO, ncp_shared_lock_disable, CTLFLAG_RW, 192 &ncp_shared_lock_disable, 0, "Disable shared namecache locks"); 193 194 SYSCTL_INT(_debug, OID_AUTO, vnsize, CTLFLAG_RD, 0, sizeof(struct vnode), 195 "sizeof(struct vnode)"); 196 SYSCTL_INT(_debug, OID_AUTO, ncsize, CTLFLAG_RD, 0, sizeof(struct namecache), 197 "sizeof(struct namecache)"); 198 199 static int ncmount_cache_enable = 1; 200 SYSCTL_INT(_debug, OID_AUTO, ncmount_cache_enable, CTLFLAG_RW, 201 &ncmount_cache_enable, 0, "mount point cache"); 202 static long ncmount_cache_hit; 203 SYSCTL_LONG(_debug, OID_AUTO, ncmount_cache_hit, CTLFLAG_RW, 204 &ncmount_cache_hit, 0, "mpcache hits"); 205 static long ncmount_cache_miss; 206 SYSCTL_LONG(_debug, OID_AUTO, ncmount_cache_miss, CTLFLAG_RW, 207 &ncmount_cache_miss, 0, "mpcache misses"); 208 static long ncmount_cache_overwrite; 209 SYSCTL_LONG(_debug, OID_AUTO, ncmount_cache_overwrite, CTLFLAG_RW, 210 &ncmount_cache_overwrite, 0, "mpcache entry overwrites"); 211 212 static int cache_resolve_mp(struct mount *mp); 213 static struct vnode *cache_dvpref(struct namecache *ncp); 214 static void _cache_lock(struct namecache *ncp); 215 static void _cache_setunresolved(struct namecache *ncp); 216 static void _cache_cleanneg(int count); 217 static void _cache_cleanpos(int count); 218 static void _cache_cleandefered(void); 219 static void _cache_unlink(struct namecache *ncp); 220 221 /* 222 * The new name cache statistics 223 */ 224 SYSCTL_NODE(_vfs, OID_AUTO, cache, CTLFLAG_RW, 0, "Name cache statistics"); 225 static int numneg; 226 SYSCTL_INT(_vfs_cache, OID_AUTO, numneg, CTLFLAG_RD, &numneg, 0, 227 "Number of negative namecache entries"); 228 static int numcache; 229 SYSCTL_INT(_vfs_cache, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, 230 "Number of namecaches entries"); 231 static u_long numcalls; 232 SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcalls, CTLFLAG_RD, &numcalls, 0, 233 "Number of namecache lookups"); 234 static u_long numchecks; 235 SYSCTL_ULONG(_vfs_cache, OID_AUTO, numchecks, CTLFLAG_RD, &numchecks, 0, 236 "Number of checked entries in namecache lookups"); 237 238 struct nchstats nchstats[SMP_MAXCPU]; 239 /* 240 * Export VFS cache effectiveness statistics to user-land. 241 * 242 * The statistics are left for aggregation to user-land so 243 * neat things can be achieved, like observing per-CPU cache 244 * distribution. 245 */ 246 static int 247 sysctl_nchstats(SYSCTL_HANDLER_ARGS) 248 { 249 struct globaldata *gd; 250 int i, error; 251 252 error = 0; 253 for (i = 0; i < ncpus; ++i) { 254 gd = globaldata_find(i); 255 if ((error = SYSCTL_OUT(req, (void *)&(*gd->gd_nchstats), 256 sizeof(struct nchstats)))) 257 break; 258 } 259 260 return (error); 261 } 262 SYSCTL_PROC(_vfs_cache, OID_AUTO, nchstats, CTLTYPE_OPAQUE|CTLFLAG_RD, 263 0, 0, sysctl_nchstats, "S,nchstats", "VFS cache effectiveness statistics"); 264 265 static struct namecache *cache_zap(struct namecache *ncp, int nonblock); 266 267 /* 268 * Namespace locking. The caller must already hold a reference to the 269 * namecache structure in order to lock/unlock it. This function prevents 270 * the namespace from being created or destroyed by accessors other then 271 * the lock holder. 272 * 273 * Note that holding a locked namecache structure prevents other threads 274 * from making namespace changes (e.g. deleting or creating), prevents 275 * vnode association state changes by other threads, and prevents the 276 * namecache entry from being resolved or unresolved by other threads. 277 * 278 * An exclusive lock owner has full authority to associate/disassociate 279 * vnodes and resolve/unresolve the locked ncp. 280 * 281 * A shared lock owner only has authority to acquire the underlying vnode, 282 * if any. 283 * 284 * The primary lock field is nc_lockstatus. nc_locktd is set after the 285 * fact (when locking) or cleared prior to unlocking. 286 * 287 * WARNING! Holding a locked ncp will prevent a vnode from being destroyed 288 * or recycled, but it does NOT help you if the vnode had already 289 * initiated a recyclement. If this is important, use cache_get() 290 * rather then cache_lock() (and deal with the differences in the 291 * way the refs counter is handled). Or, alternatively, make an 292 * unconditional call to cache_validate() or cache_resolve() 293 * after cache_lock() returns. 294 */ 295 static 296 void 297 _cache_lock(struct namecache *ncp) 298 { 299 thread_t td; 300 int didwarn; 301 int error; 302 u_int count; 303 304 KKASSERT(ncp->nc_refs != 0); 305 didwarn = 0; 306 td = curthread; 307 308 for (;;) { 309 count = ncp->nc_lockstatus; 310 cpu_ccfence(); 311 312 if ((count & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 0) { 313 if (atomic_cmpset_int(&ncp->nc_lockstatus, 314 count, count + 1)) { 315 /* 316 * The vp associated with a locked ncp must 317 * be held to prevent it from being recycled. 318 * 319 * WARNING! If VRECLAIMED is set the vnode 320 * could already be in the middle of a recycle. 321 * Callers must use cache_vref() or 322 * cache_vget() on the locked ncp to 323 * validate the vp or set the cache entry 324 * to unresolved. 325 * 326 * NOTE! vhold() is allowed if we hold a 327 * lock on the ncp (which we do). 328 */ 329 ncp->nc_locktd = td; 330 if (ncp->nc_vp) 331 vhold(ncp->nc_vp); 332 break; 333 } 334 /* cmpset failed */ 335 continue; 336 } 337 if (ncp->nc_locktd == td) { 338 KKASSERT((count & NC_SHLOCK_FLAG) == 0); 339 if (atomic_cmpset_int(&ncp->nc_lockstatus, 340 count, count + 1)) { 341 break; 342 } 343 /* cmpset failed */ 344 continue; 345 } 346 tsleep_interlock(&ncp->nc_locktd, 0); 347 if (atomic_cmpset_int(&ncp->nc_lockstatus, count, 348 count | NC_EXLOCK_REQ) == 0) { 349 /* cmpset failed */ 350 continue; 351 } 352 error = tsleep(&ncp->nc_locktd, PINTERLOCKED, 353 "clock", nclockwarn); 354 if (error == EWOULDBLOCK) { 355 if (didwarn == 0) { 356 didwarn = ticks; 357 kprintf("[diagnostic] cache_lock: " 358 "blocked on %p %08x", 359 ncp, count); 360 kprintf(" \"%*.*s\"\n", 361 ncp->nc_nlen, ncp->nc_nlen, 362 ncp->nc_name); 363 } 364 } 365 /* loop */ 366 } 367 if (didwarn) { 368 kprintf("[diagnostic] cache_lock: unblocked %*.*s after " 369 "%d secs\n", 370 ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name, 371 (int)(ticks - didwarn) / hz); 372 } 373 } 374 375 /* 376 * The shared lock works similarly to the exclusive lock except 377 * nc_locktd is left NULL and we need an interlock (VHOLD) to 378 * prevent vhold() races, since the moment our cmpset_int succeeds 379 * another cpu can come in and get its own shared lock. 380 * 381 * A critical section is needed to prevent interruption during the 382 * VHOLD interlock. 383 */ 384 static 385 void 386 _cache_lock_shared(struct namecache *ncp) 387 { 388 int didwarn; 389 int error; 390 u_int count; 391 392 KKASSERT(ncp->nc_refs != 0); 393 didwarn = 0; 394 395 for (;;) { 396 count = ncp->nc_lockstatus; 397 cpu_ccfence(); 398 399 if ((count & ~NC_SHLOCK_REQ) == 0) { 400 crit_enter(); 401 if (atomic_cmpset_int(&ncp->nc_lockstatus, 402 count, 403 (count + 1) | NC_SHLOCK_FLAG | 404 NC_SHLOCK_VHOLD)) { 405 /* 406 * The vp associated with a locked ncp must 407 * be held to prevent it from being recycled. 408 * 409 * WARNING! If VRECLAIMED is set the vnode 410 * could already be in the middle of a recycle. 411 * Callers must use cache_vref() or 412 * cache_vget() on the locked ncp to 413 * validate the vp or set the cache entry 414 * to unresolved. 415 * 416 * NOTE! vhold() is allowed if we hold a 417 * lock on the ncp (which we do). 418 */ 419 if (ncp->nc_vp) 420 vhold(ncp->nc_vp); 421 atomic_clear_int(&ncp->nc_lockstatus, 422 NC_SHLOCK_VHOLD); 423 crit_exit(); 424 break; 425 } 426 /* cmpset failed */ 427 crit_exit(); 428 continue; 429 } 430 431 /* 432 * If already held shared we can just bump the count, but 433 * only allow this if nobody is trying to get the lock 434 * exclusively. 435 * 436 * VHOLD is a bit of a hack. Even though we successfully 437 * added another shared ref, the cpu that got the first 438 * shared ref might not yet have held the vnode. 439 */ 440 if ((count & (NC_EXLOCK_REQ|NC_SHLOCK_FLAG)) == 441 NC_SHLOCK_FLAG) { 442 KKASSERT((count & ~(NC_EXLOCK_REQ | 443 NC_SHLOCK_REQ | 444 NC_SHLOCK_FLAG)) > 0); 445 if (atomic_cmpset_int(&ncp->nc_lockstatus, 446 count, count + 1)) { 447 while (ncp->nc_lockstatus & NC_SHLOCK_VHOLD) 448 cpu_pause(); 449 break; 450 } 451 continue; 452 } 453 tsleep_interlock(ncp, 0); 454 if (atomic_cmpset_int(&ncp->nc_lockstatus, count, 455 count | NC_SHLOCK_REQ) == 0) { 456 /* cmpset failed */ 457 continue; 458 } 459 error = tsleep(ncp, PINTERLOCKED, "clocksh", nclockwarn); 460 if (error == EWOULDBLOCK) { 461 if (didwarn == 0) { 462 didwarn = ticks; 463 kprintf("[diagnostic] cache_lock_shared: " 464 "blocked on %p %08x", 465 ncp, count); 466 kprintf(" \"%*.*s\"\n", 467 ncp->nc_nlen, ncp->nc_nlen, 468 ncp->nc_name); 469 } 470 } 471 /* loop */ 472 } 473 if (didwarn) { 474 kprintf("[diagnostic] cache_lock_shared: " 475 "unblocked %*.*s after %d secs\n", 476 ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name, 477 (int)(ticks - didwarn) / hz); 478 } 479 } 480 481 /* 482 * NOTE: nc_refs may be zero if the ncp is interlocked by circumstance, 483 * such as the case where one of its children is locked. 484 */ 485 static 486 int 487 _cache_lock_nonblock(struct namecache *ncp) 488 { 489 thread_t td; 490 u_int count; 491 492 td = curthread; 493 494 for (;;) { 495 count = ncp->nc_lockstatus; 496 497 if ((count & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 0) { 498 if (atomic_cmpset_int(&ncp->nc_lockstatus, 499 count, count + 1)) { 500 /* 501 * The vp associated with a locked ncp must 502 * be held to prevent it from being recycled. 503 * 504 * WARNING! If VRECLAIMED is set the vnode 505 * could already be in the middle of a recycle. 506 * Callers must use cache_vref() or 507 * cache_vget() on the locked ncp to 508 * validate the vp or set the cache entry 509 * to unresolved. 510 * 511 * NOTE! vhold() is allowed if we hold a 512 * lock on the ncp (which we do). 513 */ 514 ncp->nc_locktd = td; 515 if (ncp->nc_vp) 516 vhold(ncp->nc_vp); 517 break; 518 } 519 /* cmpset failed */ 520 continue; 521 } 522 if (ncp->nc_locktd == td) { 523 if (atomic_cmpset_int(&ncp->nc_lockstatus, 524 count, count + 1)) { 525 break; 526 } 527 /* cmpset failed */ 528 continue; 529 } 530 return(EWOULDBLOCK); 531 } 532 return(0); 533 } 534 535 /* 536 * The shared lock works similarly to the exclusive lock except 537 * nc_locktd is left NULL and we need an interlock (VHOLD) to 538 * prevent vhold() races, since the moment our cmpset_int succeeds 539 * another cpu can come in and get its own shared lock. 540 * 541 * A critical section is needed to prevent interruption during the 542 * VHOLD interlock. 543 */ 544 static 545 int 546 _cache_lock_shared_nonblock(struct namecache *ncp) 547 { 548 u_int count; 549 550 for (;;) { 551 count = ncp->nc_lockstatus; 552 553 if ((count & ~NC_SHLOCK_REQ) == 0) { 554 crit_enter(); 555 if (atomic_cmpset_int(&ncp->nc_lockstatus, 556 count, 557 (count + 1) | NC_SHLOCK_FLAG | 558 NC_SHLOCK_VHOLD)) { 559 /* 560 * The vp associated with a locked ncp must 561 * be held to prevent it from being recycled. 562 * 563 * WARNING! If VRECLAIMED is set the vnode 564 * could already be in the middle of a recycle. 565 * Callers must use cache_vref() or 566 * cache_vget() on the locked ncp to 567 * validate the vp or set the cache entry 568 * to unresolved. 569 * 570 * NOTE! vhold() is allowed if we hold a 571 * lock on the ncp (which we do). 572 */ 573 if (ncp->nc_vp) 574 vhold(ncp->nc_vp); 575 atomic_clear_int(&ncp->nc_lockstatus, 576 NC_SHLOCK_VHOLD); 577 crit_exit(); 578 break; 579 } 580 /* cmpset failed */ 581 crit_exit(); 582 continue; 583 } 584 585 /* 586 * If already held shared we can just bump the count, but 587 * only allow this if nobody is trying to get the lock 588 * exclusively. 589 * 590 * VHOLD is a bit of a hack. Even though we successfully 591 * added another shared ref, the cpu that got the first 592 * shared ref might not yet have held the vnode. 593 */ 594 if ((count & (NC_EXLOCK_REQ|NC_SHLOCK_FLAG)) == 595 NC_SHLOCK_FLAG) { 596 KKASSERT((count & ~(NC_EXLOCK_REQ | 597 NC_SHLOCK_REQ | 598 NC_SHLOCK_FLAG)) > 0); 599 if (atomic_cmpset_int(&ncp->nc_lockstatus, 600 count, count + 1)) { 601 while (ncp->nc_lockstatus & NC_SHLOCK_VHOLD) 602 cpu_pause(); 603 break; 604 } 605 continue; 606 } 607 return(EWOULDBLOCK); 608 } 609 return(0); 610 } 611 612 /* 613 * Helper function 614 * 615 * NOTE: nc_refs can be 0 (degenerate case during _cache_drop). 616 * 617 * nc_locktd must be NULLed out prior to nc_lockstatus getting cleared. 618 */ 619 static 620 void 621 _cache_unlock(struct namecache *ncp) 622 { 623 thread_t td __debugvar = curthread; 624 u_int count; 625 u_int ncount; 626 struct vnode *dropvp; 627 628 KKASSERT(ncp->nc_refs >= 0); 629 KKASSERT((ncp->nc_lockstatus & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) > 0); 630 KKASSERT((ncp->nc_lockstatus & NC_SHLOCK_FLAG) || ncp->nc_locktd == td); 631 632 count = ncp->nc_lockstatus; 633 cpu_ccfence(); 634 635 /* 636 * Clear nc_locktd prior to the atomic op (excl lock only) 637 */ 638 if ((count & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 1) 639 ncp->nc_locktd = NULL; 640 dropvp = NULL; 641 642 for (;;) { 643 if ((count & 644 ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ|NC_SHLOCK_FLAG)) == 1) { 645 dropvp = ncp->nc_vp; 646 if (count & NC_EXLOCK_REQ) 647 ncount = count & NC_SHLOCK_REQ; /* cnt->0 */ 648 else 649 ncount = 0; 650 651 if (atomic_cmpset_int(&ncp->nc_lockstatus, 652 count, ncount)) { 653 if (count & NC_EXLOCK_REQ) 654 wakeup(&ncp->nc_locktd); 655 else if (count & NC_SHLOCK_REQ) 656 wakeup(ncp); 657 break; 658 } 659 dropvp = NULL; 660 } else { 661 KKASSERT((count & NC_SHLOCK_VHOLD) == 0); 662 KKASSERT((count & ~(NC_EXLOCK_REQ | 663 NC_SHLOCK_REQ | 664 NC_SHLOCK_FLAG)) > 1); 665 if (atomic_cmpset_int(&ncp->nc_lockstatus, 666 count, count - 1)) { 667 break; 668 } 669 } 670 count = ncp->nc_lockstatus; 671 cpu_ccfence(); 672 } 673 674 /* 675 * Don't actually drop the vp until we successfully clean out 676 * the lock, otherwise we may race another shared lock. 677 */ 678 if (dropvp) 679 vdrop(dropvp); 680 } 681 682 static 683 int 684 _cache_lockstatus(struct namecache *ncp) 685 { 686 if (ncp->nc_locktd == curthread) 687 return(LK_EXCLUSIVE); 688 if (ncp->nc_lockstatus & NC_SHLOCK_FLAG) 689 return(LK_SHARED); 690 return(-1); 691 } 692 693 /* 694 * cache_hold() and cache_drop() prevent the premature deletion of a 695 * namecache entry but do not prevent operations (such as zapping) on 696 * that namecache entry. 697 * 698 * This routine may only be called from outside this source module if 699 * nc_refs is already at least 1. 700 * 701 * This is a rare case where callers are allowed to hold a spinlock, 702 * so we can't ourselves. 703 */ 704 static __inline 705 struct namecache * 706 _cache_hold(struct namecache *ncp) 707 { 708 atomic_add_int(&ncp->nc_refs, 1); 709 return(ncp); 710 } 711 712 /* 713 * Drop a cache entry, taking care to deal with races. 714 * 715 * For potential 1->0 transitions we must hold the ncp lock to safely 716 * test its flags. An unresolved entry with no children must be zapped 717 * to avoid leaks. 718 * 719 * The call to cache_zap() itself will handle all remaining races and 720 * will decrement the ncp's refs regardless. If we are resolved or 721 * have children nc_refs can safely be dropped to 0 without having to 722 * zap the entry. 723 * 724 * NOTE: cache_zap() will re-check nc_refs and nc_list in a MPSAFE fashion. 725 * 726 * NOTE: cache_zap() may return a non-NULL referenced parent which must 727 * be dropped in a loop. 728 */ 729 static __inline 730 void 731 _cache_drop(struct namecache *ncp) 732 { 733 int refs; 734 735 while (ncp) { 736 KKASSERT(ncp->nc_refs > 0); 737 refs = ncp->nc_refs; 738 739 if (refs == 1) { 740 if (_cache_lock_nonblock(ncp) == 0) { 741 ncp->nc_flag &= ~NCF_DEFEREDZAP; 742 if ((ncp->nc_flag & NCF_UNRESOLVED) && 743 TAILQ_EMPTY(&ncp->nc_list)) { 744 ncp = cache_zap(ncp, 1); 745 continue; 746 } 747 if (atomic_cmpset_int(&ncp->nc_refs, 1, 0)) { 748 _cache_unlock(ncp); 749 break; 750 } 751 _cache_unlock(ncp); 752 } 753 } else { 754 if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1)) 755 break; 756 } 757 cpu_pause(); 758 } 759 } 760 761 /* 762 * Link a new namecache entry to its parent and to the hash table. Be 763 * careful to avoid races if vhold() blocks in the future. 764 * 765 * Both ncp and par must be referenced and locked. 766 * 767 * NOTE: The hash table spinlock is held during this call, we can't do 768 * anything fancy. 769 */ 770 static void 771 _cache_link_parent(struct namecache *ncp, struct namecache *par, 772 struct nchash_head *nchpp) 773 { 774 KKASSERT(ncp->nc_parent == NULL); 775 ncp->nc_parent = par; 776 ncp->nc_head = nchpp; 777 778 /* 779 * Set inheritance flags. Note that the parent flags may be 780 * stale due to getattr potentially not having been run yet 781 * (it gets run during nlookup()'s). 782 */ 783 ncp->nc_flag &= ~(NCF_SF_PNOCACHE | NCF_UF_PCACHE); 784 if (par->nc_flag & (NCF_SF_NOCACHE | NCF_SF_PNOCACHE)) 785 ncp->nc_flag |= NCF_SF_PNOCACHE; 786 if (par->nc_flag & (NCF_UF_CACHE | NCF_UF_PCACHE)) 787 ncp->nc_flag |= NCF_UF_PCACHE; 788 789 LIST_INSERT_HEAD(&nchpp->list, ncp, nc_hash); 790 791 if (TAILQ_EMPTY(&par->nc_list)) { 792 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 793 /* 794 * Any vp associated with an ncp which has children must 795 * be held to prevent it from being recycled. 796 */ 797 if (par->nc_vp) 798 vhold(par->nc_vp); 799 } else { 800 TAILQ_INSERT_HEAD(&par->nc_list, ncp, nc_entry); 801 } 802 } 803 804 /* 805 * Remove the parent and hash associations from a namecache structure. 806 * If this is the last child of the parent the cache_drop(par) will 807 * attempt to recursively zap the parent. 808 * 809 * ncp must be locked. This routine will acquire a temporary lock on 810 * the parent as wlel as the appropriate hash chain. 811 */ 812 static void 813 _cache_unlink_parent(struct namecache *ncp) 814 { 815 struct namecache *par; 816 struct vnode *dropvp; 817 818 if ((par = ncp->nc_parent) != NULL) { 819 KKASSERT(ncp->nc_parent == par); 820 _cache_hold(par); 821 _cache_lock(par); 822 spin_lock(&ncp->nc_head->spin); 823 LIST_REMOVE(ncp, nc_hash); 824 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 825 dropvp = NULL; 826 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 827 dropvp = par->nc_vp; 828 spin_unlock(&ncp->nc_head->spin); 829 ncp->nc_parent = NULL; 830 ncp->nc_head = NULL; 831 _cache_unlock(par); 832 _cache_drop(par); 833 834 /* 835 * We can only safely vdrop with no spinlocks held. 836 */ 837 if (dropvp) 838 vdrop(dropvp); 839 } 840 } 841 842 /* 843 * Allocate a new namecache structure. Most of the code does not require 844 * zero-termination of the string but it makes vop_compat_ncreate() easier. 845 */ 846 static struct namecache * 847 cache_alloc(int nlen) 848 { 849 struct namecache *ncp; 850 851 ncp = kmalloc(sizeof(*ncp), M_VFSCACHE, M_WAITOK|M_ZERO); 852 if (nlen) 853 ncp->nc_name = kmalloc(nlen + 1, M_VFSCACHE, M_WAITOK); 854 ncp->nc_nlen = nlen; 855 ncp->nc_flag = NCF_UNRESOLVED; 856 ncp->nc_error = ENOTCONN; /* needs to be resolved */ 857 ncp->nc_refs = 1; 858 859 TAILQ_INIT(&ncp->nc_list); 860 _cache_lock(ncp); 861 return(ncp); 862 } 863 864 /* 865 * Can only be called for the case where the ncp has never been 866 * associated with anything (so no spinlocks are needed). 867 */ 868 static void 869 _cache_free(struct namecache *ncp) 870 { 871 KKASSERT(ncp->nc_refs == 1 && ncp->nc_lockstatus == 1); 872 if (ncp->nc_name) 873 kfree(ncp->nc_name, M_VFSCACHE); 874 kfree(ncp, M_VFSCACHE); 875 } 876 877 /* 878 * [re]initialize a nchandle. 879 */ 880 void 881 cache_zero(struct nchandle *nch) 882 { 883 nch->ncp = NULL; 884 nch->mount = NULL; 885 } 886 887 /* 888 * Ref and deref a namecache structure. 889 * 890 * The caller must specify a stable ncp pointer, typically meaning the 891 * ncp is already referenced but this can also occur indirectly through 892 * e.g. holding a lock on a direct child. 893 * 894 * WARNING: Caller may hold an unrelated read spinlock, which means we can't 895 * use read spinlocks here. 896 * 897 * MPSAFE if nch is 898 */ 899 struct nchandle * 900 cache_hold(struct nchandle *nch) 901 { 902 _cache_hold(nch->ncp); 903 atomic_add_int(&nch->mount->mnt_refs, 1); 904 return(nch); 905 } 906 907 /* 908 * Create a copy of a namecache handle for an already-referenced 909 * entry. 910 * 911 * MPSAFE if nch is 912 */ 913 void 914 cache_copy(struct nchandle *nch, struct nchandle *target) 915 { 916 *target = *nch; 917 if (target->ncp) 918 _cache_hold(target->ncp); 919 atomic_add_int(&nch->mount->mnt_refs, 1); 920 } 921 922 /* 923 * MPSAFE if nch is 924 */ 925 void 926 cache_changemount(struct nchandle *nch, struct mount *mp) 927 { 928 atomic_add_int(&nch->mount->mnt_refs, -1); 929 nch->mount = mp; 930 atomic_add_int(&nch->mount->mnt_refs, 1); 931 } 932 933 void 934 cache_drop(struct nchandle *nch) 935 { 936 atomic_add_int(&nch->mount->mnt_refs, -1); 937 _cache_drop(nch->ncp); 938 nch->ncp = NULL; 939 nch->mount = NULL; 940 } 941 942 int 943 cache_lockstatus(struct nchandle *nch) 944 { 945 return(_cache_lockstatus(nch->ncp)); 946 } 947 948 void 949 cache_lock(struct nchandle *nch) 950 { 951 _cache_lock(nch->ncp); 952 } 953 954 void 955 cache_lock_maybe_shared(struct nchandle *nch, int excl) 956 { 957 struct namecache *ncp = nch->ncp; 958 959 if (ncp_shared_lock_disable || excl || 960 (ncp->nc_flag & NCF_UNRESOLVED)) { 961 _cache_lock(ncp); 962 } else { 963 _cache_lock_shared(ncp); 964 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 965 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) { 966 _cache_unlock(ncp); 967 _cache_lock(ncp); 968 } 969 } else { 970 _cache_unlock(ncp); 971 _cache_lock(ncp); 972 } 973 } 974 } 975 976 /* 977 * Relock nch1 given an unlocked nch1 and a locked nch2. The caller 978 * is responsible for checking both for validity on return as they 979 * may have become invalid. 980 * 981 * We have to deal with potential deadlocks here, just ping pong 982 * the lock until we get it (we will always block somewhere when 983 * looping so this is not cpu-intensive). 984 * 985 * which = 0 nch1 not locked, nch2 is locked 986 * which = 1 nch1 is locked, nch2 is not locked 987 */ 988 void 989 cache_relock(struct nchandle *nch1, struct ucred *cred1, 990 struct nchandle *nch2, struct ucred *cred2) 991 { 992 int which; 993 994 which = 0; 995 996 for (;;) { 997 if (which == 0) { 998 if (cache_lock_nonblock(nch1) == 0) { 999 cache_resolve(nch1, cred1); 1000 break; 1001 } 1002 cache_unlock(nch2); 1003 cache_lock(nch1); 1004 cache_resolve(nch1, cred1); 1005 which = 1; 1006 } else { 1007 if (cache_lock_nonblock(nch2) == 0) { 1008 cache_resolve(nch2, cred2); 1009 break; 1010 } 1011 cache_unlock(nch1); 1012 cache_lock(nch2); 1013 cache_resolve(nch2, cred2); 1014 which = 0; 1015 } 1016 } 1017 } 1018 1019 int 1020 cache_lock_nonblock(struct nchandle *nch) 1021 { 1022 return(_cache_lock_nonblock(nch->ncp)); 1023 } 1024 1025 void 1026 cache_unlock(struct nchandle *nch) 1027 { 1028 _cache_unlock(nch->ncp); 1029 } 1030 1031 /* 1032 * ref-and-lock, unlock-and-deref functions. 1033 * 1034 * This function is primarily used by nlookup. Even though cache_lock 1035 * holds the vnode, it is possible that the vnode may have already 1036 * initiated a recyclement. 1037 * 1038 * We want cache_get() to return a definitively usable vnode or a 1039 * definitively unresolved ncp. 1040 */ 1041 static 1042 struct namecache * 1043 _cache_get(struct namecache *ncp) 1044 { 1045 _cache_hold(ncp); 1046 _cache_lock(ncp); 1047 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 1048 _cache_setunresolved(ncp); 1049 return(ncp); 1050 } 1051 1052 /* 1053 * Attempt to obtain a shared lock on the ncp. A shared lock will only 1054 * be obtained if the ncp is resolved and the vnode (if not ENOENT) is 1055 * valid. Otherwise an exclusive lock will be acquired instead. 1056 */ 1057 static 1058 struct namecache * 1059 _cache_get_maybe_shared(struct namecache *ncp, int excl) 1060 { 1061 if (ncp_shared_lock_disable || excl || 1062 (ncp->nc_flag & NCF_UNRESOLVED)) { 1063 return(_cache_get(ncp)); 1064 } 1065 _cache_hold(ncp); 1066 _cache_lock_shared(ncp); 1067 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 1068 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) { 1069 _cache_unlock(ncp); 1070 ncp = _cache_get(ncp); 1071 _cache_drop(ncp); 1072 } 1073 } else { 1074 _cache_unlock(ncp); 1075 ncp = _cache_get(ncp); 1076 _cache_drop(ncp); 1077 } 1078 return(ncp); 1079 } 1080 1081 /* 1082 * This is a special form of _cache_lock() which only succeeds if 1083 * it can get a pristine, non-recursive lock. The caller must have 1084 * already ref'd the ncp. 1085 * 1086 * On success the ncp will be locked, on failure it will not. The 1087 * ref count does not change either way. 1088 * 1089 * We want _cache_lock_special() (on success) to return a definitively 1090 * usable vnode or a definitively unresolved ncp. 1091 */ 1092 static int 1093 _cache_lock_special(struct namecache *ncp) 1094 { 1095 if (_cache_lock_nonblock(ncp) == 0) { 1096 if ((ncp->nc_lockstatus & 1097 ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == 1) { 1098 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 1099 _cache_setunresolved(ncp); 1100 return(0); 1101 } 1102 _cache_unlock(ncp); 1103 } 1104 return(EWOULDBLOCK); 1105 } 1106 1107 /* 1108 * This function tries to get a shared lock but will back-off to an exclusive 1109 * lock if: 1110 * 1111 * (1) Some other thread is trying to obtain an exclusive lock 1112 * (to prevent the exclusive requester from getting livelocked out 1113 * by many shared locks). 1114 * 1115 * (2) The current thread already owns an exclusive lock (to avoid 1116 * deadlocking). 1117 * 1118 * WARNING! On machines with lots of cores we really want to try hard to 1119 * get a shared lock or concurrent path lookups can chain-react 1120 * into a very high-latency exclusive lock. 1121 */ 1122 static int 1123 _cache_lock_shared_special(struct namecache *ncp) 1124 { 1125 if (_cache_lock_shared_nonblock(ncp) == 0) { 1126 if ((ncp->nc_lockstatus & 1127 ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) == (NC_SHLOCK_FLAG | 1)) { 1128 if (ncp->nc_vp == NULL || 1129 (ncp->nc_vp->v_flag & VRECLAIMED) == 0) { 1130 return(0); 1131 } 1132 } 1133 _cache_unlock(ncp); 1134 return(EWOULDBLOCK); 1135 } 1136 if (ncp->nc_locktd == curthread) { 1137 _cache_lock(ncp); 1138 return(0); 1139 } 1140 _cache_lock_shared(ncp); 1141 return(0); 1142 } 1143 1144 1145 /* 1146 * NOTE: The same nchandle can be passed for both arguments. 1147 */ 1148 void 1149 cache_get(struct nchandle *nch, struct nchandle *target) 1150 { 1151 KKASSERT(nch->ncp->nc_refs > 0); 1152 target->mount = nch->mount; 1153 target->ncp = _cache_get(nch->ncp); 1154 atomic_add_int(&target->mount->mnt_refs, 1); 1155 } 1156 1157 void 1158 cache_get_maybe_shared(struct nchandle *nch, struct nchandle *target, int excl) 1159 { 1160 KKASSERT(nch->ncp->nc_refs > 0); 1161 target->mount = nch->mount; 1162 target->ncp = _cache_get_maybe_shared(nch->ncp, excl); 1163 atomic_add_int(&target->mount->mnt_refs, 1); 1164 } 1165 1166 /* 1167 * 1168 */ 1169 static __inline 1170 void 1171 _cache_put(struct namecache *ncp) 1172 { 1173 _cache_unlock(ncp); 1174 _cache_drop(ncp); 1175 } 1176 1177 /* 1178 * 1179 */ 1180 void 1181 cache_put(struct nchandle *nch) 1182 { 1183 atomic_add_int(&nch->mount->mnt_refs, -1); 1184 _cache_put(nch->ncp); 1185 nch->ncp = NULL; 1186 nch->mount = NULL; 1187 } 1188 1189 /* 1190 * Resolve an unresolved ncp by associating a vnode with it. If the 1191 * vnode is NULL, a negative cache entry is created. 1192 * 1193 * The ncp should be locked on entry and will remain locked on return. 1194 */ 1195 static 1196 void 1197 _cache_setvp(struct mount *mp, struct namecache *ncp, struct vnode *vp) 1198 { 1199 KKASSERT(ncp->nc_flag & NCF_UNRESOLVED); 1200 KKASSERT(_cache_lockstatus(ncp) == LK_EXCLUSIVE); 1201 1202 if (vp != NULL) { 1203 /* 1204 * Any vp associated with an ncp which has children must 1205 * be held. Any vp associated with a locked ncp must be held. 1206 */ 1207 if (!TAILQ_EMPTY(&ncp->nc_list)) 1208 vhold(vp); 1209 spin_lock(&vp->v_spin); 1210 ncp->nc_vp = vp; 1211 TAILQ_INSERT_HEAD(&vp->v_namecache, ncp, nc_vnode); 1212 spin_unlock(&vp->v_spin); 1213 if (ncp->nc_lockstatus & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) 1214 vhold(vp); 1215 1216 /* 1217 * Set auxiliary flags 1218 */ 1219 switch(vp->v_type) { 1220 case VDIR: 1221 ncp->nc_flag |= NCF_ISDIR; 1222 break; 1223 case VLNK: 1224 ncp->nc_flag |= NCF_ISSYMLINK; 1225 /* XXX cache the contents of the symlink */ 1226 break; 1227 default: 1228 break; 1229 } 1230 atomic_add_int(&numcache, 1); 1231 ncp->nc_error = 0; 1232 /* XXX: this is a hack to work-around the lack of a real pfs vfs 1233 * implementation*/ 1234 if (mp != NULL) 1235 if (strncmp(mp->mnt_stat.f_fstypename, "null", 5) == 0) 1236 vp->v_pfsmp = mp; 1237 } else { 1238 /* 1239 * When creating a negative cache hit we set the 1240 * namecache_gen. A later resolve will clean out the 1241 * negative cache hit if the mount point's namecache_gen 1242 * has changed. Used by devfs, could also be used by 1243 * other remote FSs. 1244 */ 1245 ncp->nc_vp = NULL; 1246 spin_lock(&ncspin); 1247 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 1248 ++numneg; 1249 spin_unlock(&ncspin); 1250 ncp->nc_error = ENOENT; 1251 if (mp) 1252 VFS_NCPGEN_SET(mp, ncp); 1253 } 1254 ncp->nc_flag &= ~(NCF_UNRESOLVED | NCF_DEFEREDZAP); 1255 } 1256 1257 /* 1258 * 1259 */ 1260 void 1261 cache_setvp(struct nchandle *nch, struct vnode *vp) 1262 { 1263 _cache_setvp(nch->mount, nch->ncp, vp); 1264 } 1265 1266 /* 1267 * 1268 */ 1269 void 1270 cache_settimeout(struct nchandle *nch, int nticks) 1271 { 1272 struct namecache *ncp = nch->ncp; 1273 1274 if ((ncp->nc_timeout = ticks + nticks) == 0) 1275 ncp->nc_timeout = 1; 1276 } 1277 1278 /* 1279 * Disassociate the vnode or negative-cache association and mark a 1280 * namecache entry as unresolved again. Note that the ncp is still 1281 * left in the hash table and still linked to its parent. 1282 * 1283 * The ncp should be locked and refd on entry and will remain locked and refd 1284 * on return. 1285 * 1286 * This routine is normally never called on a directory containing children. 1287 * However, NFS often does just that in its rename() code as a cop-out to 1288 * avoid complex namespace operations. This disconnects a directory vnode 1289 * from its namecache and can cause the OLDAPI and NEWAPI to get out of 1290 * sync. 1291 * 1292 */ 1293 static 1294 void 1295 _cache_setunresolved(struct namecache *ncp) 1296 { 1297 struct vnode *vp; 1298 1299 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 1300 ncp->nc_flag |= NCF_UNRESOLVED; 1301 ncp->nc_timeout = 0; 1302 ncp->nc_error = ENOTCONN; 1303 if ((vp = ncp->nc_vp) != NULL) { 1304 atomic_add_int(&numcache, -1); 1305 spin_lock(&vp->v_spin); 1306 ncp->nc_vp = NULL; 1307 TAILQ_REMOVE(&vp->v_namecache, ncp, nc_vnode); 1308 spin_unlock(&vp->v_spin); 1309 1310 /* 1311 * Any vp associated with an ncp with children is 1312 * held by that ncp. Any vp associated with a locked 1313 * ncp is held by that ncp. These conditions must be 1314 * undone when the vp is cleared out from the ncp. 1315 */ 1316 if (!TAILQ_EMPTY(&ncp->nc_list)) 1317 vdrop(vp); 1318 if (ncp->nc_lockstatus & ~(NC_EXLOCK_REQ|NC_SHLOCK_REQ)) 1319 vdrop(vp); 1320 } else { 1321 spin_lock(&ncspin); 1322 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 1323 --numneg; 1324 spin_unlock(&ncspin); 1325 } 1326 ncp->nc_flag &= ~(NCF_WHITEOUT|NCF_ISDIR|NCF_ISSYMLINK); 1327 } 1328 } 1329 1330 /* 1331 * The cache_nresolve() code calls this function to automatically 1332 * set a resolved cache element to unresolved if it has timed out 1333 * or if it is a negative cache hit and the mount point namecache_gen 1334 * has changed. 1335 */ 1336 static __inline int 1337 _cache_auto_unresolve_test(struct mount *mp, struct namecache *ncp) 1338 { 1339 /* 1340 * Try to zap entries that have timed out. We have 1341 * to be careful here because locked leafs may depend 1342 * on the vnode remaining intact in a parent, so only 1343 * do this under very specific conditions. 1344 */ 1345 if (ncp->nc_timeout && (int)(ncp->nc_timeout - ticks) < 0 && 1346 TAILQ_EMPTY(&ncp->nc_list)) { 1347 return 1; 1348 } 1349 1350 /* 1351 * If a resolved negative cache hit is invalid due to 1352 * the mount's namecache generation being bumped, zap it. 1353 */ 1354 if (ncp->nc_vp == NULL && VFS_NCPGEN_TEST(mp, ncp)) { 1355 return 1; 1356 } 1357 1358 /* 1359 * Otherwise we are good 1360 */ 1361 return 0; 1362 } 1363 1364 static __inline void 1365 _cache_auto_unresolve(struct mount *mp, struct namecache *ncp) 1366 { 1367 /* 1368 * Already in an unresolved state, nothing to do. 1369 */ 1370 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 1371 if (_cache_auto_unresolve_test(mp, ncp)) 1372 _cache_setunresolved(ncp); 1373 } 1374 } 1375 1376 /* 1377 * 1378 */ 1379 void 1380 cache_setunresolved(struct nchandle *nch) 1381 { 1382 _cache_setunresolved(nch->ncp); 1383 } 1384 1385 /* 1386 * Determine if we can clear NCF_ISMOUNTPT by scanning the mountlist 1387 * looking for matches. This flag tells the lookup code when it must 1388 * check for a mount linkage and also prevents the directories in question 1389 * from being deleted or renamed. 1390 */ 1391 static 1392 int 1393 cache_clrmountpt_callback(struct mount *mp, void *data) 1394 { 1395 struct nchandle *nch = data; 1396 1397 if (mp->mnt_ncmounton.ncp == nch->ncp) 1398 return(1); 1399 if (mp->mnt_ncmountpt.ncp == nch->ncp) 1400 return(1); 1401 return(0); 1402 } 1403 1404 /* 1405 * 1406 */ 1407 void 1408 cache_clrmountpt(struct nchandle *nch) 1409 { 1410 int count; 1411 1412 count = mountlist_scan(cache_clrmountpt_callback, nch, 1413 MNTSCAN_FORWARD|MNTSCAN_NOBUSY); 1414 if (count == 0) 1415 nch->ncp->nc_flag &= ~NCF_ISMOUNTPT; 1416 } 1417 1418 /* 1419 * Invalidate portions of the namecache topology given a starting entry. 1420 * The passed ncp is set to an unresolved state and: 1421 * 1422 * The passed ncp must be referencxed and locked. The routine may unlock 1423 * and relock ncp several times, and will recheck the children and loop 1424 * to catch races. When done the passed ncp will be returned with the 1425 * reference and lock intact. 1426 * 1427 * CINV_DESTROY - Set a flag in the passed ncp entry indicating 1428 * that the physical underlying nodes have been 1429 * destroyed... as in deleted. For example, when 1430 * a directory is removed. This will cause record 1431 * lookups on the name to no longer be able to find 1432 * the record and tells the resolver to return failure 1433 * rather then trying to resolve through the parent. 1434 * 1435 * The topology itself, including ncp->nc_name, 1436 * remains intact. 1437 * 1438 * This only applies to the passed ncp, if CINV_CHILDREN 1439 * is specified the children are not flagged. 1440 * 1441 * CINV_CHILDREN - Set all children (recursively) to an unresolved 1442 * state as well. 1443 * 1444 * Note that this will also have the side effect of 1445 * cleaning out any unreferenced nodes in the topology 1446 * from the leaves up as the recursion backs out. 1447 * 1448 * Note that the topology for any referenced nodes remains intact, but 1449 * the nodes will be marked as having been destroyed and will be set 1450 * to an unresolved state. 1451 * 1452 * It is possible for cache_inval() to race a cache_resolve(), meaning that 1453 * the namecache entry may not actually be invalidated on return if it was 1454 * revalidated while recursing down into its children. This code guarentees 1455 * that the node(s) will go through an invalidation cycle, but does not 1456 * guarentee that they will remain in an invalidated state. 1457 * 1458 * Returns non-zero if a revalidation was detected during the invalidation 1459 * recursion, zero otherwise. Note that since only the original ncp is 1460 * locked the revalidation ultimately can only indicate that the original ncp 1461 * *MIGHT* no have been reresolved. 1462 * 1463 * DEEP RECURSION HANDLING - If a recursive invalidation recurses deeply we 1464 * have to avoid blowing out the kernel stack. We do this by saving the 1465 * deep namecache node and aborting the recursion, then re-recursing at that 1466 * node using a depth-first algorithm in order to allow multiple deep 1467 * recursions to chain through each other, then we restart the invalidation 1468 * from scratch. 1469 */ 1470 1471 struct cinvtrack { 1472 struct namecache *resume_ncp; 1473 int depth; 1474 }; 1475 1476 static int _cache_inval_internal(struct namecache *, int, struct cinvtrack *); 1477 1478 static 1479 int 1480 _cache_inval(struct namecache *ncp, int flags) 1481 { 1482 struct cinvtrack track; 1483 struct namecache *ncp2; 1484 int r; 1485 1486 track.depth = 0; 1487 track.resume_ncp = NULL; 1488 1489 for (;;) { 1490 r = _cache_inval_internal(ncp, flags, &track); 1491 if (track.resume_ncp == NULL) 1492 break; 1493 kprintf("Warning: deep namecache recursion at %s\n", 1494 ncp->nc_name); 1495 _cache_unlock(ncp); 1496 while ((ncp2 = track.resume_ncp) != NULL) { 1497 track.resume_ncp = NULL; 1498 _cache_lock(ncp2); 1499 _cache_inval_internal(ncp2, flags & ~CINV_DESTROY, 1500 &track); 1501 _cache_put(ncp2); 1502 } 1503 _cache_lock(ncp); 1504 } 1505 return(r); 1506 } 1507 1508 int 1509 cache_inval(struct nchandle *nch, int flags) 1510 { 1511 return(_cache_inval(nch->ncp, flags)); 1512 } 1513 1514 /* 1515 * Helper for _cache_inval(). The passed ncp is refd and locked and 1516 * remains that way on return, but may be unlocked/relocked multiple 1517 * times by the routine. 1518 */ 1519 static int 1520 _cache_inval_internal(struct namecache *ncp, int flags, struct cinvtrack *track) 1521 { 1522 struct namecache *kid; 1523 struct namecache *nextkid; 1524 int rcnt = 0; 1525 1526 KKASSERT(_cache_lockstatus(ncp) == LK_EXCLUSIVE); 1527 1528 _cache_setunresolved(ncp); 1529 if (flags & CINV_DESTROY) 1530 ncp->nc_flag |= NCF_DESTROYED; 1531 if ((flags & CINV_CHILDREN) && 1532 (kid = TAILQ_FIRST(&ncp->nc_list)) != NULL 1533 ) { 1534 _cache_hold(kid); 1535 if (++track->depth > MAX_RECURSION_DEPTH) { 1536 track->resume_ncp = ncp; 1537 _cache_hold(ncp); 1538 ++rcnt; 1539 } 1540 _cache_unlock(ncp); 1541 while (kid) { 1542 if (track->resume_ncp) { 1543 _cache_drop(kid); 1544 break; 1545 } 1546 if ((nextkid = TAILQ_NEXT(kid, nc_entry)) != NULL) 1547 _cache_hold(nextkid); 1548 if ((kid->nc_flag & NCF_UNRESOLVED) == 0 || 1549 TAILQ_FIRST(&kid->nc_list) 1550 ) { 1551 _cache_lock(kid); 1552 rcnt += _cache_inval_internal(kid, flags & ~CINV_DESTROY, track); 1553 _cache_unlock(kid); 1554 } 1555 _cache_drop(kid); 1556 kid = nextkid; 1557 } 1558 --track->depth; 1559 _cache_lock(ncp); 1560 } 1561 1562 /* 1563 * Someone could have gotten in there while ncp was unlocked, 1564 * retry if so. 1565 */ 1566 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) 1567 ++rcnt; 1568 return (rcnt); 1569 } 1570 1571 /* 1572 * Invalidate a vnode's namecache associations. To avoid races against 1573 * the resolver we do not invalidate a node which we previously invalidated 1574 * but which was then re-resolved while we were in the invalidation loop. 1575 * 1576 * Returns non-zero if any namecache entries remain after the invalidation 1577 * loop completed. 1578 * 1579 * NOTE: Unlike the namecache topology which guarentees that ncp's will not 1580 * be ripped out of the topology while held, the vnode's v_namecache 1581 * list has no such restriction. NCP's can be ripped out of the list 1582 * at virtually any time if not locked, even if held. 1583 * 1584 * In addition, the v_namecache list itself must be locked via 1585 * the vnode's spinlock. 1586 */ 1587 int 1588 cache_inval_vp(struct vnode *vp, int flags) 1589 { 1590 struct namecache *ncp; 1591 struct namecache *next; 1592 1593 restart: 1594 spin_lock(&vp->v_spin); 1595 ncp = TAILQ_FIRST(&vp->v_namecache); 1596 if (ncp) 1597 _cache_hold(ncp); 1598 while (ncp) { 1599 /* loop entered with ncp held and vp spin-locked */ 1600 if ((next = TAILQ_NEXT(ncp, nc_vnode)) != NULL) 1601 _cache_hold(next); 1602 spin_unlock(&vp->v_spin); 1603 _cache_lock(ncp); 1604 if (ncp->nc_vp != vp) { 1605 kprintf("Warning: cache_inval_vp: race-A detected on " 1606 "%s\n", ncp->nc_name); 1607 _cache_put(ncp); 1608 if (next) 1609 _cache_drop(next); 1610 goto restart; 1611 } 1612 _cache_inval(ncp, flags); 1613 _cache_put(ncp); /* also releases reference */ 1614 ncp = next; 1615 spin_lock(&vp->v_spin); 1616 if (ncp && ncp->nc_vp != vp) { 1617 spin_unlock(&vp->v_spin); 1618 kprintf("Warning: cache_inval_vp: race-B detected on " 1619 "%s\n", ncp->nc_name); 1620 _cache_drop(ncp); 1621 goto restart; 1622 } 1623 } 1624 spin_unlock(&vp->v_spin); 1625 return(TAILQ_FIRST(&vp->v_namecache) != NULL); 1626 } 1627 1628 /* 1629 * This routine is used instead of the normal cache_inval_vp() when we 1630 * are trying to recycle otherwise good vnodes. 1631 * 1632 * Return 0 on success, non-zero if not all namecache records could be 1633 * disassociated from the vnode (for various reasons). 1634 */ 1635 int 1636 cache_inval_vp_nonblock(struct vnode *vp) 1637 { 1638 struct namecache *ncp; 1639 struct namecache *next; 1640 1641 spin_lock(&vp->v_spin); 1642 ncp = TAILQ_FIRST(&vp->v_namecache); 1643 if (ncp) 1644 _cache_hold(ncp); 1645 while (ncp) { 1646 /* loop entered with ncp held */ 1647 if ((next = TAILQ_NEXT(ncp, nc_vnode)) != NULL) 1648 _cache_hold(next); 1649 spin_unlock(&vp->v_spin); 1650 if (_cache_lock_nonblock(ncp)) { 1651 _cache_drop(ncp); 1652 if (next) 1653 _cache_drop(next); 1654 goto done; 1655 } 1656 if (ncp->nc_vp != vp) { 1657 kprintf("Warning: cache_inval_vp: race-A detected on " 1658 "%s\n", ncp->nc_name); 1659 _cache_put(ncp); 1660 if (next) 1661 _cache_drop(next); 1662 goto done; 1663 } 1664 _cache_inval(ncp, 0); 1665 _cache_put(ncp); /* also releases reference */ 1666 ncp = next; 1667 spin_lock(&vp->v_spin); 1668 if (ncp && ncp->nc_vp != vp) { 1669 spin_unlock(&vp->v_spin); 1670 kprintf("Warning: cache_inval_vp: race-B detected on " 1671 "%s\n", ncp->nc_name); 1672 _cache_drop(ncp); 1673 goto done; 1674 } 1675 } 1676 spin_unlock(&vp->v_spin); 1677 done: 1678 return(TAILQ_FIRST(&vp->v_namecache) != NULL); 1679 } 1680 1681 /* 1682 * The source ncp has been renamed to the target ncp. Both fncp and tncp 1683 * must be locked. The target ncp is destroyed (as a normal rename-over 1684 * would destroy the target file or directory). 1685 * 1686 * Because there may be references to the source ncp we cannot copy its 1687 * contents to the target. Instead the source ncp is relinked as the target 1688 * and the target ncp is removed from the namecache topology. 1689 */ 1690 void 1691 cache_rename(struct nchandle *fnch, struct nchandle *tnch) 1692 { 1693 struct namecache *fncp = fnch->ncp; 1694 struct namecache *tncp = tnch->ncp; 1695 struct namecache *tncp_par; 1696 struct nchash_head *nchpp; 1697 u_int32_t hash; 1698 char *oname; 1699 char *nname; 1700 1701 if (tncp->nc_nlen) { 1702 nname = kmalloc(tncp->nc_nlen + 1, M_VFSCACHE, M_WAITOK); 1703 bcopy(tncp->nc_name, nname, tncp->nc_nlen); 1704 nname[tncp->nc_nlen] = 0; 1705 } else { 1706 nname = NULL; 1707 } 1708 1709 /* 1710 * Rename fncp (unlink) 1711 */ 1712 _cache_unlink_parent(fncp); 1713 oname = fncp->nc_name; 1714 fncp->nc_name = nname; 1715 fncp->nc_nlen = tncp->nc_nlen; 1716 if (oname) 1717 kfree(oname, M_VFSCACHE); 1718 1719 tncp_par = tncp->nc_parent; 1720 _cache_hold(tncp_par); 1721 _cache_lock(tncp_par); 1722 1723 /* 1724 * Rename fncp (relink) 1725 */ 1726 hash = fnv_32_buf(fncp->nc_name, fncp->nc_nlen, FNV1_32_INIT); 1727 hash = fnv_32_buf(&tncp_par, sizeof(tncp_par), hash); 1728 nchpp = NCHHASH(hash); 1729 1730 spin_lock(&nchpp->spin); 1731 _cache_link_parent(fncp, tncp_par, nchpp); 1732 spin_unlock(&nchpp->spin); 1733 1734 _cache_put(tncp_par); 1735 1736 /* 1737 * Get rid of the overwritten tncp (unlink) 1738 */ 1739 _cache_unlink(tncp); 1740 } 1741 1742 /* 1743 * Perform actions consistent with unlinking a file. The passed-in ncp 1744 * must be locked. 1745 * 1746 * The ncp is marked DESTROYED so it no longer shows up in searches, 1747 * and will be physically deleted when the vnode goes away. 1748 * 1749 * If the related vnode has no refs then we cycle it through vget()/vput() 1750 * to (possibly if we don't have a ref race) trigger a deactivation, 1751 * allowing the VFS to trivially detect and recycle the deleted vnode 1752 * via VOP_INACTIVE(). 1753 * 1754 * NOTE: _cache_rename() will automatically call _cache_unlink() on the 1755 * target ncp. 1756 */ 1757 void 1758 cache_unlink(struct nchandle *nch) 1759 { 1760 _cache_unlink(nch->ncp); 1761 } 1762 1763 static void 1764 _cache_unlink(struct namecache *ncp) 1765 { 1766 struct vnode *vp; 1767 1768 /* 1769 * Causes lookups to fail and allows another ncp with the same 1770 * name to be created under ncp->nc_parent. 1771 */ 1772 ncp->nc_flag |= NCF_DESTROYED; 1773 1774 /* 1775 * Attempt to trigger a deactivation. 1776 */ 1777 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 && 1778 (vp = ncp->nc_vp) != NULL && 1779 !sysref_isactive(&vp->v_sysref)) { 1780 if (vget(vp, LK_SHARED) == 0) 1781 vput(vp); 1782 } 1783 } 1784 1785 /* 1786 * vget the vnode associated with the namecache entry. Resolve the namecache 1787 * entry if necessary. The passed ncp must be referenced and locked. 1788 * 1789 * lk_type may be LK_SHARED, LK_EXCLUSIVE. A ref'd, possibly locked 1790 * (depending on the passed lk_type) will be returned in *vpp with an error 1791 * of 0, or NULL will be returned in *vpp with a non-0 error code. The 1792 * most typical error is ENOENT, meaning that the ncp represents a negative 1793 * cache hit and there is no vnode to retrieve, but other errors can occur 1794 * too. 1795 * 1796 * The vget() can race a reclaim. If this occurs we re-resolve the 1797 * namecache entry. 1798 * 1799 * There are numerous places in the kernel where vget() is called on a 1800 * vnode while one or more of its namecache entries is locked. Releasing 1801 * a vnode never deadlocks against locked namecache entries (the vnode 1802 * will not get recycled while referenced ncp's exist). This means we 1803 * can safely acquire the vnode. In fact, we MUST NOT release the ncp 1804 * lock when acquiring the vp lock or we might cause a deadlock. 1805 * 1806 * NOTE: The passed-in ncp must be locked exclusively if it is initially 1807 * unresolved. If a reclaim race occurs the passed-in ncp will be 1808 * relocked exclusively before being re-resolved. 1809 */ 1810 int 1811 cache_vget(struct nchandle *nch, struct ucred *cred, 1812 int lk_type, struct vnode **vpp) 1813 { 1814 struct namecache *ncp; 1815 struct vnode *vp; 1816 int error; 1817 1818 ncp = nch->ncp; 1819 again: 1820 vp = NULL; 1821 if (ncp->nc_flag & NCF_UNRESOLVED) 1822 error = cache_resolve(nch, cred); 1823 else 1824 error = 0; 1825 1826 if (error == 0 && (vp = ncp->nc_vp) != NULL) { 1827 error = vget(vp, lk_type); 1828 if (error) { 1829 /* 1830 * VRECLAIM race 1831 */ 1832 if (error == ENOENT) { 1833 kprintf("Warning: vnode reclaim race detected " 1834 "in cache_vget on %p (%s)\n", 1835 vp, ncp->nc_name); 1836 _cache_unlock(ncp); 1837 _cache_lock(ncp); 1838 _cache_setunresolved(ncp); 1839 goto again; 1840 } 1841 1842 /* 1843 * Not a reclaim race, some other error. 1844 */ 1845 KKASSERT(ncp->nc_vp == vp); 1846 vp = NULL; 1847 } else { 1848 KKASSERT(ncp->nc_vp == vp); 1849 KKASSERT((vp->v_flag & VRECLAIMED) == 0); 1850 } 1851 } 1852 if (error == 0 && vp == NULL) 1853 error = ENOENT; 1854 *vpp = vp; 1855 return(error); 1856 } 1857 1858 /* 1859 * Similar to cache_vget() but only acquires a ref on the vnode. 1860 * 1861 * NOTE: The passed-in ncp must be locked exclusively if it is initially 1862 * unresolved. If a reclaim race occurs the passed-in ncp will be 1863 * relocked exclusively before being re-resolved. 1864 */ 1865 int 1866 cache_vref(struct nchandle *nch, struct ucred *cred, struct vnode **vpp) 1867 { 1868 struct namecache *ncp; 1869 struct vnode *vp; 1870 int error; 1871 1872 ncp = nch->ncp; 1873 again: 1874 vp = NULL; 1875 if (ncp->nc_flag & NCF_UNRESOLVED) 1876 error = cache_resolve(nch, cred); 1877 else 1878 error = 0; 1879 1880 if (error == 0 && (vp = ncp->nc_vp) != NULL) { 1881 error = vget(vp, LK_SHARED); 1882 if (error) { 1883 /* 1884 * VRECLAIM race 1885 */ 1886 if (error == ENOENT) { 1887 kprintf("Warning: vnode reclaim race detected " 1888 "in cache_vget on %p (%s)\n", 1889 vp, ncp->nc_name); 1890 _cache_unlock(ncp); 1891 _cache_lock(ncp); 1892 _cache_setunresolved(ncp); 1893 goto again; 1894 } 1895 1896 /* 1897 * Not a reclaim race, some other error. 1898 */ 1899 KKASSERT(ncp->nc_vp == vp); 1900 vp = NULL; 1901 } else { 1902 KKASSERT(ncp->nc_vp == vp); 1903 KKASSERT((vp->v_flag & VRECLAIMED) == 0); 1904 /* caller does not want a lock */ 1905 vn_unlock(vp); 1906 } 1907 } 1908 if (error == 0 && vp == NULL) 1909 error = ENOENT; 1910 *vpp = vp; 1911 return(error); 1912 } 1913 1914 /* 1915 * Return a referenced vnode representing the parent directory of 1916 * ncp. 1917 * 1918 * Because the caller has locked the ncp it should not be possible for 1919 * the parent ncp to go away. However, the parent can unresolve its 1920 * dvp at any time so we must be able to acquire a lock on the parent 1921 * to safely access nc_vp. 1922 * 1923 * We have to leave par unlocked when vget()ing dvp to avoid a deadlock, 1924 * so use vhold()/vdrop() while holding the lock to prevent dvp from 1925 * getting destroyed. 1926 * 1927 * NOTE: vhold() is allowed when dvp has 0 refs if we hold a 1928 * lock on the ncp in question.. 1929 */ 1930 static struct vnode * 1931 cache_dvpref(struct namecache *ncp) 1932 { 1933 struct namecache *par; 1934 struct vnode *dvp; 1935 1936 dvp = NULL; 1937 if ((par = ncp->nc_parent) != NULL) { 1938 _cache_hold(par); 1939 _cache_lock(par); 1940 if ((par->nc_flag & NCF_UNRESOLVED) == 0) { 1941 if ((dvp = par->nc_vp) != NULL) 1942 vhold(dvp); 1943 } 1944 _cache_unlock(par); 1945 if (dvp) { 1946 if (vget(dvp, LK_SHARED) == 0) { 1947 vn_unlock(dvp); 1948 vdrop(dvp); 1949 /* return refd, unlocked dvp */ 1950 } else { 1951 vdrop(dvp); 1952 dvp = NULL; 1953 } 1954 } 1955 _cache_drop(par); 1956 } 1957 return(dvp); 1958 } 1959 1960 /* 1961 * Convert a directory vnode to a namecache record without any other 1962 * knowledge of the topology. This ONLY works with directory vnodes and 1963 * is ONLY used by the NFS server. dvp must be refd but unlocked, and the 1964 * returned ncp (if not NULL) will be held and unlocked. 1965 * 1966 * If 'makeit' is 0 and dvp has no existing namecache record, NULL is returned. 1967 * If 'makeit' is 1 we attempt to track-down and create the namecache topology 1968 * for dvp. This will fail only if the directory has been deleted out from 1969 * under the caller. 1970 * 1971 * Callers must always check for a NULL return no matter the value of 'makeit'. 1972 * 1973 * To avoid underflowing the kernel stack each recursive call increments 1974 * the makeit variable. 1975 */ 1976 1977 static int cache_inefficient_scan(struct nchandle *nch, struct ucred *cred, 1978 struct vnode *dvp, char *fakename); 1979 static int cache_fromdvp_try(struct vnode *dvp, struct ucred *cred, 1980 struct vnode **saved_dvp); 1981 1982 int 1983 cache_fromdvp(struct vnode *dvp, struct ucred *cred, int makeit, 1984 struct nchandle *nch) 1985 { 1986 struct vnode *saved_dvp; 1987 struct vnode *pvp; 1988 char *fakename; 1989 int error; 1990 1991 nch->ncp = NULL; 1992 nch->mount = dvp->v_mount; 1993 saved_dvp = NULL; 1994 fakename = NULL; 1995 1996 /* 1997 * Handle the makeit == 0 degenerate case 1998 */ 1999 if (makeit == 0) { 2000 spin_lock(&dvp->v_spin); 2001 nch->ncp = TAILQ_FIRST(&dvp->v_namecache); 2002 if (nch->ncp) 2003 cache_hold(nch); 2004 spin_unlock(&dvp->v_spin); 2005 } 2006 2007 /* 2008 * Loop until resolution, inside code will break out on error. 2009 */ 2010 while (makeit) { 2011 /* 2012 * Break out if we successfully acquire a working ncp. 2013 */ 2014 spin_lock(&dvp->v_spin); 2015 nch->ncp = TAILQ_FIRST(&dvp->v_namecache); 2016 if (nch->ncp) { 2017 cache_hold(nch); 2018 spin_unlock(&dvp->v_spin); 2019 break; 2020 } 2021 spin_unlock(&dvp->v_spin); 2022 2023 /* 2024 * If dvp is the root of its filesystem it should already 2025 * have a namecache pointer associated with it as a side 2026 * effect of the mount, but it may have been disassociated. 2027 */ 2028 if (dvp->v_flag & VROOT) { 2029 nch->ncp = _cache_get(nch->mount->mnt_ncmountpt.ncp); 2030 error = cache_resolve_mp(nch->mount); 2031 _cache_put(nch->ncp); 2032 if (ncvp_debug) { 2033 kprintf("cache_fromdvp: resolve root of mount %p error %d", 2034 dvp->v_mount, error); 2035 } 2036 if (error) { 2037 if (ncvp_debug) 2038 kprintf(" failed\n"); 2039 nch->ncp = NULL; 2040 break; 2041 } 2042 if (ncvp_debug) 2043 kprintf(" succeeded\n"); 2044 continue; 2045 } 2046 2047 /* 2048 * If we are recursed too deeply resort to an O(n^2) 2049 * algorithm to resolve the namecache topology. The 2050 * resolved pvp is left referenced in saved_dvp to 2051 * prevent the tree from being destroyed while we loop. 2052 */ 2053 if (makeit > 20) { 2054 error = cache_fromdvp_try(dvp, cred, &saved_dvp); 2055 if (error) { 2056 kprintf("lookupdotdot(longpath) failed %d " 2057 "dvp %p\n", error, dvp); 2058 nch->ncp = NULL; 2059 break; 2060 } 2061 continue; 2062 } 2063 2064 /* 2065 * Get the parent directory and resolve its ncp. 2066 */ 2067 if (fakename) { 2068 kfree(fakename, M_TEMP); 2069 fakename = NULL; 2070 } 2071 error = vop_nlookupdotdot(*dvp->v_ops, dvp, &pvp, cred, 2072 &fakename); 2073 if (error) { 2074 kprintf("lookupdotdot failed %d dvp %p\n", error, dvp); 2075 break; 2076 } 2077 vn_unlock(pvp); 2078 2079 /* 2080 * Reuse makeit as a recursion depth counter. On success 2081 * nch will be fully referenced. 2082 */ 2083 cache_fromdvp(pvp, cred, makeit + 1, nch); 2084 vrele(pvp); 2085 if (nch->ncp == NULL) 2086 break; 2087 2088 /* 2089 * Do an inefficient scan of pvp (embodied by ncp) to look 2090 * for dvp. This will create a namecache record for dvp on 2091 * success. We loop up to recheck on success. 2092 * 2093 * ncp and dvp are both held but not locked. 2094 */ 2095 error = cache_inefficient_scan(nch, cred, dvp, fakename); 2096 if (error) { 2097 kprintf("cache_fromdvp: scan %p (%s) failed on dvp=%p\n", 2098 pvp, nch->ncp->nc_name, dvp); 2099 cache_drop(nch); 2100 /* nch was NULLed out, reload mount */ 2101 nch->mount = dvp->v_mount; 2102 break; 2103 } 2104 if (ncvp_debug) { 2105 kprintf("cache_fromdvp: scan %p (%s) succeeded\n", 2106 pvp, nch->ncp->nc_name); 2107 } 2108 cache_drop(nch); 2109 /* nch was NULLed out, reload mount */ 2110 nch->mount = dvp->v_mount; 2111 } 2112 2113 /* 2114 * If nch->ncp is non-NULL it will have been held already. 2115 */ 2116 if (fakename) 2117 kfree(fakename, M_TEMP); 2118 if (saved_dvp) 2119 vrele(saved_dvp); 2120 if (nch->ncp) 2121 return (0); 2122 return (EINVAL); 2123 } 2124 2125 /* 2126 * Go up the chain of parent directories until we find something 2127 * we can resolve into the namecache. This is very inefficient. 2128 */ 2129 static 2130 int 2131 cache_fromdvp_try(struct vnode *dvp, struct ucred *cred, 2132 struct vnode **saved_dvp) 2133 { 2134 struct nchandle nch; 2135 struct vnode *pvp; 2136 int error; 2137 static time_t last_fromdvp_report; 2138 char *fakename; 2139 2140 /* 2141 * Loop getting the parent directory vnode until we get something we 2142 * can resolve in the namecache. 2143 */ 2144 vref(dvp); 2145 nch.mount = dvp->v_mount; 2146 nch.ncp = NULL; 2147 fakename = NULL; 2148 2149 for (;;) { 2150 if (fakename) { 2151 kfree(fakename, M_TEMP); 2152 fakename = NULL; 2153 } 2154 error = vop_nlookupdotdot(*dvp->v_ops, dvp, &pvp, cred, 2155 &fakename); 2156 if (error) { 2157 vrele(dvp); 2158 break; 2159 } 2160 vn_unlock(pvp); 2161 spin_lock(&pvp->v_spin); 2162 if ((nch.ncp = TAILQ_FIRST(&pvp->v_namecache)) != NULL) { 2163 _cache_hold(nch.ncp); 2164 spin_unlock(&pvp->v_spin); 2165 vrele(pvp); 2166 break; 2167 } 2168 spin_unlock(&pvp->v_spin); 2169 if (pvp->v_flag & VROOT) { 2170 nch.ncp = _cache_get(pvp->v_mount->mnt_ncmountpt.ncp); 2171 error = cache_resolve_mp(nch.mount); 2172 _cache_unlock(nch.ncp); 2173 vrele(pvp); 2174 if (error) { 2175 _cache_drop(nch.ncp); 2176 nch.ncp = NULL; 2177 vrele(dvp); 2178 } 2179 break; 2180 } 2181 vrele(dvp); 2182 dvp = pvp; 2183 } 2184 if (error == 0) { 2185 if (last_fromdvp_report != time_second) { 2186 last_fromdvp_report = time_second; 2187 kprintf("Warning: extremely inefficient path " 2188 "resolution on %s\n", 2189 nch.ncp->nc_name); 2190 } 2191 error = cache_inefficient_scan(&nch, cred, dvp, fakename); 2192 2193 /* 2194 * Hopefully dvp now has a namecache record associated with 2195 * it. Leave it referenced to prevent the kernel from 2196 * recycling the vnode. Otherwise extremely long directory 2197 * paths could result in endless recycling. 2198 */ 2199 if (*saved_dvp) 2200 vrele(*saved_dvp); 2201 *saved_dvp = dvp; 2202 _cache_drop(nch.ncp); 2203 } 2204 if (fakename) 2205 kfree(fakename, M_TEMP); 2206 return (error); 2207 } 2208 2209 /* 2210 * Do an inefficient scan of the directory represented by ncp looking for 2211 * the directory vnode dvp. ncp must be held but not locked on entry and 2212 * will be held on return. dvp must be refd but not locked on entry and 2213 * will remain refd on return. 2214 * 2215 * Why do this at all? Well, due to its stateless nature the NFS server 2216 * converts file handles directly to vnodes without necessarily going through 2217 * the namecache ops that would otherwise create the namecache topology 2218 * leading to the vnode. We could either (1) Change the namecache algorithms 2219 * to allow disconnect namecache records that are re-merged opportunistically, 2220 * or (2) Make the NFS server backtrack and scan to recover a connected 2221 * namecache topology in order to then be able to issue new API lookups. 2222 * 2223 * It turns out that (1) is a huge mess. It takes a nice clean set of 2224 * namecache algorithms and introduces a lot of complication in every subsystem 2225 * that calls into the namecache to deal with the re-merge case, especially 2226 * since we are using the namecache to placehold negative lookups and the 2227 * vnode might not be immediately assigned. (2) is certainly far less 2228 * efficient then (1), but since we are only talking about directories here 2229 * (which are likely to remain cached), the case does not actually run all 2230 * that often and has the supreme advantage of not polluting the namecache 2231 * algorithms. 2232 * 2233 * If a fakename is supplied just construct a namecache entry using the 2234 * fake name. 2235 */ 2236 static int 2237 cache_inefficient_scan(struct nchandle *nch, struct ucred *cred, 2238 struct vnode *dvp, char *fakename) 2239 { 2240 struct nlcomponent nlc; 2241 struct nchandle rncp; 2242 struct dirent *den; 2243 struct vnode *pvp; 2244 struct vattr vat; 2245 struct iovec iov; 2246 struct uio uio; 2247 int blksize; 2248 int eofflag; 2249 int bytes; 2250 char *rbuf; 2251 int error; 2252 2253 vat.va_blocksize = 0; 2254 if ((error = VOP_GETATTR(dvp, &vat)) != 0) 2255 return (error); 2256 cache_lock(nch); 2257 error = cache_vref(nch, cred, &pvp); 2258 cache_unlock(nch); 2259 if (error) 2260 return (error); 2261 if (ncvp_debug) { 2262 kprintf("inefficient_scan: directory iosize %ld " 2263 "vattr fileid = %lld\n", 2264 vat.va_blocksize, 2265 (long long)vat.va_fileid); 2266 } 2267 2268 /* 2269 * Use the supplied fakename if not NULL. Fake names are typically 2270 * not in the actual filesystem hierarchy. This is used by HAMMER 2271 * to glue @@timestamp recursions together. 2272 */ 2273 if (fakename) { 2274 nlc.nlc_nameptr = fakename; 2275 nlc.nlc_namelen = strlen(fakename); 2276 rncp = cache_nlookup(nch, &nlc); 2277 goto done; 2278 } 2279 2280 if ((blksize = vat.va_blocksize) == 0) 2281 blksize = DEV_BSIZE; 2282 rbuf = kmalloc(blksize, M_TEMP, M_WAITOK); 2283 rncp.ncp = NULL; 2284 2285 eofflag = 0; 2286 uio.uio_offset = 0; 2287 again: 2288 iov.iov_base = rbuf; 2289 iov.iov_len = blksize; 2290 uio.uio_iov = &iov; 2291 uio.uio_iovcnt = 1; 2292 uio.uio_resid = blksize; 2293 uio.uio_segflg = UIO_SYSSPACE; 2294 uio.uio_rw = UIO_READ; 2295 uio.uio_td = curthread; 2296 2297 if (ncvp_debug >= 2) 2298 kprintf("cache_inefficient_scan: readdir @ %08x\n", (int)uio.uio_offset); 2299 error = VOP_READDIR(pvp, &uio, cred, &eofflag, NULL, NULL); 2300 if (error == 0) { 2301 den = (struct dirent *)rbuf; 2302 bytes = blksize - uio.uio_resid; 2303 2304 while (bytes > 0) { 2305 if (ncvp_debug >= 2) { 2306 kprintf("cache_inefficient_scan: %*.*s\n", 2307 den->d_namlen, den->d_namlen, 2308 den->d_name); 2309 } 2310 if (den->d_type != DT_WHT && 2311 den->d_ino == vat.va_fileid) { 2312 if (ncvp_debug) { 2313 kprintf("cache_inefficient_scan: " 2314 "MATCHED inode %lld path %s/%*.*s\n", 2315 (long long)vat.va_fileid, 2316 nch->ncp->nc_name, 2317 den->d_namlen, den->d_namlen, 2318 den->d_name); 2319 } 2320 nlc.nlc_nameptr = den->d_name; 2321 nlc.nlc_namelen = den->d_namlen; 2322 rncp = cache_nlookup(nch, &nlc); 2323 KKASSERT(rncp.ncp != NULL); 2324 break; 2325 } 2326 bytes -= _DIRENT_DIRSIZ(den); 2327 den = _DIRENT_NEXT(den); 2328 } 2329 if (rncp.ncp == NULL && eofflag == 0 && uio.uio_resid != blksize) 2330 goto again; 2331 } 2332 kfree(rbuf, M_TEMP); 2333 done: 2334 vrele(pvp); 2335 if (rncp.ncp) { 2336 if (rncp.ncp->nc_flag & NCF_UNRESOLVED) { 2337 _cache_setvp(rncp.mount, rncp.ncp, dvp); 2338 if (ncvp_debug >= 2) { 2339 kprintf("cache_inefficient_scan: setvp %s/%s = %p\n", 2340 nch->ncp->nc_name, rncp.ncp->nc_name, dvp); 2341 } 2342 } else { 2343 if (ncvp_debug >= 2) { 2344 kprintf("cache_inefficient_scan: setvp %s/%s already set %p/%p\n", 2345 nch->ncp->nc_name, rncp.ncp->nc_name, dvp, 2346 rncp.ncp->nc_vp); 2347 } 2348 } 2349 if (rncp.ncp->nc_vp == NULL) 2350 error = rncp.ncp->nc_error; 2351 /* 2352 * Release rncp after a successful nlookup. rncp was fully 2353 * referenced. 2354 */ 2355 cache_put(&rncp); 2356 } else { 2357 kprintf("cache_inefficient_scan: dvp %p NOT FOUND in %s\n", 2358 dvp, nch->ncp->nc_name); 2359 error = ENOENT; 2360 } 2361 return (error); 2362 } 2363 2364 /* 2365 * Zap a namecache entry. The ncp is unconditionally set to an unresolved 2366 * state, which disassociates it from its vnode or ncneglist. 2367 * 2368 * Then, if there are no additional references to the ncp and no children, 2369 * the ncp is removed from the topology and destroyed. 2370 * 2371 * References and/or children may exist if the ncp is in the middle of the 2372 * topology, preventing the ncp from being destroyed. 2373 * 2374 * This function must be called with the ncp held and locked and will unlock 2375 * and drop it during zapping. 2376 * 2377 * If nonblock is non-zero and the parent ncp cannot be locked we give up. 2378 * This case can occur in the cache_drop() path. 2379 * 2380 * This function may returned a held (but NOT locked) parent node which the 2381 * caller must drop. We do this so _cache_drop() can loop, to avoid 2382 * blowing out the kernel stack. 2383 * 2384 * WARNING! For MPSAFE operation this routine must acquire up to three 2385 * spin locks to be able to safely test nc_refs. Lock order is 2386 * very important. 2387 * 2388 * hash spinlock if on hash list 2389 * parent spinlock if child of parent 2390 * (the ncp is unresolved so there is no vnode association) 2391 */ 2392 static struct namecache * 2393 cache_zap(struct namecache *ncp, int nonblock) 2394 { 2395 struct namecache *par; 2396 struct vnode *dropvp; 2397 int refs; 2398 2399 /* 2400 * Disassociate the vnode or negative cache ref and set NCF_UNRESOLVED. 2401 */ 2402 _cache_setunresolved(ncp); 2403 2404 /* 2405 * Try to scrap the entry and possibly tail-recurse on its parent. 2406 * We only scrap unref'd (other then our ref) unresolved entries, 2407 * we do not scrap 'live' entries. 2408 * 2409 * Note that once the spinlocks are acquired if nc_refs == 1 no 2410 * other references are possible. If it isn't, however, we have 2411 * to decrement but also be sure to avoid a 1->0 transition. 2412 */ 2413 KKASSERT(ncp->nc_flag & NCF_UNRESOLVED); 2414 KKASSERT(ncp->nc_refs > 0); 2415 2416 /* 2417 * Acquire locks. Note that the parent can't go away while we hold 2418 * a child locked. 2419 */ 2420 if ((par = ncp->nc_parent) != NULL) { 2421 if (nonblock) { 2422 for (;;) { 2423 if (_cache_lock_nonblock(par) == 0) 2424 break; 2425 refs = ncp->nc_refs; 2426 ncp->nc_flag |= NCF_DEFEREDZAP; 2427 ++numdefered; /* MP race ok */ 2428 if (atomic_cmpset_int(&ncp->nc_refs, 2429 refs, refs - 1)) { 2430 _cache_unlock(ncp); 2431 return(NULL); 2432 } 2433 cpu_pause(); 2434 } 2435 _cache_hold(par); 2436 } else { 2437 _cache_hold(par); 2438 _cache_lock(par); 2439 } 2440 spin_lock(&ncp->nc_head->spin); 2441 } 2442 2443 /* 2444 * If someone other then us has a ref or we have children 2445 * we cannot zap the entry. The 1->0 transition and any 2446 * further list operation is protected by the spinlocks 2447 * we have acquired but other transitions are not. 2448 */ 2449 for (;;) { 2450 refs = ncp->nc_refs; 2451 if (refs == 1 && TAILQ_EMPTY(&ncp->nc_list)) 2452 break; 2453 if (atomic_cmpset_int(&ncp->nc_refs, refs, refs - 1)) { 2454 if (par) { 2455 spin_unlock(&ncp->nc_head->spin); 2456 _cache_put(par); 2457 } 2458 _cache_unlock(ncp); 2459 return(NULL); 2460 } 2461 cpu_pause(); 2462 } 2463 2464 /* 2465 * We are the only ref and with the spinlocks held no further 2466 * refs can be acquired by others. 2467 * 2468 * Remove us from the hash list and parent list. We have to 2469 * drop a ref on the parent's vp if the parent's list becomes 2470 * empty. 2471 */ 2472 dropvp = NULL; 2473 if (par) { 2474 struct nchash_head *nchpp = ncp->nc_head; 2475 2476 KKASSERT(nchpp != NULL); 2477 LIST_REMOVE(ncp, nc_hash); 2478 TAILQ_REMOVE(&par->nc_list, ncp, nc_entry); 2479 if (par->nc_vp && TAILQ_EMPTY(&par->nc_list)) 2480 dropvp = par->nc_vp; 2481 ncp->nc_head = NULL; 2482 ncp->nc_parent = NULL; 2483 spin_unlock(&nchpp->spin); 2484 _cache_unlock(par); 2485 } else { 2486 KKASSERT(ncp->nc_head == NULL); 2487 } 2488 2489 /* 2490 * ncp should not have picked up any refs. Physically 2491 * destroy the ncp. 2492 */ 2493 KKASSERT(ncp->nc_refs == 1); 2494 /* _cache_unlock(ncp) not required */ 2495 ncp->nc_refs = -1; /* safety */ 2496 if (ncp->nc_name) 2497 kfree(ncp->nc_name, M_VFSCACHE); 2498 kfree(ncp, M_VFSCACHE); 2499 2500 /* 2501 * Delayed drop (we had to release our spinlocks) 2502 * 2503 * The refed parent (if not NULL) must be dropped. The 2504 * caller is responsible for looping. 2505 */ 2506 if (dropvp) 2507 vdrop(dropvp); 2508 return(par); 2509 } 2510 2511 /* 2512 * Clean up dangling negative cache and defered-drop entries in the 2513 * namecache. 2514 * 2515 * This routine is called in the critical path and also called from 2516 * vnlru(). When called from vnlru we use a lower limit to try to 2517 * deal with the negative cache before the critical path has to start 2518 * dealing with it. 2519 */ 2520 typedef enum { CHI_LOW, CHI_HIGH } cache_hs_t; 2521 2522 static cache_hs_t neg_cache_hysteresis_state[2] = { CHI_LOW, CHI_LOW }; 2523 static cache_hs_t pos_cache_hysteresis_state[2] = { CHI_LOW, CHI_LOW }; 2524 2525 void 2526 cache_hysteresis(int critpath) 2527 { 2528 int poslimit; 2529 int neglimit = desiredvnodes / ncnegfactor; 2530 int xnumcache = numcache; 2531 2532 if (critpath == 0) 2533 neglimit = neglimit * 8 / 10; 2534 2535 /* 2536 * Don't cache too many negative hits. We use hysteresis to reduce 2537 * the impact on the critical path. 2538 */ 2539 switch(neg_cache_hysteresis_state[critpath]) { 2540 case CHI_LOW: 2541 if (numneg > MINNEG && numneg > neglimit) { 2542 if (critpath) 2543 _cache_cleanneg(ncnegflush); 2544 else 2545 _cache_cleanneg(ncnegflush + 2546 numneg - neglimit); 2547 neg_cache_hysteresis_state[critpath] = CHI_HIGH; 2548 } 2549 break; 2550 case CHI_HIGH: 2551 if (numneg > MINNEG * 9 / 10 && 2552 numneg * 9 / 10 > neglimit 2553 ) { 2554 if (critpath) 2555 _cache_cleanneg(ncnegflush); 2556 else 2557 _cache_cleanneg(ncnegflush + 2558 numneg * 9 / 10 - neglimit); 2559 } else { 2560 neg_cache_hysteresis_state[critpath] = CHI_LOW; 2561 } 2562 break; 2563 } 2564 2565 /* 2566 * Don't cache too many positive hits. We use hysteresis to reduce 2567 * the impact on the critical path. 2568 * 2569 * Excessive positive hits can accumulate due to large numbers of 2570 * hardlinks (the vnode cache will not prevent hl ncps from growing 2571 * into infinity). 2572 */ 2573 if ((poslimit = ncposlimit) == 0) 2574 poslimit = desiredvnodes * 2; 2575 if (critpath == 0) 2576 poslimit = poslimit * 8 / 10; 2577 2578 switch(pos_cache_hysteresis_state[critpath]) { 2579 case CHI_LOW: 2580 if (xnumcache > poslimit && xnumcache > MINPOS) { 2581 if (critpath) 2582 _cache_cleanpos(ncposflush); 2583 else 2584 _cache_cleanpos(ncposflush + 2585 xnumcache - poslimit); 2586 pos_cache_hysteresis_state[critpath] = CHI_HIGH; 2587 } 2588 break; 2589 case CHI_HIGH: 2590 if (xnumcache > poslimit * 5 / 6 && xnumcache > MINPOS) { 2591 if (critpath) 2592 _cache_cleanpos(ncposflush); 2593 else 2594 _cache_cleanpos(ncposflush + 2595 xnumcache - poslimit * 5 / 6); 2596 } else { 2597 pos_cache_hysteresis_state[critpath] = CHI_LOW; 2598 } 2599 break; 2600 } 2601 2602 /* 2603 * Clean out dangling defered-zap ncps which could not 2604 * be cleanly dropped if too many build up. Note 2605 * that numdefered is not an exact number as such ncps 2606 * can be reused and the counter is not handled in a MP 2607 * safe manner by design. 2608 */ 2609 if (numdefered > neglimit) { 2610 _cache_cleandefered(); 2611 } 2612 } 2613 2614 /* 2615 * NEW NAMECACHE LOOKUP API 2616 * 2617 * Lookup an entry in the namecache. The passed par_nch must be referenced 2618 * and unlocked. A referenced and locked nchandle with a non-NULL nch.ncp 2619 * is ALWAYS returned, eve if the supplied component is illegal. 2620 * 2621 * The resulting namecache entry should be returned to the system with 2622 * cache_put() or cache_unlock() + cache_drop(). 2623 * 2624 * namecache locks are recursive but care must be taken to avoid lock order 2625 * reversals (hence why the passed par_nch must be unlocked). Locking 2626 * rules are to order for parent traversals, not for child traversals. 2627 * 2628 * Nobody else will be able to manipulate the associated namespace (e.g. 2629 * create, delete, rename, rename-target) until the caller unlocks the 2630 * entry. 2631 * 2632 * The returned entry will be in one of three states: positive hit (non-null 2633 * vnode), negative hit (null vnode), or unresolved (NCF_UNRESOLVED is set). 2634 * Unresolved entries must be resolved through the filesystem to associate the 2635 * vnode and/or determine whether a positive or negative hit has occured. 2636 * 2637 * It is not necessary to lock a directory in order to lock namespace under 2638 * that directory. In fact, it is explicitly not allowed to do that. A 2639 * directory is typically only locked when being created, renamed, or 2640 * destroyed. 2641 * 2642 * The directory (par) may be unresolved, in which case any returned child 2643 * will likely also be marked unresolved. Likely but not guarenteed. Since 2644 * the filesystem lookup requires a resolved directory vnode the caller is 2645 * responsible for resolving the namecache chain top-down. This API 2646 * specifically allows whole chains to be created in an unresolved state. 2647 */ 2648 struct nchandle 2649 cache_nlookup(struct nchandle *par_nch, struct nlcomponent *nlc) 2650 { 2651 struct nchandle nch; 2652 struct namecache *ncp; 2653 struct namecache *new_ncp; 2654 struct nchash_head *nchpp; 2655 struct mount *mp; 2656 u_int32_t hash; 2657 globaldata_t gd; 2658 int par_locked; 2659 2660 numcalls++; 2661 gd = mycpu; 2662 mp = par_nch->mount; 2663 par_locked = 0; 2664 2665 /* 2666 * This is a good time to call it, no ncp's are locked by 2667 * the caller or us. 2668 */ 2669 cache_hysteresis(1); 2670 2671 /* 2672 * Try to locate an existing entry 2673 */ 2674 hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT); 2675 hash = fnv_32_buf(&par_nch->ncp, sizeof(par_nch->ncp), hash); 2676 new_ncp = NULL; 2677 nchpp = NCHHASH(hash); 2678 restart: 2679 spin_lock(&nchpp->spin); 2680 LIST_FOREACH(ncp, &nchpp->list, nc_hash) { 2681 numchecks++; 2682 2683 /* 2684 * Break out if we find a matching entry. Note that 2685 * UNRESOLVED entries may match, but DESTROYED entries 2686 * do not. 2687 */ 2688 if (ncp->nc_parent == par_nch->ncp && 2689 ncp->nc_nlen == nlc->nlc_namelen && 2690 bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 && 2691 (ncp->nc_flag & NCF_DESTROYED) == 0 2692 ) { 2693 _cache_hold(ncp); 2694 spin_unlock(&nchpp->spin); 2695 if (par_locked) { 2696 _cache_unlock(par_nch->ncp); 2697 par_locked = 0; 2698 } 2699 if (_cache_lock_special(ncp) == 0) { 2700 _cache_auto_unresolve(mp, ncp); 2701 if (new_ncp) 2702 _cache_free(new_ncp); 2703 goto found; 2704 } 2705 _cache_get(ncp); 2706 _cache_put(ncp); 2707 _cache_drop(ncp); 2708 goto restart; 2709 } 2710 } 2711 2712 /* 2713 * We failed to locate an entry, create a new entry and add it to 2714 * the cache. The parent ncp must also be locked so we 2715 * can link into it. 2716 * 2717 * We have to relookup after possibly blocking in kmalloc or 2718 * when locking par_nch. 2719 * 2720 * NOTE: nlc_namelen can be 0 and nlc_nameptr NULL as a special 2721 * mount case, in which case nc_name will be NULL. 2722 */ 2723 if (new_ncp == NULL) { 2724 spin_unlock(&nchpp->spin); 2725 new_ncp = cache_alloc(nlc->nlc_namelen); 2726 if (nlc->nlc_namelen) { 2727 bcopy(nlc->nlc_nameptr, new_ncp->nc_name, 2728 nlc->nlc_namelen); 2729 new_ncp->nc_name[nlc->nlc_namelen] = 0; 2730 } 2731 goto restart; 2732 } 2733 if (par_locked == 0) { 2734 spin_unlock(&nchpp->spin); 2735 _cache_lock(par_nch->ncp); 2736 par_locked = 1; 2737 goto restart; 2738 } 2739 2740 /* 2741 * WARNING! We still hold the spinlock. We have to set the hash 2742 * table entry atomically. 2743 */ 2744 ncp = new_ncp; 2745 _cache_link_parent(ncp, par_nch->ncp, nchpp); 2746 spin_unlock(&nchpp->spin); 2747 _cache_unlock(par_nch->ncp); 2748 /* par_locked = 0 - not used */ 2749 found: 2750 /* 2751 * stats and namecache size management 2752 */ 2753 if (ncp->nc_flag & NCF_UNRESOLVED) 2754 ++gd->gd_nchstats->ncs_miss; 2755 else if (ncp->nc_vp) 2756 ++gd->gd_nchstats->ncs_goodhits; 2757 else 2758 ++gd->gd_nchstats->ncs_neghits; 2759 nch.mount = mp; 2760 nch.ncp = ncp; 2761 atomic_add_int(&nch.mount->mnt_refs, 1); 2762 return(nch); 2763 } 2764 2765 /* 2766 * Attempt to lookup a namecache entry and return with a shared namecache 2767 * lock. 2768 */ 2769 int 2770 cache_nlookup_maybe_shared(struct nchandle *par_nch, struct nlcomponent *nlc, 2771 int excl, struct nchandle *res_nch) 2772 { 2773 struct namecache *ncp; 2774 struct nchash_head *nchpp; 2775 struct mount *mp; 2776 u_int32_t hash; 2777 globaldata_t gd; 2778 2779 /* 2780 * If exclusive requested or shared namecache locks are disabled, 2781 * return failure. 2782 */ 2783 if (ncp_shared_lock_disable || excl) 2784 return(EWOULDBLOCK); 2785 2786 numcalls++; 2787 gd = mycpu; 2788 mp = par_nch->mount; 2789 2790 /* 2791 * This is a good time to call it, no ncp's are locked by 2792 * the caller or us. 2793 */ 2794 cache_hysteresis(1); 2795 2796 /* 2797 * Try to locate an existing entry 2798 */ 2799 hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT); 2800 hash = fnv_32_buf(&par_nch->ncp, sizeof(par_nch->ncp), hash); 2801 nchpp = NCHHASH(hash); 2802 2803 spin_lock(&nchpp->spin); 2804 2805 LIST_FOREACH(ncp, &nchpp->list, nc_hash) { 2806 numchecks++; 2807 2808 /* 2809 * Break out if we find a matching entry. Note that 2810 * UNRESOLVED entries may match, but DESTROYED entries 2811 * do not. 2812 */ 2813 if (ncp->nc_parent == par_nch->ncp && 2814 ncp->nc_nlen == nlc->nlc_namelen && 2815 bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 && 2816 (ncp->nc_flag & NCF_DESTROYED) == 0 2817 ) { 2818 _cache_hold(ncp); 2819 spin_unlock(&nchpp->spin); 2820 if (_cache_lock_shared_special(ncp) == 0) { 2821 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0 && 2822 (ncp->nc_flag & NCF_DESTROYED) == 0 && 2823 _cache_auto_unresolve_test(mp, ncp) == 0) { 2824 goto found; 2825 } 2826 _cache_unlock(ncp); 2827 } 2828 _cache_drop(ncp); 2829 spin_lock(&nchpp->spin); 2830 break; 2831 } 2832 } 2833 2834 /* 2835 * Failure 2836 */ 2837 spin_unlock(&nchpp->spin); 2838 return(EWOULDBLOCK); 2839 2840 /* 2841 * Success 2842 * 2843 * Note that nc_error might be non-zero (e.g ENOENT). 2844 */ 2845 found: 2846 res_nch->mount = mp; 2847 res_nch->ncp = ncp; 2848 ++gd->gd_nchstats->ncs_goodhits; 2849 atomic_add_int(&res_nch->mount->mnt_refs, 1); 2850 2851 KKASSERT(ncp->nc_error != EWOULDBLOCK); 2852 return(ncp->nc_error); 2853 } 2854 2855 /* 2856 * This is a non-blocking verison of cache_nlookup() used by 2857 * nfs_readdirplusrpc_uio(). It can fail for any reason and 2858 * will return nch.ncp == NULL in that case. 2859 */ 2860 struct nchandle 2861 cache_nlookup_nonblock(struct nchandle *par_nch, struct nlcomponent *nlc) 2862 { 2863 struct nchandle nch; 2864 struct namecache *ncp; 2865 struct namecache *new_ncp; 2866 struct nchash_head *nchpp; 2867 struct mount *mp; 2868 u_int32_t hash; 2869 globaldata_t gd; 2870 int par_locked; 2871 2872 numcalls++; 2873 gd = mycpu; 2874 mp = par_nch->mount; 2875 par_locked = 0; 2876 2877 /* 2878 * Try to locate an existing entry 2879 */ 2880 hash = fnv_32_buf(nlc->nlc_nameptr, nlc->nlc_namelen, FNV1_32_INIT); 2881 hash = fnv_32_buf(&par_nch->ncp, sizeof(par_nch->ncp), hash); 2882 new_ncp = NULL; 2883 nchpp = NCHHASH(hash); 2884 restart: 2885 spin_lock(&nchpp->spin); 2886 LIST_FOREACH(ncp, &nchpp->list, nc_hash) { 2887 numchecks++; 2888 2889 /* 2890 * Break out if we find a matching entry. Note that 2891 * UNRESOLVED entries may match, but DESTROYED entries 2892 * do not. 2893 */ 2894 if (ncp->nc_parent == par_nch->ncp && 2895 ncp->nc_nlen == nlc->nlc_namelen && 2896 bcmp(ncp->nc_name, nlc->nlc_nameptr, ncp->nc_nlen) == 0 && 2897 (ncp->nc_flag & NCF_DESTROYED) == 0 2898 ) { 2899 _cache_hold(ncp); 2900 spin_unlock(&nchpp->spin); 2901 if (par_locked) { 2902 _cache_unlock(par_nch->ncp); 2903 par_locked = 0; 2904 } 2905 if (_cache_lock_special(ncp) == 0) { 2906 _cache_auto_unresolve(mp, ncp); 2907 if (new_ncp) { 2908 _cache_free(new_ncp); 2909 new_ncp = NULL; 2910 } 2911 goto found; 2912 } 2913 _cache_drop(ncp); 2914 goto failed; 2915 } 2916 } 2917 2918 /* 2919 * We failed to locate an entry, create a new entry and add it to 2920 * the cache. The parent ncp must also be locked so we 2921 * can link into it. 2922 * 2923 * We have to relookup after possibly blocking in kmalloc or 2924 * when locking par_nch. 2925 * 2926 * NOTE: nlc_namelen can be 0 and nlc_nameptr NULL as a special 2927 * mount case, in which case nc_name will be NULL. 2928 */ 2929 if (new_ncp == NULL) { 2930 spin_unlock(&nchpp->spin); 2931 new_ncp = cache_alloc(nlc->nlc_namelen); 2932 if (nlc->nlc_namelen) { 2933 bcopy(nlc->nlc_nameptr, new_ncp->nc_name, 2934 nlc->nlc_namelen); 2935 new_ncp->nc_name[nlc->nlc_namelen] = 0; 2936 } 2937 goto restart; 2938 } 2939 if (par_locked == 0) { 2940 spin_unlock(&nchpp->spin); 2941 if (_cache_lock_nonblock(par_nch->ncp) == 0) { 2942 par_locked = 1; 2943 goto restart; 2944 } 2945 goto failed; 2946 } 2947 2948 /* 2949 * WARNING! We still hold the spinlock. We have to set the hash 2950 * table entry atomically. 2951 */ 2952 ncp = new_ncp; 2953 _cache_link_parent(ncp, par_nch->ncp, nchpp); 2954 spin_unlock(&nchpp->spin); 2955 _cache_unlock(par_nch->ncp); 2956 /* par_locked = 0 - not used */ 2957 found: 2958 /* 2959 * stats and namecache size management 2960 */ 2961 if (ncp->nc_flag & NCF_UNRESOLVED) 2962 ++gd->gd_nchstats->ncs_miss; 2963 else if (ncp->nc_vp) 2964 ++gd->gd_nchstats->ncs_goodhits; 2965 else 2966 ++gd->gd_nchstats->ncs_neghits; 2967 nch.mount = mp; 2968 nch.ncp = ncp; 2969 atomic_add_int(&nch.mount->mnt_refs, 1); 2970 return(nch); 2971 failed: 2972 if (new_ncp) { 2973 _cache_free(new_ncp); 2974 new_ncp = NULL; 2975 } 2976 nch.mount = NULL; 2977 nch.ncp = NULL; 2978 return(nch); 2979 } 2980 2981 /* 2982 * The namecache entry is marked as being used as a mount point. 2983 * Locate the mount if it is visible to the caller. The DragonFly 2984 * mount system allows arbitrary loops in the topology and disentangles 2985 * those loops by matching against (mp, ncp) rather than just (ncp). 2986 * This means any given ncp can dive any number of mounts, depending 2987 * on the relative mount (e.g. nullfs) the caller is at in the topology. 2988 * 2989 * We use a very simple frontend cache to reduce SMP conflicts, 2990 * which we have to do because the mountlist scan needs an exclusive 2991 * lock around its ripout info list. Not to mention that there might 2992 * be a lot of mounts. 2993 */ 2994 struct findmount_info { 2995 struct mount *result; 2996 struct mount *nch_mount; 2997 struct namecache *nch_ncp; 2998 }; 2999 3000 static 3001 struct ncmount_cache * 3002 ncmount_cache_lookup(struct mount *mp, struct namecache *ncp) 3003 { 3004 int hash; 3005 3006 hash = ((int)(intptr_t)mp / sizeof(*mp)) ^ 3007 ((int)(intptr_t)ncp / sizeof(*ncp)); 3008 hash = (hash & 0x7FFFFFFF) % NCMOUNT_NUMCACHE; 3009 return (&ncmount_cache[hash]); 3010 } 3011 3012 static 3013 int 3014 cache_findmount_callback(struct mount *mp, void *data) 3015 { 3016 struct findmount_info *info = data; 3017 3018 /* 3019 * Check the mount's mounted-on point against the passed nch. 3020 */ 3021 if (mp->mnt_ncmounton.mount == info->nch_mount && 3022 mp->mnt_ncmounton.ncp == info->nch_ncp 3023 ) { 3024 info->result = mp; 3025 atomic_add_int(&mp->mnt_refs, 1); 3026 return(-1); 3027 } 3028 return(0); 3029 } 3030 3031 struct mount * 3032 cache_findmount(struct nchandle *nch) 3033 { 3034 struct findmount_info info; 3035 struct ncmount_cache *ncc; 3036 struct mount *mp; 3037 3038 /* 3039 * Fast 3040 */ 3041 if (ncmount_cache_enable == 0) { 3042 ncc = NULL; 3043 goto skip; 3044 } 3045 ncc = ncmount_cache_lookup(nch->mount, nch->ncp); 3046 if (ncc->ncp == nch->ncp) { 3047 spin_lock_shared(&ncc->spin); 3048 if (ncc->isneg == 0 && 3049 ncc->ncp == nch->ncp && (mp = ncc->mp) != NULL) { 3050 if (mp->mnt_ncmounton.mount == nch->mount && 3051 mp->mnt_ncmounton.ncp == nch->ncp) { 3052 /* 3053 * Cache hit (positive) 3054 */ 3055 atomic_add_int(&mp->mnt_refs, 1); 3056 spin_unlock_shared(&ncc->spin); 3057 ++ncmount_cache_hit; 3058 return(mp); 3059 } 3060 /* else cache miss */ 3061 } 3062 if (ncc->isneg && 3063 ncc->ncp == nch->ncp && ncc->mp == nch->mount) { 3064 /* 3065 * Cache hit (negative) 3066 */ 3067 spin_unlock_shared(&ncc->spin); 3068 ++ncmount_cache_hit; 3069 return(NULL); 3070 } 3071 spin_unlock_shared(&ncc->spin); 3072 } 3073 skip: 3074 3075 /* 3076 * Slow 3077 */ 3078 info.result = NULL; 3079 info.nch_mount = nch->mount; 3080 info.nch_ncp = nch->ncp; 3081 mountlist_scan(cache_findmount_callback, &info, 3082 MNTSCAN_FORWARD|MNTSCAN_NOBUSY); 3083 3084 /* 3085 * Cache the result. 3086 * 3087 * Negative lookups: We cache the originating {ncp,mp}. (mp) is 3088 * only used for pointer comparisons and is not 3089 * referenced (otherwise there would be dangling 3090 * refs). 3091 * 3092 * Positive lookups: We cache the originating {ncp} and the target 3093 * (mp). (mp) is referenced. 3094 * 3095 * Indeterminant: If the match is undergoing an unmount we do 3096 * not cache it to avoid racing cache_unmounting(), 3097 * but still return the match. 3098 */ 3099 if (ncc) { 3100 spin_lock(&ncc->spin); 3101 if (info.result == NULL) { 3102 if (ncc->isneg == 0 && ncc->mp) 3103 atomic_add_int(&ncc->mp->mnt_refs, -1); 3104 ncc->ncp = nch->ncp; 3105 ncc->mp = nch->mount; 3106 ncc->isneg = 1; 3107 spin_unlock(&ncc->spin); 3108 ++ncmount_cache_overwrite; 3109 } else if ((info.result->mnt_kern_flag & MNTK_UNMOUNT) == 0) { 3110 if (ncc->isneg == 0 && ncc->mp) 3111 atomic_add_int(&ncc->mp->mnt_refs, -1); 3112 atomic_add_int(&info.result->mnt_refs, 1); 3113 ncc->ncp = nch->ncp; 3114 ncc->mp = info.result; 3115 ncc->isneg = 0; 3116 spin_unlock(&ncc->spin); 3117 ++ncmount_cache_overwrite; 3118 } else { 3119 spin_unlock(&ncc->spin); 3120 } 3121 ++ncmount_cache_miss; 3122 } 3123 return(info.result); 3124 } 3125 3126 void 3127 cache_dropmount(struct mount *mp) 3128 { 3129 atomic_add_int(&mp->mnt_refs, -1); 3130 } 3131 3132 void 3133 cache_ismounting(struct mount *mp) 3134 { 3135 struct nchandle *nch = &mp->mnt_ncmounton; 3136 struct ncmount_cache *ncc; 3137 3138 ncc = ncmount_cache_lookup(nch->mount, nch->ncp); 3139 if (ncc->isneg && 3140 ncc->ncp == nch->ncp && ncc->mp == nch->mount) { 3141 spin_lock(&ncc->spin); 3142 if (ncc->isneg && 3143 ncc->ncp == nch->ncp && ncc->mp == nch->mount) { 3144 ncc->ncp = NULL; 3145 ncc->mp = NULL; 3146 } 3147 spin_unlock(&ncc->spin); 3148 } 3149 } 3150 3151 void 3152 cache_unmounting(struct mount *mp) 3153 { 3154 struct nchandle *nch = &mp->mnt_ncmounton; 3155 struct ncmount_cache *ncc; 3156 3157 ncc = ncmount_cache_lookup(nch->mount, nch->ncp); 3158 if (ncc->isneg == 0 && 3159 ncc->ncp == nch->ncp && ncc->mp == mp) { 3160 spin_lock(&ncc->spin); 3161 if (ncc->isneg == 0 && 3162 ncc->ncp == nch->ncp && ncc->mp == mp) { 3163 atomic_add_int(&mp->mnt_refs, -1); 3164 ncc->ncp = NULL; 3165 ncc->mp = NULL; 3166 } 3167 spin_unlock(&ncc->spin); 3168 } 3169 } 3170 3171 /* 3172 * Resolve an unresolved namecache entry, generally by looking it up. 3173 * The passed ncp must be locked and refd. 3174 * 3175 * Theoretically since a vnode cannot be recycled while held, and since 3176 * the nc_parent chain holds its vnode as long as children exist, the 3177 * direct parent of the cache entry we are trying to resolve should 3178 * have a valid vnode. If not then generate an error that we can 3179 * determine is related to a resolver bug. 3180 * 3181 * However, if a vnode was in the middle of a recyclement when the NCP 3182 * got locked, ncp->nc_vp might point to a vnode that is about to become 3183 * invalid. cache_resolve() handles this case by unresolving the entry 3184 * and then re-resolving it. 3185 * 3186 * Note that successful resolution does not necessarily return an error 3187 * code of 0. If the ncp resolves to a negative cache hit then ENOENT 3188 * will be returned. 3189 */ 3190 int 3191 cache_resolve(struct nchandle *nch, struct ucred *cred) 3192 { 3193 struct namecache *par_tmp; 3194 struct namecache *par; 3195 struct namecache *ncp; 3196 struct nchandle nctmp; 3197 struct mount *mp; 3198 struct vnode *dvp; 3199 int error; 3200 3201 ncp = nch->ncp; 3202 mp = nch->mount; 3203 KKASSERT(_cache_lockstatus(ncp) == LK_EXCLUSIVE); 3204 restart: 3205 /* 3206 * If the ncp is already resolved we have nothing to do. However, 3207 * we do want to guarentee that a usable vnode is returned when 3208 * a vnode is present, so make sure it hasn't been reclaimed. 3209 */ 3210 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 3211 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 3212 _cache_setunresolved(ncp); 3213 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) 3214 return (ncp->nc_error); 3215 } 3216 3217 /* 3218 * If the ncp was destroyed it will never resolve again. This 3219 * can basically only happen when someone is chdir'd into an 3220 * empty directory which is then rmdir'd. We want to catch this 3221 * here and not dive the VFS because the VFS might actually 3222 * have a way to re-resolve the disconnected ncp, which will 3223 * result in inconsistencies in the cdir/nch for proc->p_fd. 3224 */ 3225 if (ncp->nc_flag & NCF_DESTROYED) { 3226 kprintf("Warning: cache_resolve: ncp '%s' was unlinked\n", 3227 ncp->nc_name); 3228 return(EINVAL); 3229 } 3230 3231 /* 3232 * Mount points need special handling because the parent does not 3233 * belong to the same filesystem as the ncp. 3234 */ 3235 if (ncp == mp->mnt_ncmountpt.ncp) 3236 return (cache_resolve_mp(mp)); 3237 3238 /* 3239 * We expect an unbroken chain of ncps to at least the mount point, 3240 * and even all the way to root (but this code doesn't have to go 3241 * past the mount point). 3242 */ 3243 if (ncp->nc_parent == NULL) { 3244 kprintf("EXDEV case 1 %p %*.*s\n", ncp, 3245 ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); 3246 ncp->nc_error = EXDEV; 3247 return(ncp->nc_error); 3248 } 3249 3250 /* 3251 * The vp's of the parent directories in the chain are held via vhold() 3252 * due to the existance of the child, and should not disappear. 3253 * However, there are cases where they can disappear: 3254 * 3255 * - due to filesystem I/O errors. 3256 * - due to NFS being stupid about tracking the namespace and 3257 * destroys the namespace for entire directories quite often. 3258 * - due to forced unmounts. 3259 * - due to an rmdir (parent will be marked DESTROYED) 3260 * 3261 * When this occurs we have to track the chain backwards and resolve 3262 * it, looping until the resolver catches up to the current node. We 3263 * could recurse here but we might run ourselves out of kernel stack 3264 * so we do it in a more painful manner. This situation really should 3265 * not occur all that often, or if it does not have to go back too 3266 * many nodes to resolve the ncp. 3267 */ 3268 while ((dvp = cache_dvpref(ncp)) == NULL) { 3269 /* 3270 * This case can occur if a process is CD'd into a 3271 * directory which is then rmdir'd. If the parent is marked 3272 * destroyed there is no point trying to resolve it. 3273 */ 3274 if (ncp->nc_parent->nc_flag & NCF_DESTROYED) 3275 return(ENOENT); 3276 par = ncp->nc_parent; 3277 _cache_hold(par); 3278 _cache_lock(par); 3279 while ((par_tmp = par->nc_parent) != NULL && 3280 par_tmp->nc_vp == NULL) { 3281 _cache_hold(par_tmp); 3282 _cache_lock(par_tmp); 3283 _cache_put(par); 3284 par = par_tmp; 3285 } 3286 if (par->nc_parent == NULL) { 3287 kprintf("EXDEV case 2 %*.*s\n", 3288 par->nc_nlen, par->nc_nlen, par->nc_name); 3289 _cache_put(par); 3290 return (EXDEV); 3291 } 3292 kprintf("[diagnostic] cache_resolve: had to recurse on %*.*s\n", 3293 par->nc_nlen, par->nc_nlen, par->nc_name); 3294 /* 3295 * The parent is not set in stone, ref and lock it to prevent 3296 * it from disappearing. Also note that due to renames it 3297 * is possible for our ncp to move and for par to no longer 3298 * be one of its parents. We resolve it anyway, the loop 3299 * will handle any moves. 3300 */ 3301 _cache_get(par); /* additional hold/lock */ 3302 _cache_put(par); /* from earlier hold/lock */ 3303 if (par == nch->mount->mnt_ncmountpt.ncp) { 3304 cache_resolve_mp(nch->mount); 3305 } else if ((dvp = cache_dvpref(par)) == NULL) { 3306 kprintf("[diagnostic] cache_resolve: raced on %*.*s\n", par->nc_nlen, par->nc_nlen, par->nc_name); 3307 _cache_put(par); 3308 continue; 3309 } else { 3310 if (par->nc_flag & NCF_UNRESOLVED) { 3311 nctmp.mount = mp; 3312 nctmp.ncp = par; 3313 par->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred); 3314 } 3315 vrele(dvp); 3316 } 3317 if ((error = par->nc_error) != 0) { 3318 if (par->nc_error != EAGAIN) { 3319 kprintf("EXDEV case 3 %*.*s error %d\n", 3320 par->nc_nlen, par->nc_nlen, par->nc_name, 3321 par->nc_error); 3322 _cache_put(par); 3323 return(error); 3324 } 3325 kprintf("[diagnostic] cache_resolve: EAGAIN par %p %*.*s\n", 3326 par, par->nc_nlen, par->nc_nlen, par->nc_name); 3327 } 3328 _cache_put(par); 3329 /* loop */ 3330 } 3331 3332 /* 3333 * Call VOP_NRESOLVE() to get the vp, then scan for any disconnected 3334 * ncp's and reattach them. If this occurs the original ncp is marked 3335 * EAGAIN to force a relookup. 3336 * 3337 * NOTE: in order to call VOP_NRESOLVE(), the parent of the passed 3338 * ncp must already be resolved. 3339 */ 3340 if (dvp) { 3341 nctmp.mount = mp; 3342 nctmp.ncp = ncp; 3343 ncp->nc_error = VOP_NRESOLVE(&nctmp, dvp, cred); 3344 vrele(dvp); 3345 } else { 3346 ncp->nc_error = EPERM; 3347 } 3348 if (ncp->nc_error == EAGAIN) { 3349 kprintf("[diagnostic] cache_resolve: EAGAIN ncp %p %*.*s\n", 3350 ncp, ncp->nc_nlen, ncp->nc_nlen, ncp->nc_name); 3351 goto restart; 3352 } 3353 return(ncp->nc_error); 3354 } 3355 3356 /* 3357 * Resolve the ncp associated with a mount point. Such ncp's almost always 3358 * remain resolved and this routine is rarely called. NFS MPs tends to force 3359 * re-resolution more often due to its mac-truck-smash-the-namecache 3360 * method of tracking namespace changes. 3361 * 3362 * The semantics for this call is that the passed ncp must be locked on 3363 * entry and will be locked on return. However, if we actually have to 3364 * resolve the mount point we temporarily unlock the entry in order to 3365 * avoid race-to-root deadlocks due to e.g. dead NFS mounts. Because of 3366 * the unlock we have to recheck the flags after we relock. 3367 */ 3368 static int 3369 cache_resolve_mp(struct mount *mp) 3370 { 3371 struct namecache *ncp = mp->mnt_ncmountpt.ncp; 3372 struct vnode *vp; 3373 int error; 3374 3375 KKASSERT(mp != NULL); 3376 3377 /* 3378 * If the ncp is already resolved we have nothing to do. However, 3379 * we do want to guarentee that a usable vnode is returned when 3380 * a vnode is present, so make sure it hasn't been reclaimed. 3381 */ 3382 if ((ncp->nc_flag & NCF_UNRESOLVED) == 0) { 3383 if (ncp->nc_vp && (ncp->nc_vp->v_flag & VRECLAIMED)) 3384 _cache_setunresolved(ncp); 3385 } 3386 3387 if (ncp->nc_flag & NCF_UNRESOLVED) { 3388 _cache_unlock(ncp); 3389 while (vfs_busy(mp, 0)) 3390 ; 3391 error = VFS_ROOT(mp, &vp); 3392 _cache_lock(ncp); 3393 3394 /* 3395 * recheck the ncp state after relocking. 3396 */ 3397 if (ncp->nc_flag & NCF_UNRESOLVED) { 3398 ncp->nc_error = error; 3399 if (error == 0) { 3400 _cache_setvp(mp, ncp, vp); 3401 vput(vp); 3402 } else { 3403 kprintf("[diagnostic] cache_resolve_mp: failed" 3404 " to resolve mount %p err=%d ncp=%p\n", 3405 mp, error, ncp); 3406 _cache_setvp(mp, ncp, NULL); 3407 } 3408 } else if (error == 0) { 3409 vput(vp); 3410 } 3411 vfs_unbusy(mp); 3412 } 3413 return(ncp->nc_error); 3414 } 3415 3416 /* 3417 * Clean out negative cache entries when too many have accumulated. 3418 */ 3419 static void 3420 _cache_cleanneg(int count) 3421 { 3422 struct namecache *ncp; 3423 3424 /* 3425 * Attempt to clean out the specified number of negative cache 3426 * entries. 3427 */ 3428 while (count) { 3429 spin_lock(&ncspin); 3430 ncp = TAILQ_FIRST(&ncneglist); 3431 if (ncp == NULL) { 3432 spin_unlock(&ncspin); 3433 break; 3434 } 3435 TAILQ_REMOVE(&ncneglist, ncp, nc_vnode); 3436 TAILQ_INSERT_TAIL(&ncneglist, ncp, nc_vnode); 3437 _cache_hold(ncp); 3438 spin_unlock(&ncspin); 3439 3440 /* 3441 * This can race, so we must re-check that the ncp 3442 * is on the ncneglist after successfully locking it. 3443 */ 3444 if (_cache_lock_special(ncp) == 0) { 3445 if (ncp->nc_vp == NULL && 3446 (ncp->nc_flag & NCF_UNRESOLVED) == 0) { 3447 ncp = cache_zap(ncp, 1); 3448 if (ncp) 3449 _cache_drop(ncp); 3450 } else { 3451 kprintf("cache_cleanneg: race avoided\n"); 3452 _cache_unlock(ncp); 3453 } 3454 } else { 3455 _cache_drop(ncp); 3456 } 3457 --count; 3458 } 3459 } 3460 3461 /* 3462 * Clean out positive cache entries when too many have accumulated. 3463 */ 3464 static void 3465 _cache_cleanpos(int count) 3466 { 3467 static volatile int rover; 3468 struct nchash_head *nchpp; 3469 struct namecache *ncp; 3470 int rover_copy; 3471 3472 /* 3473 * Attempt to clean out the specified number of negative cache 3474 * entries. 3475 */ 3476 while (count) { 3477 rover_copy = ++rover; /* MPSAFEENOUGH */ 3478 cpu_ccfence(); 3479 nchpp = NCHHASH(rover_copy); 3480 3481 spin_lock(&nchpp->spin); 3482 ncp = LIST_FIRST(&nchpp->list); 3483 while (ncp && (ncp->nc_flag & NCF_DESTROYED)) 3484 ncp = LIST_NEXT(ncp, nc_hash); 3485 if (ncp) 3486 _cache_hold(ncp); 3487 spin_unlock(&nchpp->spin); 3488 3489 if (ncp) { 3490 if (_cache_lock_special(ncp) == 0) { 3491 ncp = cache_zap(ncp, 1); 3492 if (ncp) 3493 _cache_drop(ncp); 3494 } else { 3495 _cache_drop(ncp); 3496 } 3497 } 3498 --count; 3499 } 3500 } 3501 3502 /* 3503 * This is a kitchen sink function to clean out ncps which we 3504 * tried to zap from cache_drop() but failed because we were 3505 * unable to acquire the parent lock. 3506 * 3507 * Such entries can also be removed via cache_inval_vp(), such 3508 * as when unmounting. 3509 */ 3510 static void 3511 _cache_cleandefered(void) 3512 { 3513 struct nchash_head *nchpp; 3514 struct namecache *ncp; 3515 struct namecache dummy; 3516 int i; 3517 3518 numdefered = 0; 3519 bzero(&dummy, sizeof(dummy)); 3520 dummy.nc_flag = NCF_DESTROYED; 3521 dummy.nc_refs = 1; 3522 3523 for (i = 0; i <= nchash; ++i) { 3524 nchpp = &nchashtbl[i]; 3525 3526 spin_lock(&nchpp->spin); 3527 LIST_INSERT_HEAD(&nchpp->list, &dummy, nc_hash); 3528 ncp = &dummy; 3529 while ((ncp = LIST_NEXT(ncp, nc_hash)) != NULL) { 3530 if ((ncp->nc_flag & NCF_DEFEREDZAP) == 0) 3531 continue; 3532 LIST_REMOVE(&dummy, nc_hash); 3533 LIST_INSERT_AFTER(ncp, &dummy, nc_hash); 3534 _cache_hold(ncp); 3535 spin_unlock(&nchpp->spin); 3536 if (_cache_lock_nonblock(ncp) == 0) { 3537 ncp->nc_flag &= ~NCF_DEFEREDZAP; 3538 _cache_unlock(ncp); 3539 } 3540 _cache_drop(ncp); 3541 spin_lock(&nchpp->spin); 3542 ncp = &dummy; 3543 } 3544 LIST_REMOVE(&dummy, nc_hash); 3545 spin_unlock(&nchpp->spin); 3546 } 3547 } 3548 3549 /* 3550 * Name cache initialization, from vfsinit() when we are booting 3551 */ 3552 void 3553 nchinit(void) 3554 { 3555 int i; 3556 globaldata_t gd; 3557 3558 /* initialise per-cpu namecache effectiveness statistics. */ 3559 for (i = 0; i < ncpus; ++i) { 3560 gd = globaldata_find(i); 3561 gd->gd_nchstats = &nchstats[i]; 3562 } 3563 TAILQ_INIT(&ncneglist); 3564 spin_init(&ncspin); 3565 nchashtbl = hashinit_ext(desiredvnodes / 2, 3566 sizeof(struct nchash_head), 3567 M_VFSCACHE, &nchash); 3568 for (i = 0; i <= (int)nchash; ++i) { 3569 LIST_INIT(&nchashtbl[i].list); 3570 spin_init(&nchashtbl[i].spin); 3571 } 3572 for (i = 0; i < NCMOUNT_NUMCACHE; ++i) 3573 spin_init(&ncmount_cache[i].spin); 3574 nclockwarn = 5 * hz; 3575 } 3576 3577 /* 3578 * Called from start_init() to bootstrap the root filesystem. Returns 3579 * a referenced, unlocked namecache record. 3580 */ 3581 void 3582 cache_allocroot(struct nchandle *nch, struct mount *mp, struct vnode *vp) 3583 { 3584 nch->ncp = cache_alloc(0); 3585 nch->mount = mp; 3586 atomic_add_int(&mp->mnt_refs, 1); 3587 if (vp) 3588 _cache_setvp(nch->mount, nch->ncp, vp); 3589 } 3590 3591 /* 3592 * vfs_cache_setroot() 3593 * 3594 * Create an association between the root of our namecache and 3595 * the root vnode. This routine may be called several times during 3596 * booting. 3597 * 3598 * If the caller intends to save the returned namecache pointer somewhere 3599 * it must cache_hold() it. 3600 */ 3601 void 3602 vfs_cache_setroot(struct vnode *nvp, struct nchandle *nch) 3603 { 3604 struct vnode *ovp; 3605 struct nchandle onch; 3606 3607 ovp = rootvnode; 3608 onch = rootnch; 3609 rootvnode = nvp; 3610 if (nch) 3611 rootnch = *nch; 3612 else 3613 cache_zero(&rootnch); 3614 if (ovp) 3615 vrele(ovp); 3616 if (onch.ncp) 3617 cache_drop(&onch); 3618 } 3619 3620 /* 3621 * XXX OLD API COMPAT FUNCTION. This really messes up the new namecache 3622 * topology and is being removed as quickly as possible. The new VOP_N*() 3623 * API calls are required to make specific adjustments using the supplied 3624 * ncp pointers rather then just bogusly purging random vnodes. 3625 * 3626 * Invalidate all namecache entries to a particular vnode as well as 3627 * any direct children of that vnode in the namecache. This is a 3628 * 'catch all' purge used by filesystems that do not know any better. 3629 * 3630 * Note that the linkage between the vnode and its namecache entries will 3631 * be removed, but the namecache entries themselves might stay put due to 3632 * active references from elsewhere in the system or due to the existance of 3633 * the children. The namecache topology is left intact even if we do not 3634 * know what the vnode association is. Such entries will be marked 3635 * NCF_UNRESOLVED. 3636 */ 3637 void 3638 cache_purge(struct vnode *vp) 3639 { 3640 cache_inval_vp(vp, CINV_DESTROY | CINV_CHILDREN); 3641 } 3642 3643 /* 3644 * Flush all entries referencing a particular filesystem. 3645 * 3646 * Since we need to check it anyway, we will flush all the invalid 3647 * entries at the same time. 3648 */ 3649 #if 0 3650 3651 void 3652 cache_purgevfs(struct mount *mp) 3653 { 3654 struct nchash_head *nchpp; 3655 struct namecache *ncp, *nnp; 3656 3657 /* 3658 * Scan hash tables for applicable entries. 3659 */ 3660 for (nchpp = &nchashtbl[nchash]; nchpp >= nchashtbl; nchpp--) { 3661 spin_lock_wr(&nchpp->spin); XXX 3662 ncp = LIST_FIRST(&nchpp->list); 3663 if (ncp) 3664 _cache_hold(ncp); 3665 while (ncp) { 3666 nnp = LIST_NEXT(ncp, nc_hash); 3667 if (nnp) 3668 _cache_hold(nnp); 3669 if (ncp->nc_mount == mp) { 3670 _cache_lock(ncp); 3671 ncp = cache_zap(ncp, 0); 3672 if (ncp) 3673 _cache_drop(ncp); 3674 } else { 3675 _cache_drop(ncp); 3676 } 3677 ncp = nnp; 3678 } 3679 spin_unlock_wr(&nchpp->spin); XXX 3680 } 3681 } 3682 3683 #endif 3684 3685 static int disablecwd; 3686 SYSCTL_INT(_debug, OID_AUTO, disablecwd, CTLFLAG_RW, &disablecwd, 0, 3687 "Disable getcwd"); 3688 3689 static u_long numcwdcalls; 3690 SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdcalls, CTLFLAG_RD, &numcwdcalls, 0, 3691 "Number of current directory resolution calls"); 3692 static u_long numcwdfailnf; 3693 SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdfailnf, CTLFLAG_RD, &numcwdfailnf, 0, 3694 "Number of current directory failures due to lack of file"); 3695 static u_long numcwdfailsz; 3696 SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdfailsz, CTLFLAG_RD, &numcwdfailsz, 0, 3697 "Number of current directory failures due to large result"); 3698 static u_long numcwdfound; 3699 SYSCTL_ULONG(_vfs_cache, OID_AUTO, numcwdfound, CTLFLAG_RD, &numcwdfound, 0, 3700 "Number of current directory resolution successes"); 3701 3702 /* 3703 * MPALMOSTSAFE 3704 */ 3705 int 3706 sys___getcwd(struct __getcwd_args *uap) 3707 { 3708 u_int buflen; 3709 int error; 3710 char *buf; 3711 char *bp; 3712 3713 if (disablecwd) 3714 return (ENODEV); 3715 3716 buflen = uap->buflen; 3717 if (buflen == 0) 3718 return (EINVAL); 3719 if (buflen > MAXPATHLEN) 3720 buflen = MAXPATHLEN; 3721 3722 buf = kmalloc(buflen, M_TEMP, M_WAITOK); 3723 bp = kern_getcwd(buf, buflen, &error); 3724 if (error == 0) 3725 error = copyout(bp, uap->buf, strlen(bp) + 1); 3726 kfree(buf, M_TEMP); 3727 return (error); 3728 } 3729 3730 char * 3731 kern_getcwd(char *buf, size_t buflen, int *error) 3732 { 3733 struct proc *p = curproc; 3734 char *bp; 3735 int i, slash_prefixed; 3736 struct filedesc *fdp; 3737 struct nchandle nch; 3738 struct namecache *ncp; 3739 3740 numcwdcalls++; 3741 bp = buf; 3742 bp += buflen - 1; 3743 *bp = '\0'; 3744 fdp = p->p_fd; 3745 slash_prefixed = 0; 3746 3747 nch = fdp->fd_ncdir; 3748 ncp = nch.ncp; 3749 if (ncp) 3750 _cache_hold(ncp); 3751 3752 while (ncp && (ncp != fdp->fd_nrdir.ncp || 3753 nch.mount != fdp->fd_nrdir.mount) 3754 ) { 3755 /* 3756 * While traversing upwards if we encounter the root 3757 * of the current mount we have to skip to the mount point 3758 * in the underlying filesystem. 3759 */ 3760 if (ncp == nch.mount->mnt_ncmountpt.ncp) { 3761 nch = nch.mount->mnt_ncmounton; 3762 _cache_drop(ncp); 3763 ncp = nch.ncp; 3764 if (ncp) 3765 _cache_hold(ncp); 3766 continue; 3767 } 3768 3769 /* 3770 * Prepend the path segment 3771 */ 3772 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 3773 if (bp == buf) { 3774 numcwdfailsz++; 3775 *error = ERANGE; 3776 bp = NULL; 3777 goto done; 3778 } 3779 *--bp = ncp->nc_name[i]; 3780 } 3781 if (bp == buf) { 3782 numcwdfailsz++; 3783 *error = ERANGE; 3784 bp = NULL; 3785 goto done; 3786 } 3787 *--bp = '/'; 3788 slash_prefixed = 1; 3789 3790 /* 3791 * Go up a directory. This isn't a mount point so we don't 3792 * have to check again. 3793 */ 3794 while ((nch.ncp = ncp->nc_parent) != NULL) { 3795 if (ncp_shared_lock_disable) 3796 _cache_lock(ncp); 3797 else 3798 _cache_lock_shared(ncp); 3799 if (nch.ncp != ncp->nc_parent) { 3800 _cache_unlock(ncp); 3801 continue; 3802 } 3803 _cache_hold(nch.ncp); 3804 _cache_unlock(ncp); 3805 break; 3806 } 3807 _cache_drop(ncp); 3808 ncp = nch.ncp; 3809 } 3810 if (ncp == NULL) { 3811 numcwdfailnf++; 3812 *error = ENOENT; 3813 bp = NULL; 3814 goto done; 3815 } 3816 if (!slash_prefixed) { 3817 if (bp == buf) { 3818 numcwdfailsz++; 3819 *error = ERANGE; 3820 bp = NULL; 3821 goto done; 3822 } 3823 *--bp = '/'; 3824 } 3825 numcwdfound++; 3826 *error = 0; 3827 done: 3828 if (ncp) 3829 _cache_drop(ncp); 3830 return (bp); 3831 } 3832 3833 /* 3834 * Thus begins the fullpath magic. 3835 * 3836 * The passed nchp is referenced but not locked. 3837 */ 3838 static int disablefullpath; 3839 SYSCTL_INT(_debug, OID_AUTO, disablefullpath, CTLFLAG_RW, 3840 &disablefullpath, 0, 3841 "Disable fullpath lookups"); 3842 3843 static u_int numfullpathcalls; 3844 SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathcalls, CTLFLAG_RD, 3845 &numfullpathcalls, 0, 3846 "Number of full path resolutions in progress"); 3847 static u_int numfullpathfailnf; 3848 SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathfailnf, CTLFLAG_RD, 3849 &numfullpathfailnf, 0, 3850 "Number of full path resolution failures due to lack of file"); 3851 static u_int numfullpathfailsz; 3852 SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathfailsz, CTLFLAG_RD, 3853 &numfullpathfailsz, 0, 3854 "Number of full path resolution failures due to insufficient memory"); 3855 static u_int numfullpathfound; 3856 SYSCTL_UINT(_vfs_cache, OID_AUTO, numfullpathfound, CTLFLAG_RD, 3857 &numfullpathfound, 0, 3858 "Number of full path resolution successes"); 3859 3860 int 3861 cache_fullpath(struct proc *p, struct nchandle *nchp, struct nchandle *nchbase, 3862 char **retbuf, char **freebuf, int guess) 3863 { 3864 struct nchandle fd_nrdir; 3865 struct nchandle nch; 3866 struct namecache *ncp; 3867 struct mount *mp, *new_mp; 3868 char *bp, *buf; 3869 int slash_prefixed; 3870 int error = 0; 3871 int i; 3872 3873 atomic_add_int(&numfullpathcalls, -1); 3874 3875 *retbuf = NULL; 3876 *freebuf = NULL; 3877 3878 buf = kmalloc(MAXPATHLEN, M_TEMP, M_WAITOK); 3879 bp = buf + MAXPATHLEN - 1; 3880 *bp = '\0'; 3881 if (nchbase) 3882 fd_nrdir = *nchbase; 3883 else if (p != NULL) 3884 fd_nrdir = p->p_fd->fd_nrdir; 3885 else 3886 fd_nrdir = rootnch; 3887 slash_prefixed = 0; 3888 nch = *nchp; 3889 ncp = nch.ncp; 3890 if (ncp) 3891 _cache_hold(ncp); 3892 mp = nch.mount; 3893 3894 while (ncp && (ncp != fd_nrdir.ncp || mp != fd_nrdir.mount)) { 3895 new_mp = NULL; 3896 3897 /* 3898 * If we are asked to guess the upwards path, we do so whenever 3899 * we encounter an ncp marked as a mountpoint. We try to find 3900 * the actual mountpoint by finding the mountpoint with this 3901 * ncp. 3902 */ 3903 if (guess && (ncp->nc_flag & NCF_ISMOUNTPT)) { 3904 new_mp = mount_get_by_nc(ncp); 3905 } 3906 /* 3907 * While traversing upwards if we encounter the root 3908 * of the current mount we have to skip to the mount point. 3909 */ 3910 if (ncp == mp->mnt_ncmountpt.ncp) { 3911 new_mp = mp; 3912 } 3913 if (new_mp) { 3914 nch = new_mp->mnt_ncmounton; 3915 _cache_drop(ncp); 3916 ncp = nch.ncp; 3917 if (ncp) 3918 _cache_hold(ncp); 3919 mp = nch.mount; 3920 continue; 3921 } 3922 3923 /* 3924 * Prepend the path segment 3925 */ 3926 for (i = ncp->nc_nlen - 1; i >= 0; i--) { 3927 if (bp == buf) { 3928 numfullpathfailsz++; 3929 kfree(buf, M_TEMP); 3930 error = ENOMEM; 3931 goto done; 3932 } 3933 *--bp = ncp->nc_name[i]; 3934 } 3935 if (bp == buf) { 3936 numfullpathfailsz++; 3937 kfree(buf, M_TEMP); 3938 error = ENOMEM; 3939 goto done; 3940 } 3941 *--bp = '/'; 3942 slash_prefixed = 1; 3943 3944 /* 3945 * Go up a directory. This isn't a mount point so we don't 3946 * have to check again. 3947 * 3948 * We can only safely access nc_parent with ncp held locked. 3949 */ 3950 while ((nch.ncp = ncp->nc_parent) != NULL) { 3951 _cache_lock(ncp); 3952 if (nch.ncp != ncp->nc_parent) { 3953 _cache_unlock(ncp); 3954 continue; 3955 } 3956 _cache_hold(nch.ncp); 3957 _cache_unlock(ncp); 3958 break; 3959 } 3960 _cache_drop(ncp); 3961 ncp = nch.ncp; 3962 } 3963 if (ncp == NULL) { 3964 numfullpathfailnf++; 3965 kfree(buf, M_TEMP); 3966 error = ENOENT; 3967 goto done; 3968 } 3969 3970 if (!slash_prefixed) { 3971 if (bp == buf) { 3972 numfullpathfailsz++; 3973 kfree(buf, M_TEMP); 3974 error = ENOMEM; 3975 goto done; 3976 } 3977 *--bp = '/'; 3978 } 3979 numfullpathfound++; 3980 *retbuf = bp; 3981 *freebuf = buf; 3982 error = 0; 3983 done: 3984 if (ncp) 3985 _cache_drop(ncp); 3986 return(error); 3987 } 3988 3989 int 3990 vn_fullpath(struct proc *p, struct vnode *vn, char **retbuf, char **freebuf, 3991 int guess) 3992 { 3993 struct namecache *ncp; 3994 struct nchandle nch; 3995 int error; 3996 3997 *freebuf = NULL; 3998 atomic_add_int(&numfullpathcalls, 1); 3999 if (disablefullpath) 4000 return (ENODEV); 4001 4002 if (p == NULL) 4003 return (EINVAL); 4004 4005 /* vn is NULL, client wants us to use p->p_textvp */ 4006 if (vn == NULL) { 4007 if ((vn = p->p_textvp) == NULL) 4008 return (EINVAL); 4009 } 4010 spin_lock(&vn->v_spin); 4011 TAILQ_FOREACH(ncp, &vn->v_namecache, nc_vnode) { 4012 if (ncp->nc_nlen) 4013 break; 4014 } 4015 if (ncp == NULL) { 4016 spin_unlock(&vn->v_spin); 4017 return (EINVAL); 4018 } 4019 _cache_hold(ncp); 4020 spin_unlock(&vn->v_spin); 4021 4022 atomic_add_int(&numfullpathcalls, -1); 4023 nch.ncp = ncp; 4024 nch.mount = vn->v_mount; 4025 error = cache_fullpath(p, &nch, NULL, retbuf, freebuf, guess); 4026 _cache_drop(ncp); 4027 return (error); 4028 } 4029