1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 /* 29 * This implements a hash-based lookup scheme for UFS directories. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_ufs.h" 36 37 #ifdef UFS_DIRHASH 38 39 #include <sys/param.h> 40 #include <sys/systm.h> 41 #include <sys/kernel.h> 42 #include <sys/lock.h> 43 #include <sys/mutex.h> 44 #include <sys/malloc.h> 45 #include <sys/fnv_hash.h> 46 #include <sys/proc.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/vnode.h> 50 #include <sys/mount.h> 51 #include <sys/refcount.h> 52 #include <sys/sysctl.h> 53 #include <sys/sx.h> 54 #include <sys/eventhandler.h> 55 #include <sys/time.h> 56 #include <vm/uma.h> 57 58 #include <ufs/ufs/quota.h> 59 #include <ufs/ufs/inode.h> 60 #include <ufs/ufs/dir.h> 61 #include <ufs/ufs/dirhash.h> 62 #include <ufs/ufs/extattr.h> 63 #include <ufs/ufs/ufsmount.h> 64 #include <ufs/ufs/ufs_extern.h> 65 66 #define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 67 #define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1)) 68 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 69 #define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) 70 71 static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables"); 72 73 static int ufs_mindirhashsize = DIRBLKSIZ * 5; 74 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, 75 &ufs_mindirhashsize, 76 0, "minimum directory size in bytes for which to use hashed lookup"); 77 static int ufs_dirhashmaxmem = 2 * 1024 * 1024; /* NOTE: initial value. It is 78 tuned in ufsdirhash_init() */ 79 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, 80 0, "maximum allowed dirhash memory usage"); 81 static int ufs_dirhashmem; 82 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, 83 0, "current dirhash memory usage"); 84 static int ufs_dirhashcheck = 0; 85 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, 86 0, "enable extra sanity tests"); 87 static int ufs_dirhashlowmemcount = 0; 88 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_lowmemcount, CTLFLAG_RD, 89 &ufs_dirhashlowmemcount, 0, "number of times low memory hook called"); 90 static int ufs_dirhashreclaimpercent = 10; 91 static int ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS); 92 SYSCTL_PROC(_vfs_ufs, OID_AUTO, dirhash_reclaimpercent, 93 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, 94 0, 0, ufsdirhash_set_reclaimpercent, "I", 95 "set percentage of dirhash cache to be removed in low VM events"); 96 97 98 static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); 99 static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); 100 static void ufsdirhash_delslot(struct dirhash *dh, int slot); 101 static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, 102 doff_t offset); 103 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); 104 static int ufsdirhash_recycle(int wanted); 105 static void ufsdirhash_lowmem(void); 106 static void ufsdirhash_free_locked(struct inode *ip); 107 108 static uma_zone_t ufsdirhash_zone; 109 110 #define DIRHASHLIST_LOCK() mtx_lock(&ufsdirhash_mtx) 111 #define DIRHASHLIST_UNLOCK() mtx_unlock(&ufsdirhash_mtx) 112 #define DIRHASH_BLKALLOC_WAITOK() uma_zalloc(ufsdirhash_zone, M_WAITOK) 113 #define DIRHASH_BLKFREE(ptr) uma_zfree(ufsdirhash_zone, (ptr)) 114 #define DIRHASH_ASSERT_LOCKED(dh) \ 115 sx_assert(&(dh)->dh_lock, SA_LOCKED) 116 117 /* Dirhash list; recently-used entries are near the tail. */ 118 static TAILQ_HEAD(, dirhash) ufsdirhash_list; 119 120 /* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 121 static struct mtx ufsdirhash_mtx; 122 123 /* 124 * Locking: 125 * 126 * The relationship between inode and dirhash is protected either by an 127 * exclusive vnode lock or the vnode interlock where a shared vnode lock 128 * may be used. The dirhash_mtx is acquired after the dirhash lock. To 129 * handle teardown races, code wishing to lock the dirhash for an inode 130 * when using a shared vnode lock must obtain a private reference on the 131 * dirhash while holding the vnode interlock. They can drop it once they 132 * have obtained the dirhash lock and verified that the dirhash wasn't 133 * recycled while they waited for the dirhash lock. 134 * 135 * ufsdirhash_build() acquires a shared lock on the dirhash when it is 136 * successful. This lock is released after a call to ufsdirhash_lookup(). 137 * 138 * Functions requiring exclusive access use ufsdirhash_acquire() which may 139 * free a dirhash structure that was recycled by ufsdirhash_recycle(). 140 * 141 * The dirhash lock may be held across io operations. 142 * 143 * WITNESS reports a lock order reversal between the "bufwait" lock 144 * and the "dirhash" lock. However, this specific reversal will not 145 * cause a deadlock. To get a deadlock, one would have to lock a 146 * buffer followed by the dirhash while a second thread locked a 147 * buffer while holding the dirhash lock. The second order can happen 148 * under a shared or exclusive vnode lock for the associated directory 149 * in lookup(). The first order, however, can only happen under an 150 * exclusive vnode lock (e.g. unlink(), rename(), etc.). Thus, for 151 * a thread to be doing a "bufwait" -> "dirhash" order, it has to hold 152 * an exclusive vnode lock. That exclusive vnode lock will prevent 153 * any other threads from doing a "dirhash" -> "bufwait" order. 154 */ 155 156 static void 157 ufsdirhash_hold(struct dirhash *dh) 158 { 159 160 refcount_acquire(&dh->dh_refcount); 161 } 162 163 static void 164 ufsdirhash_drop(struct dirhash *dh) 165 { 166 167 if (refcount_release(&dh->dh_refcount)) { 168 sx_destroy(&dh->dh_lock); 169 free(dh, M_DIRHASH); 170 } 171 } 172 173 /* 174 * Release the lock on a dirhash. 175 */ 176 static void 177 ufsdirhash_release(struct dirhash *dh) 178 { 179 180 sx_unlock(&dh->dh_lock); 181 } 182 183 /* 184 * Either acquire an existing hash locked shared or create a new hash and 185 * return it exclusively locked. May return NULL if the allocation fails. 186 * 187 * The vnode interlock is used to protect the i_dirhash pointer from 188 * simultaneous access while only a shared vnode lock is held. 189 */ 190 static struct dirhash * 191 ufsdirhash_create(struct inode *ip) 192 { 193 struct dirhash *ndh; 194 struct dirhash *dh; 195 struct vnode *vp; 196 bool excl; 197 198 ndh = dh = NULL; 199 vp = ip->i_vnode; 200 excl = false; 201 for (;;) { 202 /* Racy check for i_dirhash to prefetch a dirhash structure. */ 203 if (ip->i_dirhash == NULL && ndh == NULL) { 204 ndh = malloc(sizeof *dh, M_DIRHASH, 205 M_NOWAIT | M_ZERO); 206 if (ndh == NULL) 207 return (NULL); 208 refcount_init(&ndh->dh_refcount, 1); 209 210 /* 211 * The DUPOK is to prevent warnings from the 212 * sx_slock() a few lines down which is safe 213 * since the duplicate lock in that case is 214 * the one for this dirhash we are creating 215 * now which has no external references until 216 * after this function returns. 217 */ 218 sx_init_flags(&ndh->dh_lock, "dirhash", SX_DUPOK); 219 sx_xlock(&ndh->dh_lock); 220 } 221 /* 222 * Check i_dirhash. If it's NULL just try to use a 223 * preallocated structure. If none exists loop and try again. 224 */ 225 VI_LOCK(vp); 226 dh = ip->i_dirhash; 227 if (dh == NULL) { 228 ip->i_dirhash = ndh; 229 VI_UNLOCK(vp); 230 if (ndh == NULL) 231 continue; 232 return (ndh); 233 } 234 ufsdirhash_hold(dh); 235 VI_UNLOCK(vp); 236 237 /* Acquire a lock on existing hashes. */ 238 if (excl) 239 sx_xlock(&dh->dh_lock); 240 else 241 sx_slock(&dh->dh_lock); 242 243 /* The hash could've been recycled while we were waiting. */ 244 VI_LOCK(vp); 245 if (ip->i_dirhash != dh) { 246 VI_UNLOCK(vp); 247 ufsdirhash_release(dh); 248 ufsdirhash_drop(dh); 249 continue; 250 } 251 VI_UNLOCK(vp); 252 ufsdirhash_drop(dh); 253 254 /* If the hash is still valid we've succeeded. */ 255 if (dh->dh_hash != NULL) 256 break; 257 /* 258 * If the hash is NULL it has been recycled. Try to upgrade 259 * so we can recreate it. If we fail the upgrade, drop our 260 * lock and try again. 261 */ 262 if (excl || sx_try_upgrade(&dh->dh_lock)) 263 break; 264 sx_sunlock(&dh->dh_lock); 265 excl = true; 266 } 267 /* Free the preallocated structure if it was not necessary. */ 268 if (ndh) { 269 ufsdirhash_release(ndh); 270 ufsdirhash_drop(ndh); 271 } 272 return (dh); 273 } 274 275 /* 276 * Acquire an exclusive lock on an existing hash. Requires an exclusive 277 * vnode lock to protect the i_dirhash pointer. hashes that have been 278 * recycled are reclaimed here and NULL is returned. 279 */ 280 static struct dirhash * 281 ufsdirhash_acquire(struct inode *ip) 282 { 283 struct dirhash *dh; 284 285 ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__); 286 287 dh = ip->i_dirhash; 288 if (dh == NULL) 289 return (NULL); 290 sx_xlock(&dh->dh_lock); 291 if (dh->dh_hash != NULL) 292 return (dh); 293 ufsdirhash_free_locked(ip); 294 return (NULL); 295 } 296 297 /* 298 * Acquire exclusively and free the hash pointed to by ip. Works with a 299 * shared or exclusive vnode lock. 300 */ 301 void 302 ufsdirhash_free(struct inode *ip) 303 { 304 struct dirhash *dh; 305 struct vnode *vp; 306 307 vp = ip->i_vnode; 308 for (;;) { 309 /* Grab a reference on this inode's dirhash if it has one. */ 310 VI_LOCK(vp); 311 dh = ip->i_dirhash; 312 if (dh == NULL) { 313 VI_UNLOCK(vp); 314 return; 315 } 316 ufsdirhash_hold(dh); 317 VI_UNLOCK(vp); 318 319 /* Exclusively lock the dirhash. */ 320 sx_xlock(&dh->dh_lock); 321 322 /* If this dirhash still belongs to this inode, then free it. */ 323 VI_LOCK(vp); 324 if (ip->i_dirhash == dh) { 325 VI_UNLOCK(vp); 326 ufsdirhash_drop(dh); 327 break; 328 } 329 VI_UNLOCK(vp); 330 331 /* 332 * This inode's dirhash has changed while we were 333 * waiting for the dirhash lock, so try again. 334 */ 335 ufsdirhash_release(dh); 336 ufsdirhash_drop(dh); 337 } 338 ufsdirhash_free_locked(ip); 339 } 340 341 /* 342 * Attempt to build up a hash table for the directory contents in 343 * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 344 */ 345 int 346 ufsdirhash_build(struct inode *ip) 347 { 348 struct dirhash *dh; 349 struct buf *bp = NULL; 350 struct direct *ep; 351 struct vnode *vp; 352 doff_t bmask, pos; 353 u_int dirblocks, i, narrays, nblocks, nslots; 354 int j, memreqd, slot; 355 356 /* Take care of a decreased sysctl value. */ 357 while (ufs_dirhashmem > ufs_dirhashmaxmem) { 358 if (ufsdirhash_recycle(0) != 0) 359 return (-1); 360 /* Recycled enough memory, so unlock the list. */ 361 DIRHASHLIST_UNLOCK(); 362 } 363 364 /* Check if we can/should use dirhash. */ 365 if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) || 366 ip->i_effnlink == 0) { 367 if (ip->i_dirhash) 368 ufsdirhash_free(ip); 369 return (-1); 370 } 371 dh = ufsdirhash_create(ip); 372 if (dh == NULL) 373 return (-1); 374 if (dh->dh_hash != NULL) 375 return (0); 376 377 vp = ip->i_vnode; 378 /* Allocate 50% more entries than this dir size could ever need. */ 379 KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); 380 nslots = ip->i_size / DIRECTSIZ(1); 381 nslots = (nslots * 3 + 1) / 2; 382 narrays = howmany(nslots, DH_NBLKOFF); 383 nslots = narrays * DH_NBLKOFF; 384 dirblocks = howmany(ip->i_size, DIRBLKSIZ); 385 nblocks = (dirblocks * 3 + 1) / 2; 386 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 387 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 388 nblocks * sizeof(*dh->dh_blkfree); 389 DIRHASHLIST_LOCK(); 390 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { 391 DIRHASHLIST_UNLOCK(); 392 if (memreqd > ufs_dirhashmaxmem / 2) 393 goto fail; 394 /* Try to free some space. */ 395 if (ufsdirhash_recycle(memreqd) != 0) 396 goto fail; 397 /* Enough was freed, and list has been locked. */ 398 } 399 ufs_dirhashmem += memreqd; 400 DIRHASHLIST_UNLOCK(); 401 402 /* Initialise the hash table and block statistics. */ 403 dh->dh_memreq = memreqd; 404 dh->dh_narrays = narrays; 405 dh->dh_hlen = nslots; 406 dh->dh_nblk = nblocks; 407 dh->dh_dirblks = dirblocks; 408 for (i = 0; i < DH_NFSTATS; i++) 409 dh->dh_firstfree[i] = -1; 410 dh->dh_firstfree[DH_NFSTATS] = 0; 411 dh->dh_hused = 0; 412 dh->dh_seqoff = -1; 413 dh->dh_score = DH_SCOREINIT; 414 dh->dh_lastused = time_second; 415 416 /* 417 * Use non-blocking mallocs so that we will revert to a linear 418 * lookup on failure rather than potentially blocking forever. 419 */ 420 dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]), 421 M_DIRHASH, M_NOWAIT | M_ZERO); 422 if (dh->dh_hash == NULL) 423 goto fail; 424 dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]), 425 M_DIRHASH, M_NOWAIT); 426 if (dh->dh_blkfree == NULL) 427 goto fail; 428 for (i = 0; i < narrays; i++) { 429 if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL) 430 goto fail; 431 for (j = 0; j < DH_NBLKOFF; j++) 432 dh->dh_hash[i][j] = DIRHASH_EMPTY; 433 } 434 for (i = 0; i < dirblocks; i++) 435 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; 436 bmask = vp->v_mount->mnt_stat.f_iosize - 1; 437 pos = 0; 438 while (pos < ip->i_size) { 439 /* If necessary, get the next directory block. */ 440 if ((pos & bmask) == 0) { 441 if (bp != NULL) 442 brelse(bp); 443 if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) 444 goto fail; 445 } 446 447 /* Add this entry to the hash. */ 448 ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 449 if (ep->d_reclen == 0 || ep->d_reclen > 450 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { 451 /* Corrupted directory. */ 452 brelse(bp); 453 goto fail; 454 } 455 if (ep->d_ino != 0) { 456 /* Add the entry (simplified ufsdirhash_add). */ 457 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 458 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 459 slot = WRAPINCR(slot, dh->dh_hlen); 460 dh->dh_hused++; 461 DH_ENTRY(dh, slot) = pos; 462 ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); 463 } 464 pos += ep->d_reclen; 465 } 466 467 if (bp != NULL) 468 brelse(bp); 469 DIRHASHLIST_LOCK(); 470 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 471 dh->dh_onlist = 1; 472 DIRHASHLIST_UNLOCK(); 473 sx_downgrade(&dh->dh_lock); 474 return (0); 475 476 fail: 477 ufsdirhash_free_locked(ip); 478 return (-1); 479 } 480 481 /* 482 * Free any hash table associated with inode 'ip'. 483 */ 484 static void 485 ufsdirhash_free_locked(struct inode *ip) 486 { 487 struct dirhash *dh; 488 struct vnode *vp; 489 int i; 490 491 DIRHASH_ASSERT_LOCKED(ip->i_dirhash); 492 493 /* 494 * Clear the pointer in the inode to prevent new threads from 495 * finding the dead structure. 496 */ 497 vp = ip->i_vnode; 498 VI_LOCK(vp); 499 dh = ip->i_dirhash; 500 ip->i_dirhash = NULL; 501 VI_UNLOCK(vp); 502 503 /* 504 * Remove the hash from the list since we are going to free its 505 * memory. 506 */ 507 DIRHASHLIST_LOCK(); 508 if (dh->dh_onlist) 509 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 510 ufs_dirhashmem -= dh->dh_memreq; 511 DIRHASHLIST_UNLOCK(); 512 513 /* 514 * At this point, any waiters for the lock should hold their 515 * own reference on the dirhash structure. They will drop 516 * that reference once they grab the vnode interlock and see 517 * that ip->i_dirhash is NULL. 518 */ 519 sx_xunlock(&dh->dh_lock); 520 521 /* 522 * Handle partially recycled as well as fully constructed hashes. 523 */ 524 if (dh->dh_hash != NULL) { 525 for (i = 0; i < dh->dh_narrays; i++) 526 if (dh->dh_hash[i] != NULL) 527 DIRHASH_BLKFREE(dh->dh_hash[i]); 528 free(dh->dh_hash, M_DIRHASH); 529 if (dh->dh_blkfree != NULL) 530 free(dh->dh_blkfree, M_DIRHASH); 531 } 532 533 /* 534 * Drop the inode's reference to the data structure. 535 */ 536 ufsdirhash_drop(dh); 537 } 538 539 /* 540 * Find the offset of the specified name within the given inode. 541 * Returns 0 on success, ENOENT if the entry does not exist, or 542 * EJUSTRETURN if the caller should revert to a linear search. 543 * 544 * If successful, the directory offset is stored in *offp, and a 545 * pointer to a struct buf containing the entry is stored in *bpp. If 546 * prevoffp is non-NULL, the offset of the previous entry within 547 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 548 * is the first in a block, the start of the block is used). 549 * 550 * Must be called with the hash locked. Returns with the hash unlocked. 551 */ 552 int 553 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, 554 struct buf **bpp, doff_t *prevoffp) 555 { 556 struct dirhash *dh, *dh_next; 557 struct direct *dp; 558 struct vnode *vp; 559 struct buf *bp; 560 doff_t blkoff, bmask, offset, prevoff, seqoff; 561 int i, slot; 562 int error; 563 564 dh = ip->i_dirhash; 565 KASSERT(dh != NULL && dh->dh_hash != NULL, 566 ("ufsdirhash_lookup: Invalid dirhash %p\n", dh)); 567 DIRHASH_ASSERT_LOCKED(dh); 568 /* 569 * Move this dirhash towards the end of the list if it has a 570 * score higher than the next entry, and acquire the dh_lock. 571 */ 572 DIRHASHLIST_LOCK(); 573 if (TAILQ_NEXT(dh, dh_list) != NULL) { 574 /* 575 * If the new score will be greater than that of the next 576 * entry, then move this entry past it. With both mutexes 577 * held, dh_next won't go away, but its dh_score could 578 * change; that's not important since it is just a hint. 579 */ 580 if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 581 dh->dh_score >= dh_next->dh_score) { 582 KASSERT(dh->dh_onlist, ("dirhash: not on list")); 583 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 584 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 585 dh_list); 586 } 587 } 588 /* Update the score. */ 589 if (dh->dh_score < DH_SCOREMAX) 590 dh->dh_score++; 591 592 /* Update last used time. */ 593 dh->dh_lastused = time_second; 594 DIRHASHLIST_UNLOCK(); 595 596 vp = ip->i_vnode; 597 bmask = vp->v_mount->mnt_stat.f_iosize - 1; 598 blkoff = -1; 599 bp = NULL; 600 seqoff = dh->dh_seqoff; 601 restart: 602 slot = ufsdirhash_hash(dh, name, namelen); 603 604 if (seqoff != -1) { 605 /* 606 * Sequential access optimisation. seqoff contains the 607 * offset of the directory entry immediately following 608 * the last entry that was looked up. Check if this offset 609 * appears in the hash chain for the name we are looking for. 610 */ 611 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 612 i = WRAPINCR(i, dh->dh_hlen)) 613 if (offset == seqoff) 614 break; 615 if (offset == seqoff) { 616 /* 617 * We found an entry with the expected offset. This 618 * is probably the entry we want, but if not, the 619 * code below will retry. 620 */ 621 slot = i; 622 } else 623 seqoff = -1; 624 } 625 626 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 627 slot = WRAPINCR(slot, dh->dh_hlen)) { 628 if (offset == DIRHASH_DEL) 629 continue; 630 if (offset < 0 || offset >= ip->i_size) 631 panic("ufsdirhash_lookup: bad offset in hash array"); 632 if ((offset & ~bmask) != blkoff) { 633 if (bp != NULL) 634 brelse(bp); 635 blkoff = offset & ~bmask; 636 if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) { 637 error = EJUSTRETURN; 638 goto fail; 639 } 640 } 641 KASSERT(bp != NULL, ("no buffer allocated")); 642 dp = (struct direct *)(bp->b_data + (offset & bmask)); 643 if (dp->d_reclen == 0 || dp->d_reclen > 644 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { 645 /* Corrupted directory. */ 646 error = EJUSTRETURN; 647 goto fail; 648 } 649 if (dp->d_namlen == namelen && 650 bcmp(dp->d_name, name, namelen) == 0) { 651 /* Found. Get the prev offset if needed. */ 652 if (prevoffp != NULL) { 653 if (offset & (DIRBLKSIZ - 1)) { 654 prevoff = ufsdirhash_getprev(dp, 655 offset); 656 if (prevoff == -1) { 657 error = EJUSTRETURN; 658 goto fail; 659 } 660 } else 661 prevoff = offset; 662 *prevoffp = prevoff; 663 } 664 665 /* Update offset. */ 666 dh->dh_seqoff = offset + DIRSIZ(0, dp); 667 *bpp = bp; 668 *offp = offset; 669 ufsdirhash_release(dh); 670 return (0); 671 } 672 673 /* 674 * When the name doesn't match in the sequential 675 * optimization case, go back and search normally. 676 */ 677 if (seqoff != -1) { 678 seqoff = -1; 679 goto restart; 680 } 681 } 682 error = ENOENT; 683 fail: 684 ufsdirhash_release(dh); 685 if (bp != NULL) 686 brelse(bp); 687 return (error); 688 } 689 690 /* 691 * Find a directory block with room for 'slotneeded' bytes. Returns 692 * the offset of the directory entry that begins the free space. 693 * This will either be the offset of an existing entry that has free 694 * space at the end, or the offset of an entry with d_ino == 0 at 695 * the start of a DIRBLKSIZ block. 696 * 697 * To use the space, the caller may need to compact existing entries in 698 * the directory. The total number of bytes in all of the entries involved 699 * in the compaction is stored in *slotsize. In other words, all of 700 * the entries that must be compacted are exactly contained in the 701 * region beginning at the returned offset and spanning *slotsize bytes. 702 * 703 * Returns -1 if no space was found, indicating that the directory 704 * must be extended. 705 */ 706 doff_t 707 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 708 { 709 struct direct *dp; 710 struct dirhash *dh; 711 struct buf *bp; 712 doff_t pos, slotstart; 713 int dirblock, error, freebytes, i; 714 715 dh = ip->i_dirhash; 716 KASSERT(dh != NULL && dh->dh_hash != NULL, 717 ("ufsdirhash_findfree: Invalid dirhash %p\n", dh)); 718 DIRHASH_ASSERT_LOCKED(dh); 719 720 /* Find a directory block with the desired free space. */ 721 dirblock = -1; 722 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 723 if ((dirblock = dh->dh_firstfree[i]) != -1) 724 break; 725 if (dirblock == -1) 726 return (-1); 727 728 KASSERT(dirblock < dh->dh_nblk && 729 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), 730 ("ufsdirhash_findfree: bad stats")); 731 pos = dirblock * DIRBLKSIZ; 732 error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); 733 if (error) 734 return (-1); 735 736 /* Find the first entry with free space. */ 737 for (i = 0; i < DIRBLKSIZ; ) { 738 if (dp->d_reclen == 0) { 739 brelse(bp); 740 return (-1); 741 } 742 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) 743 break; 744 i += dp->d_reclen; 745 dp = (struct direct *)((char *)dp + dp->d_reclen); 746 } 747 if (i > DIRBLKSIZ) { 748 brelse(bp); 749 return (-1); 750 } 751 slotstart = pos + i; 752 753 /* Find the range of entries needed to get enough space */ 754 freebytes = 0; 755 while (i < DIRBLKSIZ && freebytes < slotneeded) { 756 freebytes += dp->d_reclen; 757 if (dp->d_ino != 0) 758 freebytes -= DIRSIZ(0, dp); 759 if (dp->d_reclen == 0) { 760 brelse(bp); 761 return (-1); 762 } 763 i += dp->d_reclen; 764 dp = (struct direct *)((char *)dp + dp->d_reclen); 765 } 766 if (i > DIRBLKSIZ) { 767 brelse(bp); 768 return (-1); 769 } 770 if (freebytes < slotneeded) 771 panic("ufsdirhash_findfree: free mismatch"); 772 brelse(bp); 773 *slotsize = pos + i - slotstart; 774 return (slotstart); 775 } 776 777 /* 778 * Return the start of the unused space at the end of a directory, or 779 * -1 if there are no trailing unused blocks. 780 */ 781 doff_t 782 ufsdirhash_enduseful(struct inode *ip) 783 { 784 785 struct dirhash *dh; 786 int i; 787 788 dh = ip->i_dirhash; 789 DIRHASH_ASSERT_LOCKED(dh); 790 KASSERT(dh != NULL && dh->dh_hash != NULL, 791 ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh)); 792 793 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) 794 return (-1); 795 796 for (i = dh->dh_dirblks - 1; i >= 0; i--) 797 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 798 break; 799 800 return ((doff_t)(i + 1) * DIRBLKSIZ); 801 } 802 803 /* 804 * Insert information into the hash about a new directory entry. dirp 805 * points to a struct direct containing the entry, and offset specifies 806 * the offset of this entry. 807 */ 808 void 809 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 810 { 811 struct dirhash *dh; 812 int slot; 813 814 if ((dh = ufsdirhash_acquire(ip)) == NULL) 815 return; 816 817 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 818 ("ufsdirhash_add: bad offset")); 819 /* 820 * Normal hash usage is < 66%. If the usage gets too high then 821 * remove the hash entirely and let it be rebuilt later. 822 */ 823 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 824 ufsdirhash_free_locked(ip); 825 return; 826 } 827 828 /* Find a free hash slot (empty or deleted), and add the entry. */ 829 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 830 while (DH_ENTRY(dh, slot) >= 0) 831 slot = WRAPINCR(slot, dh->dh_hlen); 832 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 833 dh->dh_hused++; 834 DH_ENTRY(dh, slot) = offset; 835 836 /* Update last used time. */ 837 dh->dh_lastused = time_second; 838 839 /* Update the per-block summary info. */ 840 ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); 841 ufsdirhash_release(dh); 842 } 843 844 /* 845 * Remove the specified directory entry from the hash. The entry to remove 846 * is defined by the name in `dirp', which must exist at the specified 847 * `offset' within the directory. 848 */ 849 void 850 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 851 { 852 struct dirhash *dh; 853 int slot; 854 855 if ((dh = ufsdirhash_acquire(ip)) == NULL) 856 return; 857 858 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 859 ("ufsdirhash_remove: bad offset")); 860 /* Find the entry */ 861 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 862 863 /* Remove the hash entry. */ 864 ufsdirhash_delslot(dh, slot); 865 866 /* Update the per-block summary info. */ 867 ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); 868 ufsdirhash_release(dh); 869 } 870 871 /* 872 * Change the offset associated with a directory entry in the hash. Used 873 * when compacting directory blocks. 874 */ 875 void 876 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 877 doff_t newoff) 878 { 879 struct dirhash *dh; 880 int slot; 881 882 if ((dh = ufsdirhash_acquire(ip)) == NULL) 883 return; 884 885 KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && 886 newoff < dh->dh_dirblks * DIRBLKSIZ, 887 ("ufsdirhash_move: bad offset")); 888 /* Find the entry, and update the offset. */ 889 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 890 DH_ENTRY(dh, slot) = newoff; 891 ufsdirhash_release(dh); 892 } 893 894 /* 895 * Inform dirhash that the directory has grown by one block that 896 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 897 */ 898 void 899 ufsdirhash_newblk(struct inode *ip, doff_t offset) 900 { 901 struct dirhash *dh; 902 int block; 903 904 if ((dh = ufsdirhash_acquire(ip)) == NULL) 905 return; 906 907 KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, 908 ("ufsdirhash_newblk: bad offset")); 909 block = offset / DIRBLKSIZ; 910 if (block >= dh->dh_nblk) { 911 /* Out of space; must rebuild. */ 912 ufsdirhash_free_locked(ip); 913 return; 914 } 915 dh->dh_dirblks = block + 1; 916 917 /* Account for the new free block. */ 918 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; 919 if (dh->dh_firstfree[DH_NFSTATS] == -1) 920 dh->dh_firstfree[DH_NFSTATS] = block; 921 ufsdirhash_release(dh); 922 } 923 924 /* 925 * Inform dirhash that the directory is being truncated. 926 */ 927 void 928 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 929 { 930 struct dirhash *dh; 931 int block, i; 932 933 if ((dh = ufsdirhash_acquire(ip)) == NULL) 934 return; 935 936 KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, 937 ("ufsdirhash_dirtrunc: bad offset")); 938 block = howmany(offset, DIRBLKSIZ); 939 /* 940 * If the directory shrinks to less than 1/8 of dh_nblk blocks 941 * (about 20% of its original size due to the 50% extra added in 942 * ufsdirhash_build) then free it, and let the caller rebuild 943 * if necessary. 944 */ 945 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 946 ufsdirhash_free_locked(ip); 947 return; 948 } 949 950 /* 951 * Remove any `first free' information pertaining to the 952 * truncated blocks. All blocks we're removing should be 953 * completely unused. 954 */ 955 if (dh->dh_firstfree[DH_NFSTATS] >= block) 956 dh->dh_firstfree[DH_NFSTATS] = -1; 957 for (i = block; i < dh->dh_dirblks; i++) 958 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 959 panic("ufsdirhash_dirtrunc: blocks in use"); 960 for (i = 0; i < DH_NFSTATS; i++) 961 if (dh->dh_firstfree[i] >= block) 962 panic("ufsdirhash_dirtrunc: first free corrupt"); 963 dh->dh_dirblks = block; 964 ufsdirhash_release(dh); 965 } 966 967 /* 968 * Debugging function to check that the dirhash information about 969 * a directory block matches its actual contents. Panics if a mismatch 970 * is detected. 971 * 972 * On entry, `buf' should point to the start of an in-core 973 * DIRBLKSIZ-sized directory block, and `offset' should contain the 974 * offset from the start of the directory of that block. 975 */ 976 void 977 ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) 978 { 979 struct dirhash *dh; 980 struct direct *dp; 981 int block, ffslot, i, nfree; 982 983 if (!ufs_dirhashcheck) 984 return; 985 if ((dh = ufsdirhash_acquire(ip)) == NULL) 986 return; 987 988 block = offset / DIRBLKSIZ; 989 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) 990 panic("ufsdirhash_checkblock: bad offset"); 991 992 nfree = 0; 993 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { 994 dp = (struct direct *)(buf + i); 995 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) 996 panic("ufsdirhash_checkblock: bad dir"); 997 998 if (dp->d_ino == 0) { 999 #if 0 1000 /* 1001 * XXX entries with d_ino == 0 should only occur 1002 * at the start of a DIRBLKSIZ block. However the 1003 * ufs code is tolerant of such entries at other 1004 * offsets, and fsck does not fix them. 1005 */ 1006 if (i != 0) 1007 panic("ufsdirhash_checkblock: bad dir inode"); 1008 #endif 1009 nfree += dp->d_reclen; 1010 continue; 1011 } 1012 1013 /* Check that the entry exists (will panic if it doesn't). */ 1014 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 1015 1016 nfree += dp->d_reclen - DIRSIZ(0, dp); 1017 } 1018 if (i != DIRBLKSIZ) 1019 panic("ufsdirhash_checkblock: bad dir end"); 1020 1021 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 1022 panic("ufsdirhash_checkblock: bad free count"); 1023 1024 ffslot = BLKFREE2IDX(nfree / DIRALIGN); 1025 for (i = 0; i <= DH_NFSTATS; i++) 1026 if (dh->dh_firstfree[i] == block && i != ffslot) 1027 panic("ufsdirhash_checkblock: bad first-free"); 1028 if (dh->dh_firstfree[ffslot] == -1) 1029 panic("ufsdirhash_checkblock: missing first-free entry"); 1030 ufsdirhash_release(dh); 1031 } 1032 1033 /* 1034 * Hash the specified filename into a dirhash slot. 1035 */ 1036 static int 1037 ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) 1038 { 1039 u_int32_t hash; 1040 1041 /* 1042 * We hash the name and then some other bit of data that is 1043 * invariant over the dirhash's lifetime. Otherwise names 1044 * differing only in the last byte are placed close to one 1045 * another in the table, which is bad for linear probing. 1046 */ 1047 hash = fnv_32_buf(name, namelen, FNV1_32_INIT); 1048 hash = fnv_32_buf(&dh, sizeof(dh), hash); 1049 return (hash % dh->dh_hlen); 1050 } 1051 1052 /* 1053 * Adjust the number of free bytes in the block containing `offset' 1054 * by the value specified by `diff'. 1055 * 1056 * The caller must ensure we have exclusive access to `dh'; normally 1057 * that means that dh_lock should be held, but this is also called 1058 * from ufsdirhash_build() where exclusive access can be assumed. 1059 */ 1060 static void 1061 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) 1062 { 1063 int block, i, nfidx, ofidx; 1064 1065 /* Update the per-block summary info. */ 1066 block = offset / DIRBLKSIZ; 1067 KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, 1068 ("dirhash bad offset")); 1069 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); 1070 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 1071 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); 1072 1073 /* Update the `first free' list if necessary. */ 1074 if (ofidx != nfidx) { 1075 /* If removing, scan forward for the next block. */ 1076 if (dh->dh_firstfree[ofidx] == block) { 1077 for (i = block + 1; i < dh->dh_dirblks; i++) 1078 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) 1079 break; 1080 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 1081 } 1082 1083 /* Make this the new `first free' if necessary */ 1084 if (dh->dh_firstfree[nfidx] > block || 1085 dh->dh_firstfree[nfidx] == -1) 1086 dh->dh_firstfree[nfidx] = block; 1087 } 1088 } 1089 1090 /* 1091 * Find the specified name which should have the specified offset. 1092 * Returns a slot number, and panics on failure. 1093 * 1094 * `dh' must be locked on entry and remains so on return. 1095 */ 1096 static int 1097 ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) 1098 { 1099 int slot; 1100 1101 DIRHASH_ASSERT_LOCKED(dh); 1102 1103 /* Find the entry. */ 1104 KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); 1105 slot = ufsdirhash_hash(dh, name, namelen); 1106 while (DH_ENTRY(dh, slot) != offset && 1107 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 1108 slot = WRAPINCR(slot, dh->dh_hlen); 1109 if (DH_ENTRY(dh, slot) != offset) 1110 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 1111 1112 return (slot); 1113 } 1114 1115 /* 1116 * Remove the entry corresponding to the specified slot from the hash array. 1117 * 1118 * `dh' must be locked on entry and remains so on return. 1119 */ 1120 static void 1121 ufsdirhash_delslot(struct dirhash *dh, int slot) 1122 { 1123 int i; 1124 1125 DIRHASH_ASSERT_LOCKED(dh); 1126 1127 /* Mark the entry as deleted. */ 1128 DH_ENTRY(dh, slot) = DIRHASH_DEL; 1129 1130 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 1131 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 1132 i = WRAPINCR(i, dh->dh_hlen); 1133 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 1134 i = WRAPDECR(i, dh->dh_hlen); 1135 while (DH_ENTRY(dh, i) == DIRHASH_DEL) { 1136 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 1137 dh->dh_hused--; 1138 i = WRAPDECR(i, dh->dh_hlen); 1139 } 1140 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); 1141 } 1142 } 1143 1144 /* 1145 * Given a directory entry and its offset, find the offset of the 1146 * previous entry in the same DIRBLKSIZ-sized block. Returns an 1147 * offset, or -1 if there is no previous entry in the block or some 1148 * other problem occurred. 1149 */ 1150 static doff_t 1151 ufsdirhash_getprev(struct direct *dirp, doff_t offset) 1152 { 1153 struct direct *dp; 1154 char *blkbuf; 1155 doff_t blkoff, prevoff; 1156 int entrypos, i; 1157 1158 blkoff = rounddown2(offset, DIRBLKSIZ); /* offset of start of block */ 1159 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ 1160 blkbuf = (char *)dirp - entrypos; 1161 prevoff = blkoff; 1162 1163 /* If `offset' is the start of a block, there is no previous entry. */ 1164 if (entrypos == 0) 1165 return (-1); 1166 1167 /* Scan from the start of the block until we get to the entry. */ 1168 for (i = 0; i < entrypos; i += dp->d_reclen) { 1169 dp = (struct direct *)(blkbuf + i); 1170 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 1171 return (-1); /* Corrupted directory. */ 1172 prevoff = blkoff + i; 1173 } 1174 return (prevoff); 1175 } 1176 1177 /* 1178 * Delete the given dirhash and reclaim its memory. Assumes that 1179 * ufsdirhash_list is locked, and leaves it locked. Also assumes 1180 * that dh is locked. Returns the amount of memory freed. 1181 */ 1182 static int 1183 ufsdirhash_destroy(struct dirhash *dh) 1184 { 1185 doff_t **hash; 1186 u_int8_t *blkfree; 1187 int i, mem, narrays; 1188 1189 KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); 1190 1191 /* Remove it from the list and detach its memory. */ 1192 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1193 dh->dh_onlist = 0; 1194 hash = dh->dh_hash; 1195 dh->dh_hash = NULL; 1196 blkfree = dh->dh_blkfree; 1197 dh->dh_blkfree = NULL; 1198 narrays = dh->dh_narrays; 1199 mem = dh->dh_memreq; 1200 dh->dh_memreq = 0; 1201 1202 /* Unlock dirhash and free the detached memory. */ 1203 ufsdirhash_release(dh); 1204 for (i = 0; i < narrays; i++) 1205 DIRHASH_BLKFREE(hash[i]); 1206 free(hash, M_DIRHASH); 1207 free(blkfree, M_DIRHASH); 1208 1209 /* Account for the returned memory. */ 1210 ufs_dirhashmem -= mem; 1211 1212 return (mem); 1213 } 1214 1215 /* 1216 * Try to free up `wanted' bytes by stealing memory from existing 1217 * dirhashes. Returns zero with list locked if successful. 1218 */ 1219 static int 1220 ufsdirhash_recycle(int wanted) 1221 { 1222 struct dirhash *dh; 1223 1224 DIRHASHLIST_LOCK(); 1225 dh = TAILQ_FIRST(&ufsdirhash_list); 1226 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1227 /* Decrement the score; only recycle if it becomes zero. */ 1228 if (dh == NULL || --dh->dh_score > 0) { 1229 DIRHASHLIST_UNLOCK(); 1230 return (-1); 1231 } 1232 /* 1233 * If we can't lock it it's in use and we don't want to 1234 * recycle it anyway. 1235 */ 1236 if (!sx_try_xlock(&dh->dh_lock)) { 1237 dh = TAILQ_NEXT(dh, dh_list); 1238 continue; 1239 } 1240 1241 ufsdirhash_destroy(dh); 1242 1243 /* Repeat if necessary. */ 1244 dh = TAILQ_FIRST(&ufsdirhash_list); 1245 } 1246 /* Success; return with list locked. */ 1247 return (0); 1248 } 1249 1250 /* 1251 * Callback that frees some dirhashes when the system is low on virtual memory. 1252 */ 1253 static void 1254 ufsdirhash_lowmem() 1255 { 1256 struct dirhash *dh, *dh_temp; 1257 int memfreed, memwanted; 1258 1259 ufs_dirhashlowmemcount++; 1260 memfreed = 0; 1261 memwanted = ufs_dirhashmem * ufs_dirhashreclaimpercent / 100; 1262 1263 DIRHASHLIST_LOCK(); 1264 1265 /* 1266 * Reclaim up to memwanted from the oldest dirhashes. This will allow 1267 * us to make some progress when the system is running out of memory 1268 * without compromising the dinamicity of maximum age. If the situation 1269 * does not improve lowmem will be eventually retriggered and free some 1270 * other entry in the cache. The entries on the head of the list should 1271 * be the oldest. If during list traversal we can't get a lock on the 1272 * dirhash, it will be skipped. 1273 */ 1274 TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) { 1275 if (sx_try_xlock(&dh->dh_lock)) 1276 memfreed += ufsdirhash_destroy(dh); 1277 if (memfreed >= memwanted) 1278 break; 1279 } 1280 DIRHASHLIST_UNLOCK(); 1281 } 1282 1283 static int 1284 ufsdirhash_set_reclaimpercent(SYSCTL_HANDLER_ARGS) 1285 { 1286 int error, v; 1287 1288 v = ufs_dirhashreclaimpercent; 1289 error = sysctl_handle_int(oidp, &v, v, req); 1290 if (error) 1291 return (error); 1292 if (req->newptr == NULL) 1293 return (error); 1294 if (v == ufs_dirhashreclaimpercent) 1295 return (0); 1296 1297 /* Refuse invalid percentages */ 1298 if (v < 0 || v > 100) 1299 return (EINVAL); 1300 ufs_dirhashreclaimpercent = v; 1301 return (0); 1302 } 1303 1304 void 1305 ufsdirhash_init() 1306 { 1307 ufs_dirhashmaxmem = lmax(roundup(hibufspace / 64, PAGE_SIZE), 1308 2 * 1024 * 1024); 1309 1310 ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t), 1311 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1312 mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF); 1313 TAILQ_INIT(&ufsdirhash_list); 1314 1315 /* Register a callback function to handle low memory signals */ 1316 EVENTHANDLER_REGISTER(vm_lowmem, ufsdirhash_lowmem, NULL, 1317 EVENTHANDLER_PRI_FIRST); 1318 } 1319 1320 void 1321 ufsdirhash_uninit() 1322 { 1323 KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit")); 1324 uma_zdestroy(ufsdirhash_zone); 1325 mtx_destroy(&ufsdirhash_mtx); 1326 } 1327 1328 #endif /* UFS_DIRHASH */ 1329