1 /*- 2 * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 1. Redistributions of source code must retain the above copyright 8 * notice, this list of conditions and the following disclaimer. 9 * 2. Redistributions in binary form must reproduce the above copyright 10 * notice, this list of conditions and the following disclaimer in the 11 * documentation and/or other materials provided with the distribution. 12 * 13 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 14 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 16 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 17 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 18 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 19 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 20 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 21 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 22 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 23 * SUCH DAMAGE. 24 */ 25 26 /* 27 * This implements a hash-based lookup scheme for UFS directories. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include "opt_ufs.h" 34 35 #ifdef UFS_DIRHASH 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/mutex.h> 42 #include <sys/malloc.h> 43 #include <sys/fnv_hash.h> 44 #include <sys/proc.h> 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/refcount.h> 50 #include <sys/sysctl.h> 51 #include <sys/sx.h> 52 #include <sys/eventhandler.h> 53 #include <sys/time.h> 54 #include <vm/uma.h> 55 56 #include <ufs/ufs/quota.h> 57 #include <ufs/ufs/inode.h> 58 #include <ufs/ufs/dir.h> 59 #include <ufs/ufs/dirhash.h> 60 #include <ufs/ufs/extattr.h> 61 #include <ufs/ufs/ufsmount.h> 62 #include <ufs/ufs/ufs_extern.h> 63 64 #define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1)) 65 #define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1)) 66 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 67 #define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n)) 68 69 static MALLOC_DEFINE(M_DIRHASH, "ufs_dirhash", "UFS directory hash tables"); 70 71 static int ufs_mindirhashsize = DIRBLKSIZ * 5; 72 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW, 73 &ufs_mindirhashsize, 74 0, "minimum directory size in bytes for which to use hashed lookup"); 75 static int ufs_dirhashmaxmem = 2 * 1024 * 1024; /* NOTE: initial value. It is 76 tuned in ufsdirhash_init() */ 77 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem, 78 0, "maximum allowed dirhash memory usage"); 79 static int ufs_dirhashmem; 80 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem, 81 0, "current dirhash memory usage"); 82 static int ufs_dirhashcheck = 0; 83 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck, 84 0, "enable extra sanity tests"); 85 static int ufs_dirhashlowmemcount = 0; 86 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_lowmemcount, CTLFLAG_RD, 87 &ufs_dirhashlowmemcount, 0, "number of times low memory hook called"); 88 static int ufs_dirhashreclaimage = 5; 89 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_reclaimage, CTLFLAG_RW, 90 &ufs_dirhashreclaimage, 0, 91 "max time in seconds of hash inactivity before deletion in low VM events"); 92 93 94 static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen); 95 static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff); 96 static void ufsdirhash_delslot(struct dirhash *dh, int slot); 97 static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, 98 doff_t offset); 99 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset); 100 static int ufsdirhash_recycle(int wanted); 101 static void ufsdirhash_lowmem(void); 102 static void ufsdirhash_free_locked(struct inode *ip); 103 104 static uma_zone_t ufsdirhash_zone; 105 106 #define DIRHASHLIST_LOCK() mtx_lock(&ufsdirhash_mtx) 107 #define DIRHASHLIST_UNLOCK() mtx_unlock(&ufsdirhash_mtx) 108 #define DIRHASH_BLKALLOC_WAITOK() uma_zalloc(ufsdirhash_zone, M_WAITOK) 109 #define DIRHASH_BLKFREE(ptr) uma_zfree(ufsdirhash_zone, (ptr)) 110 #define DIRHASH_ASSERT_LOCKED(dh) \ 111 sx_assert(&(dh)->dh_lock, SA_LOCKED) 112 113 /* Dirhash list; recently-used entries are near the tail. */ 114 static TAILQ_HEAD(, dirhash) ufsdirhash_list; 115 116 /* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */ 117 static struct mtx ufsdirhash_mtx; 118 119 /* 120 * Locking: 121 * 122 * The relationship between inode and dirhash is protected either by an 123 * exclusive vnode lock or the vnode interlock where a shared vnode lock 124 * may be used. The dirhash_mtx is acquired after the dirhash lock. To 125 * handle teardown races, code wishing to lock the dirhash for an inode 126 * when using a shared vnode lock must obtain a private reference on the 127 * dirhash while holding the vnode interlock. They can drop it once they 128 * have obtained the dirhash lock and verified that the dirhash wasn't 129 * recycled while they waited for the dirhash lock. 130 * 131 * ufsdirhash_build() acquires a shared lock on the dirhash when it is 132 * successful. This lock is released after a call to ufsdirhash_lookup(). 133 * 134 * Functions requiring exclusive access use ufsdirhash_acquire() which may 135 * free a dirhash structure that was recycled by ufsdirhash_recycle(). 136 * 137 * The dirhash lock may be held across io operations. 138 * 139 * WITNESS reports a lock order reversal between the "bufwait" lock 140 * and the "dirhash" lock. However, this specific reversal will not 141 * cause a deadlock. To get a deadlock, one would have to lock a 142 * buffer followed by the dirhash while a second thread locked a 143 * buffer while holding the dirhash lock. The second order can happen 144 * under a shared or exclusive vnode lock for the associated directory 145 * in lookup(). The first order, however, can only happen under an 146 * exclusive vnode lock (e.g. unlink(), rename(), etc.). Thus, for 147 * a thread to be doing a "bufwait" -> "dirhash" order, it has to hold 148 * an exclusive vnode lock. That exclusive vnode lock will prevent 149 * any other threads from doing a "dirhash" -> "bufwait" order. 150 */ 151 152 static void 153 ufsdirhash_hold(struct dirhash *dh) 154 { 155 156 refcount_acquire(&dh->dh_refcount); 157 } 158 159 static void 160 ufsdirhash_drop(struct dirhash *dh) 161 { 162 163 if (refcount_release(&dh->dh_refcount)) { 164 sx_destroy(&dh->dh_lock); 165 free(dh, M_DIRHASH); 166 } 167 } 168 169 /* 170 * Release the lock on a dirhash. 171 */ 172 static void 173 ufsdirhash_release(struct dirhash *dh) 174 { 175 176 sx_unlock(&dh->dh_lock); 177 } 178 179 /* 180 * Either acquire an existing hash locked shared or create a new hash and 181 * return it exclusively locked. May return NULL if the allocation fails. 182 * 183 * The vnode interlock is used to protect the i_dirhash pointer from 184 * simultaneous access while only a shared vnode lock is held. 185 */ 186 static struct dirhash * 187 ufsdirhash_create(struct inode *ip) 188 { 189 struct dirhash *ndh; 190 struct dirhash *dh; 191 struct vnode *vp; 192 int error; 193 194 error = 0; 195 ndh = dh = NULL; 196 vp = ip->i_vnode; 197 for (;;) { 198 /* Racy check for i_dirhash to prefetch a dirhash structure. */ 199 if (ip->i_dirhash == NULL && ndh == NULL) { 200 ndh = malloc(sizeof *dh, M_DIRHASH, 201 M_NOWAIT | M_ZERO); 202 if (ndh == NULL) 203 return (NULL); 204 refcount_init(&ndh->dh_refcount, 1); 205 206 /* 207 * The DUPOK is to prevent warnings from the 208 * sx_slock() a few lines down which is safe 209 * since the duplicate lock in that case is 210 * the one for this dirhash we are creating 211 * now which has no external references until 212 * after this function returns. 213 */ 214 sx_init_flags(&ndh->dh_lock, "dirhash", SX_DUPOK); 215 sx_xlock(&ndh->dh_lock); 216 } 217 /* 218 * Check i_dirhash. If it's NULL just try to use a 219 * preallocated structure. If none exists loop and try again. 220 */ 221 VI_LOCK(vp); 222 dh = ip->i_dirhash; 223 if (dh == NULL) { 224 ip->i_dirhash = ndh; 225 VI_UNLOCK(vp); 226 if (ndh == NULL) 227 continue; 228 return (ndh); 229 } 230 ufsdirhash_hold(dh); 231 VI_UNLOCK(vp); 232 233 /* Acquire a shared lock on existing hashes. */ 234 sx_slock(&dh->dh_lock); 235 236 /* The hash could've been recycled while we were waiting. */ 237 VI_LOCK(vp); 238 if (ip->i_dirhash != dh) { 239 VI_UNLOCK(vp); 240 ufsdirhash_release(dh); 241 ufsdirhash_drop(dh); 242 continue; 243 } 244 VI_UNLOCK(vp); 245 ufsdirhash_drop(dh); 246 247 /* If the hash is still valid we've succeeded. */ 248 if (dh->dh_hash != NULL) 249 break; 250 /* 251 * If the hash is NULL it has been recycled. Try to upgrade 252 * so we can recreate it. If we fail the upgrade, drop our 253 * lock and try again. 254 */ 255 if (sx_try_upgrade(&dh->dh_lock)) 256 break; 257 sx_sunlock(&dh->dh_lock); 258 } 259 /* Free the preallocated structure if it was not necessary. */ 260 if (ndh) { 261 ufsdirhash_release(ndh); 262 ufsdirhash_drop(ndh); 263 } 264 return (dh); 265 } 266 267 /* 268 * Acquire an exclusive lock on an existing hash. Requires an exclusive 269 * vnode lock to protect the i_dirhash pointer. hashes that have been 270 * recycled are reclaimed here and NULL is returned. 271 */ 272 static struct dirhash * 273 ufsdirhash_acquire(struct inode *ip) 274 { 275 struct dirhash *dh; 276 struct vnode *vp; 277 278 ASSERT_VOP_ELOCKED(ip->i_vnode, __FUNCTION__); 279 280 vp = ip->i_vnode; 281 dh = ip->i_dirhash; 282 if (dh == NULL) 283 return (NULL); 284 sx_xlock(&dh->dh_lock); 285 if (dh->dh_hash != NULL) 286 return (dh); 287 ufsdirhash_free_locked(ip); 288 return (NULL); 289 } 290 291 /* 292 * Acquire exclusively and free the hash pointed to by ip. Works with a 293 * shared or exclusive vnode lock. 294 */ 295 void 296 ufsdirhash_free(struct inode *ip) 297 { 298 struct dirhash *dh; 299 struct vnode *vp; 300 301 vp = ip->i_vnode; 302 for (;;) { 303 /* Grab a reference on this inode's dirhash if it has one. */ 304 VI_LOCK(vp); 305 dh = ip->i_dirhash; 306 if (dh == NULL) { 307 VI_UNLOCK(vp); 308 return; 309 } 310 ufsdirhash_hold(dh); 311 VI_UNLOCK(vp); 312 313 /* Exclusively lock the dirhash. */ 314 sx_xlock(&dh->dh_lock); 315 316 /* If this dirhash still belongs to this inode, then free it. */ 317 VI_LOCK(vp); 318 if (ip->i_dirhash == dh) { 319 VI_UNLOCK(vp); 320 ufsdirhash_drop(dh); 321 break; 322 } 323 VI_UNLOCK(vp); 324 325 /* 326 * This inode's dirhash has changed while we were 327 * waiting for the dirhash lock, so try again. 328 */ 329 ufsdirhash_release(dh); 330 ufsdirhash_drop(dh); 331 } 332 ufsdirhash_free_locked(ip); 333 } 334 335 /* 336 * Attempt to build up a hash table for the directory contents in 337 * inode 'ip'. Returns 0 on success, or -1 of the operation failed. 338 */ 339 int 340 ufsdirhash_build(struct inode *ip) 341 { 342 struct dirhash *dh; 343 struct buf *bp = NULL; 344 struct direct *ep; 345 struct vnode *vp; 346 doff_t bmask, pos; 347 int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot; 348 349 /* Take care of a decreased sysctl value. */ 350 while (ufs_dirhashmem > ufs_dirhashmaxmem) { 351 if (ufsdirhash_recycle(0) != 0) 352 return (-1); 353 /* Recycled enough memory, so unlock the list. */ 354 DIRHASHLIST_UNLOCK(); 355 } 356 357 /* Check if we can/should use dirhash. */ 358 if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode) || 359 ip->i_effnlink == 0) { 360 if (ip->i_dirhash) 361 ufsdirhash_free(ip); 362 return (-1); 363 } 364 dh = ufsdirhash_create(ip); 365 if (dh == NULL) 366 return (-1); 367 if (dh->dh_hash != NULL) 368 return (0); 369 370 vp = ip->i_vnode; 371 /* Allocate 50% more entries than this dir size could ever need. */ 372 KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size")); 373 nslots = ip->i_size / DIRECTSIZ(1); 374 nslots = (nslots * 3 + 1) / 2; 375 narrays = howmany(nslots, DH_NBLKOFF); 376 nslots = narrays * DH_NBLKOFF; 377 dirblocks = howmany(ip->i_size, DIRBLKSIZ); 378 nblocks = (dirblocks * 3 + 1) / 2; 379 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) + 380 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) + 381 nblocks * sizeof(*dh->dh_blkfree); 382 DIRHASHLIST_LOCK(); 383 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) { 384 DIRHASHLIST_UNLOCK(); 385 if (memreqd > ufs_dirhashmaxmem / 2) 386 goto fail; 387 /* Try to free some space. */ 388 if (ufsdirhash_recycle(memreqd) != 0) 389 goto fail; 390 /* Enough was freed, and list has been locked. */ 391 } 392 ufs_dirhashmem += memreqd; 393 DIRHASHLIST_UNLOCK(); 394 395 /* Initialise the hash table and block statistics. */ 396 dh->dh_memreq = memreqd; 397 dh->dh_narrays = narrays; 398 dh->dh_hlen = nslots; 399 dh->dh_nblk = nblocks; 400 dh->dh_dirblks = dirblocks; 401 for (i = 0; i < DH_NFSTATS; i++) 402 dh->dh_firstfree[i] = -1; 403 dh->dh_firstfree[DH_NFSTATS] = 0; 404 dh->dh_hused = 0; 405 dh->dh_seqoff = -1; 406 dh->dh_score = DH_SCOREINIT; 407 dh->dh_lastused = time_second; 408 409 /* 410 * Use non-blocking mallocs so that we will revert to a linear 411 * lookup on failure rather than potentially blocking forever. 412 */ 413 dh->dh_hash = malloc(narrays * sizeof(dh->dh_hash[0]), 414 M_DIRHASH, M_NOWAIT | M_ZERO); 415 if (dh->dh_hash == NULL) 416 goto fail; 417 dh->dh_blkfree = malloc(nblocks * sizeof(dh->dh_blkfree[0]), 418 M_DIRHASH, M_NOWAIT); 419 if (dh->dh_blkfree == NULL) 420 goto fail; 421 for (i = 0; i < narrays; i++) { 422 if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL) 423 goto fail; 424 for (j = 0; j < DH_NBLKOFF; j++) 425 dh->dh_hash[i][j] = DIRHASH_EMPTY; 426 } 427 for (i = 0; i < dirblocks; i++) 428 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN; 429 bmask = vp->v_mount->mnt_stat.f_iosize - 1; 430 pos = 0; 431 while (pos < ip->i_size) { 432 /* If necessary, get the next directory block. */ 433 if ((pos & bmask) == 0) { 434 if (bp != NULL) 435 brelse(bp); 436 if (UFS_BLKATOFF(vp, (off_t)pos, NULL, &bp) != 0) 437 goto fail; 438 } 439 440 /* Add this entry to the hash. */ 441 ep = (struct direct *)((char *)bp->b_data + (pos & bmask)); 442 if (ep->d_reclen == 0 || ep->d_reclen > 443 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) { 444 /* Corrupted directory. */ 445 brelse(bp); 446 goto fail; 447 } 448 if (ep->d_ino != 0) { 449 /* Add the entry (simplified ufsdirhash_add). */ 450 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen); 451 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 452 slot = WRAPINCR(slot, dh->dh_hlen); 453 dh->dh_hused++; 454 DH_ENTRY(dh, slot) = pos; 455 ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep)); 456 } 457 pos += ep->d_reclen; 458 } 459 460 if (bp != NULL) 461 brelse(bp); 462 DIRHASHLIST_LOCK(); 463 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list); 464 dh->dh_onlist = 1; 465 DIRHASHLIST_UNLOCK(); 466 sx_downgrade(&dh->dh_lock); 467 return (0); 468 469 fail: 470 ufsdirhash_free_locked(ip); 471 return (-1); 472 } 473 474 /* 475 * Free any hash table associated with inode 'ip'. 476 */ 477 static void 478 ufsdirhash_free_locked(struct inode *ip) 479 { 480 struct dirhash *dh; 481 struct vnode *vp; 482 int i; 483 484 DIRHASH_ASSERT_LOCKED(ip->i_dirhash); 485 486 /* 487 * Clear the pointer in the inode to prevent new threads from 488 * finding the dead structure. 489 */ 490 vp = ip->i_vnode; 491 VI_LOCK(vp); 492 dh = ip->i_dirhash; 493 ip->i_dirhash = NULL; 494 VI_UNLOCK(vp); 495 496 /* 497 * Remove the hash from the list since we are going to free its 498 * memory. 499 */ 500 DIRHASHLIST_LOCK(); 501 if (dh->dh_onlist) 502 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 503 ufs_dirhashmem -= dh->dh_memreq; 504 DIRHASHLIST_UNLOCK(); 505 506 /* 507 * At this point, any waiters for the lock should hold their 508 * own reference on the dirhash structure. They will drop 509 * that reference once they grab the vnode interlock and see 510 * that ip->i_dirhash is NULL. 511 */ 512 sx_xunlock(&dh->dh_lock); 513 514 /* 515 * Handle partially recycled as well as fully constructed hashes. 516 */ 517 if (dh->dh_hash != NULL) { 518 for (i = 0; i < dh->dh_narrays; i++) 519 if (dh->dh_hash[i] != NULL) 520 DIRHASH_BLKFREE(dh->dh_hash[i]); 521 free(dh->dh_hash, M_DIRHASH); 522 if (dh->dh_blkfree != NULL) 523 free(dh->dh_blkfree, M_DIRHASH); 524 } 525 526 /* 527 * Drop the inode's reference to the data structure. 528 */ 529 ufsdirhash_drop(dh); 530 } 531 532 /* 533 * Find the offset of the specified name within the given inode. 534 * Returns 0 on success, ENOENT if the entry does not exist, or 535 * EJUSTRETURN if the caller should revert to a linear search. 536 * 537 * If successful, the directory offset is stored in *offp, and a 538 * pointer to a struct buf containing the entry is stored in *bpp. If 539 * prevoffp is non-NULL, the offset of the previous entry within 540 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry 541 * is the first in a block, the start of the block is used). 542 * 543 * Must be called with the hash locked. Returns with the hash unlocked. 544 */ 545 int 546 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp, 547 struct buf **bpp, doff_t *prevoffp) 548 { 549 struct dirhash *dh, *dh_next; 550 struct direct *dp; 551 struct vnode *vp; 552 struct buf *bp; 553 doff_t blkoff, bmask, offset, prevoff, seqoff; 554 int i, slot; 555 int error; 556 557 dh = ip->i_dirhash; 558 KASSERT(dh != NULL && dh->dh_hash != NULL, 559 ("ufsdirhash_lookup: Invalid dirhash %p\n", dh)); 560 DIRHASH_ASSERT_LOCKED(dh); 561 /* 562 * Move this dirhash towards the end of the list if it has a 563 * score higher than the next entry, and acquire the dh_lock. 564 */ 565 DIRHASHLIST_LOCK(); 566 if (TAILQ_NEXT(dh, dh_list) != NULL) { 567 /* 568 * If the new score will be greater than that of the next 569 * entry, then move this entry past it. With both mutexes 570 * held, dh_next won't go away, but its dh_score could 571 * change; that's not important since it is just a hint. 572 */ 573 if ((dh_next = TAILQ_NEXT(dh, dh_list)) != NULL && 574 dh->dh_score >= dh_next->dh_score) { 575 KASSERT(dh->dh_onlist, ("dirhash: not on list")); 576 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 577 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh, 578 dh_list); 579 } 580 } 581 /* Update the score. */ 582 if (dh->dh_score < DH_SCOREMAX) 583 dh->dh_score++; 584 585 /* Update last used time. */ 586 dh->dh_lastused = time_second; 587 DIRHASHLIST_UNLOCK(); 588 589 vp = ip->i_vnode; 590 bmask = vp->v_mount->mnt_stat.f_iosize - 1; 591 blkoff = -1; 592 bp = NULL; 593 seqoff = dh->dh_seqoff; 594 restart: 595 slot = ufsdirhash_hash(dh, name, namelen); 596 597 if (seqoff != -1) { 598 /* 599 * Sequential access optimisation. seqoff contains the 600 * offset of the directory entry immediately following 601 * the last entry that was looked up. Check if this offset 602 * appears in the hash chain for the name we are looking for. 603 */ 604 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY; 605 i = WRAPINCR(i, dh->dh_hlen)) 606 if (offset == seqoff) 607 break; 608 if (offset == seqoff) { 609 /* 610 * We found an entry with the expected offset. This 611 * is probably the entry we want, but if not, the 612 * code below will retry. 613 */ 614 slot = i; 615 } else 616 seqoff = -1; 617 } 618 619 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY; 620 slot = WRAPINCR(slot, dh->dh_hlen)) { 621 if (offset == DIRHASH_DEL) 622 continue; 623 if (offset < 0 || offset >= ip->i_size) 624 panic("ufsdirhash_lookup: bad offset in hash array"); 625 if ((offset & ~bmask) != blkoff) { 626 if (bp != NULL) 627 brelse(bp); 628 blkoff = offset & ~bmask; 629 if (UFS_BLKATOFF(vp, (off_t)blkoff, NULL, &bp) != 0) { 630 error = EJUSTRETURN; 631 goto fail; 632 } 633 } 634 KASSERT(bp != NULL, ("no buffer allocated")); 635 dp = (struct direct *)(bp->b_data + (offset & bmask)); 636 if (dp->d_reclen == 0 || dp->d_reclen > 637 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) { 638 /* Corrupted directory. */ 639 error = EJUSTRETURN; 640 goto fail; 641 } 642 if (dp->d_namlen == namelen && 643 bcmp(dp->d_name, name, namelen) == 0) { 644 /* Found. Get the prev offset if needed. */ 645 if (prevoffp != NULL) { 646 if (offset & (DIRBLKSIZ - 1)) { 647 prevoff = ufsdirhash_getprev(dp, 648 offset); 649 if (prevoff == -1) { 650 error = EJUSTRETURN; 651 goto fail; 652 } 653 } else 654 prevoff = offset; 655 *prevoffp = prevoff; 656 } 657 658 /* Update offset. */ 659 dh->dh_seqoff = offset + DIRSIZ(0, dp); 660 *bpp = bp; 661 *offp = offset; 662 ufsdirhash_release(dh); 663 return (0); 664 } 665 666 /* 667 * When the name doesn't match in the sequential 668 * optimization case, go back and search normally. 669 */ 670 if (seqoff != -1) { 671 seqoff = -1; 672 goto restart; 673 } 674 } 675 error = ENOENT; 676 fail: 677 ufsdirhash_release(dh); 678 if (bp != NULL) 679 brelse(bp); 680 return (error); 681 } 682 683 /* 684 * Find a directory block with room for 'slotneeded' bytes. Returns 685 * the offset of the directory entry that begins the free space. 686 * This will either be the offset of an existing entry that has free 687 * space at the end, or the offset of an entry with d_ino == 0 at 688 * the start of a DIRBLKSIZ block. 689 * 690 * To use the space, the caller may need to compact existing entries in 691 * the directory. The total number of bytes in all of the entries involved 692 * in the compaction is stored in *slotsize. In other words, all of 693 * the entries that must be compacted are exactly contained in the 694 * region beginning at the returned offset and spanning *slotsize bytes. 695 * 696 * Returns -1 if no space was found, indicating that the directory 697 * must be extended. 698 */ 699 doff_t 700 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize) 701 { 702 struct direct *dp; 703 struct dirhash *dh; 704 struct buf *bp; 705 doff_t pos, slotstart; 706 int dirblock, error, freebytes, i; 707 708 dh = ip->i_dirhash; 709 KASSERT(dh != NULL && dh->dh_hash != NULL, 710 ("ufsdirhash_findfree: Invalid dirhash %p\n", dh)); 711 DIRHASH_ASSERT_LOCKED(dh); 712 713 /* Find a directory block with the desired free space. */ 714 dirblock = -1; 715 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++) 716 if ((dirblock = dh->dh_firstfree[i]) != -1) 717 break; 718 if (dirblock == -1) 719 return (-1); 720 721 KASSERT(dirblock < dh->dh_nblk && 722 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN), 723 ("ufsdirhash_findfree: bad stats")); 724 pos = dirblock * DIRBLKSIZ; 725 error = UFS_BLKATOFF(ip->i_vnode, (off_t)pos, (char **)&dp, &bp); 726 if (error) 727 return (-1); 728 729 /* Find the first entry with free space. */ 730 for (i = 0; i < DIRBLKSIZ; ) { 731 if (dp->d_reclen == 0) { 732 brelse(bp); 733 return (-1); 734 } 735 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp)) 736 break; 737 i += dp->d_reclen; 738 dp = (struct direct *)((char *)dp + dp->d_reclen); 739 } 740 if (i > DIRBLKSIZ) { 741 brelse(bp); 742 return (-1); 743 } 744 slotstart = pos + i; 745 746 /* Find the range of entries needed to get enough space */ 747 freebytes = 0; 748 while (i < DIRBLKSIZ && freebytes < slotneeded) { 749 freebytes += dp->d_reclen; 750 if (dp->d_ino != 0) 751 freebytes -= DIRSIZ(0, dp); 752 if (dp->d_reclen == 0) { 753 brelse(bp); 754 return (-1); 755 } 756 i += dp->d_reclen; 757 dp = (struct direct *)((char *)dp + dp->d_reclen); 758 } 759 if (i > DIRBLKSIZ) { 760 brelse(bp); 761 return (-1); 762 } 763 if (freebytes < slotneeded) 764 panic("ufsdirhash_findfree: free mismatch"); 765 brelse(bp); 766 *slotsize = pos + i - slotstart; 767 return (slotstart); 768 } 769 770 /* 771 * Return the start of the unused space at the end of a directory, or 772 * -1 if there are no trailing unused blocks. 773 */ 774 doff_t 775 ufsdirhash_enduseful(struct inode *ip) 776 { 777 778 struct dirhash *dh; 779 int i; 780 781 dh = ip->i_dirhash; 782 DIRHASH_ASSERT_LOCKED(dh); 783 KASSERT(dh != NULL && dh->dh_hash != NULL, 784 ("ufsdirhash_enduseful: Invalid dirhash %p\n", dh)); 785 786 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) 787 return (-1); 788 789 for (i = dh->dh_dirblks - 1; i >= 0; i--) 790 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 791 break; 792 793 return ((doff_t)(i + 1) * DIRBLKSIZ); 794 } 795 796 /* 797 * Insert information into the hash about a new directory entry. dirp 798 * points to a struct direct containing the entry, and offset specifies 799 * the offset of this entry. 800 */ 801 void 802 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset) 803 { 804 struct dirhash *dh; 805 int slot; 806 807 if ((dh = ufsdirhash_acquire(ip)) == NULL) 808 return; 809 810 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 811 ("ufsdirhash_add: bad offset")); 812 /* 813 * Normal hash usage is < 66%. If the usage gets too high then 814 * remove the hash entirely and let it be rebuilt later. 815 */ 816 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) { 817 ufsdirhash_free_locked(ip); 818 return; 819 } 820 821 /* Find a free hash slot (empty or deleted), and add the entry. */ 822 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen); 823 while (DH_ENTRY(dh, slot) >= 0) 824 slot = WRAPINCR(slot, dh->dh_hlen); 825 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY) 826 dh->dh_hused++; 827 DH_ENTRY(dh, slot) = offset; 828 829 /* Update last used time. */ 830 dh->dh_lastused = time_second; 831 832 /* Update the per-block summary info. */ 833 ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp)); 834 ufsdirhash_release(dh); 835 } 836 837 /* 838 * Remove the specified directory entry from the hash. The entry to remove 839 * is defined by the name in `dirp', which must exist at the specified 840 * `offset' within the directory. 841 */ 842 void 843 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset) 844 { 845 struct dirhash *dh; 846 int slot; 847 848 if ((dh = ufsdirhash_acquire(ip)) == NULL) 849 return; 850 851 KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ, 852 ("ufsdirhash_remove: bad offset")); 853 /* Find the entry */ 854 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset); 855 856 /* Remove the hash entry. */ 857 ufsdirhash_delslot(dh, slot); 858 859 /* Update the per-block summary info. */ 860 ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp)); 861 ufsdirhash_release(dh); 862 } 863 864 /* 865 * Change the offset associated with a directory entry in the hash. Used 866 * when compacting directory blocks. 867 */ 868 void 869 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff, 870 doff_t newoff) 871 { 872 struct dirhash *dh; 873 int slot; 874 875 if ((dh = ufsdirhash_acquire(ip)) == NULL) 876 return; 877 878 KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ && 879 newoff < dh->dh_dirblks * DIRBLKSIZ, 880 ("ufsdirhash_move: bad offset")); 881 /* Find the entry, and update the offset. */ 882 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff); 883 DH_ENTRY(dh, slot) = newoff; 884 ufsdirhash_release(dh); 885 } 886 887 /* 888 * Inform dirhash that the directory has grown by one block that 889 * begins at offset (i.e. the new length is offset + DIRBLKSIZ). 890 */ 891 void 892 ufsdirhash_newblk(struct inode *ip, doff_t offset) 893 { 894 struct dirhash *dh; 895 int block; 896 897 if ((dh = ufsdirhash_acquire(ip)) == NULL) 898 return; 899 900 KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ, 901 ("ufsdirhash_newblk: bad offset")); 902 block = offset / DIRBLKSIZ; 903 if (block >= dh->dh_nblk) { 904 /* Out of space; must rebuild. */ 905 ufsdirhash_free_locked(ip); 906 return; 907 } 908 dh->dh_dirblks = block + 1; 909 910 /* Account for the new free block. */ 911 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN; 912 if (dh->dh_firstfree[DH_NFSTATS] == -1) 913 dh->dh_firstfree[DH_NFSTATS] = block; 914 ufsdirhash_release(dh); 915 } 916 917 /* 918 * Inform dirhash that the directory is being truncated. 919 */ 920 void 921 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset) 922 { 923 struct dirhash *dh; 924 int block, i; 925 926 if ((dh = ufsdirhash_acquire(ip)) == NULL) 927 return; 928 929 KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ, 930 ("ufsdirhash_dirtrunc: bad offset")); 931 block = howmany(offset, DIRBLKSIZ); 932 /* 933 * If the directory shrinks to less than 1/8 of dh_nblk blocks 934 * (about 20% of its original size due to the 50% extra added in 935 * ufsdirhash_build) then free it, and let the caller rebuild 936 * if necessary. 937 */ 938 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) { 939 ufsdirhash_free_locked(ip); 940 return; 941 } 942 943 /* 944 * Remove any `first free' information pertaining to the 945 * truncated blocks. All blocks we're removing should be 946 * completely unused. 947 */ 948 if (dh->dh_firstfree[DH_NFSTATS] >= block) 949 dh->dh_firstfree[DH_NFSTATS] = -1; 950 for (i = block; i < dh->dh_dirblks; i++) 951 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN) 952 panic("ufsdirhash_dirtrunc: blocks in use"); 953 for (i = 0; i < DH_NFSTATS; i++) 954 if (dh->dh_firstfree[i] >= block) 955 panic("ufsdirhash_dirtrunc: first free corrupt"); 956 dh->dh_dirblks = block; 957 ufsdirhash_release(dh); 958 } 959 960 /* 961 * Debugging function to check that the dirhash information about 962 * a directory block matches its actual contents. Panics if a mismatch 963 * is detected. 964 * 965 * On entry, `buf' should point to the start of an in-core 966 * DIRBLKSIZ-sized directory block, and `offset' should contain the 967 * offset from the start of the directory of that block. 968 */ 969 void 970 ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset) 971 { 972 struct dirhash *dh; 973 struct direct *dp; 974 int block, ffslot, i, nfree; 975 976 if (!ufs_dirhashcheck) 977 return; 978 if ((dh = ufsdirhash_acquire(ip)) == NULL) 979 return; 980 981 block = offset / DIRBLKSIZ; 982 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks) 983 panic("ufsdirhash_checkblock: bad offset"); 984 985 nfree = 0; 986 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) { 987 dp = (struct direct *)(buf + i); 988 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ) 989 panic("ufsdirhash_checkblock: bad dir"); 990 991 if (dp->d_ino == 0) { 992 #if 0 993 /* 994 * XXX entries with d_ino == 0 should only occur 995 * at the start of a DIRBLKSIZ block. However the 996 * ufs code is tolerant of such entries at other 997 * offsets, and fsck does not fix them. 998 */ 999 if (i != 0) 1000 panic("ufsdirhash_checkblock: bad dir inode"); 1001 #endif 1002 nfree += dp->d_reclen; 1003 continue; 1004 } 1005 1006 /* Check that the entry exists (will panic if it doesn't). */ 1007 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i); 1008 1009 nfree += dp->d_reclen - DIRSIZ(0, dp); 1010 } 1011 if (i != DIRBLKSIZ) 1012 panic("ufsdirhash_checkblock: bad dir end"); 1013 1014 if (dh->dh_blkfree[block] * DIRALIGN != nfree) 1015 panic("ufsdirhash_checkblock: bad free count"); 1016 1017 ffslot = BLKFREE2IDX(nfree / DIRALIGN); 1018 for (i = 0; i <= DH_NFSTATS; i++) 1019 if (dh->dh_firstfree[i] == block && i != ffslot) 1020 panic("ufsdirhash_checkblock: bad first-free"); 1021 if (dh->dh_firstfree[ffslot] == -1) 1022 panic("ufsdirhash_checkblock: missing first-free entry"); 1023 ufsdirhash_release(dh); 1024 } 1025 1026 /* 1027 * Hash the specified filename into a dirhash slot. 1028 */ 1029 static int 1030 ufsdirhash_hash(struct dirhash *dh, char *name, int namelen) 1031 { 1032 u_int32_t hash; 1033 1034 /* 1035 * We hash the name and then some other bit of data that is 1036 * invariant over the dirhash's lifetime. Otherwise names 1037 * differing only in the last byte are placed close to one 1038 * another in the table, which is bad for linear probing. 1039 */ 1040 hash = fnv_32_buf(name, namelen, FNV1_32_INIT); 1041 hash = fnv_32_buf(&dh, sizeof(dh), hash); 1042 return (hash % dh->dh_hlen); 1043 } 1044 1045 /* 1046 * Adjust the number of free bytes in the block containing `offset' 1047 * by the value specified by `diff'. 1048 * 1049 * The caller must ensure we have exclusive access to `dh'; normally 1050 * that means that dh_lock should be held, but this is also called 1051 * from ufsdirhash_build() where exclusive access can be assumed. 1052 */ 1053 static void 1054 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff) 1055 { 1056 int block, i, nfidx, ofidx; 1057 1058 /* Update the per-block summary info. */ 1059 block = offset / DIRBLKSIZ; 1060 KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks, 1061 ("dirhash bad offset")); 1062 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]); 1063 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN); 1064 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]); 1065 1066 /* Update the `first free' list if necessary. */ 1067 if (ofidx != nfidx) { 1068 /* If removing, scan forward for the next block. */ 1069 if (dh->dh_firstfree[ofidx] == block) { 1070 for (i = block + 1; i < dh->dh_dirblks; i++) 1071 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx) 1072 break; 1073 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1; 1074 } 1075 1076 /* Make this the new `first free' if necessary */ 1077 if (dh->dh_firstfree[nfidx] > block || 1078 dh->dh_firstfree[nfidx] == -1) 1079 dh->dh_firstfree[nfidx] = block; 1080 } 1081 } 1082 1083 /* 1084 * Find the specified name which should have the specified offset. 1085 * Returns a slot number, and panics on failure. 1086 * 1087 * `dh' must be locked on entry and remains so on return. 1088 */ 1089 static int 1090 ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset) 1091 { 1092 int slot; 1093 1094 DIRHASH_ASSERT_LOCKED(dh); 1095 1096 /* Find the entry. */ 1097 KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full")); 1098 slot = ufsdirhash_hash(dh, name, namelen); 1099 while (DH_ENTRY(dh, slot) != offset && 1100 DH_ENTRY(dh, slot) != DIRHASH_EMPTY) 1101 slot = WRAPINCR(slot, dh->dh_hlen); 1102 if (DH_ENTRY(dh, slot) != offset) 1103 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name); 1104 1105 return (slot); 1106 } 1107 1108 /* 1109 * Remove the entry corresponding to the specified slot from the hash array. 1110 * 1111 * `dh' must be locked on entry and remains so on return. 1112 */ 1113 static void 1114 ufsdirhash_delslot(struct dirhash *dh, int slot) 1115 { 1116 int i; 1117 1118 DIRHASH_ASSERT_LOCKED(dh); 1119 1120 /* Mark the entry as deleted. */ 1121 DH_ENTRY(dh, slot) = DIRHASH_DEL; 1122 1123 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */ 1124 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; ) 1125 i = WRAPINCR(i, dh->dh_hlen); 1126 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) { 1127 i = WRAPDECR(i, dh->dh_hlen); 1128 while (DH_ENTRY(dh, i) == DIRHASH_DEL) { 1129 DH_ENTRY(dh, i) = DIRHASH_EMPTY; 1130 dh->dh_hused--; 1131 i = WRAPDECR(i, dh->dh_hlen); 1132 } 1133 KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen")); 1134 } 1135 } 1136 1137 /* 1138 * Given a directory entry and its offset, find the offset of the 1139 * previous entry in the same DIRBLKSIZ-sized block. Returns an 1140 * offset, or -1 if there is no previous entry in the block or some 1141 * other problem occurred. 1142 */ 1143 static doff_t 1144 ufsdirhash_getprev(struct direct *dirp, doff_t offset) 1145 { 1146 struct direct *dp; 1147 char *blkbuf; 1148 doff_t blkoff, prevoff; 1149 int entrypos, i; 1150 1151 blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */ 1152 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */ 1153 blkbuf = (char *)dirp - entrypos; 1154 prevoff = blkoff; 1155 1156 /* If `offset' is the start of a block, there is no previous entry. */ 1157 if (entrypos == 0) 1158 return (-1); 1159 1160 /* Scan from the start of the block until we get to the entry. */ 1161 for (i = 0; i < entrypos; i += dp->d_reclen) { 1162 dp = (struct direct *)(blkbuf + i); 1163 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos) 1164 return (-1); /* Corrupted directory. */ 1165 prevoff = blkoff + i; 1166 } 1167 return (prevoff); 1168 } 1169 1170 /* 1171 * Delete the given dirhash and reclaim its memory. Assumes that 1172 * ufsdirhash_list is locked, and leaves it locked. Also assumes 1173 * that dh is locked. Returns the amount of memory freed. 1174 */ 1175 static int 1176 ufsdirhash_destroy(struct dirhash *dh) 1177 { 1178 doff_t **hash; 1179 u_int8_t *blkfree; 1180 int i, mem, narrays; 1181 1182 KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list")); 1183 1184 /* Remove it from the list and detach its memory. */ 1185 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list); 1186 dh->dh_onlist = 0; 1187 hash = dh->dh_hash; 1188 dh->dh_hash = NULL; 1189 blkfree = dh->dh_blkfree; 1190 dh->dh_blkfree = NULL; 1191 narrays = dh->dh_narrays; 1192 mem = dh->dh_memreq; 1193 dh->dh_memreq = 0; 1194 1195 /* Unlock dirhash and free the detached memory. */ 1196 ufsdirhash_release(dh); 1197 for (i = 0; i < narrays; i++) 1198 DIRHASH_BLKFREE(hash[i]); 1199 free(hash, M_DIRHASH); 1200 free(blkfree, M_DIRHASH); 1201 1202 /* Account for the returned memory. */ 1203 ufs_dirhashmem -= mem; 1204 1205 return (mem); 1206 } 1207 1208 /* 1209 * Try to free up `wanted' bytes by stealing memory from existing 1210 * dirhashes. Returns zero with list locked if successful. 1211 */ 1212 static int 1213 ufsdirhash_recycle(int wanted) 1214 { 1215 struct dirhash *dh; 1216 1217 DIRHASHLIST_LOCK(); 1218 dh = TAILQ_FIRST(&ufsdirhash_list); 1219 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) { 1220 /* Decrement the score; only recycle if it becomes zero. */ 1221 if (dh == NULL || --dh->dh_score > 0) { 1222 DIRHASHLIST_UNLOCK(); 1223 return (-1); 1224 } 1225 /* 1226 * If we can't lock it it's in use and we don't want to 1227 * recycle it anyway. 1228 */ 1229 if (!sx_try_xlock(&dh->dh_lock)) { 1230 dh = TAILQ_NEXT(dh, dh_list); 1231 continue; 1232 } 1233 1234 ufsdirhash_destroy(dh); 1235 1236 /* Repeat if necessary. */ 1237 dh = TAILQ_FIRST(&ufsdirhash_list); 1238 } 1239 /* Success; return with list locked. */ 1240 return (0); 1241 } 1242 1243 /* 1244 * Callback that frees some dirhashes when the system is low on virtual memory. 1245 */ 1246 static void 1247 ufsdirhash_lowmem() 1248 { 1249 struct dirhash *dh, *dh_temp; 1250 int memfreed = 0; 1251 /* 1252 * Will free a *minimum* of 10% of the dirhash, but possibly much 1253 * more (depending on dirhashreclaimage). System with large dirhashes 1254 * probably also need a much larger dirhashreclaimage. 1255 * XXX: this percentage may need to be adjusted. 1256 */ 1257 int memwanted = ufs_dirhashmem / 10; 1258 1259 ufs_dirhashlowmemcount++; 1260 1261 DIRHASHLIST_LOCK(); 1262 /* 1263 * Delete dirhashes not used for more than ufs_dirhashreclaimage 1264 * seconds. If we can't get a lock on the dirhash, it will be skipped. 1265 */ 1266 TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) { 1267 if (!sx_try_xlock(&dh->dh_lock)) 1268 continue; 1269 if (time_second - dh->dh_lastused > ufs_dirhashreclaimage) 1270 memfreed += ufsdirhash_destroy(dh); 1271 /* Unlock if we didn't delete the dirhash */ 1272 else 1273 ufsdirhash_release(dh); 1274 } 1275 1276 /* 1277 * If not enough memory was freed, keep deleting hashes from the head 1278 * of the dirhash list. The ones closest to the head should be the 1279 * oldest. 1280 */ 1281 if (memfreed < memwanted) { 1282 TAILQ_FOREACH_SAFE(dh, &ufsdirhash_list, dh_list, dh_temp) { 1283 if (!sx_try_xlock(&dh->dh_lock)) 1284 continue; 1285 memfreed += ufsdirhash_destroy(dh); 1286 if (memfreed >= memwanted) 1287 break; 1288 } 1289 } 1290 DIRHASHLIST_UNLOCK(); 1291 } 1292 1293 1294 void 1295 ufsdirhash_init() 1296 { 1297 ufs_dirhashmaxmem = lmax(roundup(hibufspace / 64, PAGE_SIZE), 1298 2 * 1024 * 1024); 1299 1300 ufsdirhash_zone = uma_zcreate("DIRHASH", DH_NBLKOFF * sizeof(doff_t), 1301 NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0); 1302 mtx_init(&ufsdirhash_mtx, "dirhash list", NULL, MTX_DEF); 1303 TAILQ_INIT(&ufsdirhash_list); 1304 1305 /* Register a callback function to handle low memory signals */ 1306 EVENTHANDLER_REGISTER(vm_lowmem, ufsdirhash_lowmem, NULL, 1307 EVENTHANDLER_PRI_FIRST); 1308 } 1309 1310 void 1311 ufsdirhash_uninit() 1312 { 1313 KASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit")); 1314 uma_zdestroy(ufsdirhash_zone); 1315 mtx_destroy(&ufsdirhash_mtx); 1316 } 1317 1318 #endif /* UFS_DIRHASH */ 1319