xref: /netbsd/sys/ufs/ufs/ufs_dirhash.c (revision 15f14dd7)
1 /*	$NetBSD: ufs_dirhash.c,v 1.41 2022/08/07 02:33:47 simonb Exp $	*/
2 
3 /*
4  * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.8 2004/12/08 11:54:13 dwmalone Exp $
28  */
29 
30 #include <sys/cdefs.h>
31 __KERNEL_RCSID(0, "$NetBSD: ufs_dirhash.c,v 1.41 2022/08/07 02:33:47 simonb Exp $");
32 
33 /*
34  * This implements a hash-based lookup scheme for UFS directories.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/kmem.h>
41 #include <sys/types.h>
42 #include <sys/hash.h>
43 #include <sys/proc.h>
44 #include <sys/buf.h>
45 #include <sys/vnode.h>
46 #include <sys/mount.h>
47 #include <sys/pool.h>
48 #include <sys/sysctl.h>
49 #include <sys/atomic.h>
50 
51 #include <ufs/ufs/inode.h>
52 #include <ufs/ufs/dir.h>
53 #include <ufs/ufs/dirhash.h>
54 #include <ufs/ufs/ufsmount.h>
55 #include <ufs/ufs/ufs_bswap.h>
56 #include <ufs/ufs/ufs_extern.h>
57 
58 
59 /*
60  * Defaults for dirhash cache sizes:
61  *  - use up to 1/64th of system memory.
62  *  - disable dirhash (set the cache size to 0 bytes) if the
63  *    calculated value of hash is less than 2MB.
64  *  - cap maximum size of the dirhash cache at 32MB.
65  */
66 #define	DIRHASH_DEFAULT_DIVIDER	64
67 #define	MIN_DEFAULT_DIRHASH_MEM	(2 * 1024 * 1024)
68 #define	MAX_DEFAULT_DIRHASH_MEM	(32 * 1024 * 1024)
69 
70 
71 #define WRAPINCR(val, limit)	(((val) + 1 == (limit)) ? 0 : ((val) + 1))
72 #define WRAPDECR(val, limit)	(((val) == 0) ? ((limit) - 1) : ((val) - 1))
73 #define OFSFMT(ip)		((ip)->i_ump->um_maxsymlinklen <= 0)
74 #define BLKFREE2IDX(n)		((n) > DH_NFSTATS ? DH_NFSTATS : (n))
75 
76 static u_int ufs_dirhashminblks = 5;
77 static u_int ufs_dirhashmaxmem = 0;
78 static u_int ufs_dirhashmem;
79 static u_int ufs_dirhashcheck = 0;
80 
81 static int ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen);
82 static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff,
83 	   int dirblksiz);
84 static void ufsdirhash_delslot(struct dirhash *dh, int slot);
85 static int ufsdirhash_findslot(struct dirhash *dh, const char *name,
86 	   int namelen, doff_t offset);
87 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset,
88 	   int dirblksiz);
89 static int ufsdirhash_recycle(int wanted);
90 
91 static pool_cache_t ufsdirhashblk_cache;
92 static pool_cache_t ufsdirhash_cache;
93 
94 #define DIRHASHLIST_LOCK()		mutex_enter(&ufsdirhash_lock)
95 #define DIRHASHLIST_UNLOCK()		mutex_exit(&ufsdirhash_lock)
96 #define DIRHASH_LOCK(dh)		mutex_enter(&(dh)->dh_lock)
97 #define DIRHASH_UNLOCK(dh)		mutex_exit(&(dh)->dh_lock)
98 #define DIRHASH_BLKALLOC()		\
99     pool_cache_get(ufsdirhashblk_cache, PR_NOWAIT)
100 #define DIRHASH_BLKFREE(ptr)		\
101     pool_cache_put(ufsdirhashblk_cache, ptr)
102 
103 /* Dirhash list; recently-used entries are near the tail. */
104 static TAILQ_HEAD(, dirhash) ufsdirhash_list;
105 
106 /* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
107 static kmutex_t ufsdirhash_lock;
108 
109 /*
110  * Locking order:
111  *	ufsdirhash_lock
112  *	dh_lock
113  *
114  * The dh_lock mutex should be acquired either via the inode lock, or via
115  * ufsdirhash_lock. Only the owner of the inode may free the associated
116  * dirhash, but anything can steal its memory and set dh_hash to NULL.
117  */
118 
119 /*
120  * Attempt to build up a hash table for the directory contents in
121  * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
122  */
123 int
ufsdirhash_build(struct inode * ip)124 ufsdirhash_build(struct inode *ip)
125 {
126 	struct dirhash *dh;
127 	struct buf *bp = NULL;
128 	struct direct *ep;
129 	struct vnode *vp;
130 	doff_t bmask, pos;
131 	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
132 	const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
133 	int dirblksiz = ip->i_ump->um_dirblksiz;
134 
135 	/* Check if we can/should use dirhash. */
136 	if (ip->i_dirhash == NULL) {
137 		if (ufs_dirhashmaxmem == 0 ||
138 		    ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
139 		    OFSFMT(ip))
140 			return (-1);
141 	} else {
142 		/* Hash exists, but sysctls could have changed. */
143 		if (ip->i_size < (ufs_dirhashminblks * dirblksiz) ||
144 		    ufs_dirhashmem > ufs_dirhashmaxmem) {
145 			ufsdirhash_free(ip);
146 			return (-1);
147 		}
148 		/* Check if hash exists and is intact (note: unlocked read). */
149 		if (ip->i_dirhash->dh_hash != NULL)
150 			return (0);
151 		/* Free the old, recycled hash and build a new one. */
152 		ufsdirhash_free(ip);
153 	}
154 
155 	/* Don't hash removed directories. */
156 	if (ip->i_nlink == 0)
157 		return (-1);
158 
159 	vp = ip->i_vnode;
160 	/* Allocate 50% more entries than this dir size could ever need. */
161 	KASSERT(ip->i_size >= dirblksiz);
162 	nslots = ip->i_size / UFS_DIRECTSIZ(1);
163 	nslots = (nslots * 3 + 1) / 2;
164 	narrays = howmany(nslots, DH_NBLKOFF);
165 	nslots = narrays * DH_NBLKOFF;
166 	dirblocks = howmany(ip->i_size, dirblksiz);
167 	nblocks = (dirblocks * 3 + 1) / 2;
168 
169 	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
170 	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
171 	    nblocks * sizeof(*dh->dh_blkfree);
172 
173 	while (atomic_add_int_nv(&ufs_dirhashmem, memreqd) >
174 	    ufs_dirhashmaxmem) {
175 		atomic_add_int(&ufs_dirhashmem, -memreqd);
176 		if (memreqd > ufs_dirhashmaxmem / 2)
177 			return (-1);
178 		/* Try to free some space. */
179 		if (ufsdirhash_recycle(memreqd) != 0)
180 			return (-1);
181 	        else
182 		    	DIRHASHLIST_UNLOCK();
183 	}
184 
185 	/*
186 	 * Use non-blocking mallocs so that we will revert to a linear
187 	 * lookup on failure rather than potentially blocking forever.
188 	 */
189 	dh = pool_cache_get(ufsdirhash_cache, PR_NOWAIT);
190 	if (dh == NULL) {
191 		atomic_add_int(&ufs_dirhashmem, -memreqd);
192 		return (-1);
193 	}
194 	memset(dh, 0, sizeof(*dh));
195 	mutex_init(&dh->dh_lock, MUTEX_DEFAULT, IPL_NONE);
196 	DIRHASH_LOCK(dh);
197 	dh->dh_hashsz = narrays * sizeof(dh->dh_hash[0]);
198 	dh->dh_hash = kmem_zalloc(dh->dh_hashsz, KM_NOSLEEP);
199 	dh->dh_blkfreesz = nblocks * sizeof(dh->dh_blkfree[0]);
200 	dh->dh_blkfree = kmem_zalloc(dh->dh_blkfreesz, KM_NOSLEEP);
201 	if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
202 		goto fail;
203 	for (i = 0; i < narrays; i++) {
204 		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC()) == NULL)
205 			goto fail;
206 		for (j = 0; j < DH_NBLKOFF; j++)
207 			dh->dh_hash[i][j] = DIRHASH_EMPTY;
208 	}
209 
210 	/* Initialise the hash table and block statistics. */
211 	dh->dh_narrays = narrays;
212 	dh->dh_hlen = nslots;
213 	dh->dh_nblk = nblocks;
214 	dh->dh_dirblks = dirblocks;
215 	for (i = 0; i < dirblocks; i++)
216 		dh->dh_blkfree[i] = dirblksiz / DIRALIGN;
217 	for (i = 0; i < DH_NFSTATS; i++)
218 		dh->dh_firstfree[i] = -1;
219 	dh->dh_firstfree[DH_NFSTATS] = 0;
220 	dh->dh_seqopt = 0;
221 	dh->dh_seqoff = 0;
222 	dh->dh_score = DH_SCOREINIT;
223 	ip->i_dirhash = dh;
224 
225 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
226 	pos = 0;
227 	while (pos < ip->i_size) {
228 		preempt_point();
229 
230 		/* If necessary, get the next directory block. */
231 		if ((pos & bmask) == 0) {
232 			if (bp != NULL)
233 				brelse(bp, 0);
234 			if (ufs_blkatoff(vp, (off_t)pos, NULL, &bp, false) != 0)
235 				goto fail;
236 		}
237 
238 		/* Add this entry to the hash. */
239 		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
240 		if (ep->d_reclen == 0 || ep->d_reclen >
241 		    dirblksiz - (pos & (dirblksiz - 1))) {
242 			/* Corrupted directory. */
243 			brelse(bp, 0);
244 			goto fail;
245 		}
246 		if (ep->d_ino != 0) {
247 			/* Add the entry (simplified ufsdirhash_add). */
248 			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
249 			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
250 				slot = WRAPINCR(slot, dh->dh_hlen);
251 			dh->dh_hused++;
252 			DH_ENTRY(dh, slot) = pos;
253 			ufsdirhash_adjfree(dh, pos, -UFS_DIRSIZ(0, ep, needswap),
254 			    dirblksiz);
255 		}
256 		pos += ep->d_reclen;
257 	}
258 
259 	if (bp != NULL)
260 		brelse(bp, 0);
261 	DIRHASHLIST_LOCK();
262 	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
263 	dh->dh_onlist = 1;
264 	DIRHASH_UNLOCK(dh);
265 	DIRHASHLIST_UNLOCK();
266 	return (0);
267 
268 fail:
269 	ip->i_dirhash = NULL;
270 	DIRHASH_UNLOCK(dh);
271 	if (dh->dh_hash != NULL) {
272 		for (i = 0; i < narrays; i++)
273 			if (dh->dh_hash[i] != NULL)
274 				DIRHASH_BLKFREE(dh->dh_hash[i]);
275 		kmem_free(dh->dh_hash, dh->dh_hashsz);
276 	}
277 	if (dh->dh_blkfree != NULL)
278 		kmem_free(dh->dh_blkfree, dh->dh_blkfreesz);
279 	mutex_destroy(&dh->dh_lock);
280 	pool_cache_put(ufsdirhash_cache, dh);
281 	atomic_add_int(&ufs_dirhashmem, -memreqd);
282 	return (-1);
283 }
284 
285 /*
286  * Free any hash table associated with inode 'ip'.
287  */
288 void
ufsdirhash_free(struct inode * ip)289 ufsdirhash_free(struct inode *ip)
290 {
291 	struct dirhash *dh;
292 	int i, mem;
293 
294 	if ((dh = ip->i_dirhash) == NULL)
295 		return;
296 
297 	ip->i_dirhash = NULL;
298 
299 	DIRHASHLIST_LOCK();
300 	if (dh->dh_onlist)
301 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
302 	DIRHASHLIST_UNLOCK();
303 
304 	/* The dirhash pointed to by 'dh' is exclusively ours now. */
305 	mem = sizeof(*dh);
306 	if (dh->dh_hash != NULL) {
307 		for (i = 0; i < dh->dh_narrays; i++)
308 			DIRHASH_BLKFREE(dh->dh_hash[i]);
309 		kmem_free(dh->dh_hash, dh->dh_hashsz);
310 		kmem_free(dh->dh_blkfree, dh->dh_blkfreesz);
311 		mem += dh->dh_hashsz;
312 		mem += dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash);
313 		mem += dh->dh_nblk * sizeof(*dh->dh_blkfree);
314 	}
315 	mutex_destroy(&dh->dh_lock);
316 	pool_cache_put(ufsdirhash_cache, dh);
317 
318 	atomic_add_int(&ufs_dirhashmem, -mem);
319 }
320 
321 /*
322  * Find the offset of the specified name within the given inode.
323  * Returns 0 on success, ENOENT if the entry does not exist, or
324  * EJUSTRETURN if the caller should revert to a linear search.
325  *
326  * If successful, the directory offset is stored in *offp, and a
327  * pointer to a struct buf containing the entry is stored in *bpp. If
328  * prevoffp is non-NULL, the offset of the previous entry within
329  * the UFS_DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
330  * is the first in a block, the start of the block is used).
331  */
332 int
ufsdirhash_lookup(struct inode * ip,const char * name,int namelen,doff_t * offp,struct buf ** bpp,doff_t * prevoffp)333 ufsdirhash_lookup(struct inode *ip, const char *name, int namelen, doff_t *offp,
334     struct buf **bpp, doff_t *prevoffp)
335 {
336 	struct dirhash *dh, *dh_next;
337 	struct direct *dp;
338 	struct vnode *vp;
339 	struct buf *bp;
340 	doff_t blkoff, bmask, offset, prevoff;
341 	int i, slot;
342 	const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
343 	int dirblksiz = ip->i_ump->um_dirblksiz;
344 
345 	if ((dh = ip->i_dirhash) == NULL)
346 		return (EJUSTRETURN);
347 
348 	/*
349 	 * Move this dirhash towards the end of the list if it has a
350 	 * score higher than the next entry, and acquire the dh_lock.
351 	 * Optimise the case where it's already the last by performing
352 	 * an unlocked read of the TAILQ_NEXT pointer.
353 	 *
354 	 * In both cases, end up holding just dh_lock.
355 	 */
356 	if (TAILQ_NEXT(dh, dh_list) != NULL) {
357 		DIRHASHLIST_LOCK();
358 		DIRHASH_LOCK(dh);
359 		/*
360 		 * If the new score will be greater than that of the next
361 		 * entry, then move this entry past it. With both mutexes
362 		 * held, dh_next won't go away, but its dh_score could
363 		 * change; that's not important since it is just a hint.
364 		 */
365 		if (dh->dh_hash != NULL &&
366 		    (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
367 		    dh->dh_score >= dh_next->dh_score) {
368 			KASSERT(dh->dh_onlist);
369 			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
370 			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
371 			    dh_list);
372 		}
373 		DIRHASHLIST_UNLOCK();
374 	} else {
375 		/* Already the last, though that could change as we wait. */
376 		DIRHASH_LOCK(dh);
377 	}
378 	if (dh->dh_hash == NULL) {
379 		DIRHASH_UNLOCK(dh);
380 		ufsdirhash_free(ip);
381 		return (EJUSTRETURN);
382 	}
383 
384 	/* Update the score. */
385 	if (dh->dh_score < DH_SCOREMAX)
386 		dh->dh_score++;
387 
388 	vp = ip->i_vnode;
389 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
390 	blkoff = -1;
391 	bp = NULL;
392 restart:
393 	slot = ufsdirhash_hash(dh, name, namelen);
394 
395 	if (dh->dh_seqopt) {
396 		/*
397 		 * Sequential access optimisation. dh_seqoff contains the
398 		 * offset of the directory entry immediately following
399 		 * the last entry that was looked up. Check if this offset
400 		 * appears in the hash chain for the name we are looking for.
401 		 */
402 		for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
403 		    i = WRAPINCR(i, dh->dh_hlen))
404 			if (offset == dh->dh_seqoff)
405 				break;
406 		if (offset == dh->dh_seqoff) {
407 			/*
408 			 * We found an entry with the expected offset. This
409 			 * is probably the entry we want, but if not, the
410 			 * code below will turn off seqoff and retry.
411 			 */
412 			slot = i;
413 		} else
414 			dh->dh_seqopt = 0;
415 	}
416 
417 	for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
418 	    slot = WRAPINCR(slot, dh->dh_hlen)) {
419 		if (offset == DIRHASH_DEL)
420 			continue;
421 
422 		if (offset < 0 || offset >= ip->i_size)
423 			panic("ufsdirhash_lookup: bad offset in hash array");
424 		if ((offset & ~bmask) != blkoff) {
425 			if (bp != NULL)
426 				brelse(bp, 0);
427 			blkoff = offset & ~bmask;
428 			if (ufs_blkatoff(vp, (off_t)blkoff,
429 			    NULL, &bp, false) != 0) {
430 				DIRHASH_UNLOCK(dh);
431 				return (EJUSTRETURN);
432 			}
433 		}
434 		dp = (struct direct *)((char *)bp->b_data + (offset & bmask));
435 		if (dp->d_reclen == 0 || dp->d_reclen >
436 		    dirblksiz - (offset & (dirblksiz - 1))) {
437 			/* Corrupted directory. */
438 			DIRHASH_UNLOCK(dh);
439 			brelse(bp, 0);
440 			return (EJUSTRETURN);
441 		}
442 		if (dp->d_namlen == namelen &&
443 		    memcmp(dp->d_name, name, namelen) == 0) {
444 			/* Found. Get the prev offset if needed. */
445 			if (prevoffp != NULL) {
446 				if (offset & (dirblksiz - 1)) {
447 					prevoff = ufsdirhash_getprev(dp,
448 					    offset, dirblksiz);
449 					if (prevoff == -1) {
450 						brelse(bp, 0);
451 						return (EJUSTRETURN);
452 					}
453 				} else
454 					prevoff = offset;
455 				*prevoffp = prevoff;
456 			}
457 
458 			/* Check for sequential access, and update offset. */
459 			if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
460 				dh->dh_seqopt = 1;
461 			dh->dh_seqoff = offset + UFS_DIRSIZ(0, dp, needswap);
462 			DIRHASH_UNLOCK(dh);
463 
464 			*bpp = bp;
465 			*offp = offset;
466 			return (0);
467 		}
468 
469 		if (dh->dh_hash == NULL) {
470 			DIRHASH_UNLOCK(dh);
471 			if (bp != NULL)
472 				brelse(bp, 0);
473 			ufsdirhash_free(ip);
474 			return (EJUSTRETURN);
475 		}
476 		/*
477 		 * When the name doesn't match in the seqopt case, go back
478 		 * and search normally.
479 		 */
480 		if (dh->dh_seqopt) {
481 			dh->dh_seqopt = 0;
482 			goto restart;
483 		}
484 	}
485 	DIRHASH_UNLOCK(dh);
486 	if (bp != NULL)
487 		brelse(bp, 0);
488 	return (ENOENT);
489 }
490 
491 /*
492  * Find a directory block with room for 'slotneeded' bytes. Returns
493  * the offset of the directory entry that begins the free space.
494  * This will either be the offset of an existing entry that has free
495  * space at the end, or the offset of an entry with d_ino == 0 at
496  * the start of a UFS_DIRBLKSIZ block.
497  *
498  * To use the space, the caller may need to compact existing entries in
499  * the directory. The total number of bytes in all of the entries involved
500  * in the compaction is stored in *slotsize. In other words, all of
501  * the entries that must be compacted are exactly contained in the
502  * region beginning at the returned offset and spanning *slotsize bytes.
503  *
504  * Returns -1 if no space was found, indicating that the directory
505  * must be extended.
506  */
507 doff_t
ufsdirhash_findfree(struct inode * ip,int slotneeded,int * slotsize)508 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
509 {
510 	struct direct *dp;
511 	struct dirhash *dh;
512 	struct buf *bp;
513 	doff_t pos, slotstart;
514 	int dirblock, error, freebytes, i;
515 	const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
516 	int dirblksiz = ip->i_ump->um_dirblksiz;
517 
518 	if ((dh = ip->i_dirhash) == NULL)
519 		return (-1);
520 
521 	DIRHASH_LOCK(dh);
522 	if (dh->dh_hash == NULL) {
523 		DIRHASH_UNLOCK(dh);
524 		ufsdirhash_free(ip);
525 		return (-1);
526 	}
527 
528 	/* Find a directory block with the desired free space. */
529 	dirblock = -1;
530 	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
531 		if ((dirblock = dh->dh_firstfree[i]) != -1)
532 			break;
533 	if (dirblock == -1) {
534 		DIRHASH_UNLOCK(dh);
535 		return (-1);
536 	}
537 
538 	KASSERT(dirblock < dh->dh_nblk &&
539 	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN));
540 	pos = dirblock * dirblksiz;
541 	error = ufs_blkatoff(ip->i_vnode, (off_t)pos, (void *)&dp, &bp, false);
542 	if (error) {
543 		DIRHASH_UNLOCK(dh);
544 		return (-1);
545 	}
546 	/* Find the first entry with free space. */
547 	for (i = 0; i < dirblksiz; ) {
548 		if (dp->d_reclen == 0) {
549 			DIRHASH_UNLOCK(dh);
550 			brelse(bp, 0);
551 			return (-1);
552 		}
553 		if (dp->d_ino == 0 || dp->d_reclen > UFS_DIRSIZ(0, dp, needswap))
554 			break;
555 		i += dp->d_reclen;
556 		dp = (struct direct *)((char *)dp + dp->d_reclen);
557 	}
558 	if (i > dirblksiz) {
559 		DIRHASH_UNLOCK(dh);
560 		brelse(bp, 0);
561 		return (-1);
562 	}
563 	slotstart = pos + i;
564 
565 	/* Find the range of entries needed to get enough space */
566 	freebytes = 0;
567 	while (i < dirblksiz && freebytes < slotneeded) {
568 		freebytes += dp->d_reclen;
569 		if (dp->d_ino != 0)
570 			freebytes -= UFS_DIRSIZ(0, dp, needswap);
571 		if (dp->d_reclen == 0) {
572 			DIRHASH_UNLOCK(dh);
573 			brelse(bp, 0);
574 			return (-1);
575 		}
576 		i += dp->d_reclen;
577 		dp = (struct direct *)((char *)dp + dp->d_reclen);
578 	}
579 	if (i > dirblksiz) {
580 		DIRHASH_UNLOCK(dh);
581 		brelse(bp, 0);
582 		return (-1);
583 	}
584 	if (freebytes < slotneeded)
585 		panic("ufsdirhash_findfree: free mismatch");
586 	DIRHASH_UNLOCK(dh);
587 	brelse(bp, 0);
588 	*slotsize = pos + i - slotstart;
589 	return (slotstart);
590 }
591 
592 /*
593  * Return the start of the unused space at the end of a directory, or
594  * -1 if there are no trailing unused blocks.
595  */
596 doff_t
ufsdirhash_enduseful(struct inode * ip)597 ufsdirhash_enduseful(struct inode *ip)
598 {
599 	struct dirhash *dh;
600 	int i;
601 	int dirblksiz = ip->i_ump->um_dirblksiz;
602 
603 	if ((dh = ip->i_dirhash) == NULL)
604 		return (-1);
605 
606 	DIRHASH_LOCK(dh);
607 	if (dh->dh_hash == NULL) {
608 		DIRHASH_UNLOCK(dh);
609 		ufsdirhash_free(ip);
610 		return (-1);
611 	}
612 
613 	if (dh->dh_blkfree[dh->dh_dirblks - 1] != dirblksiz / DIRALIGN) {
614 		DIRHASH_UNLOCK(dh);
615 		return (-1);
616 	}
617 
618 	for (i = dh->dh_dirblks - 1; i >= 0; i--)
619 		if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
620 			break;
621 	DIRHASH_UNLOCK(dh);
622 	return ((doff_t)(i + 1) * dirblksiz);
623 }
624 
625 /*
626  * Insert information into the hash about a new directory entry. dirp
627  * points to a struct direct containing the entry, and offset specifies
628  * the offset of this entry.
629  */
630 void
ufsdirhash_add(struct inode * ip,struct direct * dirp,doff_t offset)631 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
632 {
633 	struct dirhash *dh;
634 	int slot;
635 	const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
636 	int dirblksiz = ip->i_ump->um_dirblksiz;
637 
638 	if ((dh = ip->i_dirhash) == NULL)
639 		return;
640 
641 	DIRHASH_LOCK(dh);
642 	if (dh->dh_hash == NULL) {
643 		DIRHASH_UNLOCK(dh);
644 		ufsdirhash_free(ip);
645 		return;
646 	}
647 
648 	KASSERT(offset < dh->dh_dirblks * dirblksiz);
649 	/*
650 	 * Normal hash usage is < 66%. If the usage gets too high then
651 	 * remove the hash entirely and let it be rebuilt later.
652 	 */
653 	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
654 		DIRHASH_UNLOCK(dh);
655 		ufsdirhash_free(ip);
656 		return;
657 	}
658 
659 	/* Find a free hash slot (empty or deleted), and add the entry. */
660 	slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
661 	while (DH_ENTRY(dh, slot) >= 0)
662 		slot = WRAPINCR(slot, dh->dh_hlen);
663 	if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
664 		dh->dh_hused++;
665 	DH_ENTRY(dh, slot) = offset;
666 
667 	/* Update the per-block summary info. */
668 	ufsdirhash_adjfree(dh, offset, -UFS_DIRSIZ(0, dirp, needswap), dirblksiz);
669 	DIRHASH_UNLOCK(dh);
670 }
671 
672 /*
673  * Remove the specified directory entry from the hash. The entry to remove
674  * is defined by the name in `dirp', which must exist at the specified
675  * `offset' within the directory.
676  */
677 void
ufsdirhash_remove(struct inode * ip,struct direct * dirp,doff_t offset)678 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
679 {
680 	struct dirhash *dh;
681 	int slot;
682 	const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
683 	int dirblksiz = ip->i_ump->um_dirblksiz;
684 
685 	if ((dh = ip->i_dirhash) == NULL)
686 		return;
687 
688 	DIRHASH_LOCK(dh);
689 	if (dh->dh_hash == NULL) {
690 		DIRHASH_UNLOCK(dh);
691 		ufsdirhash_free(ip);
692 		return;
693 	}
694 
695 	KASSERT(offset < dh->dh_dirblks * dirblksiz);
696 	/* Find the entry */
697 	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
698 
699 	/* Remove the hash entry. */
700 	ufsdirhash_delslot(dh, slot);
701 
702 	/* Update the per-block summary info. */
703 	ufsdirhash_adjfree(dh, offset, UFS_DIRSIZ(0, dirp, needswap), dirblksiz);
704 	DIRHASH_UNLOCK(dh);
705 }
706 
707 /*
708  * Change the offset associated with a directory entry in the hash. Used
709  * when compacting directory blocks.
710  */
711 void
ufsdirhash_move(struct inode * ip,struct direct * dirp,doff_t oldoff,doff_t newoff)712 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
713     doff_t newoff)
714 {
715 	struct dirhash *dh;
716 	int slot;
717 
718 	if ((dh = ip->i_dirhash) == NULL)
719 		return;
720 	DIRHASH_LOCK(dh);
721 	if (dh->dh_hash == NULL) {
722 		DIRHASH_UNLOCK(dh);
723 		ufsdirhash_free(ip);
724 		return;
725 	}
726 
727 	KASSERT(oldoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz &&
728 	    newoff < dh->dh_dirblks * ip->i_ump->um_dirblksiz);
729 	/* Find the entry, and update the offset. */
730 	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
731 	DH_ENTRY(dh, slot) = newoff;
732 	DIRHASH_UNLOCK(dh);
733 }
734 
735 /*
736  * Inform dirhash that the directory has grown by one block that
737  * begins at offset (i.e. the new length is offset + UFS_DIRBLKSIZ).
738  */
739 void
ufsdirhash_newblk(struct inode * ip,doff_t offset)740 ufsdirhash_newblk(struct inode *ip, doff_t offset)
741 {
742 	struct dirhash *dh;
743 	int block;
744 	int dirblksiz = ip->i_ump->um_dirblksiz;
745 
746 	if ((dh = ip->i_dirhash) == NULL)
747 		return;
748 	DIRHASH_LOCK(dh);
749 	if (dh->dh_hash == NULL) {
750 		DIRHASH_UNLOCK(dh);
751 		ufsdirhash_free(ip);
752 		return;
753 	}
754 
755 	KASSERT(offset == dh->dh_dirblks * dirblksiz);
756 	block = offset / dirblksiz;
757 	if (block >= dh->dh_nblk) {
758 		/* Out of space; must rebuild. */
759 		DIRHASH_UNLOCK(dh);
760 		ufsdirhash_free(ip);
761 		return;
762 	}
763 	dh->dh_dirblks = block + 1;
764 
765 	/* Account for the new free block. */
766 	dh->dh_blkfree[block] = dirblksiz / DIRALIGN;
767 	if (dh->dh_firstfree[DH_NFSTATS] == -1)
768 		dh->dh_firstfree[DH_NFSTATS] = block;
769 	DIRHASH_UNLOCK(dh);
770 }
771 
772 /*
773  * Inform dirhash that the directory is being truncated.
774  */
775 void
ufsdirhash_dirtrunc(struct inode * ip,doff_t offset)776 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
777 {
778 	struct dirhash *dh;
779 	int block, i;
780 	int dirblksiz = ip->i_ump->um_dirblksiz;
781 
782 	if ((dh = ip->i_dirhash) == NULL)
783 		return;
784 
785 	DIRHASH_LOCK(dh);
786 	if (dh->dh_hash == NULL) {
787 		DIRHASH_UNLOCK(dh);
788 		ufsdirhash_free(ip);
789 		return;
790 	}
791 
792 	KASSERT(offset <= dh->dh_dirblks * dirblksiz);
793 	block = howmany(offset, dirblksiz);
794 	/*
795 	 * If the directory shrinks to less than 1/8 of dh_nblk blocks
796 	 * (about 20% of its original size due to the 50% extra added in
797 	 * ufsdirhash_build) then free it, and let the caller rebuild
798 	 * if necessary.
799 	 */
800 	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
801 		DIRHASH_UNLOCK(dh);
802 		ufsdirhash_free(ip);
803 		return;
804 	}
805 
806 	/*
807 	 * Remove any `first free' information pertaining to the
808 	 * truncated blocks. All blocks we're removing should be
809 	 * completely unused.
810 	 */
811 	if (dh->dh_firstfree[DH_NFSTATS] >= block)
812 		dh->dh_firstfree[DH_NFSTATS] = -1;
813 	for (i = block; i < dh->dh_dirblks; i++)
814 		if (dh->dh_blkfree[i] != dirblksiz / DIRALIGN)
815 			panic("ufsdirhash_dirtrunc: blocks in use");
816 	for (i = 0; i < DH_NFSTATS; i++)
817 		if (dh->dh_firstfree[i] >= block)
818 			panic("ufsdirhash_dirtrunc: first free corrupt");
819 	dh->dh_dirblks = block;
820 	DIRHASH_UNLOCK(dh);
821 }
822 
823 /*
824  * Debugging function to check that the dirhash information about
825  * a directory block matches its actual contents. Panics if a mismatch
826  * is detected.
827  *
828  * On entry, `sbuf' should point to the start of an in-core
829  * DIRBLKSIZ-sized directory block, and `offset' should contain the
830  * offset from the start of the directory of that block.
831  */
832 void
ufsdirhash_checkblock(struct inode * ip,char * sbuf,doff_t offset)833 ufsdirhash_checkblock(struct inode *ip, char *sbuf, doff_t offset)
834 {
835 	struct dirhash *dh;
836 	struct direct *dp;
837 	int block, ffslot, i, nfree;
838 	const int needswap = UFS_MPNEEDSWAP(ip->i_ump);
839 	int dirblksiz = ip->i_ump->um_dirblksiz;
840 
841 	if (!ufs_dirhashcheck)
842 		return;
843 	if ((dh = ip->i_dirhash) == NULL)
844 		return;
845 
846 	DIRHASH_LOCK(dh);
847 	if (dh->dh_hash == NULL) {
848 		DIRHASH_UNLOCK(dh);
849 		ufsdirhash_free(ip);
850 		return;
851 	}
852 
853 	block = offset / dirblksiz;
854 	if ((offset & (dirblksiz - 1)) != 0 || block >= dh->dh_dirblks)
855 		panic("ufsdirhash_checkblock: bad offset");
856 
857 	nfree = 0;
858 	for (i = 0; i < dirblksiz; i += dp->d_reclen) {
859 		dp = (struct direct *)(sbuf + i);
860 		if (dp->d_reclen == 0 || i + dp->d_reclen > dirblksiz)
861 			panic("ufsdirhash_checkblock: bad dir");
862 
863 		if (dp->d_ino == 0) {
864 #if 0
865 			/*
866 			 * XXX entries with d_ino == 0 should only occur
867 			 * at the start of a DIRBLKSIZ block. However the
868 			 * ufs code is tolerant of such entries at other
869 			 * offsets, and fsck does not fix them.
870 			 */
871 			if (i != 0)
872 				panic("ufsdirhash_checkblock: bad dir inode");
873 #endif
874 			nfree += dp->d_reclen;
875 			continue;
876 		}
877 
878 		/* Check that the entry	exists (will panic if it doesn't). */
879 		ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
880 
881 		nfree += dp->d_reclen - UFS_DIRSIZ(0, dp, needswap);
882 	}
883 	if (i != dirblksiz)
884 		panic("ufsdirhash_checkblock: bad dir end");
885 
886 	if (dh->dh_blkfree[block] * DIRALIGN != nfree)
887 		panic("ufsdirhash_checkblock: bad free count");
888 
889 	ffslot = BLKFREE2IDX(nfree / DIRALIGN);
890 	for (i = 0; i <= DH_NFSTATS; i++)
891 		if (dh->dh_firstfree[i] == block && i != ffslot)
892 			panic("ufsdirhash_checkblock: bad first-free");
893 	if (dh->dh_firstfree[ffslot] == -1)
894 		panic("ufsdirhash_checkblock: missing first-free entry");
895 	DIRHASH_UNLOCK(dh);
896 }
897 
898 /*
899  * Hash the specified filename into a dirhash slot.
900  */
901 static int
ufsdirhash_hash(struct dirhash * dh,const char * name,int namelen)902 ufsdirhash_hash(struct dirhash *dh, const char *name, int namelen)
903 {
904 	u_int32_t hash;
905 
906 	/*
907 	 * We hash the name and then some other bit of data that is
908 	 * invariant over the dirhash's lifetime. Otherwise names
909 	 * differing only in the last byte are placed close to one
910 	 * another in the table, which is bad for linear probing.
911 	 */
912 	hash = hash32_buf(name, namelen, HASH32_BUF_INIT);
913 	hash = hash32_buf(&dh, sizeof(dh), hash);
914 	return (hash % dh->dh_hlen);
915 }
916 
917 /*
918  * Adjust the number of free bytes in the block containing `offset'
919  * by the value specified by `diff'.
920  *
921  * The caller must ensure we have exclusive access to `dh'; normally
922  * that means that dh_lock should be held, but this is also called
923  * from ufsdirhash_build() where exclusive access can be assumed.
924  */
925 static void
ufsdirhash_adjfree(struct dirhash * dh,doff_t offset,int diff,int dirblksiz)926 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff, int dirblksiz)
927 {
928 	int block, i, nfidx, ofidx;
929 
930 	KASSERT(mutex_owned(&dh->dh_lock));
931 
932 	/* Update the per-block summary info. */
933 	block = offset / dirblksiz;
934 	KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks);
935 	ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
936 	dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
937 	nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
938 
939 	/* Update the `first free' list if necessary. */
940 	if (ofidx != nfidx) {
941 		/* If removing, scan forward for the next block. */
942 		if (dh->dh_firstfree[ofidx] == block) {
943 			for (i = block + 1; i < dh->dh_dirblks; i++)
944 				if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
945 					break;
946 			dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
947 		}
948 
949 		/* Make this the new `first free' if necessary */
950 		if (dh->dh_firstfree[nfidx] > block ||
951 		    dh->dh_firstfree[nfidx] == -1)
952 			dh->dh_firstfree[nfidx] = block;
953 	}
954 }
955 
956 /*
957  * Find the specified name which should have the specified offset.
958  * Returns a slot number, and panics on failure.
959  *
960  * `dh' must be locked on entry and remains so on return.
961  */
962 static int
ufsdirhash_findslot(struct dirhash * dh,const char * name,int namelen,doff_t offset)963 ufsdirhash_findslot(struct dirhash *dh, const char *name, int namelen,
964     doff_t offset)
965 {
966 	int slot;
967 
968 	KASSERT(mutex_owned(&dh->dh_lock));
969 
970 	/* Find the entry. */
971 	KASSERT(dh->dh_hused < dh->dh_hlen);
972 	slot = ufsdirhash_hash(dh, name, namelen);
973 	while (DH_ENTRY(dh, slot) != offset &&
974 	    DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
975 		slot = WRAPINCR(slot, dh->dh_hlen);
976 	if (DH_ENTRY(dh, slot) != offset)
977 		panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
978 
979 	return (slot);
980 }
981 
982 /*
983  * Remove the entry corresponding to the specified slot from the hash array.
984  *
985  * `dh' must be locked on entry and remains so on return.
986  */
987 static void
ufsdirhash_delslot(struct dirhash * dh,int slot)988 ufsdirhash_delslot(struct dirhash *dh, int slot)
989 {
990 	int i;
991 
992 	KASSERT(mutex_owned(&dh->dh_lock));
993 
994 	/* Mark the entry as deleted. */
995 	DH_ENTRY(dh, slot) = DIRHASH_DEL;
996 
997 	/* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
998 	for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
999 		i = WRAPINCR(i, dh->dh_hlen);
1000 	if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
1001 		i = WRAPDECR(i, dh->dh_hlen);
1002 		while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
1003 			DH_ENTRY(dh, i) = DIRHASH_EMPTY;
1004 			dh->dh_hused--;
1005 			i = WRAPDECR(i, dh->dh_hlen);
1006 		}
1007 		KASSERT(dh->dh_hused >= 0);
1008 	}
1009 }
1010 
1011 /*
1012  * Given a directory entry and its offset, find the offset of the
1013  * previous entry in the same UFS_DIRBLKSIZ-sized block. Returns an
1014  * offset, or -1 if there is no previous entry in the block or some
1015  * other problem occurred.
1016  */
1017 static doff_t
ufsdirhash_getprev(struct direct * dirp,doff_t offset,int dirblksiz)1018 ufsdirhash_getprev(struct direct *dirp, doff_t offset, int dirblksiz)
1019 {
1020 	struct direct *dp;
1021 	char *blkbuf;
1022 	doff_t blkoff, prevoff;
1023 	int entrypos, i;
1024 
1025 	blkoff = offset & ~(dirblksiz - 1);	/* offset of start of block */
1026 	entrypos = offset & (dirblksiz - 1);	/* entry relative to block */
1027 	blkbuf = (char *)dirp - entrypos;
1028 	prevoff = blkoff;
1029 
1030 	/* If `offset' is the start of a block, there is no previous entry. */
1031 	if (entrypos == 0)
1032 		return (-1);
1033 
1034 	/* Scan from the start of the block until we get to the entry. */
1035 	for (i = 0; i < entrypos; i += dp->d_reclen) {
1036 		dp = (struct direct *)(blkbuf + i);
1037 		if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
1038 			return (-1);	/* Corrupted directory. */
1039 		prevoff = blkoff + i;
1040 	}
1041 	return (prevoff);
1042 }
1043 
1044 /*
1045  * Try to free up `wanted' bytes by stealing memory from existing
1046  * dirhashes. Returns zero with list locked if successful.
1047  */
1048 static int
ufsdirhash_recycle(int wanted)1049 ufsdirhash_recycle(int wanted)
1050 {
1051 	struct dirhash *dh;
1052 	doff_t **hash;
1053 	u_int8_t *blkfree;
1054 	int i, mem, narrays;
1055 	size_t hashsz, blkfreesz;
1056 
1057 	DIRHASHLIST_LOCK();
1058 	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1059 		/* Find a dirhash, and lock it. */
1060 		if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
1061 			DIRHASHLIST_UNLOCK();
1062 			return (-1);
1063 		}
1064 		DIRHASH_LOCK(dh);
1065 		KASSERT(dh->dh_hash != NULL);
1066 
1067 		/* Decrement the score; only recycle if it becomes zero. */
1068 		if (--dh->dh_score > 0) {
1069 			DIRHASH_UNLOCK(dh);
1070 			DIRHASHLIST_UNLOCK();
1071 			return (-1);
1072 		}
1073 
1074 		/* Remove it from the list and detach its memory. */
1075 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
1076 		dh->dh_onlist = 0;
1077 		hash = dh->dh_hash;
1078 		hashsz = dh->dh_hashsz;
1079 		dh->dh_hash = NULL;
1080 		blkfree = dh->dh_blkfree;
1081 		blkfreesz = dh->dh_blkfreesz;
1082 		dh->dh_blkfree = NULL;
1083 		narrays = dh->dh_narrays;
1084 		mem = narrays * sizeof(*dh->dh_hash) +
1085 		    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
1086 		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
1087 
1088 		/* Unlock everything, free the detached memory. */
1089 		DIRHASH_UNLOCK(dh);
1090 		DIRHASHLIST_UNLOCK();
1091 
1092 		for (i = 0; i < narrays; i++)
1093 			DIRHASH_BLKFREE(hash[i]);
1094 		kmem_free(hash, hashsz);
1095 		kmem_free(blkfree, blkfreesz);
1096 
1097 		/* Account for the returned memory, and repeat if necessary. */
1098 		DIRHASHLIST_LOCK();
1099 		atomic_add_int(&ufs_dirhashmem, -mem);
1100 	}
1101 	/* Success. */
1102 	return (0);
1103 }
1104 
1105 SYSCTL_SETUP(ufsdirhash_sysctl_init, "ufs_dirhash sysctl")
1106 {
1107 	const struct sysctlnode *rnode, *cnode;
1108 
1109 	sysctl_createv(clog, 0, NULL, &rnode,
1110 		       CTLFLAG_PERMANENT,
1111 		       CTLTYPE_NODE, "ufs",
1112 		       SYSCTL_DESCR("ufs"),
1113 		       NULL, 0, NULL, 0,
1114 		       CTL_VFS, CTL_CREATE, CTL_EOL);
1115 
1116 	sysctl_createv(clog, 0, &rnode, &rnode,
1117 		       CTLFLAG_PERMANENT,
1118 		       CTLTYPE_NODE, "dirhash",
1119 		       SYSCTL_DESCR("dirhash"),
1120 		       NULL, 0, NULL, 0,
1121 		       CTL_CREATE, CTL_EOL);
1122 
1123 	sysctl_createv(clog, 0, &rnode, &cnode,
1124 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1125 		       CTLTYPE_INT, "minblocks",
1126 		       SYSCTL_DESCR("minimum hashed directory size in blocks"),
1127 		       NULL, 0, &ufs_dirhashminblks, 0,
1128 		       CTL_CREATE, CTL_EOL);
1129 
1130 	sysctl_createv(clog, 0, &rnode, &cnode,
1131 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1132 		       CTLTYPE_INT, "maxmem",
1133 		       SYSCTL_DESCR("maximum dirhash memory usage"),
1134 		       NULL, 0, &ufs_dirhashmaxmem, 0,
1135 		       CTL_CREATE, CTL_EOL);
1136 
1137 	sysctl_createv(clog, 0, &rnode, &cnode,
1138 		       CTLFLAG_PERMANENT|CTLFLAG_READONLY,
1139 		       CTLTYPE_INT, "memused",
1140 		       SYSCTL_DESCR("current dirhash memory usage"),
1141 		       NULL, 0, &ufs_dirhashmem, 0,
1142 		       CTL_CREATE, CTL_EOL);
1143 
1144 	sysctl_createv(clog, 0, &rnode, &cnode,
1145 		       CTLFLAG_PERMANENT|CTLFLAG_READWRITE,
1146 		       CTLTYPE_INT, "docheck",
1147 		       SYSCTL_DESCR("enable extra sanity checks"),
1148 		       NULL, 0, &ufs_dirhashcheck, 0,
1149 		       CTL_CREATE, CTL_EOL);
1150 }
1151 
1152 void
ufsdirhash_init(void)1153 ufsdirhash_init(void)
1154 {
1155 
1156 	/*
1157 	 * Only initialise defaults for the dirhash size if it hasn't
1158 	 * hasn't been set.
1159 	 */
1160 	if (ufs_dirhashmaxmem == 0) {
1161 		/* Use 64-bit math to avoid overflows. */
1162 		uint64_t physmem_bytes, hash_bytes;
1163 
1164 		physmem_bytes = ctob((uint64_t)physmem);
1165 		hash_bytes = physmem_bytes / DIRHASH_DEFAULT_DIVIDER;
1166 
1167 		if (hash_bytes < MIN_DEFAULT_DIRHASH_MEM)
1168 			hash_bytes = 0;
1169 
1170 		if (hash_bytes > MAX_DEFAULT_DIRHASH_MEM)
1171 			hash_bytes = MAX_DEFAULT_DIRHASH_MEM;
1172 
1173 		ufs_dirhashmaxmem = (u_int)hash_bytes;
1174 	}
1175 
1176 	mutex_init(&ufsdirhash_lock, MUTEX_DEFAULT, IPL_NONE);
1177 	ufsdirhashblk_cache = pool_cache_init(DH_NBLKOFF * sizeof(daddr_t), 0,
1178 	    0, 0, "dirhashblk", NULL, IPL_NONE, NULL, NULL, NULL);
1179 	ufsdirhash_cache = pool_cache_init(sizeof(struct dirhash), 0,
1180 	    0, 0, "dirhash", NULL, IPL_NONE, NULL, NULL, NULL);
1181 	TAILQ_INIT(&ufsdirhash_list);
1182 }
1183 
1184 void
ufsdirhash_done(void)1185 ufsdirhash_done(void)
1186 {
1187 
1188 	KASSERT(TAILQ_EMPTY(&ufsdirhash_list));
1189 	pool_cache_destroy(ufsdirhashblk_cache);
1190 	pool_cache_destroy(ufsdirhash_cache);
1191 	mutex_destroy(&ufsdirhash_lock);
1192 }
1193