xref: /dragonfly/sys/vfs/ufs/ufs_dirhash.c (revision 0ca59c34)
1 /*-
2  * Copyright (c) 2001 Ian Dowse.  All rights reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
14  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
16  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
17  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
18  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
19  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
20  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
21  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
22  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
23  * SUCH DAMAGE.
24  *
25  * $FreeBSD: src/sys/ufs/ufs/ufs_dirhash.c,v 1.3.2.6 2002/04/10 21:41:14 dwmalone Exp $
26  */
27 /*
28  * This implements a hash-based lookup scheme for UFS directories.
29  */
30 
31 #include "opt_ufs.h"
32 
33 #ifdef UFS_DIRHASH
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/fnv_hash.h>
40 #include <sys/proc.h>
41 #include <sys/buf.h>
42 #include <sys/vnode.h>
43 #include <sys/mount.h>
44 #include <sys/sysctl.h>
45 #include <sys/objcache.h>
46 
47 #include "quota.h"
48 #include "inode.h"
49 #include "dir.h"
50 #include "dirhash.h"
51 #include "ufsmount.h"
52 #include "ufs_extern.h"
53 #include "ffs_extern.h"
54 
55 #define WRAPINCR(val, limit)	(((val) + 1 == (limit)) ? 0 : ((val) + 1))
56 #define WRAPDECR(val, limit)	(((val) == 0) ? ((limit) - 1) : ((val) - 1))
57 #define OFSFMT(vp)		((vp)->v_mount->mnt_maxsymlinklen <= 0)
58 #define BLKFREE2IDX(n)		((n) > DH_NFSTATS ? DH_NFSTATS : (n))
59 
60 static MALLOC_DEFINE(M_DIRHASH, "UFS dirhash", "UFS directory hash tables");
61 
62 SYSCTL_NODE(_vfs, OID_AUTO, ufs, CTLFLAG_RD, 0, "UFS filesystem");
63 
64 static int ufs_mindirhashsize = DIRBLKSIZ * 5;
65 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_minsize, CTLFLAG_RW,
66     &ufs_mindirhashsize,
67     0, "minimum directory size in bytes for which to use hashed lookup");
68 static int ufs_dirhashmaxmem = 2 * 1024 * 1024;
69 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_maxmem, CTLFLAG_RW, &ufs_dirhashmaxmem,
70     0, "maximum allowed dirhash memory usage");
71 static int ufs_dirhashmem;
72 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_mem, CTLFLAG_RD, &ufs_dirhashmem,
73     0, "current dirhash memory usage");
74 static int ufs_dirhashcheck = 0;
75 SYSCTL_INT(_vfs_ufs, OID_AUTO, dirhash_docheck, CTLFLAG_RW, &ufs_dirhashcheck,
76     0, "enable extra sanity tests");
77 
78 
79 static int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
80 static void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
81 static void ufsdirhash_delslot(struct dirhash *dh, int slot);
82 static int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
83 	   doff_t offset);
84 static doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
85 static void ufsdirhash_init(void);
86 static int ufsdirhash_recycle(int wanted);
87 
88 static struct objcache  *ufsdirhash_oc;
89 
90 /* Dirhash list; recently-used entries are near the tail. */
91 static TAILQ_HEAD(, dirhash) ufsdirhash_list;
92 
93 /*
94  * Attempt to build up a hash table for the directory contents in
95  * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
96  */
97 int
98 ufsdirhash_build(struct inode *ip)
99 {
100 	struct dirhash *dh;
101 	struct buf *bp = NULL;
102 	struct direct *ep;
103 	struct vnode *vp;
104 	doff_t bmask, pos;
105 	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
106 
107 	/* Check if we can/should use dirhash. */
108 	if (ip->i_dirhash == NULL) {
109 		if (ip->i_size < ufs_mindirhashsize || OFSFMT(ip->i_vnode))
110 			return (-1);
111 	} else {
112 		/* Hash exists, but sysctls could have changed. */
113 		if (ip->i_size < ufs_mindirhashsize ||
114 		    ufs_dirhashmem > ufs_dirhashmaxmem) {
115 			ufsdirhash_free(ip);
116 			return (-1);
117 		}
118 		/* Check if hash exists and is intact (note: unlocked read). */
119 		if (ip->i_dirhash->dh_hash != NULL)
120 			return (0);
121 		/* Free the old, recycled hash and build a new one. */
122 		ufsdirhash_free(ip);
123 	}
124 
125 	/* Don't hash removed directories. */
126 	if (ip->i_effnlink == 0)
127 		return (-1);
128 
129 	vp = ip->i_vnode;
130 	/* Allocate 50% more entries than this dir size could ever need. */
131 	KASSERT(ip->i_size >= DIRBLKSIZ, ("ufsdirhash_build size"));
132 	nslots = ip->i_size / DIRECTSIZ(1);
133 	nslots = (nslots * 3 + 1) / 2;
134 	narrays = howmany(nslots, DH_NBLKOFF);
135 	nslots = narrays * DH_NBLKOFF;
136 	dirblocks = howmany(ip->i_size, DIRBLKSIZ);
137 	nblocks = (dirblocks * 3 + 1) / 2;
138 
139 	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
140 	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
141 	    nblocks * sizeof(*dh->dh_blkfree);
142 	if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
143 		if (memreqd > ufs_dirhashmaxmem / 2)
144 			return (-1);
145 
146 		/* Try to free some space. */
147 		if (ufsdirhash_recycle(memreqd) != 0)
148 			return (-1);
149 		/* Enough was freed. */
150 	}
151 	ufs_dirhashmem += memreqd;
152 
153 	/*
154 	 * Use non-blocking mallocs so that we will revert to a linear
155 	 * lookup on failure rather than potentially blocking forever.
156 	 */
157 	dh = kmalloc(sizeof *dh, M_DIRHASH, M_NOWAIT | M_ZERO);
158 	if (dh == NULL)
159 		return (-1);
160 	dh->dh_hash = kmalloc(narrays * sizeof(dh->dh_hash[0]), M_DIRHASH,
161 			      M_NOWAIT | M_ZERO);
162 	dh->dh_blkfree = kmalloc(nblocks * sizeof(dh->dh_blkfree[0]),
163 				 M_DIRHASH, M_NOWAIT);
164 	if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
165 		goto fail;
166 	for (i = 0; i < narrays; i++) {
167 		if ((dh->dh_hash[i] = objcache_get(ufsdirhash_oc, M_WAITOK)) == NULL)
168 			goto fail;
169 		for (j = 0; j < DH_NBLKOFF; j++)
170 			dh->dh_hash[i][j] = DIRHASH_EMPTY;
171 	}
172 
173 	/* Initialise the hash table and block statistics. */
174 	dh->dh_narrays = narrays;
175 	dh->dh_hlen = nslots;
176 	dh->dh_nblk = nblocks;
177 	dh->dh_dirblks = dirblocks;
178 	for (i = 0; i < dirblocks; i++)
179 		dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
180 	for (i = 0; i < DH_NFSTATS; i++)
181 		dh->dh_firstfree[i] = -1;
182 	dh->dh_firstfree[DH_NFSTATS] = 0;
183 	dh->dh_seqopt = 0;
184 	dh->dh_seqoff = 0;
185 	dh->dh_score = DH_SCOREINIT;
186 	ip->i_dirhash = dh;
187 
188 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
189 	pos = 0;
190 	while (pos < ip->i_size) {
191 		/* If necessary, get the next directory block. */
192 		if ((pos & bmask) == 0) {
193 			if (bp != NULL)
194 				brelse(bp);
195 			if (ffs_blkatoff(vp, (off_t)pos, NULL, &bp) != 0)
196 				goto fail;
197 		}
198 
199 		/* Add this entry to the hash. */
200 		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
201 		if (ep->d_reclen == 0 || ep->d_reclen >
202 		    DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
203 			/* Corrupted directory. */
204 			brelse(bp);
205 			goto fail;
206 		}
207 		if (ep->d_ino != 0) {
208 			/* Add the entry (simplified ufsdirhash_add). */
209 			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
210 			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
211 				slot = WRAPINCR(slot, dh->dh_hlen);
212 			dh->dh_hused++;
213 			DH_ENTRY(dh, slot) = pos;
214 			ufsdirhash_adjfree(dh, pos, -DIRSIZ(0, ep));
215 		}
216 		pos += ep->d_reclen;
217 	}
218 
219 	if (bp != NULL)
220 		brelse(bp);
221 	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
222 	dh->dh_onlist = 1;
223 	return (0);
224 
225 fail:
226 	if (dh->dh_hash != NULL) {
227 		for (i = 0; i < narrays; i++)
228 			if (dh->dh_hash[i] != NULL)
229 				objcache_put(ufsdirhash_oc, dh->dh_hash[i]);
230 		kfree(dh->dh_hash, M_DIRHASH);
231 	}
232 	if (dh->dh_blkfree != NULL)
233 		kfree(dh->dh_blkfree, M_DIRHASH);
234 	kfree(dh, M_DIRHASH);
235 	ip->i_dirhash = NULL;
236 	ufs_dirhashmem -= memreqd;
237 	return (-1);
238 }
239 
240 /*
241  * Free any hash table associated with inode 'ip'.
242  */
243 void
244 ufsdirhash_free(struct inode *ip)
245 {
246 	struct dirhash *dh;
247 	int i, mem;
248 
249 	if ((dh = ip->i_dirhash) == NULL)
250 		return;
251 	if (dh->dh_onlist)
252 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
253 
254 	/* The dirhash pointed to by 'dh' is exclusively ours now. */
255 
256 	mem = sizeof(*dh);
257 	if (dh->dh_hash != NULL) {
258 		for (i = 0; i < dh->dh_narrays; i++)
259 			objcache_put(ufsdirhash_oc, dh->dh_hash[i]);
260 		kfree(dh->dh_hash, M_DIRHASH);
261 		kfree(dh->dh_blkfree, M_DIRHASH);
262 		mem += dh->dh_narrays * sizeof(*dh->dh_hash) +
263 		    dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
264 		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
265 	}
266 	kfree(dh, M_DIRHASH);
267 	ip->i_dirhash = NULL;
268 
269 	ufs_dirhashmem -= mem;
270 }
271 
272 /*
273  * Find the offset of the specified name within the given inode.
274  * Returns 0 on success, ENOENT if the entry does not exist, or
275  * EJUSTRETURN if the caller should revert to a linear search.
276  *
277  * If successful, the directory offset is stored in *offp, and a
278  * pointer to a struct buf containing the entry is stored in *bpp. If
279  * prevoffp is non-NULL, the offset of the previous entry within
280  * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
281  * is the first in a block, the start of the block is used).
282  */
283 int
284 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
285 		  struct buf **bpp, doff_t *prevoffp)
286 {
287 	struct dirhash *dh, *dh_next;
288 	struct direct *dp;
289 	struct vnode *vp;
290 	struct buf *bp;
291 	doff_t blkoff, bmask, offset, prevoff;
292 	int i, slot;
293 
294 	if ((dh = ip->i_dirhash) == NULL)
295 		return (EJUSTRETURN);
296 	/*
297 	 * Move this dirhash towards the end of the list if it has a
298 	 * score higher than the next entry.
299 	 * Optimise the case where it's already the last by performing
300 	 * an unlocked read of the TAILQ_NEXT pointer.
301 	 */
302 	if (TAILQ_NEXT(dh, dh_list) != NULL) {
303 		/*
304 		 * If the new score will be greater than that of the next
305 		 * entry, then move this entry past it. With both mutexes
306 		 * held, dh_next won't go away, but its dh_score could
307 		 * change; that's not important since it is just a hint.
308 		 */
309 		if (dh->dh_hash != NULL &&
310 		    (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
311 		    dh->dh_score >= dh_next->dh_score) {
312 			KASSERT(dh->dh_onlist, ("dirhash: not on list"));
313 			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
314 			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
315 			    dh_list);
316 		}
317 	}
318 	if (dh->dh_hash == NULL) {
319 		ufsdirhash_free(ip);
320 		return (EJUSTRETURN);
321 	}
322 
323 	/* Update the score. */
324 	if (dh->dh_score < DH_SCOREMAX)
325 		dh->dh_score++;
326 
327 	vp = ip->i_vnode;
328 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
329 	blkoff = -1;
330 	bp = NULL;
331 restart:
332 	slot = ufsdirhash_hash(dh, name, namelen);
333 
334 	if (dh->dh_seqopt) {
335 		/*
336 		 * Sequential access optimisation. dh_seqoff contains the
337 		 * offset of the directory entry immediately following
338 		 * the last entry that was looked up. Check if this offset
339 		 * appears in the hash chain for the name we are looking for.
340 		 */
341 		for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
342 		    i = WRAPINCR(i, dh->dh_hlen))
343 			if (offset == dh->dh_seqoff)
344 				break;
345 		if (offset == dh->dh_seqoff) {
346 			/*
347 			 * We found an entry with the expected offset. This
348 			 * is probably the entry we want, but if not, the
349 			 * code below will turn off seqoff and retry.
350 			 */
351 			slot = i;
352 		} else
353 			dh->dh_seqopt = 0;
354 	}
355 
356 	for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
357 	    slot = WRAPINCR(slot, dh->dh_hlen)) {
358 		if (offset == DIRHASH_DEL)
359 			continue;
360 
361 		if (offset < 0 || offset >= ip->i_size)
362 			panic("ufsdirhash_lookup: bad offset in hash array");
363 		if ((offset & ~bmask) != blkoff) {
364 			if (bp != NULL)
365 				brelse(bp);
366 			blkoff = offset & ~bmask;
367 			if (ffs_blkatoff(vp, (off_t)blkoff, NULL, &bp) != 0)
368 				return (EJUSTRETURN);
369 		}
370 		dp = (struct direct *)(bp->b_data + (offset & bmask));
371 		if (dp->d_reclen == 0 || dp->d_reclen >
372 		    DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
373 			/* Corrupted directory. */
374 			brelse(bp);
375 			return (EJUSTRETURN);
376 		}
377 		if (dp->d_namlen == namelen &&
378 		    bcmp(dp->d_name, name, namelen) == 0) {
379 			/* Found. Get the prev offset if needed. */
380 			if (prevoffp != NULL) {
381 				if (offset & (DIRBLKSIZ - 1)) {
382 					prevoff = ufsdirhash_getprev(dp,
383 					    offset);
384 					if (prevoff == -1) {
385 						brelse(bp);
386 						return (EJUSTRETURN);
387 					}
388 				} else
389 					prevoff = offset;
390 				*prevoffp = prevoff;
391 			}
392 
393 			/* Check for sequential access, and update offset. */
394 			if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
395 				dh->dh_seqopt = 1;
396 			dh->dh_seqoff = offset + DIRSIZ(0, dp);
397 
398 			*bpp = bp;
399 			*offp = offset;
400 			return (0);
401 		}
402 
403 		if (dh->dh_hash == NULL) {
404 			if (bp != NULL)
405 				brelse(bp);
406 			ufsdirhash_free(ip);
407 			return (EJUSTRETURN);
408 		}
409 		/*
410 		 * When the name doesn't match in the seqopt case, go back
411 		 * and search normally.
412 		 */
413 		if (dh->dh_seqopt) {
414 			dh->dh_seqopt = 0;
415 			goto restart;
416 		}
417 	}
418 	if (bp != NULL)
419 		brelse(bp);
420 	return (ENOENT);
421 }
422 
423 /*
424  * Find a directory block with room for 'slotneeded' bytes. Returns
425  * the offset of the directory entry that begins the free space.
426  * This will either be the offset of an existing entry that has free
427  * space at the end, or the offset of an entry with d_ino == 0 at
428  * the start of a DIRBLKSIZ block.
429  *
430  * To use the space, the caller may need to compact existing entries in
431  * the directory. The total number of bytes in all of the entries involved
432  * in the compaction is stored in *slotsize. In other words, all of
433  * the entries that must be compacted are exactly contained in the
434  * region beginning at the returned offset and spanning *slotsize bytes.
435  *
436  * Returns -1 if no space was found, indicating that the directory
437  * must be extended.
438  */
439 doff_t
440 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
441 {
442 	struct direct *dp;
443 	struct dirhash *dh;
444 	struct buf *bp;
445 	doff_t pos, slotstart;
446 	int dirblock, error, freebytes, i;
447 
448 	if ((dh = ip->i_dirhash) == NULL)
449 		return (-1);
450 	if (dh->dh_hash == NULL) {
451 		ufsdirhash_free(ip);
452 		return (-1);
453 	}
454 
455 	/* Find a directory block with the desired free space. */
456 	dirblock = -1;
457 	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
458 		if ((dirblock = dh->dh_firstfree[i]) != -1)
459 			break;
460 	if (dirblock == -1) {
461 		return (-1);
462 	}
463 
464 	KASSERT(dirblock < dh->dh_nblk &&
465 	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
466 	    ("ufsdirhash_findfree: bad stats"));
467 	pos = dirblock * DIRBLKSIZ;
468 	error = ffs_blkatoff(ip->i_vnode, (off_t)pos, (char **)&dp, &bp);
469 	if (error)
470 		return (-1);
471 
472 	/* Find the first entry with free space. */
473 	for (i = 0; i < DIRBLKSIZ; ) {
474 		if (dp->d_reclen == 0) {
475 			brelse(bp);
476 			return (-1);
477 		}
478 		if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(0, dp))
479 			break;
480 		i += dp->d_reclen;
481 		dp = (struct direct *)((char *)dp + dp->d_reclen);
482 	}
483 	if (i > DIRBLKSIZ) {
484 		brelse(bp);
485 		return (-1);
486 	}
487 	slotstart = pos + i;
488 
489 	/* Find the range of entries needed to get enough space */
490 	freebytes = 0;
491 	while (i < DIRBLKSIZ && freebytes < slotneeded) {
492 		freebytes += dp->d_reclen;
493 		if (dp->d_ino != 0)
494 			freebytes -= DIRSIZ(0, dp);
495 		if (dp->d_reclen == 0) {
496 			brelse(bp);
497 			return (-1);
498 		}
499 		i += dp->d_reclen;
500 		dp = (struct direct *)((char *)dp + dp->d_reclen);
501 	}
502 	if (i > DIRBLKSIZ) {
503 		brelse(bp);
504 		return (-1);
505 	}
506 	if (freebytes < slotneeded)
507 		panic("ufsdirhash_findfree: free mismatch");
508 	brelse(bp);
509 	*slotsize = pos + i - slotstart;
510 	return (slotstart);
511 }
512 
513 /*
514  * Return the start of the unused space at the end of a directory, or
515  * -1 if there are no trailing unused blocks.
516  */
517 doff_t
518 ufsdirhash_enduseful(struct inode *ip)
519 {
520 	struct dirhash *dh;
521 	int i;
522 
523 	if ((dh = ip->i_dirhash) == NULL)
524 		return (-1);
525 	if (dh->dh_hash == NULL) {
526 		ufsdirhash_free(ip);
527 		return (-1);
528 	}
529 
530 	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) {
531 		return (-1);
532 	}
533 
534 	for (i = dh->dh_dirblks - 1; i >= 0; i--)
535 		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
536 			break;
537 	return ((doff_t)(i + 1) * DIRBLKSIZ);
538 }
539 
540 /*
541  * Insert information into the hash about a new directory entry. dirp
542  * points to a struct direct containing the entry, and offset specifies
543  * the offset of this entry.
544  */
545 void
546 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
547 {
548 	struct dirhash *dh;
549 	int slot;
550 
551 	if ((dh = ip->i_dirhash) == NULL)
552 		return;
553 	if (dh->dh_hash == NULL) {
554 		ufsdirhash_free(ip);
555 		return;
556 	}
557 
558 	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
559 	    ("ufsdirhash_add: bad offset"));
560 	/*
561 	 * Normal hash usage is < 66%. If the usage gets too high then
562 	 * remove the hash entirely and let it be rebuilt later.
563 	 */
564 	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
565 		ufsdirhash_free(ip);
566 		return;
567 	}
568 
569 	/* Find a free hash slot (empty or deleted), and add the entry. */
570 	slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
571 	while (DH_ENTRY(dh, slot) >= 0)
572 		slot = WRAPINCR(slot, dh->dh_hlen);
573 	if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
574 		dh->dh_hused++;
575 	DH_ENTRY(dh, slot) = offset;
576 
577 	/* Update the per-block summary info. */
578 	ufsdirhash_adjfree(dh, offset, -DIRSIZ(0, dirp));
579 }
580 
581 /*
582  * Remove the specified directory entry from the hash. The entry to remove
583  * is defined by the name in `dirp', which must exist at the specified
584  * `offset' within the directory.
585  */
586 void
587 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
588 {
589 	struct dirhash *dh;
590 	int slot;
591 
592 	if ((dh = ip->i_dirhash) == NULL)
593 		return;
594 	if (dh->dh_hash == NULL) {
595 		ufsdirhash_free(ip);
596 		return;
597 	}
598 
599 	KASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
600 	    ("ufsdirhash_remove: bad offset"));
601 	/* Find the entry */
602 	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
603 
604 	/* Remove the hash entry. */
605 	ufsdirhash_delslot(dh, slot);
606 
607 	/* Update the per-block summary info. */
608 	ufsdirhash_adjfree(dh, offset, DIRSIZ(0, dirp));
609 }
610 
611 /*
612  * Change the offset associated with a directory entry in the hash. Used
613  * when compacting directory blocks.
614  */
615 void
616 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
617 		doff_t newoff)
618 {
619 	struct dirhash *dh;
620 	int slot;
621 
622 	if ((dh = ip->i_dirhash) == NULL)
623 		return;
624 	if (dh->dh_hash == NULL) {
625 		ufsdirhash_free(ip);
626 		return;
627 	}
628 
629 	KASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
630 	    newoff < dh->dh_dirblks * DIRBLKSIZ,
631 	    ("ufsdirhash_move: bad offset"));
632 	/* Find the entry, and update the offset. */
633 	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
634 	DH_ENTRY(dh, slot) = newoff;
635 }
636 
637 /*
638  * Inform dirhash that the directory has grown by one block that
639  * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
640  */
641 void
642 ufsdirhash_newblk(struct inode *ip, doff_t offset)
643 {
644 	struct dirhash *dh;
645 	int block;
646 
647 	if ((dh = ip->i_dirhash) == NULL)
648 		return;
649 	if (dh->dh_hash == NULL) {
650 		ufsdirhash_free(ip);
651 		return;
652 	}
653 
654 	KASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
655 	    ("ufsdirhash_newblk: bad offset"));
656 	block = offset / DIRBLKSIZ;
657 	if (block >= dh->dh_nblk) {
658 		/* Out of space; must rebuild. */
659 		ufsdirhash_free(ip);
660 		return;
661 	}
662 	dh->dh_dirblks = block + 1;
663 
664 	/* Account for the new free block. */
665 	dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
666 	if (dh->dh_firstfree[DH_NFSTATS] == -1)
667 		dh->dh_firstfree[DH_NFSTATS] = block;
668 }
669 
670 /*
671  * Inform dirhash that the directory is being truncated.
672  */
673 void
674 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
675 {
676 	struct dirhash *dh;
677 	int block, i;
678 
679 	if ((dh = ip->i_dirhash) == NULL)
680 		return;
681 	if (dh->dh_hash == NULL) {
682 		ufsdirhash_free(ip);
683 		return;
684 	}
685 
686 	KASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
687 	    ("ufsdirhash_dirtrunc: bad offset"));
688 	block = howmany(offset, DIRBLKSIZ);
689 	/*
690 	 * If the directory shrinks to less than 1/8 of dh_nblk blocks
691 	 * (about 20% of its original size due to the 50% extra added in
692 	 * ufsdirhash_build) then free it, and let the caller rebuild
693 	 * if necessary.
694 	 */
695 	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
696 		ufsdirhash_free(ip);
697 		return;
698 	}
699 
700 	/*
701 	 * Remove any `first free' information pertaining to the
702 	 * truncated blocks. All blocks we're removing should be
703 	 * completely unused.
704 	 */
705 	if (dh->dh_firstfree[DH_NFSTATS] >= block)
706 		dh->dh_firstfree[DH_NFSTATS] = -1;
707 	for (i = block; i < dh->dh_dirblks; i++)
708 		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
709 			panic("ufsdirhash_dirtrunc: blocks in use");
710 	for (i = 0; i < DH_NFSTATS; i++)
711 		if (dh->dh_firstfree[i] >= block)
712 			panic("ufsdirhash_dirtrunc: first free corrupt");
713 	dh->dh_dirblks = block;
714 }
715 
716 /*
717  * Debugging function to check that the dirhash information about
718  * a directory block matches its actual contents. Panics if a mismatch
719  * is detected.
720  *
721  * On entry, `buf' should point to the start of an in-core
722  * DIRBLKSIZ-sized directory block, and `offset' should contain the
723  * offset from the start of the directory of that block.
724  */
725 void
726 ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
727 {
728 	struct dirhash *dh;
729 	struct direct *dp;
730 	int block, ffslot, i, nfree;
731 
732 	if (!ufs_dirhashcheck)
733 		return;
734 	if ((dh = ip->i_dirhash) == NULL)
735 		return;
736 	if (dh->dh_hash == NULL) {
737 		ufsdirhash_free(ip);
738 		return;
739 	}
740 
741 	block = offset / DIRBLKSIZ;
742 	if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
743 		panic("ufsdirhash_checkblock: bad offset");
744 
745 	nfree = 0;
746 	for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
747 		dp = (struct direct *)(buf + i);
748 		if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
749 			panic("ufsdirhash_checkblock: bad dir");
750 
751 		if (dp->d_ino == 0) {
752 #if 0
753 			/*
754 			 * XXX entries with d_ino == 0 should only occur
755 			 * at the start of a DIRBLKSIZ block. However the
756 			 * ufs code is tolerant of such entries at other
757 			 * offsets, and fsck does not fix them.
758 			 */
759 			if (i != 0)
760 				panic("ufsdirhash_checkblock: bad dir inode");
761 #endif
762 			nfree += dp->d_reclen;
763 			continue;
764 		}
765 
766 		/* Check that the entry	exists (will panic if it doesn't). */
767 		ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
768 
769 		nfree += dp->d_reclen - DIRSIZ(0, dp);
770 	}
771 	if (i != DIRBLKSIZ)
772 		panic("ufsdirhash_checkblock: bad dir end");
773 
774 	if (dh->dh_blkfree[block] * DIRALIGN != nfree)
775 		panic("ufsdirhash_checkblock: bad free count");
776 
777 	ffslot = BLKFREE2IDX(nfree / DIRALIGN);
778 	for (i = 0; i <= DH_NFSTATS; i++)
779 		if (dh->dh_firstfree[i] == block && i != ffslot)
780 			panic("ufsdirhash_checkblock: bad first-free");
781 	if (dh->dh_firstfree[ffslot] == -1)
782 		panic("ufsdirhash_checkblock: missing first-free entry");
783 }
784 
785 /*
786  * Hash the specified filename into a dirhash slot.
787  */
788 static int
789 ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
790 {
791 	uint32_t hash;
792 
793 	/*
794 	 * We hash the name and then some other bit of data that is
795 	 * invariant over the dirhash's lifetime. Otherwise names
796 	 * differing only in the last byte are placed close to one
797 	 * another in the table, which is bad for linear probing.
798 	 */
799 	hash = fnv_32_buf(name, namelen, FNV1_32_INIT);
800 	hash = fnv_32_buf(&dh, sizeof(dh), hash);
801 	return (hash % dh->dh_hlen);
802 }
803 
804 /*
805  * Adjust the number of free bytes in the block containing `offset'
806  * by the value specified by `diff'.
807  *
808  * The caller must ensure we have exclusive access to `dh'.
809  */
810 static void
811 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
812 {
813 	int block, i, nfidx, ofidx;
814 
815 	/* Update the per-block summary info. */
816 	block = offset / DIRBLKSIZ;
817 	KASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
818 	     ("dirhash bad offset"));
819 	ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
820 	dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
821 	nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
822 
823 	/* Update the `first free' list if necessary. */
824 	if (ofidx != nfidx) {
825 		/* If removing, scan forward for the next block. */
826 		if (dh->dh_firstfree[ofidx] == block) {
827 			for (i = block + 1; i < dh->dh_dirblks; i++)
828 				if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
829 					break;
830 			dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
831 		}
832 
833 		/* Make this the new `first free' if necessary */
834 		if (dh->dh_firstfree[nfidx] > block ||
835 		    dh->dh_firstfree[nfidx] == -1)
836 			dh->dh_firstfree[nfidx] = block;
837 	}
838 }
839 
840 /*
841  * Find the specified name which should have the specified offset.
842  * Returns a slot number, and panics on failure.
843  *
844  * `dh' must be locked on entry and remains so on return.
845  */
846 static int
847 ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
848 {
849 	int slot;
850 
851 	/* Find the entry. */
852 	KASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
853 	slot = ufsdirhash_hash(dh, name, namelen);
854 	while (DH_ENTRY(dh, slot) != offset &&
855 	    DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
856 		slot = WRAPINCR(slot, dh->dh_hlen);
857 	if (DH_ENTRY(dh, slot) != offset)
858 		panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
859 
860 	return (slot);
861 }
862 
863 /*
864  * Remove the entry corresponding to the specified slot from the hash array.
865  *
866  * `dh' must be locked on entry and remains so on return.
867  */
868 static void
869 ufsdirhash_delslot(struct dirhash *dh, int slot)
870 {
871 	int i;
872 
873 	/* Mark the entry as deleted. */
874 	DH_ENTRY(dh, slot) = DIRHASH_DEL;
875 
876 	/* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
877 	for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
878 		i = WRAPINCR(i, dh->dh_hlen);
879 	if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
880 		i = WRAPDECR(i, dh->dh_hlen);
881 		while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
882 			DH_ENTRY(dh, i) = DIRHASH_EMPTY;
883 			dh->dh_hused--;
884 			i = WRAPDECR(i, dh->dh_hlen);
885 		}
886 		KASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
887 	}
888 }
889 
890 /*
891  * Given a directory entry and its offset, find the offset of the
892  * previous entry in the same DIRBLKSIZ-sized block. Returns an
893  * offset, or -1 if there is no previous entry in the block or some
894  * other problem occurred.
895  */
896 static doff_t
897 ufsdirhash_getprev(struct direct *dirp, doff_t offset)
898 {
899 	struct direct *dp;
900 	char *blkbuf;
901 	doff_t blkoff, prevoff;
902 	int entrypos, i;
903 
904 	blkoff = offset & ~(DIRBLKSIZ - 1);	/* offset of start of block */
905 	entrypos = offset & (DIRBLKSIZ - 1);	/* entry relative to block */
906 	blkbuf = (char *)dirp - entrypos;
907 	prevoff = blkoff;
908 
909 	/* If `offset' is the start of a block, there is no previous entry. */
910 	if (entrypos == 0)
911 		return (-1);
912 
913 	/* Scan from the start of the block until we get to the entry. */
914 	for (i = 0; i < entrypos; i += dp->d_reclen) {
915 		dp = (struct direct *)(blkbuf + i);
916 		if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
917 			return (-1);	/* Corrupted directory. */
918 		prevoff = blkoff + i;
919 	}
920 	return (prevoff);
921 }
922 
923 /*
924  * Try to free up `wanted' bytes by stealing memory from existing
925  * dirhashes. Returns zero if successful.
926  */
927 static int
928 ufsdirhash_recycle(int wanted)
929 {
930 	struct dirhash *dh;
931 	doff_t **hash;
932 	uint8_t *blkfree;
933 	int i, mem, narrays;
934 
935 	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
936 		/* Find a dirhash, and lock it. */
937 		if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
938 			return (-1);
939 		}
940 		KASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
941 
942 		/* Decrement the score; only recycle if it becomes zero. */
943 		if (--dh->dh_score > 0) {
944 			return (-1);
945 		}
946 
947 		/* Remove it from the list and detach its memory. */
948 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
949 		dh->dh_onlist = 0;
950 		hash = dh->dh_hash;
951 		dh->dh_hash = NULL;
952 		blkfree = dh->dh_blkfree;
953 		dh->dh_blkfree = NULL;
954 		narrays = dh->dh_narrays;
955 		mem = narrays * sizeof(*dh->dh_hash) +
956 		    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
957 		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
958 
959 		/* Free the detached memory. */
960 		for (i = 0; i < narrays; i++)
961 			objcache_put(ufsdirhash_oc, hash[i]);
962 		kfree(hash, M_DIRHASH);
963 		kfree(blkfree, M_DIRHASH);
964 
965 		/* Account for the returned memory, and repeat if necessary. */
966 		ufs_dirhashmem -= mem;
967 	}
968 	/* Success. */
969 	return (0);
970 }
971 
972 
973 static void
974 ufsdirhash_init(void)
975 {
976 	ufsdirhash_oc = objcache_create_simple(M_DIRHASH,
977 	    DH_NBLKOFF * sizeof(daddr_t));
978 	TAILQ_INIT(&ufsdirhash_list);
979 }
980 SYSINIT(ufsdirhash, SI_SUB_PSEUDO, SI_ORDER_ANY, ufsdirhash_init, NULL);
981 
982 
983 #endif /* UFS_DIRHASH */
984