xref: /openbsd/sys/ufs/ufs/ufs_dirhash.c (revision da5362d5)
1 /* $OpenBSD: ufs_dirhash.c,v 1.43 2024/01/09 03:15:59 guenther Exp $	*/
2 /*
3  * Copyright (c) 2001, 2002 Ian Dowse.  All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 /*
28  * This implements a hash-based lookup scheme for UFS directories.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/pool.h>
36 #include <sys/buf.h>
37 #include <sys/vnode.h>
38 #include <sys/mount.h>
39 #include <sys/sysctl.h>
40 #include <sys/mutex.h>
41 
42 #include <crypto/siphash.h>
43 
44 #include <ufs/ufs/quota.h>
45 #include <ufs/ufs/inode.h>
46 #include <ufs/ufs/dir.h>
47 #include <ufs/ufs/dirhash.h>
48 #include <ufs/ufs/ufsmount.h>
49 #include <ufs/ufs/ufs_extern.h>
50 
51 #define WRAPINCR(val, limit)	(((val) + 1 == (limit)) ? 0 : ((val) + 1))
52 #define WRAPDECR(val, limit)	(((val) == 0) ? ((limit) - 1) : ((val) - 1))
53 #define BLKFREE2IDX(n)		((n) > DH_NFSTATS ? DH_NFSTATS : (n))
54 
55 int ufs_mindirhashsize;
56 int ufs_dirhashmaxmem;
57 int ufs_dirhashmem;
58 int ufs_dirhashcheck;
59 
60 SIPHASH_KEY ufsdirhash_key;
61 
62 int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
63 void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
64 void ufsdirhash_delslot(struct dirhash *dh, int slot);
65 int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
66    doff_t offset);
67 doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
68 int ufsdirhash_recycle(int wanted);
69 
70 struct pool		ufsdirhash_pool;
71 
72 #define	DIRHASHLIST_LOCK()	rw_enter_write(&ufsdirhash_mtx)
73 #define	DIRHASHLIST_UNLOCK()	rw_exit_write(&ufsdirhash_mtx)
74 #define	DIRHASH_LOCK(dh)	rw_enter_write(&(dh)->dh_mtx)
75 #define	DIRHASH_UNLOCK(dh)	rw_exit_write(&(dh)->dh_mtx)
76 #define	DIRHASH_BLKALLOC_WAITOK()	pool_get(&ufsdirhash_pool, PR_WAITOK)
77 #define	DIRHASH_BLKFREE(v)		pool_put(&ufsdirhash_pool, v)
78 
79 #define	mtx_assert(l, f)	/* nothing */
80 #define DIRHASH_ASSERT(e, m)	KASSERT((e))
81 
82 /* Dirhash list; recently-used entries are near the tail. */
83 TAILQ_HEAD(, dirhash) ufsdirhash_list;
84 
85 /* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
86 struct rwlock		ufsdirhash_mtx;
87 
88 /*
89  * Locking order:
90  *	ufsdirhash_mtx
91  *	dh_mtx
92  *
93  * The dh_mtx mutex should be acquired either via the inode lock, or via
94  * ufsdirhash_mtx. Only the owner of the inode may free the associated
95  * dirhash, but anything can steal its memory and set dh_hash to NULL.
96  */
97 
98 /*
99  * Attempt to build up a hash table for the directory contents in
100  * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
101  */
102 int
ufsdirhash_build(struct inode * ip)103 ufsdirhash_build(struct inode *ip)
104 {
105 	struct dirhash *dh;
106 	struct buf *bp = NULL;
107 	struct direct *ep;
108 	struct vnode *vp;
109 	doff_t bmask, pos;
110 	int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
111 
112 	/* Check if we can/should use dirhash. */
113 	if (ip->i_dirhash == NULL) {
114 		if (DIP(ip, size) < ufs_mindirhashsize)
115 			return (-1);
116 	} else {
117 		/* Hash exists, but sysctls could have changed. */
118 		if (DIP(ip, size) < ufs_mindirhashsize ||
119 		    ufs_dirhashmem > ufs_dirhashmaxmem) {
120 			ufsdirhash_free(ip);
121 			return (-1);
122 		}
123 		/* Check if hash exists and is intact (note: unlocked read). */
124 		if (ip->i_dirhash->dh_hash != NULL)
125 			return (0);
126 		/* Free the old, recycled hash and build a new one. */
127 		ufsdirhash_free(ip);
128 	}
129 
130 	/* Don't hash removed directories. */
131 	if (ip->i_effnlink == 0)
132 		return (-1);
133 
134 	vp = ip->i_vnode;
135 	/* Allocate 50% more entries than this dir size could ever need. */
136 	DIRHASH_ASSERT(DIP(ip, size) >= DIRBLKSIZ, ("ufsdirhash_build size"));
137 	nslots = DIP(ip, size) / DIRECTSIZ(1);
138 	nslots = (nslots * 3 + 1) / 2;
139 	narrays = howmany(nslots, DH_NBLKOFF);
140 	nslots = narrays * DH_NBLKOFF;
141 	dirblocks = howmany(DIP(ip, size), DIRBLKSIZ);
142 	nblocks = (dirblocks * 3 + 1) / 2;
143 
144 	memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
145 	    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
146 	    nblocks * sizeof(*dh->dh_blkfree);
147 	DIRHASHLIST_LOCK();
148 	if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
149 		DIRHASHLIST_UNLOCK();
150 		if (memreqd > ufs_dirhashmaxmem / 2)
151 			return (-1);
152 
153 		/* Try to free some space. */
154 		if (ufsdirhash_recycle(memreqd) != 0)
155 			return (-1);
156 		/* Enough was freed, and list has been locked. */
157 	}
158 	ufs_dirhashmem += memreqd;
159 	DIRHASHLIST_UNLOCK();
160 
161 	/*
162 	 * Use non-blocking mallocs so that we will revert to a linear
163 	 * lookup on failure rather than potentially blocking forever.
164 	 */
165 	dh = malloc(sizeof(*dh), M_DIRHASH, M_NOWAIT|M_ZERO);
166 	if (dh == NULL) {
167 		DIRHASHLIST_LOCK();
168 		ufs_dirhashmem -= memreqd;
169 		DIRHASHLIST_UNLOCK();
170 		return (-1);
171 	}
172 	dh->dh_hash = mallocarray(narrays, sizeof(dh->dh_hash[0]),
173 	    M_DIRHASH, M_NOWAIT|M_ZERO);
174 	dh->dh_blkfree = mallocarray(nblocks, sizeof(dh->dh_blkfree[0]),
175 	    M_DIRHASH, M_NOWAIT | M_ZERO);
176 	if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
177 		goto fail;
178 	for (i = 0; i < narrays; i++) {
179 		if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
180 			goto fail;
181 		for (j = 0; j < DH_NBLKOFF; j++)
182 			dh->dh_hash[i][j] = DIRHASH_EMPTY;
183 	}
184 
185 	/* Initialise the hash table and block statistics. */
186 	rw_init(&dh->dh_mtx, "dirhash");
187 	dh->dh_narrays = narrays;
188 	dh->dh_hlen = nslots;
189 	dh->dh_nblk = nblocks;
190 	dh->dh_dirblks = dirblocks;
191 	for (i = 0; i < dirblocks; i++)
192 		dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
193 	for (i = 0; i < DH_NFSTATS; i++)
194 		dh->dh_firstfree[i] = -1;
195 	dh->dh_firstfree[DH_NFSTATS] = 0;
196 	dh->dh_seqopt = 0;
197 	dh->dh_seqoff = 0;
198 	dh->dh_score = DH_SCOREINIT;
199 	ip->i_dirhash = dh;
200 
201 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
202 	pos = 0;
203 	while (pos < DIP(ip, size)) {
204 		/* If necessary, get the next directory block. */
205 		if ((pos & bmask) == 0) {
206 			if (bp != NULL)
207 				brelse(bp);
208 			if (UFS_BUFATOFF(ip, (off_t)pos, NULL, &bp) != 0)
209 				goto fail;
210 		}
211 		/* Add this entry to the hash. */
212 		ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
213 		if (ep->d_reclen == 0 || ep->d_reclen >
214 		    DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
215 			/* Corrupted directory. */
216 			brelse(bp);
217 			goto fail;
218 		}
219 		if (ep->d_ino != 0) {
220 			/* Add the entry (simplified ufsdirhash_add). */
221 			slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
222 			while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
223 				slot = WRAPINCR(slot, dh->dh_hlen);
224 			dh->dh_hused++;
225 			DH_ENTRY(dh, slot) = pos;
226 			ufsdirhash_adjfree(dh, pos, -DIRSIZ(ep));
227 		}
228 		pos += ep->d_reclen;
229 	}
230 
231 	if (bp != NULL)
232 		brelse(bp);
233 	DIRHASHLIST_LOCK();
234 	TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
235 	dh->dh_onlist = 1;
236 	DIRHASHLIST_UNLOCK();
237 	return (0);
238 
239 fail:
240 	if (dh->dh_hash != NULL) {
241 		for (i = 0; i < narrays; i++)
242 			if (dh->dh_hash[i] != NULL)
243 				DIRHASH_BLKFREE(dh->dh_hash[i]);
244 		free(dh->dh_hash, M_DIRHASH,
245 		    narrays * sizeof(dh->dh_hash[0]));
246 	}
247 	if (dh->dh_blkfree != NULL)
248 		free(dh->dh_blkfree, M_DIRHASH,
249 		    nblocks * sizeof(dh->dh_blkfree[0]));
250 	free(dh, M_DIRHASH, sizeof(*dh));
251 	ip->i_dirhash = NULL;
252 	DIRHASHLIST_LOCK();
253 	ufs_dirhashmem -= memreqd;
254 	DIRHASHLIST_UNLOCK();
255 	return (-1);
256 }
257 
258 /*
259  * Free any hash table associated with inode 'ip'.
260  */
261 void
ufsdirhash_free(struct inode * ip)262 ufsdirhash_free(struct inode *ip)
263 {
264 	struct dirhash *dh;
265 	int i, mem;
266 
267 	if ((dh = ip->i_dirhash) == NULL)
268 		return;
269 	DIRHASHLIST_LOCK();
270 	DIRHASH_LOCK(dh);
271 	if (dh->dh_onlist)
272 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
273 	DIRHASH_UNLOCK(dh);
274 	DIRHASHLIST_UNLOCK();
275 
276 	/* The dirhash pointed to by 'dh' is exclusively ours now. */
277 
278 	mem = sizeof(*dh);
279 	if (dh->dh_hash != NULL) {
280 		for (i = 0; i < dh->dh_narrays; i++)
281 			DIRHASH_BLKFREE(dh->dh_hash[i]);
282 		free(dh->dh_hash, M_DIRHASH,
283 		    dh->dh_narrays * sizeof(dh->dh_hash[0]));
284 		free(dh->dh_blkfree, M_DIRHASH,
285 		    dh->dh_nblk * sizeof(dh->dh_blkfree[0]));
286 		mem += dh->dh_narrays * sizeof(*dh->dh_hash) +
287 		    dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
288 		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
289 	}
290 	free(dh, M_DIRHASH, sizeof(*dh));
291 	ip->i_dirhash = NULL;
292 
293 	DIRHASHLIST_LOCK();
294 	ufs_dirhashmem -= mem;
295 	DIRHASHLIST_UNLOCK();
296 }
297 
298 /*
299  * Find the offset of the specified name within the given inode.
300  * Returns 0 on success, ENOENT if the entry does not exist, or
301  * EJUSTRETURN if the caller should revert to a linear search.
302  *
303  * If successful, the directory offset is stored in *offp, and a
304  * pointer to a struct buf containing the entry is stored in *bpp. If
305  * prevoffp is non-NULL, the offset of the previous entry within
306  * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
307  * is the first in a block, the start of the block is used).
308  */
309 int
ufsdirhash_lookup(struct inode * ip,char * name,int namelen,doff_t * offp,struct buf ** bpp,doff_t * prevoffp)310 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
311     struct buf **bpp, doff_t *prevoffp)
312 {
313 	struct dirhash *dh, *dh_next;
314 	struct direct *dp;
315 	struct vnode *vp;
316 	struct buf *bp;
317 	doff_t blkoff, bmask, offset, prevoff;
318 	int i, slot;
319 
320 	if ((dh = ip->i_dirhash) == NULL)
321 		return (EJUSTRETURN);
322 	/*
323 	 * Move this dirhash towards the end of the list if it has a
324 	 * score higher than the next entry, and acquire the dh_mtx.
325 	 * Optimise the case where it's already the last by performing
326 	 * an unlocked read of the TAILQ_NEXT pointer.
327 	 *
328 	 * In both cases, end up holding just dh_mtx.
329 	 */
330 	if (TAILQ_NEXT(dh, dh_list) != NULL) {
331 		DIRHASHLIST_LOCK();
332 		DIRHASH_LOCK(dh);
333 		/*
334 		 * If the new score will be greater than that of the next
335 		 * entry, then move this entry past it. With both mutexes
336 		 * held, dh_next won't go away, but its dh_score could
337 		 * change; that's not important since it is just a hint.
338 		 */
339 		if (dh->dh_hash != NULL &&
340 		    (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
341 		    dh->dh_score >= dh_next->dh_score) {
342 			DIRHASH_ASSERT(dh->dh_onlist, ("dirhash: not on list"));
343 			TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
344 			TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
345 			    dh_list);
346 		}
347 		DIRHASHLIST_UNLOCK();
348 	} else {
349 		/* Already the last, though that could change as we wait. */
350 		DIRHASH_LOCK(dh);
351 	}
352 	if (dh->dh_hash == NULL) {
353 		DIRHASH_UNLOCK(dh);
354 		ufsdirhash_free(ip);
355 		return (EJUSTRETURN);
356 	}
357 
358 	/* Update the score. */
359 	if (dh->dh_score < DH_SCOREMAX)
360 		dh->dh_score++;
361 
362 	vp = ip->i_vnode;
363 	bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
364 	blkoff = -1;
365 	bp = NULL;
366 restart:
367 	slot = ufsdirhash_hash(dh, name, namelen);
368 
369 	if (dh->dh_seqopt) {
370 		/*
371 		 * Sequential access optimisation. dh_seqoff contains the
372 		 * offset of the directory entry immediately following
373 		 * the last entry that was looked up. Check if this offset
374 		 * appears in the hash chain for the name we are looking for.
375 		 */
376 		for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
377 		    i = WRAPINCR(i, dh->dh_hlen))
378 			if (offset == dh->dh_seqoff)
379 				break;
380 		if (offset == dh->dh_seqoff) {
381 			/*
382 			 * We found an entry with the expected offset. This
383 			 * is probably the entry we want, but if not, the
384 			 * code below will turn off seqopt and retry.
385 			 */
386 			slot = i;
387 		} else
388 			dh->dh_seqopt = 0;
389 	}
390 
391 	for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
392 	    slot = WRAPINCR(slot, dh->dh_hlen)) {
393 		if (offset == DIRHASH_DEL)
394 			continue;
395 		DIRHASH_UNLOCK(dh);
396 
397 		if (offset < 0 || offset >= DIP(ip, size))
398 			panic("ufsdirhash_lookup: bad offset in hash array");
399 		if ((offset & ~bmask) != blkoff) {
400 			if (bp != NULL)
401 				brelse(bp);
402 			blkoff = offset & ~bmask;
403 			if (UFS_BUFATOFF(ip, (off_t)blkoff, NULL, &bp) != 0)
404 				return (EJUSTRETURN);
405 		}
406 		dp = (struct direct *)(bp->b_data + (offset & bmask));
407 		if (dp->d_reclen == 0 || dp->d_reclen >
408 		    DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
409 			/* Corrupted directory. */
410 			brelse(bp);
411 			return (EJUSTRETURN);
412 		}
413 		if (dp->d_namlen == namelen &&
414 		    memcmp(dp->d_name, name, namelen) == 0) {
415 			/* Found. Get the prev offset if needed. */
416 			if (prevoffp != NULL) {
417 				if (offset & (DIRBLKSIZ - 1)) {
418 					prevoff = ufsdirhash_getprev(dp,
419 					    offset);
420 					if (prevoff == -1) {
421 						brelse(bp);
422 						return (EJUSTRETURN);
423 					}
424 				} else
425 					prevoff = offset;
426 				*prevoffp = prevoff;
427 			}
428 
429 			/* Check for sequential access, and update offset. */
430 			if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
431 				dh->dh_seqopt = 1;
432 			dh->dh_seqoff = offset + DIRSIZ(dp);
433 
434 			*bpp = bp;
435 			*offp = offset;
436 			return (0);
437 		}
438 
439 		DIRHASH_LOCK(dh);
440 		if (dh->dh_hash == NULL) {
441 			DIRHASH_UNLOCK(dh);
442 			if (bp != NULL)
443 				brelse(bp);
444 			ufsdirhash_free(ip);
445 			return (EJUSTRETURN);
446 		}
447 		/*
448 		 * When the name doesn't match in the seqopt case, go back
449 		 * and search normally.
450 		 */
451 		if (dh->dh_seqopt) {
452 			dh->dh_seqopt = 0;
453 			goto restart;
454 		}
455 	}
456 	DIRHASH_UNLOCK(dh);
457 	if (bp != NULL)
458 		brelse(bp);
459 	return (ENOENT);
460 }
461 
462 /*
463  * Find a directory block with room for 'slotneeded' bytes. Returns
464  * the offset of the directory entry that begins the free space.
465  * This will either be the offset of an existing entry that has free
466  * space at the end, or the offset of an entry with d_ino == 0 at
467  * the start of a DIRBLKSIZ block.
468  *
469  * To use the space, the caller may need to compact existing entries in
470  * the directory. The total number of bytes in all of the entries involved
471  * in the compaction is stored in *slotsize. In other words, all of
472  * the entries that must be compacted are exactly contained in the
473  * region beginning at the returned offset and spanning *slotsize bytes.
474  *
475  * Returns -1 if no space was found, indicating that the directory
476  * must be extended.
477  */
478 doff_t
ufsdirhash_findfree(struct inode * ip,int slotneeded,int * slotsize)479 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
480 {
481 	struct direct *dp;
482 	struct dirhash *dh;
483 	struct buf *bp;
484 	doff_t pos, slotstart;
485 	int dirblock, error, freebytes, i;
486 
487 	if ((dh = ip->i_dirhash) == NULL)
488 		return (-1);
489 	DIRHASH_LOCK(dh);
490 	if (dh->dh_hash == NULL) {
491 		DIRHASH_UNLOCK(dh);
492 		ufsdirhash_free(ip);
493 		return (-1);
494 	}
495 
496 	/* Find a directory block with the desired free space. */
497 	dirblock = -1;
498 	for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
499 		if ((dirblock = dh->dh_firstfree[i]) != -1)
500 			break;
501 	if (dirblock == -1) {
502 		DIRHASH_UNLOCK(dh);
503 		return (-1);
504 	}
505 
506 	DIRHASH_ASSERT(dirblock < dh->dh_nblk &&
507 	    dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
508 	    ("ufsdirhash_findfree: bad stats"));
509 	DIRHASH_UNLOCK(dh);
510 	pos = dirblock * DIRBLKSIZ;
511 	error = UFS_BUFATOFF(ip, (off_t)pos, (char **)&dp, &bp);
512 	if (error)
513 		return (-1);
514 
515 	/* Find the first entry with free space. */
516 	for (i = 0; i < DIRBLKSIZ; ) {
517 		if (dp->d_reclen == 0) {
518 			brelse(bp);
519 			return (-1);
520 		}
521 		if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(dp))
522 			break;
523 		i += dp->d_reclen;
524 		dp = (struct direct *)((char *)dp + dp->d_reclen);
525 	}
526 	if (i > DIRBLKSIZ) {
527 		brelse(bp);
528 		return (-1);
529 	}
530 	slotstart = pos + i;
531 
532 	/* Find the range of entries needed to get enough space */
533 	freebytes = 0;
534 	while (i < DIRBLKSIZ && freebytes < slotneeded) {
535 		freebytes += dp->d_reclen;
536 		if (dp->d_ino != 0)
537 			freebytes -= DIRSIZ(dp);
538 		if (dp->d_reclen == 0) {
539 			brelse(bp);
540 			return (-1);
541 		}
542 		i += dp->d_reclen;
543 		dp = (struct direct *)((char *)dp + dp->d_reclen);
544 	}
545 	if (i > DIRBLKSIZ) {
546 		brelse(bp);
547 		return (-1);
548 	}
549 	if (freebytes < slotneeded)
550 		panic("ufsdirhash_findfree: free mismatch");
551 	brelse(bp);
552 	*slotsize = pos + i - slotstart;
553 	return (slotstart);
554 }
555 
556 /*
557  * Return the start of the unused space at the end of a directory, or
558  * -1 if there are no trailing unused blocks.
559  */
560 doff_t
ufsdirhash_enduseful(struct inode * ip)561 ufsdirhash_enduseful(struct inode *ip)
562 {
563 
564 	struct dirhash *dh;
565 	int i;
566 
567 	if ((dh = ip->i_dirhash) == NULL)
568 		return (-1);
569 	DIRHASH_LOCK(dh);
570 	if (dh->dh_hash == NULL) {
571 		DIRHASH_UNLOCK(dh);
572 		ufsdirhash_free(ip);
573 		return (-1);
574 	}
575 
576 	if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) {
577 		DIRHASH_UNLOCK(dh);
578 		return (-1);
579 	}
580 
581 	for (i = dh->dh_dirblks - 1; i >= 0; i--)
582 		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
583 			break;
584 	DIRHASH_UNLOCK(dh);
585 	return ((doff_t)(i + 1) * DIRBLKSIZ);
586 }
587 
588 /*
589  * Insert information into the hash about a new directory entry. dirp
590  * points to a struct direct containing the entry, and offset specifies
591  * the offset of this entry.
592  */
593 void
ufsdirhash_add(struct inode * ip,struct direct * dirp,doff_t offset)594 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
595 {
596 	struct dirhash *dh;
597 	int slot;
598 
599 	if ((dh = ip->i_dirhash) == NULL)
600 		return;
601 	DIRHASH_LOCK(dh);
602 	if (dh->dh_hash == NULL) {
603 		DIRHASH_UNLOCK(dh);
604 		ufsdirhash_free(ip);
605 		return;
606 	}
607 
608 	DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
609 	    ("ufsdirhash_add: bad offset"));
610 	/*
611 	 * Normal hash usage is < 66%. If the usage gets too high then
612 	 * remove the hash entirely and let it be rebuilt later.
613 	 */
614 	if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
615 		DIRHASH_UNLOCK(dh);
616 		ufsdirhash_free(ip);
617 		return;
618 	}
619 
620 	/* Find a free hash slot (empty or deleted), and add the entry. */
621 	slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
622 	while (DH_ENTRY(dh, slot) >= 0)
623 		slot = WRAPINCR(slot, dh->dh_hlen);
624 	if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
625 		dh->dh_hused++;
626 	DH_ENTRY(dh, slot) = offset;
627 
628 	/* Update the per-block summary info. */
629 	ufsdirhash_adjfree(dh, offset, -DIRSIZ(dirp));
630 	DIRHASH_UNLOCK(dh);
631 }
632 
633 /*
634  * Remove the specified directory entry from the hash. The entry to remove
635  * is defined by the name in `dirp', which must exist at the specified
636  * `offset' within the directory.
637  */
638 void
ufsdirhash_remove(struct inode * ip,struct direct * dirp,doff_t offset)639 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
640 {
641 	struct dirhash *dh;
642 	int slot;
643 
644 	if ((dh = ip->i_dirhash) == NULL)
645 		return;
646 	DIRHASH_LOCK(dh);
647 	if (dh->dh_hash == NULL) {
648 		DIRHASH_UNLOCK(dh);
649 		ufsdirhash_free(ip);
650 		return;
651 	}
652 
653 	DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
654 	    ("ufsdirhash_remove: bad offset"));
655 	/* Find the entry */
656 	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
657 
658 	/* Remove the hash entry. */
659 	ufsdirhash_delslot(dh, slot);
660 
661 	/* Update the per-block summary info. */
662 	ufsdirhash_adjfree(dh, offset, DIRSIZ(dirp));
663 	DIRHASH_UNLOCK(dh);
664 }
665 
666 /*
667  * Change the offset associated with a directory entry in the hash. Used
668  * when compacting directory blocks.
669  */
670 void
ufsdirhash_move(struct inode * ip,struct direct * dirp,doff_t oldoff,doff_t newoff)671 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
672     doff_t newoff)
673 {
674 	struct dirhash *dh;
675 	int slot;
676 
677 	if ((dh = ip->i_dirhash) == NULL)
678 		return;
679 	DIRHASH_LOCK(dh);
680 	if (dh->dh_hash == NULL) {
681 		DIRHASH_UNLOCK(dh);
682 		ufsdirhash_free(ip);
683 		return;
684 	}
685 
686 	DIRHASH_ASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
687 	    newoff < dh->dh_dirblks * DIRBLKSIZ,
688 	    ("ufsdirhash_move: bad offset"));
689 	/* Find the entry, and update the offset. */
690 	slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
691 	DH_ENTRY(dh, slot) = newoff;
692 	DIRHASH_UNLOCK(dh);
693 }
694 
695 /*
696  * Inform dirhash that the directory has grown by one block that
697  * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
698  */
699 void
ufsdirhash_newblk(struct inode * ip,doff_t offset)700 ufsdirhash_newblk(struct inode *ip, doff_t offset)
701 {
702 	struct dirhash *dh;
703 	int block;
704 
705 	if ((dh = ip->i_dirhash) == NULL)
706 		return;
707 	DIRHASH_LOCK(dh);
708 	if (dh->dh_hash == NULL) {
709 		DIRHASH_UNLOCK(dh);
710 		ufsdirhash_free(ip);
711 		return;
712 	}
713 
714 	DIRHASH_ASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
715 	    ("ufsdirhash_newblk: bad offset"));
716 	block = offset / DIRBLKSIZ;
717 	if (block >= dh->dh_nblk) {
718 		/* Out of space; must rebuild. */
719 		DIRHASH_UNLOCK(dh);
720 		ufsdirhash_free(ip);
721 		return;
722 	}
723 	dh->dh_dirblks = block + 1;
724 
725 	/* Account for the new free block. */
726 	dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
727 	if (dh->dh_firstfree[DH_NFSTATS] == -1)
728 		dh->dh_firstfree[DH_NFSTATS] = block;
729 	DIRHASH_UNLOCK(dh);
730 }
731 
732 /*
733  * Inform dirhash that the directory is being truncated.
734  */
735 void
ufsdirhash_dirtrunc(struct inode * ip,doff_t offset)736 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
737 {
738 	struct dirhash *dh;
739 	int block, i;
740 
741 	if ((dh = ip->i_dirhash) == NULL)
742 		return;
743 	DIRHASH_LOCK(dh);
744 	if (dh->dh_hash == NULL) {
745 		DIRHASH_UNLOCK(dh);
746 		ufsdirhash_free(ip);
747 		return;
748 	}
749 
750 	DIRHASH_ASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
751 	    ("ufsdirhash_dirtrunc: bad offset"));
752 	block = howmany(offset, DIRBLKSIZ);
753 	/*
754 	 * If the directory shrinks to less than 1/8 of dh_nblk blocks
755 	 * (about 20% of its original size due to the 50% extra added in
756 	 * ufsdirhash_build) then free it, and let the caller rebuild
757 	 * if necessary.
758 	 */
759 	if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
760 		DIRHASH_UNLOCK(dh);
761 		ufsdirhash_free(ip);
762 		return;
763 	}
764 
765 	/*
766 	 * Remove any `first free' information pertaining to the
767 	 * truncated blocks. All blocks we're removing should be
768 	 * completely unused.
769 	 */
770 	if (dh->dh_firstfree[DH_NFSTATS] >= block)
771 		dh->dh_firstfree[DH_NFSTATS] = -1;
772 	for (i = block; i < dh->dh_dirblks; i++)
773 		if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
774 			panic("ufsdirhash_dirtrunc: blocks in use");
775 	for (i = 0; i < DH_NFSTATS; i++)
776 		if (dh->dh_firstfree[i] >= block)
777 			panic("ufsdirhash_dirtrunc: first free corrupt");
778 	dh->dh_dirblks = block;
779 	DIRHASH_UNLOCK(dh);
780 }
781 
782 /*
783  * Debugging function to check that the dirhash information about
784  * a directory block matches its actual contents. Panics if a mismatch
785  * is detected.
786  *
787  * On entry, `buf' should point to the start of an in-core
788  * DIRBLKSIZ-sized directory block, and `offset' should contain the
789  * offset from the start of the directory of that block.
790  */
791 void
ufsdirhash_checkblock(struct inode * ip,char * buf,doff_t offset)792 ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
793 {
794 	struct dirhash *dh;
795 	struct direct *dp;
796 	int block, ffslot, i, nfree;
797 
798 	if (!ufs_dirhashcheck)
799 		return;
800 	if ((dh = ip->i_dirhash) == NULL)
801 		return;
802 	DIRHASH_LOCK(dh);
803 	if (dh->dh_hash == NULL) {
804 		DIRHASH_UNLOCK(dh);
805 		ufsdirhash_free(ip);
806 		return;
807 	}
808 
809 	block = offset / DIRBLKSIZ;
810 	if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
811 		panic("ufsdirhash_checkblock: bad offset");
812 
813 	nfree = 0;
814 	for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
815 		dp = (struct direct *)(buf + i);
816 		if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
817 			panic("ufsdirhash_checkblock: bad dir");
818 
819 		if (dp->d_ino == 0) {
820 #if 0
821 			/*
822 			 * XXX entries with d_ino == 0 should only occur
823 			 * at the start of a DIRBLKSIZ block. However the
824 			 * ufs code is tolerant of such entries at other
825 			 * offsets, and fsck does not fix them.
826 			 */
827 			if (i != 0)
828 				panic("ufsdirhash_checkblock: bad dir inode");
829 #endif
830 			nfree += dp->d_reclen;
831 			continue;
832 		}
833 
834 		/* Check that the entry	exists (will panic if it doesn't). */
835 		ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
836 
837 		nfree += dp->d_reclen - DIRSIZ(dp);
838 	}
839 	if (i != DIRBLKSIZ)
840 		panic("ufsdirhash_checkblock: bad dir end");
841 
842 	if (dh->dh_blkfree[block] * DIRALIGN != nfree)
843 		panic("ufsdirhash_checkblock: bad free count");
844 
845 	ffslot = BLKFREE2IDX(nfree / DIRALIGN);
846 	for (i = 0; i <= DH_NFSTATS; i++)
847 		if (dh->dh_firstfree[i] == block && i != ffslot)
848 			panic("ufsdirhash_checkblock: bad first-free");
849 	if (dh->dh_firstfree[ffslot] == -1)
850 		panic("ufsdirhash_checkblock: missing first-free entry");
851 	DIRHASH_UNLOCK(dh);
852 }
853 
854 /*
855  * Hash the specified filename into a dirhash slot.
856  */
857 int
ufsdirhash_hash(struct dirhash * dh,char * name,int namelen)858 ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
859 {
860 	return SipHash24(&ufsdirhash_key, name, namelen) % dh->dh_hlen;
861 }
862 
863 /*
864  * Adjust the number of free bytes in the block containing `offset'
865  * by the value specified by `diff'.
866  *
867  * The caller must ensure we have exclusive access to `dh'; normally
868  * that means that dh_mtx should be held, but this is also called
869  * from ufsdirhash_build() where exclusive access can be assumed.
870  */
871 void
ufsdirhash_adjfree(struct dirhash * dh,doff_t offset,int diff)872 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
873 {
874 	int block, i, nfidx, ofidx;
875 
876 	/* Update the per-block summary info. */
877 	block = offset / DIRBLKSIZ;
878 	DIRHASH_ASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
879 	     ("dirhash bad offset"));
880 	ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
881 	dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
882 	nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
883 
884 	/* Update the `first free' list if necessary. */
885 	if (ofidx != nfidx) {
886 		/* If removing, scan forward for the next block. */
887 		if (dh->dh_firstfree[ofidx] == block) {
888 			for (i = block + 1; i < dh->dh_dirblks; i++)
889 				if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
890 					break;
891 			dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
892 		}
893 
894 		/* Make this the new `first free' if necessary */
895 		if (dh->dh_firstfree[nfidx] > block ||
896 		    dh->dh_firstfree[nfidx] == -1)
897 			dh->dh_firstfree[nfidx] = block;
898 	}
899 }
900 
901 /*
902  * Find the specified name which should have the specified offset.
903  * Returns a slot number, and panics on failure.
904  *
905  * `dh' must be locked on entry and remains so on return.
906  */
907 int
ufsdirhash_findslot(struct dirhash * dh,char * name,int namelen,doff_t offset)908 ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
909 {
910 	int slot;
911 
912 	mtx_assert(&dh->dh_mtx, MA_OWNED);
913 
914 	/* Find the entry. */
915 	DIRHASH_ASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
916 	slot = ufsdirhash_hash(dh, name, namelen);
917 	while (DH_ENTRY(dh, slot) != offset &&
918 	    DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
919 		slot = WRAPINCR(slot, dh->dh_hlen);
920 	if (DH_ENTRY(dh, slot) != offset)
921 		panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
922 
923 	return (slot);
924 }
925 
926 /*
927  * Remove the entry corresponding to the specified slot from the hash array.
928  *
929  * `dh' must be locked on entry and remains so on return.
930  */
931 void
ufsdirhash_delslot(struct dirhash * dh,int slot)932 ufsdirhash_delslot(struct dirhash *dh, int slot)
933 {
934 	int i;
935 
936 	mtx_assert(&dh->dh_mtx, MA_OWNED);
937 
938 	/* Mark the entry as deleted. */
939 	DH_ENTRY(dh, slot) = DIRHASH_DEL;
940 
941 	/* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
942 	for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
943 		i = WRAPINCR(i, dh->dh_hlen);
944 	if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
945 		i = WRAPDECR(i, dh->dh_hlen);
946 		while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
947 			DH_ENTRY(dh, i) = DIRHASH_EMPTY;
948 			dh->dh_hused--;
949 			i = WRAPDECR(i, dh->dh_hlen);
950 		}
951 		DIRHASH_ASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
952 	}
953 }
954 
955 /*
956  * Given a directory entry and its offset, find the offset of the
957  * previous entry in the same DIRBLKSIZ-sized block. Returns an
958  * offset, or -1 if there is no previous entry in the block or some
959  * other problem occurred.
960  */
961 doff_t
ufsdirhash_getprev(struct direct * dirp,doff_t offset)962 ufsdirhash_getprev(struct direct *dirp, doff_t offset)
963 {
964 	struct direct *dp;
965 	char *blkbuf;
966 	doff_t blkoff, prevoff;
967 	int entrypos, i;
968 
969 	blkoff = offset & ~(DIRBLKSIZ - 1);	/* offset of start of block */
970 	entrypos = offset & (DIRBLKSIZ - 1);	/* entry relative to block */
971 	blkbuf = (char *)dirp - entrypos;
972 	prevoff = blkoff;
973 
974 	/* If `offset' is the start of a block, there is no previous entry. */
975 	if (entrypos == 0)
976 		return (-1);
977 
978 	/* Scan from the start of the block until we get to the entry. */
979 	for (i = 0; i < entrypos; i += dp->d_reclen) {
980 		dp = (struct direct *)(blkbuf + i);
981 		if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
982 			return (-1);	/* Corrupted directory. */
983 		prevoff = blkoff + i;
984 	}
985 	return (prevoff);
986 }
987 
988 /*
989  * Try to free up `wanted' bytes by stealing memory from existing
990  * dirhashes. Returns zero with list locked if successful.
991  */
992 int
ufsdirhash_recycle(int wanted)993 ufsdirhash_recycle(int wanted)
994 {
995 	struct dirhash *dh;
996 	doff_t **hash;
997 	u_int8_t *blkfree;
998 	int i, mem, narrays, nblk;
999 
1000 	DIRHASHLIST_LOCK();
1001 	while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1002 		/* Find a dirhash, and lock it. */
1003 		if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
1004 			DIRHASHLIST_UNLOCK();
1005 			return (-1);
1006 		}
1007 		DIRHASH_LOCK(dh);
1008 		DIRHASH_ASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
1009 
1010 		/* Decrement the score; only recycle if it becomes zero. */
1011 		if (--dh->dh_score > 0) {
1012 			DIRHASH_UNLOCK(dh);
1013 			DIRHASHLIST_UNLOCK();
1014 			return (-1);
1015 		}
1016 
1017 		/* Remove it from the list and detach its memory. */
1018 		TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
1019 		dh->dh_onlist = 0;
1020 		hash = dh->dh_hash;
1021 		dh->dh_hash = NULL;
1022 		blkfree = dh->dh_blkfree;
1023 		dh->dh_blkfree = NULL;
1024 		narrays = dh->dh_narrays;
1025 		nblk = dh->dh_nblk;
1026 		mem = narrays * sizeof(*dh->dh_hash) +
1027 		    narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
1028 		    dh->dh_nblk * sizeof(*dh->dh_blkfree);
1029 
1030 		/* Unlock everything, free the detached memory. */
1031 		DIRHASH_UNLOCK(dh);
1032 		DIRHASHLIST_UNLOCK();
1033 		for (i = 0; i < narrays; i++)
1034 			DIRHASH_BLKFREE(hash[i]);
1035 		free(hash, M_DIRHASH, narrays * sizeof(hash[0]));
1036 		free(blkfree, M_DIRHASH, nblk * sizeof(blkfree[0]));
1037 
1038 		/* Account for the returned memory, and repeat if necessary. */
1039 		DIRHASHLIST_LOCK();
1040 		ufs_dirhashmem -= mem;
1041 	}
1042 	/* Success; return with list locked. */
1043 	return (0);
1044 }
1045 
1046 
1047 void
ufsdirhash_init(void)1048 ufsdirhash_init(void)
1049 {
1050 	pool_init(&ufsdirhash_pool, DH_NBLKOFF * sizeof(doff_t), 0, IPL_NONE,
1051 	    PR_WAITOK, "dirhash", NULL);
1052 	rw_init(&ufsdirhash_mtx, "dirhash_list");
1053 	arc4random_buf(&ufsdirhash_key, sizeof(ufsdirhash_key));
1054 	TAILQ_INIT(&ufsdirhash_list);
1055 	ufs_dirhashmaxmem = 5 * 1024 * 1024;
1056 	ufs_mindirhashsize = 5 * DIRBLKSIZ;
1057 }
1058 
1059 void
ufsdirhash_uninit(void)1060 ufsdirhash_uninit(void)
1061 {
1062 	DIRHASH_ASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
1063 	pool_destroy(&ufsdirhash_pool);
1064 }
1065