1 /* $OpenBSD: ufs_dirhash.c,v 1.43 2024/01/09 03:15:59 guenther Exp $ */
2 /*
3 * Copyright (c) 2001, 2002 Ian Dowse. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 /*
28 * This implements a hash-based lookup scheme for UFS directories.
29 */
30
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/lock.h>
34 #include <sys/malloc.h>
35 #include <sys/pool.h>
36 #include <sys/buf.h>
37 #include <sys/vnode.h>
38 #include <sys/mount.h>
39 #include <sys/sysctl.h>
40 #include <sys/mutex.h>
41
42 #include <crypto/siphash.h>
43
44 #include <ufs/ufs/quota.h>
45 #include <ufs/ufs/inode.h>
46 #include <ufs/ufs/dir.h>
47 #include <ufs/ufs/dirhash.h>
48 #include <ufs/ufs/ufsmount.h>
49 #include <ufs/ufs/ufs_extern.h>
50
51 #define WRAPINCR(val, limit) (((val) + 1 == (limit)) ? 0 : ((val) + 1))
52 #define WRAPDECR(val, limit) (((val) == 0) ? ((limit) - 1) : ((val) - 1))
53 #define BLKFREE2IDX(n) ((n) > DH_NFSTATS ? DH_NFSTATS : (n))
54
55 int ufs_mindirhashsize;
56 int ufs_dirhashmaxmem;
57 int ufs_dirhashmem;
58 int ufs_dirhashcheck;
59
60 SIPHASH_KEY ufsdirhash_key;
61
62 int ufsdirhash_hash(struct dirhash *dh, char *name, int namelen);
63 void ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff);
64 void ufsdirhash_delslot(struct dirhash *dh, int slot);
65 int ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen,
66 doff_t offset);
67 doff_t ufsdirhash_getprev(struct direct *dp, doff_t offset);
68 int ufsdirhash_recycle(int wanted);
69
70 struct pool ufsdirhash_pool;
71
72 #define DIRHASHLIST_LOCK() rw_enter_write(&ufsdirhash_mtx)
73 #define DIRHASHLIST_UNLOCK() rw_exit_write(&ufsdirhash_mtx)
74 #define DIRHASH_LOCK(dh) rw_enter_write(&(dh)->dh_mtx)
75 #define DIRHASH_UNLOCK(dh) rw_exit_write(&(dh)->dh_mtx)
76 #define DIRHASH_BLKALLOC_WAITOK() pool_get(&ufsdirhash_pool, PR_WAITOK)
77 #define DIRHASH_BLKFREE(v) pool_put(&ufsdirhash_pool, v)
78
79 #define mtx_assert(l, f) /* nothing */
80 #define DIRHASH_ASSERT(e, m) KASSERT((e))
81
82 /* Dirhash list; recently-used entries are near the tail. */
83 TAILQ_HEAD(, dirhash) ufsdirhash_list;
84
85 /* Protects: ufsdirhash_list, `dh_list' field, ufs_dirhashmem. */
86 struct rwlock ufsdirhash_mtx;
87
88 /*
89 * Locking order:
90 * ufsdirhash_mtx
91 * dh_mtx
92 *
93 * The dh_mtx mutex should be acquired either via the inode lock, or via
94 * ufsdirhash_mtx. Only the owner of the inode may free the associated
95 * dirhash, but anything can steal its memory and set dh_hash to NULL.
96 */
97
98 /*
99 * Attempt to build up a hash table for the directory contents in
100 * inode 'ip'. Returns 0 on success, or -1 of the operation failed.
101 */
102 int
ufsdirhash_build(struct inode * ip)103 ufsdirhash_build(struct inode *ip)
104 {
105 struct dirhash *dh;
106 struct buf *bp = NULL;
107 struct direct *ep;
108 struct vnode *vp;
109 doff_t bmask, pos;
110 int dirblocks, i, j, memreqd, nblocks, narrays, nslots, slot;
111
112 /* Check if we can/should use dirhash. */
113 if (ip->i_dirhash == NULL) {
114 if (DIP(ip, size) < ufs_mindirhashsize)
115 return (-1);
116 } else {
117 /* Hash exists, but sysctls could have changed. */
118 if (DIP(ip, size) < ufs_mindirhashsize ||
119 ufs_dirhashmem > ufs_dirhashmaxmem) {
120 ufsdirhash_free(ip);
121 return (-1);
122 }
123 /* Check if hash exists and is intact (note: unlocked read). */
124 if (ip->i_dirhash->dh_hash != NULL)
125 return (0);
126 /* Free the old, recycled hash and build a new one. */
127 ufsdirhash_free(ip);
128 }
129
130 /* Don't hash removed directories. */
131 if (ip->i_effnlink == 0)
132 return (-1);
133
134 vp = ip->i_vnode;
135 /* Allocate 50% more entries than this dir size could ever need. */
136 DIRHASH_ASSERT(DIP(ip, size) >= DIRBLKSIZ, ("ufsdirhash_build size"));
137 nslots = DIP(ip, size) / DIRECTSIZ(1);
138 nslots = (nslots * 3 + 1) / 2;
139 narrays = howmany(nslots, DH_NBLKOFF);
140 nslots = narrays * DH_NBLKOFF;
141 dirblocks = howmany(DIP(ip, size), DIRBLKSIZ);
142 nblocks = (dirblocks * 3 + 1) / 2;
143
144 memreqd = sizeof(*dh) + narrays * sizeof(*dh->dh_hash) +
145 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
146 nblocks * sizeof(*dh->dh_blkfree);
147 DIRHASHLIST_LOCK();
148 if (memreqd + ufs_dirhashmem > ufs_dirhashmaxmem) {
149 DIRHASHLIST_UNLOCK();
150 if (memreqd > ufs_dirhashmaxmem / 2)
151 return (-1);
152
153 /* Try to free some space. */
154 if (ufsdirhash_recycle(memreqd) != 0)
155 return (-1);
156 /* Enough was freed, and list has been locked. */
157 }
158 ufs_dirhashmem += memreqd;
159 DIRHASHLIST_UNLOCK();
160
161 /*
162 * Use non-blocking mallocs so that we will revert to a linear
163 * lookup on failure rather than potentially blocking forever.
164 */
165 dh = malloc(sizeof(*dh), M_DIRHASH, M_NOWAIT|M_ZERO);
166 if (dh == NULL) {
167 DIRHASHLIST_LOCK();
168 ufs_dirhashmem -= memreqd;
169 DIRHASHLIST_UNLOCK();
170 return (-1);
171 }
172 dh->dh_hash = mallocarray(narrays, sizeof(dh->dh_hash[0]),
173 M_DIRHASH, M_NOWAIT|M_ZERO);
174 dh->dh_blkfree = mallocarray(nblocks, sizeof(dh->dh_blkfree[0]),
175 M_DIRHASH, M_NOWAIT | M_ZERO);
176 if (dh->dh_hash == NULL || dh->dh_blkfree == NULL)
177 goto fail;
178 for (i = 0; i < narrays; i++) {
179 if ((dh->dh_hash[i] = DIRHASH_BLKALLOC_WAITOK()) == NULL)
180 goto fail;
181 for (j = 0; j < DH_NBLKOFF; j++)
182 dh->dh_hash[i][j] = DIRHASH_EMPTY;
183 }
184
185 /* Initialise the hash table and block statistics. */
186 rw_init(&dh->dh_mtx, "dirhash");
187 dh->dh_narrays = narrays;
188 dh->dh_hlen = nslots;
189 dh->dh_nblk = nblocks;
190 dh->dh_dirblks = dirblocks;
191 for (i = 0; i < dirblocks; i++)
192 dh->dh_blkfree[i] = DIRBLKSIZ / DIRALIGN;
193 for (i = 0; i < DH_NFSTATS; i++)
194 dh->dh_firstfree[i] = -1;
195 dh->dh_firstfree[DH_NFSTATS] = 0;
196 dh->dh_seqopt = 0;
197 dh->dh_seqoff = 0;
198 dh->dh_score = DH_SCOREINIT;
199 ip->i_dirhash = dh;
200
201 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
202 pos = 0;
203 while (pos < DIP(ip, size)) {
204 /* If necessary, get the next directory block. */
205 if ((pos & bmask) == 0) {
206 if (bp != NULL)
207 brelse(bp);
208 if (UFS_BUFATOFF(ip, (off_t)pos, NULL, &bp) != 0)
209 goto fail;
210 }
211 /* Add this entry to the hash. */
212 ep = (struct direct *)((char *)bp->b_data + (pos & bmask));
213 if (ep->d_reclen == 0 || ep->d_reclen >
214 DIRBLKSIZ - (pos & (DIRBLKSIZ - 1))) {
215 /* Corrupted directory. */
216 brelse(bp);
217 goto fail;
218 }
219 if (ep->d_ino != 0) {
220 /* Add the entry (simplified ufsdirhash_add). */
221 slot = ufsdirhash_hash(dh, ep->d_name, ep->d_namlen);
222 while (DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
223 slot = WRAPINCR(slot, dh->dh_hlen);
224 dh->dh_hused++;
225 DH_ENTRY(dh, slot) = pos;
226 ufsdirhash_adjfree(dh, pos, -DIRSIZ(ep));
227 }
228 pos += ep->d_reclen;
229 }
230
231 if (bp != NULL)
232 brelse(bp);
233 DIRHASHLIST_LOCK();
234 TAILQ_INSERT_TAIL(&ufsdirhash_list, dh, dh_list);
235 dh->dh_onlist = 1;
236 DIRHASHLIST_UNLOCK();
237 return (0);
238
239 fail:
240 if (dh->dh_hash != NULL) {
241 for (i = 0; i < narrays; i++)
242 if (dh->dh_hash[i] != NULL)
243 DIRHASH_BLKFREE(dh->dh_hash[i]);
244 free(dh->dh_hash, M_DIRHASH,
245 narrays * sizeof(dh->dh_hash[0]));
246 }
247 if (dh->dh_blkfree != NULL)
248 free(dh->dh_blkfree, M_DIRHASH,
249 nblocks * sizeof(dh->dh_blkfree[0]));
250 free(dh, M_DIRHASH, sizeof(*dh));
251 ip->i_dirhash = NULL;
252 DIRHASHLIST_LOCK();
253 ufs_dirhashmem -= memreqd;
254 DIRHASHLIST_UNLOCK();
255 return (-1);
256 }
257
258 /*
259 * Free any hash table associated with inode 'ip'.
260 */
261 void
ufsdirhash_free(struct inode * ip)262 ufsdirhash_free(struct inode *ip)
263 {
264 struct dirhash *dh;
265 int i, mem;
266
267 if ((dh = ip->i_dirhash) == NULL)
268 return;
269 DIRHASHLIST_LOCK();
270 DIRHASH_LOCK(dh);
271 if (dh->dh_onlist)
272 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
273 DIRHASH_UNLOCK(dh);
274 DIRHASHLIST_UNLOCK();
275
276 /* The dirhash pointed to by 'dh' is exclusively ours now. */
277
278 mem = sizeof(*dh);
279 if (dh->dh_hash != NULL) {
280 for (i = 0; i < dh->dh_narrays; i++)
281 DIRHASH_BLKFREE(dh->dh_hash[i]);
282 free(dh->dh_hash, M_DIRHASH,
283 dh->dh_narrays * sizeof(dh->dh_hash[0]));
284 free(dh->dh_blkfree, M_DIRHASH,
285 dh->dh_nblk * sizeof(dh->dh_blkfree[0]));
286 mem += dh->dh_narrays * sizeof(*dh->dh_hash) +
287 dh->dh_narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
288 dh->dh_nblk * sizeof(*dh->dh_blkfree);
289 }
290 free(dh, M_DIRHASH, sizeof(*dh));
291 ip->i_dirhash = NULL;
292
293 DIRHASHLIST_LOCK();
294 ufs_dirhashmem -= mem;
295 DIRHASHLIST_UNLOCK();
296 }
297
298 /*
299 * Find the offset of the specified name within the given inode.
300 * Returns 0 on success, ENOENT if the entry does not exist, or
301 * EJUSTRETURN if the caller should revert to a linear search.
302 *
303 * If successful, the directory offset is stored in *offp, and a
304 * pointer to a struct buf containing the entry is stored in *bpp. If
305 * prevoffp is non-NULL, the offset of the previous entry within
306 * the DIRBLKSIZ-sized block is stored in *prevoffp (if the entry
307 * is the first in a block, the start of the block is used).
308 */
309 int
ufsdirhash_lookup(struct inode * ip,char * name,int namelen,doff_t * offp,struct buf ** bpp,doff_t * prevoffp)310 ufsdirhash_lookup(struct inode *ip, char *name, int namelen, doff_t *offp,
311 struct buf **bpp, doff_t *prevoffp)
312 {
313 struct dirhash *dh, *dh_next;
314 struct direct *dp;
315 struct vnode *vp;
316 struct buf *bp;
317 doff_t blkoff, bmask, offset, prevoff;
318 int i, slot;
319
320 if ((dh = ip->i_dirhash) == NULL)
321 return (EJUSTRETURN);
322 /*
323 * Move this dirhash towards the end of the list if it has a
324 * score higher than the next entry, and acquire the dh_mtx.
325 * Optimise the case where it's already the last by performing
326 * an unlocked read of the TAILQ_NEXT pointer.
327 *
328 * In both cases, end up holding just dh_mtx.
329 */
330 if (TAILQ_NEXT(dh, dh_list) != NULL) {
331 DIRHASHLIST_LOCK();
332 DIRHASH_LOCK(dh);
333 /*
334 * If the new score will be greater than that of the next
335 * entry, then move this entry past it. With both mutexes
336 * held, dh_next won't go away, but its dh_score could
337 * change; that's not important since it is just a hint.
338 */
339 if (dh->dh_hash != NULL &&
340 (dh_next = TAILQ_NEXT(dh, dh_list)) != NULL &&
341 dh->dh_score >= dh_next->dh_score) {
342 DIRHASH_ASSERT(dh->dh_onlist, ("dirhash: not on list"));
343 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
344 TAILQ_INSERT_AFTER(&ufsdirhash_list, dh_next, dh,
345 dh_list);
346 }
347 DIRHASHLIST_UNLOCK();
348 } else {
349 /* Already the last, though that could change as we wait. */
350 DIRHASH_LOCK(dh);
351 }
352 if (dh->dh_hash == NULL) {
353 DIRHASH_UNLOCK(dh);
354 ufsdirhash_free(ip);
355 return (EJUSTRETURN);
356 }
357
358 /* Update the score. */
359 if (dh->dh_score < DH_SCOREMAX)
360 dh->dh_score++;
361
362 vp = ip->i_vnode;
363 bmask = VFSTOUFS(vp->v_mount)->um_mountp->mnt_stat.f_iosize - 1;
364 blkoff = -1;
365 bp = NULL;
366 restart:
367 slot = ufsdirhash_hash(dh, name, namelen);
368
369 if (dh->dh_seqopt) {
370 /*
371 * Sequential access optimisation. dh_seqoff contains the
372 * offset of the directory entry immediately following
373 * the last entry that was looked up. Check if this offset
374 * appears in the hash chain for the name we are looking for.
375 */
376 for (i = slot; (offset = DH_ENTRY(dh, i)) != DIRHASH_EMPTY;
377 i = WRAPINCR(i, dh->dh_hlen))
378 if (offset == dh->dh_seqoff)
379 break;
380 if (offset == dh->dh_seqoff) {
381 /*
382 * We found an entry with the expected offset. This
383 * is probably the entry we want, but if not, the
384 * code below will turn off seqopt and retry.
385 */
386 slot = i;
387 } else
388 dh->dh_seqopt = 0;
389 }
390
391 for (; (offset = DH_ENTRY(dh, slot)) != DIRHASH_EMPTY;
392 slot = WRAPINCR(slot, dh->dh_hlen)) {
393 if (offset == DIRHASH_DEL)
394 continue;
395 DIRHASH_UNLOCK(dh);
396
397 if (offset < 0 || offset >= DIP(ip, size))
398 panic("ufsdirhash_lookup: bad offset in hash array");
399 if ((offset & ~bmask) != blkoff) {
400 if (bp != NULL)
401 brelse(bp);
402 blkoff = offset & ~bmask;
403 if (UFS_BUFATOFF(ip, (off_t)blkoff, NULL, &bp) != 0)
404 return (EJUSTRETURN);
405 }
406 dp = (struct direct *)(bp->b_data + (offset & bmask));
407 if (dp->d_reclen == 0 || dp->d_reclen >
408 DIRBLKSIZ - (offset & (DIRBLKSIZ - 1))) {
409 /* Corrupted directory. */
410 brelse(bp);
411 return (EJUSTRETURN);
412 }
413 if (dp->d_namlen == namelen &&
414 memcmp(dp->d_name, name, namelen) == 0) {
415 /* Found. Get the prev offset if needed. */
416 if (prevoffp != NULL) {
417 if (offset & (DIRBLKSIZ - 1)) {
418 prevoff = ufsdirhash_getprev(dp,
419 offset);
420 if (prevoff == -1) {
421 brelse(bp);
422 return (EJUSTRETURN);
423 }
424 } else
425 prevoff = offset;
426 *prevoffp = prevoff;
427 }
428
429 /* Check for sequential access, and update offset. */
430 if (dh->dh_seqopt == 0 && dh->dh_seqoff == offset)
431 dh->dh_seqopt = 1;
432 dh->dh_seqoff = offset + DIRSIZ(dp);
433
434 *bpp = bp;
435 *offp = offset;
436 return (0);
437 }
438
439 DIRHASH_LOCK(dh);
440 if (dh->dh_hash == NULL) {
441 DIRHASH_UNLOCK(dh);
442 if (bp != NULL)
443 brelse(bp);
444 ufsdirhash_free(ip);
445 return (EJUSTRETURN);
446 }
447 /*
448 * When the name doesn't match in the seqopt case, go back
449 * and search normally.
450 */
451 if (dh->dh_seqopt) {
452 dh->dh_seqopt = 0;
453 goto restart;
454 }
455 }
456 DIRHASH_UNLOCK(dh);
457 if (bp != NULL)
458 brelse(bp);
459 return (ENOENT);
460 }
461
462 /*
463 * Find a directory block with room for 'slotneeded' bytes. Returns
464 * the offset of the directory entry that begins the free space.
465 * This will either be the offset of an existing entry that has free
466 * space at the end, or the offset of an entry with d_ino == 0 at
467 * the start of a DIRBLKSIZ block.
468 *
469 * To use the space, the caller may need to compact existing entries in
470 * the directory. The total number of bytes in all of the entries involved
471 * in the compaction is stored in *slotsize. In other words, all of
472 * the entries that must be compacted are exactly contained in the
473 * region beginning at the returned offset and spanning *slotsize bytes.
474 *
475 * Returns -1 if no space was found, indicating that the directory
476 * must be extended.
477 */
478 doff_t
ufsdirhash_findfree(struct inode * ip,int slotneeded,int * slotsize)479 ufsdirhash_findfree(struct inode *ip, int slotneeded, int *slotsize)
480 {
481 struct direct *dp;
482 struct dirhash *dh;
483 struct buf *bp;
484 doff_t pos, slotstart;
485 int dirblock, error, freebytes, i;
486
487 if ((dh = ip->i_dirhash) == NULL)
488 return (-1);
489 DIRHASH_LOCK(dh);
490 if (dh->dh_hash == NULL) {
491 DIRHASH_UNLOCK(dh);
492 ufsdirhash_free(ip);
493 return (-1);
494 }
495
496 /* Find a directory block with the desired free space. */
497 dirblock = -1;
498 for (i = howmany(slotneeded, DIRALIGN); i <= DH_NFSTATS; i++)
499 if ((dirblock = dh->dh_firstfree[i]) != -1)
500 break;
501 if (dirblock == -1) {
502 DIRHASH_UNLOCK(dh);
503 return (-1);
504 }
505
506 DIRHASH_ASSERT(dirblock < dh->dh_nblk &&
507 dh->dh_blkfree[dirblock] >= howmany(slotneeded, DIRALIGN),
508 ("ufsdirhash_findfree: bad stats"));
509 DIRHASH_UNLOCK(dh);
510 pos = dirblock * DIRBLKSIZ;
511 error = UFS_BUFATOFF(ip, (off_t)pos, (char **)&dp, &bp);
512 if (error)
513 return (-1);
514
515 /* Find the first entry with free space. */
516 for (i = 0; i < DIRBLKSIZ; ) {
517 if (dp->d_reclen == 0) {
518 brelse(bp);
519 return (-1);
520 }
521 if (dp->d_ino == 0 || dp->d_reclen > DIRSIZ(dp))
522 break;
523 i += dp->d_reclen;
524 dp = (struct direct *)((char *)dp + dp->d_reclen);
525 }
526 if (i > DIRBLKSIZ) {
527 brelse(bp);
528 return (-1);
529 }
530 slotstart = pos + i;
531
532 /* Find the range of entries needed to get enough space */
533 freebytes = 0;
534 while (i < DIRBLKSIZ && freebytes < slotneeded) {
535 freebytes += dp->d_reclen;
536 if (dp->d_ino != 0)
537 freebytes -= DIRSIZ(dp);
538 if (dp->d_reclen == 0) {
539 brelse(bp);
540 return (-1);
541 }
542 i += dp->d_reclen;
543 dp = (struct direct *)((char *)dp + dp->d_reclen);
544 }
545 if (i > DIRBLKSIZ) {
546 brelse(bp);
547 return (-1);
548 }
549 if (freebytes < slotneeded)
550 panic("ufsdirhash_findfree: free mismatch");
551 brelse(bp);
552 *slotsize = pos + i - slotstart;
553 return (slotstart);
554 }
555
556 /*
557 * Return the start of the unused space at the end of a directory, or
558 * -1 if there are no trailing unused blocks.
559 */
560 doff_t
ufsdirhash_enduseful(struct inode * ip)561 ufsdirhash_enduseful(struct inode *ip)
562 {
563
564 struct dirhash *dh;
565 int i;
566
567 if ((dh = ip->i_dirhash) == NULL)
568 return (-1);
569 DIRHASH_LOCK(dh);
570 if (dh->dh_hash == NULL) {
571 DIRHASH_UNLOCK(dh);
572 ufsdirhash_free(ip);
573 return (-1);
574 }
575
576 if (dh->dh_blkfree[dh->dh_dirblks - 1] != DIRBLKSIZ / DIRALIGN) {
577 DIRHASH_UNLOCK(dh);
578 return (-1);
579 }
580
581 for (i = dh->dh_dirblks - 1; i >= 0; i--)
582 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
583 break;
584 DIRHASH_UNLOCK(dh);
585 return ((doff_t)(i + 1) * DIRBLKSIZ);
586 }
587
588 /*
589 * Insert information into the hash about a new directory entry. dirp
590 * points to a struct direct containing the entry, and offset specifies
591 * the offset of this entry.
592 */
593 void
ufsdirhash_add(struct inode * ip,struct direct * dirp,doff_t offset)594 ufsdirhash_add(struct inode *ip, struct direct *dirp, doff_t offset)
595 {
596 struct dirhash *dh;
597 int slot;
598
599 if ((dh = ip->i_dirhash) == NULL)
600 return;
601 DIRHASH_LOCK(dh);
602 if (dh->dh_hash == NULL) {
603 DIRHASH_UNLOCK(dh);
604 ufsdirhash_free(ip);
605 return;
606 }
607
608 DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
609 ("ufsdirhash_add: bad offset"));
610 /*
611 * Normal hash usage is < 66%. If the usage gets too high then
612 * remove the hash entirely and let it be rebuilt later.
613 */
614 if (dh->dh_hused >= (dh->dh_hlen * 3) / 4) {
615 DIRHASH_UNLOCK(dh);
616 ufsdirhash_free(ip);
617 return;
618 }
619
620 /* Find a free hash slot (empty or deleted), and add the entry. */
621 slot = ufsdirhash_hash(dh, dirp->d_name, dirp->d_namlen);
622 while (DH_ENTRY(dh, slot) >= 0)
623 slot = WRAPINCR(slot, dh->dh_hlen);
624 if (DH_ENTRY(dh, slot) == DIRHASH_EMPTY)
625 dh->dh_hused++;
626 DH_ENTRY(dh, slot) = offset;
627
628 /* Update the per-block summary info. */
629 ufsdirhash_adjfree(dh, offset, -DIRSIZ(dirp));
630 DIRHASH_UNLOCK(dh);
631 }
632
633 /*
634 * Remove the specified directory entry from the hash. The entry to remove
635 * is defined by the name in `dirp', which must exist at the specified
636 * `offset' within the directory.
637 */
638 void
ufsdirhash_remove(struct inode * ip,struct direct * dirp,doff_t offset)639 ufsdirhash_remove(struct inode *ip, struct direct *dirp, doff_t offset)
640 {
641 struct dirhash *dh;
642 int slot;
643
644 if ((dh = ip->i_dirhash) == NULL)
645 return;
646 DIRHASH_LOCK(dh);
647 if (dh->dh_hash == NULL) {
648 DIRHASH_UNLOCK(dh);
649 ufsdirhash_free(ip);
650 return;
651 }
652
653 DIRHASH_ASSERT(offset < dh->dh_dirblks * DIRBLKSIZ,
654 ("ufsdirhash_remove: bad offset"));
655 /* Find the entry */
656 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, offset);
657
658 /* Remove the hash entry. */
659 ufsdirhash_delslot(dh, slot);
660
661 /* Update the per-block summary info. */
662 ufsdirhash_adjfree(dh, offset, DIRSIZ(dirp));
663 DIRHASH_UNLOCK(dh);
664 }
665
666 /*
667 * Change the offset associated with a directory entry in the hash. Used
668 * when compacting directory blocks.
669 */
670 void
ufsdirhash_move(struct inode * ip,struct direct * dirp,doff_t oldoff,doff_t newoff)671 ufsdirhash_move(struct inode *ip, struct direct *dirp, doff_t oldoff,
672 doff_t newoff)
673 {
674 struct dirhash *dh;
675 int slot;
676
677 if ((dh = ip->i_dirhash) == NULL)
678 return;
679 DIRHASH_LOCK(dh);
680 if (dh->dh_hash == NULL) {
681 DIRHASH_UNLOCK(dh);
682 ufsdirhash_free(ip);
683 return;
684 }
685
686 DIRHASH_ASSERT(oldoff < dh->dh_dirblks * DIRBLKSIZ &&
687 newoff < dh->dh_dirblks * DIRBLKSIZ,
688 ("ufsdirhash_move: bad offset"));
689 /* Find the entry, and update the offset. */
690 slot = ufsdirhash_findslot(dh, dirp->d_name, dirp->d_namlen, oldoff);
691 DH_ENTRY(dh, slot) = newoff;
692 DIRHASH_UNLOCK(dh);
693 }
694
695 /*
696 * Inform dirhash that the directory has grown by one block that
697 * begins at offset (i.e. the new length is offset + DIRBLKSIZ).
698 */
699 void
ufsdirhash_newblk(struct inode * ip,doff_t offset)700 ufsdirhash_newblk(struct inode *ip, doff_t offset)
701 {
702 struct dirhash *dh;
703 int block;
704
705 if ((dh = ip->i_dirhash) == NULL)
706 return;
707 DIRHASH_LOCK(dh);
708 if (dh->dh_hash == NULL) {
709 DIRHASH_UNLOCK(dh);
710 ufsdirhash_free(ip);
711 return;
712 }
713
714 DIRHASH_ASSERT(offset == dh->dh_dirblks * DIRBLKSIZ,
715 ("ufsdirhash_newblk: bad offset"));
716 block = offset / DIRBLKSIZ;
717 if (block >= dh->dh_nblk) {
718 /* Out of space; must rebuild. */
719 DIRHASH_UNLOCK(dh);
720 ufsdirhash_free(ip);
721 return;
722 }
723 dh->dh_dirblks = block + 1;
724
725 /* Account for the new free block. */
726 dh->dh_blkfree[block] = DIRBLKSIZ / DIRALIGN;
727 if (dh->dh_firstfree[DH_NFSTATS] == -1)
728 dh->dh_firstfree[DH_NFSTATS] = block;
729 DIRHASH_UNLOCK(dh);
730 }
731
732 /*
733 * Inform dirhash that the directory is being truncated.
734 */
735 void
ufsdirhash_dirtrunc(struct inode * ip,doff_t offset)736 ufsdirhash_dirtrunc(struct inode *ip, doff_t offset)
737 {
738 struct dirhash *dh;
739 int block, i;
740
741 if ((dh = ip->i_dirhash) == NULL)
742 return;
743 DIRHASH_LOCK(dh);
744 if (dh->dh_hash == NULL) {
745 DIRHASH_UNLOCK(dh);
746 ufsdirhash_free(ip);
747 return;
748 }
749
750 DIRHASH_ASSERT(offset <= dh->dh_dirblks * DIRBLKSIZ,
751 ("ufsdirhash_dirtrunc: bad offset"));
752 block = howmany(offset, DIRBLKSIZ);
753 /*
754 * If the directory shrinks to less than 1/8 of dh_nblk blocks
755 * (about 20% of its original size due to the 50% extra added in
756 * ufsdirhash_build) then free it, and let the caller rebuild
757 * if necessary.
758 */
759 if (block < dh->dh_nblk / 8 && dh->dh_narrays > 1) {
760 DIRHASH_UNLOCK(dh);
761 ufsdirhash_free(ip);
762 return;
763 }
764
765 /*
766 * Remove any `first free' information pertaining to the
767 * truncated blocks. All blocks we're removing should be
768 * completely unused.
769 */
770 if (dh->dh_firstfree[DH_NFSTATS] >= block)
771 dh->dh_firstfree[DH_NFSTATS] = -1;
772 for (i = block; i < dh->dh_dirblks; i++)
773 if (dh->dh_blkfree[i] != DIRBLKSIZ / DIRALIGN)
774 panic("ufsdirhash_dirtrunc: blocks in use");
775 for (i = 0; i < DH_NFSTATS; i++)
776 if (dh->dh_firstfree[i] >= block)
777 panic("ufsdirhash_dirtrunc: first free corrupt");
778 dh->dh_dirblks = block;
779 DIRHASH_UNLOCK(dh);
780 }
781
782 /*
783 * Debugging function to check that the dirhash information about
784 * a directory block matches its actual contents. Panics if a mismatch
785 * is detected.
786 *
787 * On entry, `buf' should point to the start of an in-core
788 * DIRBLKSIZ-sized directory block, and `offset' should contain the
789 * offset from the start of the directory of that block.
790 */
791 void
ufsdirhash_checkblock(struct inode * ip,char * buf,doff_t offset)792 ufsdirhash_checkblock(struct inode *ip, char *buf, doff_t offset)
793 {
794 struct dirhash *dh;
795 struct direct *dp;
796 int block, ffslot, i, nfree;
797
798 if (!ufs_dirhashcheck)
799 return;
800 if ((dh = ip->i_dirhash) == NULL)
801 return;
802 DIRHASH_LOCK(dh);
803 if (dh->dh_hash == NULL) {
804 DIRHASH_UNLOCK(dh);
805 ufsdirhash_free(ip);
806 return;
807 }
808
809 block = offset / DIRBLKSIZ;
810 if ((offset & (DIRBLKSIZ - 1)) != 0 || block >= dh->dh_dirblks)
811 panic("ufsdirhash_checkblock: bad offset");
812
813 nfree = 0;
814 for (i = 0; i < DIRBLKSIZ; i += dp->d_reclen) {
815 dp = (struct direct *)(buf + i);
816 if (dp->d_reclen == 0 || i + dp->d_reclen > DIRBLKSIZ)
817 panic("ufsdirhash_checkblock: bad dir");
818
819 if (dp->d_ino == 0) {
820 #if 0
821 /*
822 * XXX entries with d_ino == 0 should only occur
823 * at the start of a DIRBLKSIZ block. However the
824 * ufs code is tolerant of such entries at other
825 * offsets, and fsck does not fix them.
826 */
827 if (i != 0)
828 panic("ufsdirhash_checkblock: bad dir inode");
829 #endif
830 nfree += dp->d_reclen;
831 continue;
832 }
833
834 /* Check that the entry exists (will panic if it doesn't). */
835 ufsdirhash_findslot(dh, dp->d_name, dp->d_namlen, offset + i);
836
837 nfree += dp->d_reclen - DIRSIZ(dp);
838 }
839 if (i != DIRBLKSIZ)
840 panic("ufsdirhash_checkblock: bad dir end");
841
842 if (dh->dh_blkfree[block] * DIRALIGN != nfree)
843 panic("ufsdirhash_checkblock: bad free count");
844
845 ffslot = BLKFREE2IDX(nfree / DIRALIGN);
846 for (i = 0; i <= DH_NFSTATS; i++)
847 if (dh->dh_firstfree[i] == block && i != ffslot)
848 panic("ufsdirhash_checkblock: bad first-free");
849 if (dh->dh_firstfree[ffslot] == -1)
850 panic("ufsdirhash_checkblock: missing first-free entry");
851 DIRHASH_UNLOCK(dh);
852 }
853
854 /*
855 * Hash the specified filename into a dirhash slot.
856 */
857 int
ufsdirhash_hash(struct dirhash * dh,char * name,int namelen)858 ufsdirhash_hash(struct dirhash *dh, char *name, int namelen)
859 {
860 return SipHash24(&ufsdirhash_key, name, namelen) % dh->dh_hlen;
861 }
862
863 /*
864 * Adjust the number of free bytes in the block containing `offset'
865 * by the value specified by `diff'.
866 *
867 * The caller must ensure we have exclusive access to `dh'; normally
868 * that means that dh_mtx should be held, but this is also called
869 * from ufsdirhash_build() where exclusive access can be assumed.
870 */
871 void
ufsdirhash_adjfree(struct dirhash * dh,doff_t offset,int diff)872 ufsdirhash_adjfree(struct dirhash *dh, doff_t offset, int diff)
873 {
874 int block, i, nfidx, ofidx;
875
876 /* Update the per-block summary info. */
877 block = offset / DIRBLKSIZ;
878 DIRHASH_ASSERT(block < dh->dh_nblk && block < dh->dh_dirblks,
879 ("dirhash bad offset"));
880 ofidx = BLKFREE2IDX(dh->dh_blkfree[block]);
881 dh->dh_blkfree[block] = (int)dh->dh_blkfree[block] + (diff / DIRALIGN);
882 nfidx = BLKFREE2IDX(dh->dh_blkfree[block]);
883
884 /* Update the `first free' list if necessary. */
885 if (ofidx != nfidx) {
886 /* If removing, scan forward for the next block. */
887 if (dh->dh_firstfree[ofidx] == block) {
888 for (i = block + 1; i < dh->dh_dirblks; i++)
889 if (BLKFREE2IDX(dh->dh_blkfree[i]) == ofidx)
890 break;
891 dh->dh_firstfree[ofidx] = (i < dh->dh_dirblks) ? i : -1;
892 }
893
894 /* Make this the new `first free' if necessary */
895 if (dh->dh_firstfree[nfidx] > block ||
896 dh->dh_firstfree[nfidx] == -1)
897 dh->dh_firstfree[nfidx] = block;
898 }
899 }
900
901 /*
902 * Find the specified name which should have the specified offset.
903 * Returns a slot number, and panics on failure.
904 *
905 * `dh' must be locked on entry and remains so on return.
906 */
907 int
ufsdirhash_findslot(struct dirhash * dh,char * name,int namelen,doff_t offset)908 ufsdirhash_findslot(struct dirhash *dh, char *name, int namelen, doff_t offset)
909 {
910 int slot;
911
912 mtx_assert(&dh->dh_mtx, MA_OWNED);
913
914 /* Find the entry. */
915 DIRHASH_ASSERT(dh->dh_hused < dh->dh_hlen, ("dirhash find full"));
916 slot = ufsdirhash_hash(dh, name, namelen);
917 while (DH_ENTRY(dh, slot) != offset &&
918 DH_ENTRY(dh, slot) != DIRHASH_EMPTY)
919 slot = WRAPINCR(slot, dh->dh_hlen);
920 if (DH_ENTRY(dh, slot) != offset)
921 panic("ufsdirhash_findslot: '%.*s' not found", namelen, name);
922
923 return (slot);
924 }
925
926 /*
927 * Remove the entry corresponding to the specified slot from the hash array.
928 *
929 * `dh' must be locked on entry and remains so on return.
930 */
931 void
ufsdirhash_delslot(struct dirhash * dh,int slot)932 ufsdirhash_delslot(struct dirhash *dh, int slot)
933 {
934 int i;
935
936 mtx_assert(&dh->dh_mtx, MA_OWNED);
937
938 /* Mark the entry as deleted. */
939 DH_ENTRY(dh, slot) = DIRHASH_DEL;
940
941 /* If this is the end of a chain of DIRHASH_DEL slots, remove them. */
942 for (i = slot; DH_ENTRY(dh, i) == DIRHASH_DEL; )
943 i = WRAPINCR(i, dh->dh_hlen);
944 if (DH_ENTRY(dh, i) == DIRHASH_EMPTY) {
945 i = WRAPDECR(i, dh->dh_hlen);
946 while (DH_ENTRY(dh, i) == DIRHASH_DEL) {
947 DH_ENTRY(dh, i) = DIRHASH_EMPTY;
948 dh->dh_hused--;
949 i = WRAPDECR(i, dh->dh_hlen);
950 }
951 DIRHASH_ASSERT(dh->dh_hused >= 0, ("ufsdirhash_delslot neg hlen"));
952 }
953 }
954
955 /*
956 * Given a directory entry and its offset, find the offset of the
957 * previous entry in the same DIRBLKSIZ-sized block. Returns an
958 * offset, or -1 if there is no previous entry in the block or some
959 * other problem occurred.
960 */
961 doff_t
ufsdirhash_getprev(struct direct * dirp,doff_t offset)962 ufsdirhash_getprev(struct direct *dirp, doff_t offset)
963 {
964 struct direct *dp;
965 char *blkbuf;
966 doff_t blkoff, prevoff;
967 int entrypos, i;
968
969 blkoff = offset & ~(DIRBLKSIZ - 1); /* offset of start of block */
970 entrypos = offset & (DIRBLKSIZ - 1); /* entry relative to block */
971 blkbuf = (char *)dirp - entrypos;
972 prevoff = blkoff;
973
974 /* If `offset' is the start of a block, there is no previous entry. */
975 if (entrypos == 0)
976 return (-1);
977
978 /* Scan from the start of the block until we get to the entry. */
979 for (i = 0; i < entrypos; i += dp->d_reclen) {
980 dp = (struct direct *)(blkbuf + i);
981 if (dp->d_reclen == 0 || i + dp->d_reclen > entrypos)
982 return (-1); /* Corrupted directory. */
983 prevoff = blkoff + i;
984 }
985 return (prevoff);
986 }
987
988 /*
989 * Try to free up `wanted' bytes by stealing memory from existing
990 * dirhashes. Returns zero with list locked if successful.
991 */
992 int
ufsdirhash_recycle(int wanted)993 ufsdirhash_recycle(int wanted)
994 {
995 struct dirhash *dh;
996 doff_t **hash;
997 u_int8_t *blkfree;
998 int i, mem, narrays, nblk;
999
1000 DIRHASHLIST_LOCK();
1001 while (wanted + ufs_dirhashmem > ufs_dirhashmaxmem) {
1002 /* Find a dirhash, and lock it. */
1003 if ((dh = TAILQ_FIRST(&ufsdirhash_list)) == NULL) {
1004 DIRHASHLIST_UNLOCK();
1005 return (-1);
1006 }
1007 DIRHASH_LOCK(dh);
1008 DIRHASH_ASSERT(dh->dh_hash != NULL, ("dirhash: NULL hash on list"));
1009
1010 /* Decrement the score; only recycle if it becomes zero. */
1011 if (--dh->dh_score > 0) {
1012 DIRHASH_UNLOCK(dh);
1013 DIRHASHLIST_UNLOCK();
1014 return (-1);
1015 }
1016
1017 /* Remove it from the list and detach its memory. */
1018 TAILQ_REMOVE(&ufsdirhash_list, dh, dh_list);
1019 dh->dh_onlist = 0;
1020 hash = dh->dh_hash;
1021 dh->dh_hash = NULL;
1022 blkfree = dh->dh_blkfree;
1023 dh->dh_blkfree = NULL;
1024 narrays = dh->dh_narrays;
1025 nblk = dh->dh_nblk;
1026 mem = narrays * sizeof(*dh->dh_hash) +
1027 narrays * DH_NBLKOFF * sizeof(**dh->dh_hash) +
1028 dh->dh_nblk * sizeof(*dh->dh_blkfree);
1029
1030 /* Unlock everything, free the detached memory. */
1031 DIRHASH_UNLOCK(dh);
1032 DIRHASHLIST_UNLOCK();
1033 for (i = 0; i < narrays; i++)
1034 DIRHASH_BLKFREE(hash[i]);
1035 free(hash, M_DIRHASH, narrays * sizeof(hash[0]));
1036 free(blkfree, M_DIRHASH, nblk * sizeof(blkfree[0]));
1037
1038 /* Account for the returned memory, and repeat if necessary. */
1039 DIRHASHLIST_LOCK();
1040 ufs_dirhashmem -= mem;
1041 }
1042 /* Success; return with list locked. */
1043 return (0);
1044 }
1045
1046
1047 void
ufsdirhash_init(void)1048 ufsdirhash_init(void)
1049 {
1050 pool_init(&ufsdirhash_pool, DH_NBLKOFF * sizeof(doff_t), 0, IPL_NONE,
1051 PR_WAITOK, "dirhash", NULL);
1052 rw_init(&ufsdirhash_mtx, "dirhash_list");
1053 arc4random_buf(&ufsdirhash_key, sizeof(ufsdirhash_key));
1054 TAILQ_INIT(&ufsdirhash_list);
1055 ufs_dirhashmaxmem = 5 * 1024 * 1024;
1056 ufs_mindirhashsize = 5 * DIRBLKSIZ;
1057 }
1058
1059 void
ufsdirhash_uninit(void)1060 ufsdirhash_uninit(void)
1061 {
1062 DIRHASH_ASSERT(TAILQ_EMPTY(&ufsdirhash_list), ("ufsdirhash_uninit"));
1063 pool_destroy(&ufsdirhash_pool);
1064 }
1065