xref: /dragonfly/sys/vfs/hammer/hammer_ondisk.c (revision 70705abf)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.4 2007/11/19 00:53:40 dillon Exp $
35  */
36 /*
37  * Manage HAMMER's on-disk structures.  These routines are primarily
38  * responsible for interfacing with the kernel's I/O subsystem and for
39  * managing in-memory structures.
40  */
41 
42 #include "hammer.h"
43 #include <sys/fcntl.h>
44 #include <sys/nlookup.h>
45 #include <sys/buf.h>
46 #include <sys/buf2.h>
47 
48 static void hammer_free_volume(hammer_volume_t volume);
49 static int hammer_load_volume(hammer_volume_t volume);
50 static int hammer_load_supercl(hammer_supercl_t supercl, int isnew);
51 static int hammer_load_cluster(hammer_cluster_t cluster, int isnew);
52 static int hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type);
53 static void hammer_remove_node_clist(hammer_buffer_t buffer,
54 			hammer_node_t node);
55 static void initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head,
56 			u_int64_t type);
57 static void alloc_new_buffer(hammer_cluster_t cluster,
58 			hammer_alist_t live, u_int64_t type, int32_t nelements,
59 			int32_t start,
60 			int *errorp, struct hammer_buffer **bufferp);
61 #if 0
62 static void readhammerbuf(hammer_volume_t vol, void *data,
63 			int64_t offset);
64 static void writehammerbuf(hammer_volume_t vol, const void *data,
65 			int64_t offset);
66 #endif
67 static int64_t calculate_cluster_offset(hammer_volume_t vol, int32_t clu_no);
68 static int64_t calculate_supercl_offset(hammer_volume_t vol, int32_t scl_no);
69 
70 struct hammer_alist_config Buf_alist_config;
71 struct hammer_alist_config Vol_normal_alist_config;
72 struct hammer_alist_config Vol_super_alist_config;
73 struct hammer_alist_config Supercl_alist_config;
74 struct hammer_alist_config Clu_master_alist_config;
75 struct hammer_alist_config Clu_slave_alist_config;
76 
77 /*
78  * Red-Black tree support for various structures
79  */
80 static int
81 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2)
82 {
83 	if (ip1->obj_id < ip2->obj_id)
84 		return(-1);
85 	if (ip1->obj_id > ip2->obj_id)
86 		return(1);
87 	if (ip1->obj_asof < ip2->obj_asof)
88 		return(-1);
89 	if (ip1->obj_asof > ip2->obj_asof)
90 		return(1);
91 	return(0);
92 }
93 
94 static int
95 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip)
96 {
97 	if (info->obj_id < ip->obj_id)
98 		return(-1);
99 	if (info->obj_id > ip->obj_id)
100 		return(1);
101 	if (info->obj_asof < ip->obj_asof)
102 		return(-1);
103 	if (info->obj_asof > ip->obj_asof)
104 		return(1);
105 	return(0);
106 }
107 
108 static int
109 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2)
110 {
111 	if (vol1->vol_no < vol2->vol_no)
112 		return(-1);
113 	if (vol1->vol_no > vol2->vol_no)
114 		return(1);
115 	return(0);
116 }
117 
118 static int
119 hammer_scl_rb_compare(hammer_supercl_t cl1, hammer_supercl_t cl2)
120 {
121 	if (cl1->scl_no < cl2->scl_no)
122 		return(-1);
123 	if (cl1->scl_no > cl2->scl_no)
124 		return(1);
125 	return(0);
126 }
127 
128 static int
129 hammer_clu_rb_compare(hammer_cluster_t cl1, hammer_cluster_t cl2)
130 {
131 	if (cl1->clu_no < cl2->clu_no)
132 		return(-1);
133 	if (cl1->clu_no > cl2->clu_no)
134 		return(1);
135 	return(0);
136 }
137 
138 static int
139 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2)
140 {
141 	if (buf1->buf_no < buf2->buf_no)
142 		return(-1);
143 	if (buf1->buf_no > buf2->buf_no)
144 		return(1);
145 	return(0);
146 }
147 
148 static int
149 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2)
150 {
151 	if (node1->node_offset < node2->node_offset)
152 		return(-1);
153 	if (node1->node_offset < node2->node_offset)
154 		return(1);
155 	return(0);
156 }
157 
158 /*
159  * Note: The lookup function for hammer_ino_rb_tree winds up being named
160  * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info).  The other lookup
161  * functions are normal, e.g. hammer_clu_rb_tree_RB_LOOKUP(root, clu_no).
162  */
163 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare);
164 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node,
165 		hammer_inode_info_cmp, hammer_inode_info_t);
166 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node,
167 	     hammer_vol_rb_compare, int32_t, vol_no);
168 RB_GENERATE2(hammer_scl_rb_tree, hammer_supercl, rb_node,
169 	     hammer_scl_rb_compare, int32_t, scl_no);
170 RB_GENERATE2(hammer_clu_rb_tree, hammer_cluster, rb_node,
171 	     hammer_clu_rb_compare, int32_t, clu_no);
172 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node,
173 	     hammer_buf_rb_compare, int32_t, buf_no);
174 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node,
175 	     hammer_nod_rb_compare, int32_t, node_offset);
176 
177 /************************************************************************
178  *				VOLUMES					*
179  ************************************************************************
180  *
181  * Load a HAMMER volume by name.  Returns 0 on success or a positive error
182  * code on failure.  Volumes must be loaded at mount time, get_volume() will
183  * not load a new volume.
184  *
185  * Calls made to hammer_load_volume() or single-threaded
186  */
187 int
188 hammer_install_volume(struct hammer_mount *hmp, const char *volname)
189 {
190 	struct mount *mp;
191 	hammer_volume_t volume;
192 	struct hammer_volume_ondisk *ondisk;
193 	struct nlookupdata nd;
194 	struct buf *bp = NULL;
195 	int error;
196 	int ronly;
197 
198 	mp = hmp->mp;
199 	ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
200 
201 	/*
202 	 * Allocate a volume structure
203 	 */
204 	volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO);
205 	volume->vol_name = kstrdup(volname, M_HAMMER);
206 	volume->hmp = hmp;
207 	volume->io.type = HAMMER_STRUCTURE_VOLUME;
208 	volume->io.offset = 0LL;
209 
210 	/*
211 	 * Get the device vnode
212 	 */
213 	error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW);
214 	if (error == 0)
215 		error = nlookup(&nd);
216 	if (error == 0)
217 		error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp);
218 	nlookup_done(&nd);
219 	if (error == 0) {
220 		vn_isdisk(volume->devvp, &error);
221 	}
222 	if (error == 0) {
223 		vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY);
224 		error = VOP_OPEN(volume->devvp, (ronly ? FREAD : FREAD|FWRITE),
225 				 FSCRED, NULL);
226 		vn_unlock(volume->devvp);
227 	}
228 	if (error) {
229 		hammer_free_volume(volume);
230 		return(error);
231 	}
232 
233 	/*
234 	 * Extract the volume number from the volume header and do various
235 	 * sanity checks.
236 	 */
237 	error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp);
238 	if (error)
239 		goto late_failure;
240 	ondisk = (void *)bp->b_data;
241 	if (ondisk->head.buf_type != HAMMER_FSBUF_VOLUME) {
242 		kprintf("hammer_mount: volume %s has an invalid header\n",
243 			volume->vol_name);
244 		error = EFTYPE;
245 		goto late_failure;
246 	}
247 	volume->vol_no = ondisk->vol_no;
248 	volume->cluster_base = ondisk->vol_beg;
249 	volume->vol_clsize = ondisk->vol_clsize;
250 	volume->vol_flags = ondisk->vol_flags;
251 	RB_INIT(&volume->rb_clus_root);
252 	RB_INIT(&volume->rb_scls_root);
253 
254 	if (RB_EMPTY(&hmp->rb_vols_root)) {
255 		hmp->fsid = ondisk->vol_fsid;
256 	} else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) {
257 		kprintf("hammer_mount: volume %s's fsid does not match "
258 			"other volumes\n", volume->vol_name);
259 		error = EFTYPE;
260 		goto late_failure;
261 	}
262 
263 	/*
264 	 * Insert the volume structure into the red-black tree.
265 	 */
266 	if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) {
267 		kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n",
268 			volume->vol_name, volume->vol_no);
269 		error = EEXIST;
270 	}
271 
272 	/*
273 	 * Set the root volume and load the root cluster.  HAMMER special
274 	 * cases rootvol and rootcl and will not deallocate the structures.
275 	 * We do not hold a ref because this would prevent related I/O
276 	 * from being flushed.
277 	 */
278 	if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) {
279 		hmp->rootvol = volume;
280 		hmp->rootcl = hammer_get_cluster(volume,
281 						 ondisk->vol0_root_clu_no,
282 						 &error, 0);
283 		hammer_rel_cluster(hmp->rootcl, 0);
284 		hmp->fsid_udev = dev2udev(vn_todev(volume->devvp));
285 	}
286 late_failure:
287 	if (bp)
288 		brelse(bp);
289 	if (error) {
290 		/*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/
291 		VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE);
292 		hammer_free_volume(volume);
293 	}
294 	return (error);
295 }
296 
297 /*
298  * Unload and free a HAMMER volume.  Must return >= 0 to continue scan
299  * so returns -1 on failure.
300  */
301 int
302 hammer_unload_volume(hammer_volume_t volume, void *data __unused)
303 {
304 	struct hammer_mount *hmp = volume->hmp;
305 	hammer_cluster_t rootcl;
306 	int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0);
307 
308 	/*
309 	 * Sync clusters, sync volume
310 	 */
311 
312 	/*
313 	 * Clean up the root cluster, which is held unlocked in the root
314 	 * volume.
315 	 */
316 	hammer_ref(&volume->io.lock);
317 	if (hmp->rootvol == volume) {
318 		if ((rootcl = hmp->rootcl) != NULL)
319 			hmp->rootcl = NULL;
320 		hmp->rootvol = NULL;
321 	}
322 
323 	/*
324 	 * Flush the volume
325 	 */
326 	KKASSERT(volume->io.lock.refs == 1);
327 	hammer_io_release(&volume->io, 1);
328 	volume->ondisk = NULL;
329 	if (volume->devvp) {
330 		if (ronly) {
331 			vinvalbuf(volume->devvp, 0, 0, 0);
332 			VOP_CLOSE(volume->devvp, FREAD);
333 		} else {
334 			vinvalbuf(volume->devvp, V_SAVE, 0, 0);
335 			VOP_CLOSE(volume->devvp, FREAD|FWRITE);
336 		}
337 	}
338 
339 	/*
340 	 * Destroy the structure
341 	 */
342 	RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume);
343 	hammer_free_volume(volume);
344 	return(0);
345 }
346 
347 static
348 void
349 hammer_free_volume(hammer_volume_t volume)
350 {
351 	if (volume->vol_name) {
352 		kfree(volume->vol_name, M_HAMMER);
353 		volume->vol_name = NULL;
354 	}
355 	if (volume->devvp) {
356 		vrele(volume->devvp);
357 		volume->devvp = NULL;
358 	}
359 	kfree(volume, M_HAMMER);
360 }
361 
362 /*
363  * Get a HAMMER volume.  The volume must already exist.
364  */
365 hammer_volume_t
366 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp)
367 {
368 	struct hammer_volume *volume;
369 
370 	/*
371 	 * Locate the volume structure
372 	 */
373 	volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no);
374 	if (volume == NULL) {
375 		*errorp = ENOENT;
376 		return(NULL);
377 	}
378 	hammer_ref(&volume->io.lock);
379 
380 	/*
381 	 * Deal with on-disk info
382 	 */
383 	if (volume->ondisk == NULL) {
384 		*errorp = hammer_load_volume(volume);
385 		if (*errorp) {
386 			hammer_rel_volume(volume, 1);
387 			volume = NULL;
388 		}
389 	} else {
390 		*errorp = 0;
391 	}
392 	return(volume);
393 }
394 
395 hammer_volume_t
396 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp)
397 {
398 	hammer_volume_t volume;
399 
400 	volume = hmp->rootvol;
401 	KKASSERT(volume != NULL);
402 	hammer_ref(&volume->io.lock);
403 
404 	/*
405 	 * Deal with on-disk info
406 	 */
407 	if (volume->ondisk == NULL) {
408 		*errorp = hammer_load_volume(volume);
409 		if (*errorp) {
410 			hammer_rel_volume(volume, 1);
411 			volume = NULL;
412 		}
413 	} else {
414 		*errorp = 0;
415 	}
416 	return (volume);
417 }
418 
419 /*
420  * Load a volume's on-disk information.  The volume must be referenced and
421  * not locked.  We temporarily acquire an exclusive lock to interlock
422  * against releases or multiple get's.
423  */
424 static int
425 hammer_load_volume(hammer_volume_t volume)
426 {
427 	struct hammer_volume_ondisk *ondisk;
428 	int error;
429 
430 	hammer_lock_ex(&volume->io.lock);
431 	if (volume->ondisk == NULL) {
432 		error = hammer_io_read(volume->devvp, &volume->io);
433 		if (error) {
434 			hammer_unlock(&volume->io.lock);
435 			return (error);
436 		}
437 		volume->ondisk = ondisk = (void *)volume->io.bp->b_data;
438 
439 		/*
440 		 * Configure the volume's A-lists.  These are used to
441 		 * allocate clusters.
442 		 */
443 		if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
444 			volume->alist.config = &Vol_super_alist_config;
445 			volume->alist.meta = ondisk->vol_almeta.super;
446 			volume->alist.info = volume;
447 		} else {
448 			volume->alist.config = &Vol_normal_alist_config;
449 			volume->alist.meta = ondisk->vol_almeta.normal;
450 			volume->alist.info = NULL;
451 		}
452 		hammer_alist_init(&volume->alist);
453 	} else {
454 		error = 0;
455 	}
456 	hammer_unlock(&volume->io.lock);
457 	return(0);
458 }
459 
460 /*
461  * Release a volume.  Call hammer_io_release on the last reference.  We have
462  * to acquire an exclusive lock to interlock against volume->ondisk tests
463  * in hammer_load_volume().
464  */
465 void
466 hammer_rel_volume(hammer_volume_t volume, int flush)
467 {
468 	if (hammer_islastref(&volume->io.lock)) {
469 		hammer_lock_ex(&volume->io.lock);
470 		if (hammer_islastref(&volume->io.lock)) {
471 			volume->ondisk = NULL;
472 			hammer_io_release(&volume->io, flush);
473 		}
474 		hammer_unlock(&volume->io.lock);
475 	}
476 	hammer_unref(&volume->io.lock);
477 }
478 
479 /************************************************************************
480  *				SUPER-CLUSTERS				*
481  ************************************************************************
482  *
483  * Manage super-clusters.  Note that a supercl holds a reference to its
484  * associated volume.
485  */
486 hammer_supercl_t
487 hammer_get_supercl(hammer_volume_t volume, int32_t scl_no,
488 		   int *errorp, int isnew)
489 {
490 	hammer_supercl_t supercl;
491 
492 	/*
493 	 * Locate and lock the super-cluster structure, creating one
494 	 * if necessary.
495 	 */
496 again:
497 	supercl = RB_LOOKUP(hammer_scl_rb_tree, &volume->rb_scls_root, scl_no);
498 	if (supercl == NULL) {
499 		supercl = kmalloc(sizeof(*supercl), M_HAMMER, M_WAITOK|M_ZERO);
500 		supercl->scl_no = scl_no;
501 		supercl->volume = volume;
502 		supercl->io.offset = calculate_supercl_offset(volume, scl_no);
503 		supercl->io.type = HAMMER_STRUCTURE_SUPERCL;
504 		hammer_ref(&supercl->io.lock);
505 
506 		/*
507 		 * Insert the cluster into the RB tree and handle late
508 		 * collisions.
509 		 */
510 		if (RB_INSERT(hammer_scl_rb_tree, &volume->rb_scls_root, supercl)) {
511 			hammer_unref(&supercl->io.lock);
512 			kfree(supercl, M_HAMMER);
513 			goto again;
514 		}
515 		hammer_ref(&volume->io.lock);
516 	} else {
517 		hammer_ref(&supercl->io.lock);
518 	}
519 
520 	/*
521 	 * Deal with on-disk info
522 	 */
523 	if (supercl->ondisk == NULL || isnew) {
524 		*errorp = hammer_load_supercl(supercl, isnew);
525 		if (*errorp) {
526 			hammer_rel_supercl(supercl, 1);
527 			supercl = NULL;
528 		}
529 	} else {
530 		*errorp = 0;
531 	}
532 	return(supercl);
533 }
534 
535 static int
536 hammer_load_supercl(hammer_supercl_t supercl, int isnew)
537 {
538 	struct hammer_supercl_ondisk *ondisk;
539 	hammer_volume_t volume = supercl->volume;
540 	int error;
541 
542 	hammer_lock_ex(&supercl->io.lock);
543 	if (supercl->ondisk == NULL) {
544 		if (isnew)
545 			error = hammer_io_new(volume->devvp, &supercl->io);
546 		else
547 			error = hammer_io_read(volume->devvp, &supercl->io);
548 		if (error) {
549 			hammer_unlock(&supercl->io.lock);
550 			return (error);
551 		}
552 		supercl->ondisk = ondisk = (void *)supercl->io.bp->b_data;
553 
554 		supercl->alist.config = &Supercl_alist_config;
555 		supercl->alist.meta = ondisk->scl_meta;
556 		supercl->alist.info = NULL;
557 
558 		/*
559 		 * If this is a new super-cluster we have to initialize
560 		 * various ondisk structural elements.  The caller is
561 		 * responsible for the remainder.
562 		 */
563 		if (isnew) {
564 			struct hammer_alist_live dummy;
565 
566 			dummy.config = &Buf_alist_config;
567 			dummy.meta = ondisk->head.buf_almeta;
568 			dummy.info = NULL;
569 			initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_SUPERCL);
570 			hammer_alist_init(&supercl->alist);
571 		}
572 	} else if (isnew) {
573 		error = hammer_io_new(volume->devvp, &supercl->io);
574 	} else {
575 		error = 0;
576 	}
577 	hammer_unlock(&supercl->io.lock);
578 	return (error);
579 }
580 
581 /*
582  * Release a super-cluster.  We have to deal with several places where
583  * another thread can ref the super-cluster.
584  *
585  * Only destroy the structure itself if the related buffer cache buffer
586  * was disassociated from it.  This ties the management of the structure
587  * to the buffer cache subsystem.
588  */
589 void
590 hammer_rel_supercl(hammer_supercl_t supercl, int flush)
591 {
592 	hammer_volume_t volume;
593 
594 	if (hammer_islastref(&supercl->io.lock)) {
595 		hammer_lock_ex(&supercl->io.lock);
596 		if (hammer_islastref(&supercl->io.lock)) {
597 			supercl->ondisk = NULL;
598 			hammer_io_release(&supercl->io, flush);
599 			if (supercl->io.bp == NULL &&
600 			    hammer_islastref(&supercl->io.lock)) {
601 				volume = supercl->volume;
602 				RB_REMOVE(hammer_scl_rb_tree,
603 					  &volume->rb_scls_root, supercl);
604 				supercl->volume = NULL;	/* sanity */
605 				kfree(supercl, M_HAMMER);
606 				hammer_rel_volume(volume, 0);
607 				return;
608 			}
609 		}
610 		hammer_unlock(&supercl->io.lock);
611 	}
612 	hammer_unref(&supercl->io.lock);
613 }
614 
615 /************************************************************************
616  *				CLUSTERS				*
617  ************************************************************************
618  *
619  * Manage clusters.  Note that a cluster holds a reference to its
620  * associated volume.
621  */
622 hammer_cluster_t
623 hammer_get_cluster(hammer_volume_t volume, int32_t clu_no,
624 		   int *errorp, int isnew)
625 {
626 	hammer_cluster_t cluster;
627 
628 again:
629 	cluster = RB_LOOKUP(hammer_clu_rb_tree, &volume->rb_clus_root, clu_no);
630 	if (cluster == NULL) {
631 		cluster = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
632 		cluster->clu_no = clu_no;
633 		cluster->volume = volume;
634 		cluster->io.offset = calculate_cluster_offset(volume, clu_no);
635 		RB_INIT(&cluster->rb_bufs_root);
636 		RB_INIT(&cluster->rb_nods_root);
637 		cluster->io.type = HAMMER_STRUCTURE_CLUSTER;
638 		hammer_ref(&cluster->io.lock);
639 
640 		/*
641 		 * Insert the cluster into the RB tree and handle late
642 		 * collisions.
643 		 */
644 		if (RB_INSERT(hammer_clu_rb_tree, &volume->rb_clus_root, cluster)) {
645 			hammer_unref(&cluster->io.lock);
646 			kfree(cluster, M_HAMMER);
647 			goto again;
648 		}
649 		hammer_ref(&volume->io.lock);
650 	} else {
651 		hammer_ref(&cluster->io.lock);
652 	}
653 
654 	/*
655 	 * Deal with on-disk info
656 	 */
657 	if (cluster->ondisk == NULL || isnew) {
658 		*errorp = hammer_load_cluster(cluster, isnew);
659 		if (*errorp) {
660 			hammer_rel_cluster(cluster, 1);
661 			cluster = NULL;
662 		}
663 	} else {
664 		*errorp = 0;
665 	}
666 	return (cluster);
667 }
668 
669 hammer_cluster_t
670 hammer_get_root_cluster(struct hammer_mount *hmp, int *errorp)
671 {
672 	hammer_cluster_t cluster;
673 
674 	cluster = hmp->rootcl;
675 	KKASSERT(cluster != NULL);
676 	hammer_ref(&cluster->io.lock);
677 
678 	/*
679 	 * Deal with on-disk info
680 	 */
681 	if (cluster->ondisk == NULL) {
682 		*errorp = hammer_load_cluster(cluster, 0);
683 		if (*errorp) {
684 			hammer_rel_cluster(cluster, 1);
685 			cluster = NULL;
686 		}
687 	} else {
688 		*errorp = 0;
689 	}
690 	return (cluster);
691 }
692 
693 static
694 int
695 hammer_load_cluster(hammer_cluster_t cluster, int isnew)
696 {
697 	hammer_volume_t volume = cluster->volume;
698 	struct hammer_cluster_ondisk *ondisk;
699 	int error;
700 
701 	/*
702 	 * Load the cluster's on-disk info
703 	 */
704 	hammer_lock_ex(&cluster->io.lock);
705 	if (cluster->ondisk == NULL) {
706 		if (isnew)
707 			error = hammer_io_new(volume->devvp, &cluster->io);
708 		else
709 			error = hammer_io_read(volume->devvp, &cluster->io);
710 		if (error) {
711 			hammer_unlock(&cluster->io.lock);
712 			return (error);
713 		}
714 		cluster->ondisk = ondisk = (void *)cluster->io.bp->b_data;
715 
716 		cluster->alist_master.config = &Clu_master_alist_config;
717 		cluster->alist_master.meta = ondisk->clu_master_meta;
718 		cluster->alist_btree.config = &Clu_slave_alist_config;
719 		cluster->alist_btree.meta = ondisk->clu_btree_meta;
720 		cluster->alist_btree.info = cluster;
721 		cluster->alist_record.config = &Clu_slave_alist_config;
722 		cluster->alist_record.meta = ondisk->clu_record_meta;
723 		cluster->alist_record.info = cluster;
724 		cluster->alist_mdata.config = &Clu_slave_alist_config;
725 		cluster->alist_mdata.meta = ondisk->clu_mdata_meta;
726 		cluster->alist_mdata.info = cluster;
727 
728 		cluster->clu_btree_beg = ondisk->clu_btree_beg;
729 		cluster->clu_btree_end = ondisk->clu_btree_end;
730 
731 		/*
732 		 * If this is a new cluster we have to initialize
733 		 * various ondisk structural elements.  The caller is
734 		 * responsible for the remainder.
735 		 */
736 		if (isnew) {
737 			struct hammer_alist_live dummy;
738 
739 			dummy.config = &Buf_alist_config;
740 			dummy.meta = ondisk->head.buf_almeta;
741 			dummy.info = NULL;
742 			initbuffer(&dummy, &ondisk->head, HAMMER_FSBUF_CLUSTER);
743 
744 			hammer_alist_init(&cluster->alist_master);
745 			hammer_alist_init(&cluster->alist_btree);
746 			hammer_alist_init(&cluster->alist_record);
747 			hammer_alist_init(&cluster->alist_mdata);
748 		}
749 	} else if (isnew) {
750 		error = hammer_io_new(volume->devvp, &cluster->io);
751 	} else {
752 		error = 0;
753 	}
754 	hammer_unlock(&cluster->io.lock);
755 	return (error);
756 }
757 
758 /*
759  * Reference a cluster that is either already referenced or via a specially
760  * handled pointer (aka rootcl).
761  */
762 int
763 hammer_ref_cluster(hammer_cluster_t cluster)
764 {
765 	int error;
766 
767 	KKASSERT(cluster != NULL);
768 	hammer_ref(&cluster->io.lock);
769 
770 	/*
771 	 * Deal with on-disk info
772 	 */
773 	if (cluster->ondisk == NULL) {
774 		error = hammer_load_cluster(cluster, 0);
775 		if (error)
776 			hammer_rel_cluster(cluster, 1);
777 	} else {
778 		error = 0;
779 	}
780 	return(error);
781 }
782 
783 /*
784  * Release a cluster.  We have to deal with several places where
785  * another thread can ref the cluster.
786  *
787  * Only destroy the structure itself if the related buffer cache buffer
788  * was disassociated from it.  This ties the management of the structure
789  * to the buffer cache subsystem.
790  */
791 void
792 hammer_rel_cluster(hammer_cluster_t cluster, int flush)
793 {
794 	hammer_node_t node;
795 	hammer_volume_t volume;
796 
797 	if (hammer_islastref(&cluster->io.lock)) {
798 		hammer_lock_ex(&cluster->io.lock);
799 		if (hammer_islastref(&cluster->io.lock)) {
800 			cluster->ondisk = NULL;
801 			hammer_io_release(&cluster->io, flush);
802 
803 			/*
804 			 * Clean out the B-Tree node cache, if any, then
805 			 * clean up the volume ref and free the cluster.
806 			 *
807 			 * If the cluster acquires a new reference while we
808 			 * are trying to clean it out, abort the cleaning.
809 			 *
810 			 * There really shouldn't be any nodes at this point
811 			 * but we allow a node with no buffer association
812 			 * so handle the case.
813 			 */
814 			while (cluster->io.bp == NULL &&
815 			       hammer_islastref(&cluster->io.lock) &&
816 			       (node = RB_ROOT(&cluster->rb_nods_root)) != NULL
817 			) {
818 				KKASSERT(node->lock.refs == 0);
819 				hammer_flush_node(node);
820 			}
821 			if (cluster->io.bp == NULL &&
822 			    hammer_islastref(&cluster->io.lock)) {
823 				volume = cluster->volume;
824 				RB_REMOVE(hammer_clu_rb_tree,
825 					  &volume->rb_clus_root, cluster);
826 				cluster->volume = NULL;	/* sanity */
827 				kfree(cluster, M_HAMMER);
828 				hammer_rel_volume(volume, 0);
829 				return;
830 			}
831 		}
832 		hammer_unlock(&cluster->io.lock);
833 	}
834 	hammer_unref(&cluster->io.lock);
835 }
836 
837 /************************************************************************
838  *				BUFFERS					*
839  ************************************************************************
840  *
841  * Manage buffers.  Note that a buffer holds a reference to its associated
842  * cluster, and its cluster will hold a reference to the cluster's volume.
843  *
844  * A non-zero buf_type indicates that a new buffer should be created and
845  * zero'd.
846  */
847 hammer_buffer_t
848 hammer_get_buffer(hammer_cluster_t cluster, int32_t buf_no,
849 		  u_int64_t buf_type, int *errorp)
850 {
851 	hammer_buffer_t buffer;
852 
853 	/*
854 	 * Find the buffer.  Note that buffer 0 corresponds to the cluster
855 	 * header and should never be requested.
856 	 */
857 	KKASSERT(buf_no != 0);
858 
859 	/*
860 	 * Locate and lock the buffer structure, creating one if necessary.
861 	 */
862 again:
863 	buffer = RB_LOOKUP(hammer_buf_rb_tree, &cluster->rb_bufs_root, buf_no);
864 	if (buffer == NULL) {
865 		buffer = kmalloc(sizeof(*cluster), M_HAMMER, M_WAITOK|M_ZERO);
866 		buffer->buf_no = buf_no;
867 		buffer->cluster = cluster;
868 		buffer->volume = cluster->volume;
869 		buffer->io.offset = cluster->io.offset +
870 				    (buf_no * HAMMER_BUFSIZE);
871 		buffer->io.type = HAMMER_STRUCTURE_BUFFER;
872 		TAILQ_INIT(&buffer->clist);
873 		hammer_ref(&buffer->io.lock);
874 
875 		/*
876 		 * Insert the cluster into the RB tree and handle late
877 		 * collisions.
878 		 */
879 		if (RB_INSERT(hammer_buf_rb_tree, &cluster->rb_bufs_root, buffer)) {
880 			hammer_unref(&buffer->io.lock);
881 			kfree(buffer, M_HAMMER);
882 			goto again;
883 		}
884 		hammer_ref(&cluster->io.lock);
885 	} else {
886 		hammer_ref(&buffer->io.lock);
887 	}
888 
889 	/*
890 	 * Deal with on-disk info
891 	 */
892 	if (buffer->ondisk == NULL || buf_type) {
893 		*errorp = hammer_load_buffer(buffer, buf_type);
894 		if (*errorp) {
895 			hammer_rel_buffer(buffer, 1);
896 			buffer = NULL;
897 		}
898 	} else {
899 		*errorp = 0;
900 	}
901 	return(buffer);
902 }
903 
904 static int
905 hammer_load_buffer(hammer_buffer_t buffer, u_int64_t buf_type)
906 {
907 	hammer_volume_t volume;
908 	hammer_fsbuf_ondisk_t ondisk;
909 	int error;
910 
911 	/*
912 	 * Load the buffer's on-disk info
913 	 */
914 	volume = buffer->volume;
915 	hammer_lock_ex(&buffer->io.lock);
916 	if (buffer->ondisk == NULL) {
917 		if (buf_type) {
918 			error = hammer_io_new(volume->devvp, &buffer->io);
919 		} else {
920 			error = hammer_io_read(volume->devvp, &buffer->io);
921 		}
922 		if (error) {
923 			hammer_unlock(&buffer->io.lock);
924 			return (error);
925 		}
926 		buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data;
927 		buffer->alist.config = &Buf_alist_config;
928 		buffer->alist.meta = ondisk->head.buf_almeta;
929 
930 		if (buf_type) {
931 			initbuffer(&buffer->alist, &ondisk->head, buf_type);
932 		}
933 		buffer->buf_type = ondisk->head.buf_type;
934 	} else if (buf_type) {
935 		error = hammer_io_new(volume->devvp, &buffer->io);
936 	} else {
937 		error = 0;
938 	}
939 	hammer_unlock(&buffer->io.lock);
940 	return (error);
941 }
942 
943 /*
944  * Reference a buffer that is either already referenced or via a specially
945  * handled pointer (aka cursor->buffer).
946  */
947 int
948 hammer_ref_buffer(hammer_buffer_t buffer)
949 {
950 	int error;
951 
952 	hammer_ref(&buffer->io.lock);
953 	if (buffer->ondisk == NULL) {
954 		error = hammer_load_buffer(buffer, 0);
955 		if (error) {
956 			hammer_rel_buffer(buffer, 1);
957 			/*
958 			 * NOTE: buffer pointer can become stale after
959 			 * the above release.
960 			 */
961 		} else {
962 			KKASSERT(buffer->buf_type ==
963 				 buffer->ondisk->head.buf_type);
964 		}
965 	} else {
966 		error = 0;
967 	}
968 	return(error);
969 }
970 
971 /*
972  * Release a buffer.  We have to deal with several places where
973  * another thread can ref the buffer.
974  *
975  * Only destroy the structure itself if the related buffer cache buffer
976  * was disassociated from it.  This ties the management of the structure
977  * to the buffer cache subsystem.  buffer->ondisk determines whether the
978  * embedded io is referenced or not.
979  */
980 void
981 hammer_rel_buffer(hammer_buffer_t buffer, int flush)
982 {
983 	hammer_cluster_t cluster;
984 	hammer_node_t node;
985 
986 	if (hammer_islastref(&buffer->io.lock)) {
987 		hammer_lock_ex(&buffer->io.lock);
988 		if (hammer_islastref(&buffer->io.lock)) {
989 			buffer->ondisk = NULL;
990 			hammer_io_release(&buffer->io, flush);
991 
992 			/*
993 			 * Clean out the B-Tree node cache, if any, then
994 			 * clean up the cluster ref and free the buffer.
995 			 *
996 			 * If the buffer acquires a new reference while we
997 			 * are trying to clean it out, abort the cleaning.
998 			 */
999 			while (buffer->io.bp == NULL &&
1000 			       hammer_islastref(&buffer->io.lock) &&
1001 			       (node = TAILQ_FIRST(&buffer->clist)) != NULL
1002 			) {
1003 				KKASSERT(node->lock.refs == 0);
1004 				hammer_flush_node(node);
1005 			}
1006 			if (buffer->io.bp == NULL &&
1007 			    hammer_islastref(&buffer->io.lock)) {
1008 				cluster = buffer->cluster;
1009 				RB_REMOVE(hammer_buf_rb_tree,
1010 					  &cluster->rb_bufs_root, buffer);
1011 				buffer->cluster = NULL; /* sanity */
1012 				kfree(buffer, M_HAMMER);
1013 				hammer_rel_cluster(cluster, 0);
1014 				return;
1015 			}
1016 		}
1017 		hammer_unlock(&buffer->io.lock);
1018 	}
1019 	hammer_unref(&buffer->io.lock);
1020 }
1021 
1022 /*
1023  * Flush passively cached B-Tree nodes associated with this buffer.
1024  *
1025  * NOTE: The buffer is referenced and locked.
1026  */
1027 void
1028 hammer_flush_buffer_nodes(hammer_buffer_t buffer)
1029 {
1030 	hammer_node_t node;
1031 
1032 	node = TAILQ_FIRST(&buffer->clist);
1033 	while (node) {
1034 		buffer->save_scan = TAILQ_NEXT(node, entry);
1035 		if (node->lock.refs == 0)
1036 			hammer_flush_node(node);
1037 		node = buffer->save_scan;
1038 	}
1039 }
1040 
1041 /************************************************************************
1042  *				NODES					*
1043  ************************************************************************
1044  *
1045  * Manage B-Tree nodes.  B-Tree nodes represent the primary indexing
1046  * method used by the HAMMER filesystem.
1047  *
1048  * Unlike other HAMMER structures, a hammer_node can be PASSIVELY
1049  * associated with its buffer.  It can have an active buffer reference
1050  * even when the node itself has no references.  The node also passively
1051  * associates itself with its cluster without holding any cluster refs.
1052  * The cluster ref is indirectly maintained by the active buffer ref when
1053  * a node is acquired.
1054  *
1055  * A hammer_node can also be passively associated with other HAMMER
1056  * structures, such as inodes, while retaining 0 references.  These
1057  * associations can be cleared backwards using a pointer-to-pointer in
1058  * the hammer_node.
1059  *
1060  * This allows the HAMMER implementation to cache hammer_node's long-term
1061  * and short-cut a great deal of the infrastructure's complexity.  In
1062  * most cases a cached node can be reacquired without having to dip into
1063  * either the buffer or cluster management code.
1064  *
1065  * The caller must pass a referenced cluster on call and will retain
1066  * ownership of the reference on return.  The node will acquire its own
1067  * additional references, if necessary.
1068  */
1069 hammer_node_t
1070 hammer_get_node(hammer_cluster_t cluster, int32_t node_offset, int *errorp)
1071 {
1072 	hammer_node_t node;
1073 
1074 	/*
1075 	 * Locate the structure, allocating one if necessary.
1076 	 */
1077 again:
1078 	node = RB_LOOKUP(hammer_nod_rb_tree, &cluster->rb_nods_root,
1079 			 node_offset);
1080 	if (node == NULL) {
1081 		node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO);
1082 		node->node_offset = node_offset;
1083 		node->cluster = cluster;
1084 		if (RB_INSERT(hammer_nod_rb_tree, &cluster->rb_nods_root,
1085 			      node)) {
1086 			kfree(node, M_HAMMER);
1087 			goto again;
1088 		}
1089 	}
1090 	*errorp = hammer_ref_node(node);
1091 	if (*errorp) {
1092 		/*
1093 		 * NOTE: The node pointer may be stale on error return.
1094 		 * In fact, its probably been destroyed.
1095 		 */
1096 		node = NULL;
1097 	}
1098 	return(node);
1099 }
1100 
1101 /*
1102  * Reference the node to prevent disassociations, then associate and
1103  * load the related buffer.  This routine can also be called to reference
1104  * a node from a cache pointer.
1105  *
1106  * NOTE: Because the caller does not have a ref on the node, the caller's
1107  * node pointer will be stale if an error is returned.  We may also wind
1108  * up clearing the related cache pointers.
1109  *
1110  * NOTE: The cluster is indirectly referenced by our buffer ref.
1111  */
1112 int
1113 hammer_ref_node(hammer_node_t node)
1114 {
1115 	hammer_buffer_t buffer;
1116 	int32_t buf_no;
1117 	int error;
1118 
1119 	hammer_ref(&node->lock);
1120 	if (node->ondisk == NULL) {
1121 		hammer_lock_ex(&node->lock);
1122 		if (node->ondisk == NULL) {
1123 			/*
1124 			 * This is a little confusing but the jist is that
1125 			 * node->buffer determines whether the node is on
1126 			 * the buffer's clist and node->ondisk determines
1127 			 * whether the buffer is referenced.
1128 			 */
1129 			if ((buffer = node->buffer) != NULL) {
1130 				error = hammer_ref_buffer(buffer);
1131 			} else {
1132 				buf_no = node->node_offset / HAMMER_BUFSIZE;
1133 				buffer = hammer_get_buffer(node->cluster,
1134 							   buf_no, 0, &error);
1135 				if (buffer) {
1136 					KKASSERT(error == 0);
1137 					TAILQ_INSERT_TAIL(&buffer->clist,
1138 							  node, entry);
1139 					node->buffer = buffer;
1140 				}
1141 			}
1142 			if (error == 0) {
1143 				node->ondisk = (void *)((char *)buffer->ondisk +
1144 				       (node->node_offset & HAMMER_BUFMASK));
1145 			}
1146 		}
1147 		hammer_unlock(&node->lock);
1148 	}
1149 	if (error)
1150 		hammer_rel_node(node);
1151 	return (error);
1152 }
1153 
1154 /*
1155  * Release a hammer_node.  The node retains a passive association with
1156  * its cluster, buffer and caches.
1157  *
1158  * However, to avoid cluttering up kernel memory with tons of B-Tree
1159  * node cache structures we destroy the node if no passive cache or
1160  * (instantiated) buffer references exist.
1161  */
1162 void
1163 hammer_rel_node(hammer_node_t node)
1164 {
1165 	hammer_cluster_t cluster;
1166 	hammer_buffer_t buffer;
1167 
1168 	if (hammer_islastref(&node->lock)) {
1169 		cluster = node->cluster;
1170 		/*
1171 		 * Clutter control, this case only occurs after a failed
1172 		 * load since otherwise ondisk will be non-NULL.
1173 		 */
1174 		if (node->cache1 == NULL && node->cache2 == NULL &&
1175 		    node->ondisk == NULL) {
1176 			RB_REMOVE(hammer_nod_rb_tree, &cluster->rb_nods_root,
1177 				  node);
1178 			if ((buffer = node->buffer) != NULL) {
1179 				node->buffer = NULL;
1180 				hammer_remove_node_clist(buffer, node);
1181 			}
1182 			kfree(node, M_HAMMER);
1183 			return;
1184 		}
1185 
1186 		/*
1187 		 * node->ondisk determines whether we have a buffer reference
1188 		 * to get rid of or not.  Only get rid of the reference if
1189 		 * the kernel tried to flush the buffer.
1190 		 *
1191 		 * NOTE: Once unref'd the node can be physically destroyed,
1192 		 * so our node is stale afterwords.
1193 		 *
1194 		 * This case occurs if the node still has cache references.
1195 		 * We could remove the references and free the structure
1196 		 * but for now we allow them (and the node structure) to
1197 		 * remain intact.
1198 		 */
1199 		if (node->ondisk && hammer_io_checkflush(&node->buffer->io)) {
1200 			buffer = node->buffer;
1201 			node->buffer = NULL;
1202 			node->ondisk = NULL;
1203 			hammer_remove_node_clist(buffer, node);
1204 			hammer_unref(&node->lock);
1205 			hammer_rel_buffer(buffer, 0);
1206 		} else {
1207 			hammer_unref(&node->lock);
1208 		}
1209 	} else {
1210 		hammer_unref(&node->lock);
1211 	}
1212 }
1213 
1214 /*
1215  * Cache-and-release a hammer_node.  Kinda like catching and releasing a
1216  * fish, but keeping an eye on him.  The node is passively cached in *cache.
1217  *
1218  * NOTE!  HAMMER may NULL *cache at any time, even after you have
1219  * referenced the node!
1220  */
1221 void
1222 hammer_cache_node(hammer_node_t node, struct hammer_node **cache)
1223 {
1224 	if (node->cache1 != cache) {
1225 		if (node->cache2 == cache) {
1226 			struct hammer_node **tmp;
1227 			tmp = node->cache1;
1228 			node->cache1 = node->cache2;
1229 			node->cache2 = tmp;
1230 		} else {
1231 			if (node->cache2)
1232 				*node->cache2 = NULL;
1233 			node->cache2 = node->cache1;
1234 			node->cache1 = cache;
1235 			*cache = node;
1236 		}
1237 	}
1238 	hammer_rel_node(node);
1239 }
1240 
1241 void
1242 hammer_uncache_node(struct hammer_node **cache)
1243 {
1244 	hammer_node_t node;
1245 
1246 	if ((node = *cache) != NULL) {
1247 		*cache = NULL;
1248 		if (node->cache1 == cache) {
1249 			node->cache1 = node->cache2;
1250 			node->cache2 = NULL;
1251 		} else if (node->cache2 == cache) {
1252 			node->cache2 = NULL;
1253 		} else {
1254 			panic("hammer_uncache_node: missing cache linkage");
1255 		}
1256 		if (node->cache1 == NULL && node->cache2 == NULL &&
1257 		    node->lock.refs == 0) {
1258 			hammer_flush_node(node);
1259 		}
1260 	}
1261 }
1262 
1263 /*
1264  * Remove a node's cache references and destroy the node if it has no
1265  * references.  This is typically called from the buffer handling code.
1266  *
1267  * The node may have an active buffer reference (ondisk != NULL) even
1268  * if the node itself has no references.
1269  *
1270  * Note that a caller iterating through nodes via a buffer must have its
1271  * own reference on the buffer or our hammer_rel_buffer() call below may
1272  * rip it out from under the caller.
1273  */
1274 void
1275 hammer_flush_node(hammer_node_t node)
1276 {
1277 	hammer_buffer_t buffer;
1278 
1279 	if (node->cache1)
1280 		*node->cache1 = NULL;
1281 	if (node->cache2)
1282 		*node->cache2 = NULL;
1283 	if (node->lock.refs == 0) {
1284 		RB_REMOVE(hammer_nod_rb_tree, &node->cluster->rb_nods_root,
1285 			  node);
1286 		if ((buffer = node->buffer) != NULL) {
1287 			node->buffer = NULL;
1288 			hammer_remove_node_clist(buffer, node);
1289 			if (node->ondisk) {
1290 				node->ondisk = NULL;
1291 				hammer_rel_buffer(buffer, 0);
1292 			}
1293 		}
1294 		kfree(node, M_HAMMER);
1295 	}
1296 }
1297 
1298 /*
1299  * Remove a node from the buffer's clist.  Adjust save_scan as appropriate.
1300  * This is in its own little routine to properly handle interactions with
1301  * save_scan, so it is possible to block while scanning a buffer's node list.
1302  */
1303 static
1304 void
1305 hammer_remove_node_clist(hammer_buffer_t buffer, hammer_node_t node)
1306 {
1307 	if (buffer->save_scan == node)
1308 		buffer->save_scan = TAILQ_NEXT(node, entry);
1309 	TAILQ_REMOVE(&buffer->clist, node, entry);
1310 }
1311 
1312 /************************************************************************
1313  *				A-LIST ALLOCATORS			*
1314  ************************************************************************/
1315 
1316 /*
1317  * Allocate HAMMER elements - btree nodes, data storage, and record elements
1318  *
1319  * The passed *bufferp should be initialized to NULL.  On successive calls
1320  * *bufferp caches the most recent buffer used until put away by the caller.
1321  * Note that previously returned pointers using the cached buffer become
1322  * invalid on successive calls which reuse *bufferp.
1323  *
1324  * All allocations first attempt to use the block found at the specified
1325  * iterator.  If that fails the first available block is used.  If that
1326  * fails a new buffer is allocated and associated with the buffer type
1327  * A-list and the element is allocated out of the new buffer.
1328  */
1329 
1330 hammer_node_t
1331 hammer_alloc_btree(hammer_cluster_t cluster, int *errorp)
1332 {
1333 	hammer_buffer_t buffer;
1334 	hammer_alist_t live;
1335 	hammer_node_t node;
1336 	int32_t elm_no;
1337 	int32_t buf_no;
1338 	int32_t node_offset;
1339 
1340 	/*
1341 	 * Allocate a B-Tree element
1342 	 */
1343 	buffer = NULL;
1344 	live = &cluster->alist_btree;
1345 	elm_no = hammer_alist_alloc_fwd(live, 1, cluster->ondisk->idx_index);
1346 	if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1347 		elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1348 	if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1349 		alloc_new_buffer(cluster, live,
1350 				 HAMMER_FSBUF_BTREE, HAMMER_BTREE_NODES,
1351 				 cluster->ondisk->idx_index, errorp, &buffer);
1352 		elm_no = hammer_alist_alloc(live, 1);
1353 		if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1354 			*errorp = ENOSPC;
1355 			if (buffer)
1356 				hammer_rel_buffer(buffer, 0);
1357 			return(NULL);
1358 		}
1359 	}
1360 	cluster->ondisk->idx_index = elm_no;
1361 
1362 	/*
1363 	 * Load and return the B-Tree element
1364 	 */
1365 	buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1366 	node_offset = buf_no * HAMMER_BUFSIZE +
1367 		      offsetof(union hammer_fsbuf_ondisk,
1368 			       btree.nodes[elm_no & HAMMER_FSBUF_BLKMASK]);
1369 	node = hammer_get_node(cluster, node_offset, errorp);
1370 	if (node) {
1371 		bzero(node->ondisk, sizeof(*node->ondisk));
1372 	} else {
1373 		hammer_alist_free(live, elm_no, 1);
1374 		hammer_rel_node(node);
1375 		node = NULL;
1376 	}
1377 	if (buffer)
1378 		hammer_rel_buffer(buffer, 0);
1379 	return(node);
1380 }
1381 
1382 void *
1383 hammer_alloc_data(hammer_cluster_t cluster, int32_t bytes,
1384 		  int *errorp, struct hammer_buffer **bufferp)
1385 {
1386 	hammer_buffer_t buffer;
1387 	hammer_alist_t live;
1388 	int32_t elm_no;
1389 	int32_t buf_no;
1390 	int32_t nblks;
1391 	void *item;
1392 
1393 	/*
1394 	 * Allocate a data element
1395 	 */
1396 	nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1397 	live = &cluster->alist_mdata;
1398 	elm_no = hammer_alist_alloc_fwd(live, nblks, cluster->ondisk->idx_data);
1399 	if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1400 		elm_no = hammer_alist_alloc_fwd(live, 1, 0);
1401 	if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1402 		alloc_new_buffer(cluster, live,
1403 				 HAMMER_FSBUF_DATA, HAMMER_DATA_NODES,
1404 				 cluster->ondisk->idx_data, errorp, bufferp);
1405 		elm_no = hammer_alist_alloc(live, nblks);
1406 		if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1407 			*errorp = ENOSPC;
1408 			return(NULL);
1409 		}
1410 	}
1411 	cluster->ondisk->idx_index = elm_no;
1412 
1413 	/*
1414 	 * Load and return the B-Tree element
1415 	 */
1416 	buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1417 	buffer = *bufferp;
1418 	if (buffer == NULL || buffer->cluster != cluster ||
1419 	    buffer->buf_no != buf_no) {
1420 		if (buffer)
1421 			hammer_rel_buffer(buffer, 0);
1422 		buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1423 		*bufferp = buffer;
1424 	}
1425 	KKASSERT(buffer->ondisk->head.buf_type == HAMMER_FSBUF_BTREE);
1426 	item = &buffer->ondisk->data.data[elm_no & HAMMER_FSBUF_BLKMASK];
1427 	bzero(item, nblks * HAMMER_DATA_BLKSIZE);
1428 	*errorp = 0;
1429 	return(item);
1430 }
1431 
1432 void *
1433 hammer_alloc_record(hammer_cluster_t cluster,
1434 		    int *errorp, struct hammer_buffer **bufferp)
1435 {
1436 	hammer_buffer_t buffer;
1437 	hammer_alist_t live;
1438 	int32_t elm_no;
1439 	int32_t buf_no;
1440 	void *item;
1441 
1442 	/*
1443 	 * Allocate a record element
1444 	 */
1445 	live = &cluster->alist_record;
1446 	elm_no = hammer_alist_alloc_rev(live, 1, cluster->ondisk->idx_record);
1447 	if (elm_no == HAMMER_ALIST_BLOCK_NONE)
1448 		elm_no = hammer_alist_alloc_rev(live, 1,HAMMER_ALIST_BLOCK_MAX);
1449 	if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1450 		alloc_new_buffer(cluster, live,
1451 				 HAMMER_FSBUF_RECORDS, HAMMER_RECORD_NODES,
1452 				 cluster->ondisk->idx_record, errorp, bufferp);
1453 		elm_no = hammer_alist_alloc(live, 1);
1454 		if (elm_no == HAMMER_ALIST_BLOCK_NONE) {
1455 			*errorp = ENOSPC;
1456 			return(NULL);
1457 		}
1458 	}
1459 	cluster->ondisk->idx_record = elm_no;
1460 
1461 	/*
1462 	 * Load and return the B-Tree element
1463 	 */
1464 	buf_no = elm_no / HAMMER_FSBUF_MAXBLKS;
1465 	buffer = *bufferp;
1466 	if (buffer == NULL || buffer->cluster != cluster ||
1467 	    buffer->buf_no != buf_no) {
1468 		if (buffer)
1469 			hammer_rel_buffer(buffer, 0);
1470 		buffer = hammer_get_buffer(cluster, buf_no, 0, errorp);
1471 		*bufferp = buffer;
1472 	}
1473 	KKASSERT(buffer->ondisk->head.buf_type != 0);
1474 	item = &buffer->ondisk->record.recs[elm_no & HAMMER_FSBUF_BLKMASK];
1475 	bzero(item, sizeof(union hammer_record_ondisk));
1476 	*errorp = 0;
1477 	return(item);
1478 }
1479 
1480 /*
1481  * Free HAMMER elements based on either a hammer_buffer and element pointer
1482  * or a cluster-relative byte offset.
1483  */
1484 void
1485 hammer_free_btree_ptr(hammer_buffer_t buffer, hammer_node_ondisk_t node)
1486 {
1487 	int32_t elm_no;
1488 	hammer_alist_t live;
1489 
1490 	elm_no = node - &buffer->ondisk->btree.nodes[0];
1491 	KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1492 	elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1493 	live = &buffer->cluster->alist_btree;
1494 	hammer_alist_free(live, elm_no, 1);
1495 }
1496 
1497 void
1498 hammer_free_data_ptr(hammer_buffer_t buffer, void *data, int bytes)
1499 {
1500 	int32_t elm_no;
1501 	int32_t nblks;
1502 	hammer_alist_t live;
1503 
1504 	elm_no = ((char *)data - (char *)buffer->ondisk->data.data) /
1505 		 HAMMER_DATA_BLKSIZE;
1506 	KKASSERT(elm_no >= 0 && elm_no < HAMMER_DATA_NODES);
1507 	elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1508 	nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1509 	live = &buffer->cluster->alist_mdata;
1510 	hammer_alist_free(live, elm_no, nblks);
1511 }
1512 
1513 void
1514 hammer_free_record_ptr(hammer_buffer_t buffer, union hammer_record_ondisk *rec)
1515 {
1516 	int32_t elm_no;
1517 	hammer_alist_t live;
1518 
1519 	elm_no = rec - &buffer->ondisk->record.recs[0];
1520 	KKASSERT(elm_no >= 0 && elm_no < HAMMER_BTREE_NODES);
1521 	elm_no += buffer->buf_no * HAMMER_FSBUF_MAXBLKS;
1522 	live = &buffer->cluster->alist_record;
1523 	hammer_alist_free(live, elm_no, 1);
1524 }
1525 
1526 void
1527 hammer_free_btree(hammer_cluster_t cluster, int32_t bclu_offset)
1528 {
1529 	const int32_t blksize = sizeof(struct hammer_node_ondisk);
1530 	int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1531 	hammer_alist_t live;
1532 	int32_t elm_no;
1533 
1534 	elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1535 	fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, btree.nodes[0]);
1536 	live = &cluster->alist_btree;
1537 	KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1538 	elm_no += fsbuf_offset / blksize;
1539 	hammer_alist_free(live, elm_no, 1);
1540 }
1541 
1542 void
1543 hammer_free_data(hammer_cluster_t cluster, int32_t bclu_offset, int32_t bytes)
1544 {
1545 	const int32_t blksize = HAMMER_DATA_BLKSIZE;
1546 	int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1547 	hammer_alist_t live;
1548 	int32_t elm_no;
1549 	int32_t nblks;
1550 
1551 	elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1552 	fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, data.data[0][0]);
1553 	live = &cluster->alist_mdata;
1554 	nblks = (bytes + HAMMER_DATA_BLKMASK) & ~HAMMER_DATA_BLKMASK;
1555 	KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1556 	elm_no += fsbuf_offset / blksize;
1557 	hammer_alist_free(live, elm_no, nblks);
1558 }
1559 
1560 void
1561 hammer_free_record(hammer_cluster_t cluster, int32_t bclu_offset)
1562 {
1563 	const int32_t blksize = sizeof(union hammer_record_ondisk);
1564 	int32_t fsbuf_offset = bclu_offset & HAMMER_BUFMASK;
1565 	hammer_alist_t live;
1566 	int32_t elm_no;
1567 
1568 	elm_no = bclu_offset / HAMMER_BUFSIZE * HAMMER_FSBUF_MAXBLKS;
1569 	fsbuf_offset -= offsetof(union hammer_fsbuf_ondisk, record.recs[0]);
1570 	live = &cluster->alist_record;
1571 	KKASSERT(fsbuf_offset >= 0 && fsbuf_offset % blksize == 0);
1572 	elm_no += fsbuf_offset / blksize;
1573 	hammer_alist_free(live, elm_no, 1);
1574 }
1575 
1576 
1577 /*
1578  * Allocate a new filesystem buffer and assign it to the specified
1579  * filesystem buffer type.  The new buffer will be added to the
1580  * type-specific A-list and initialized.
1581  */
1582 static void
1583 alloc_new_buffer(hammer_cluster_t cluster, hammer_alist_t live,
1584 		 u_int64_t type, int32_t nelements,
1585 		 int start, int *errorp, struct hammer_buffer **bufferp)
1586 {
1587 	hammer_buffer_t buffer;
1588 	int32_t buf_no;
1589 
1590 	start = start / HAMMER_FSBUF_MAXBLKS;	/* convert to buf_no */
1591 
1592 	if (type == HAMMER_FSBUF_RECORDS) {
1593 		buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1594 						1, start);
1595 		if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1596 			buf_no = hammer_alist_alloc_rev(&cluster->alist_master,
1597 						1, HAMMER_ALIST_BLOCK_MAX);
1598 		}
1599 	} else {
1600 		buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1601 						1, start);
1602 		if (buf_no == HAMMER_ALIST_BLOCK_NONE) {
1603 			buf_no = hammer_alist_alloc_fwd(&cluster->alist_master,
1604 						1, 0);
1605 		}
1606 	}
1607 	KKASSERT(buf_no != HAMMER_ALIST_BLOCK_NONE); /* XXX */
1608 
1609 	/*
1610 	 * The new buffer must be initialized (type != 0) regardless of
1611 	 * whether we already have it cached or not, so don't try to
1612 	 * optimize the cached buffer check.  Just call hammer_get_buffer().
1613 	 */
1614 	buffer = hammer_get_buffer(cluster, buf_no, type, errorp);
1615 	if (*bufferp)
1616 		hammer_rel_buffer(*bufferp, 0);
1617 	*bufferp = buffer;
1618 }
1619 
1620 #if 0
1621 
1622 /*
1623  * Flush various tracking structures to disk
1624  */
1625 
1626 /*
1627  * Flush various tracking structures to disk
1628  */
1629 void
1630 flush_all_volumes(void)
1631 {
1632 	hammer_volume_t vol;
1633 
1634 	for (vol = VolBase; vol; vol = vol->next)
1635 		flush_volume(vol);
1636 }
1637 
1638 void
1639 flush_volume(hammer_volume_t vol)
1640 {
1641 	hammer_supercl_t supercl;
1642 	hammer_cluster_t cl;
1643 
1644 	for (supercl = vol->supercl_base; supercl; supercl = supercl->next)
1645 		flush_supercl(supercl);
1646 	for (cl = vol->cluster_base; cl; cl = cl->next)
1647 		flush_cluster(cl);
1648 	writehammerbuf(vol, vol->ondisk, 0);
1649 }
1650 
1651 void
1652 flush_supercl(hammer_supercl_t supercl)
1653 {
1654 	int64_t supercl_offset;
1655 
1656 	supercl_offset = supercl->scl_offset;
1657 	writehammerbuf(supercl->volume, supercl->ondisk, supercl_offset);
1658 }
1659 
1660 void
1661 flush_cluster(hammer_cluster_t cl)
1662 {
1663 	hammer_buffer_t buf;
1664 	int64_t cluster_offset;
1665 
1666 	for (buf = cl->buffer_base; buf; buf = buf->next)
1667 		flush_buffer(buf);
1668 	cluster_offset = cl->clu_offset;
1669 	writehammerbuf(cl->volume, cl->ondisk, cluster_offset);
1670 }
1671 
1672 void
1673 flush_buffer(hammer_buffer_t buf)
1674 {
1675 	int64_t buffer_offset;
1676 
1677 	buffer_offset = buf->buf_offset + buf->cluster->clu_offset;
1678 	writehammerbuf(buf->volume, buf->ondisk, buffer_offset);
1679 }
1680 
1681 #endif
1682 
1683 /*
1684  * Generic buffer initialization
1685  */
1686 static void
1687 initbuffer(hammer_alist_t live, hammer_fsbuf_head_t head, u_int64_t type)
1688 {
1689 	head->buf_type = type;
1690 	hammer_alist_init(live);
1691 }
1692 
1693 /*
1694  * Calculate the cluster's offset in the volume.  This calculation is
1695  * slightly more complex when using superclusters because superclusters
1696  * are grouped in blocks of 16, followed by 16 x N clusters where N
1697  * is the number of clusters a supercluster can manage.
1698  */
1699 static int64_t
1700 calculate_cluster_offset(hammer_volume_t volume, int32_t clu_no)
1701 {
1702 	int32_t scl_group;
1703 	int64_t scl_group_size;
1704 	int64_t off;
1705 
1706 	if (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL) {
1707 		scl_group = clu_no / HAMMER_VOL_SUPERCLUSTER_GROUP /
1708 			    HAMMER_SCL_MAXCLUSTERS;
1709 		scl_group_size =
1710 			    ((int64_t)HAMMER_BUFSIZE *
1711 			     HAMMER_VOL_SUPERCLUSTER_GROUP) +
1712 			    ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1713 			     volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1714 		scl_group_size +=
1715 			    HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1716 
1717 		off = volume->cluster_base +
1718 		      scl_group * scl_group_size +
1719 		      (HAMMER_BUFSIZE * HAMMER_VOL_SUPERCLUSTER_GROUP) +
1720 		      ((int64_t)clu_no % ((int64_t)HAMMER_SCL_MAXCLUSTERS *
1721 		       HAMMER_VOL_SUPERCLUSTER_GROUP))
1722 		      * HAMMER_BUFSIZE;
1723 	} else {
1724 		off = volume->cluster_base +
1725 		      (int64_t)clu_no * volume->vol_clsize;
1726 	}
1727 	return(off);
1728 }
1729 
1730 /*
1731  * Calculate a super-cluster's offset in the volume.
1732  */
1733 static int64_t
1734 calculate_supercl_offset(hammer_volume_t volume, int32_t scl_no)
1735 {
1736 	int64_t off;
1737 	int32_t scl_group;
1738 	int64_t scl_group_size;
1739 
1740 	KKASSERT (volume->vol_flags & HAMMER_VOLF_USINGSUPERCL);
1741 	scl_group = scl_no / HAMMER_VOL_SUPERCLUSTER_GROUP;
1742 	if (scl_group) {
1743 		scl_group_size =
1744 			    ((int64_t)HAMMER_BUFSIZE *
1745 			     HAMMER_VOL_SUPERCLUSTER_GROUP) +
1746 			    ((int64_t)HAMMER_VOL_SUPERCLUSTER_GROUP *
1747 			     volume->vol_clsize * HAMMER_SCL_MAXCLUSTERS);
1748 		scl_group_size +=
1749 			    HAMMER_VOL_SUPERCLUSTER_GROUP * HAMMER_BUFSIZE;
1750 		off = volume->cluster_base + (scl_group * scl_group_size) +
1751 		      (scl_no % HAMMER_VOL_SUPERCLUSTER_GROUP) * HAMMER_BUFSIZE;
1752 	} else {
1753 		off = volume->cluster_base + (scl_no * HAMMER_BUFSIZE);
1754 	}
1755 	return(off);
1756 }
1757 
1758 /*
1759  * A-LIST SUPPORT
1760  *
1761  * Setup the parameters for the various A-lists we use in hammer.  The
1762  * supercluster A-list must be chained to the cluster A-list and cluster
1763  * slave A-lists are chained to buffer A-lists.
1764  *
1765  * See hammer_init_alist_config() below.
1766  */
1767 
1768 /*
1769  * A-LIST - cluster recursion into a filesystem buffer
1770  */
1771 static int
1772 buffer_alist_init(void *info, int32_t blk, int32_t radix)
1773 {
1774 	hammer_cluster_t cluster = info;
1775 	hammer_buffer_t buffer;
1776 	int32_t buf_no;
1777 	int error = 0;
1778 
1779 	/*
1780 	 * Calculate the buffer number, initialize based on the buffer type.
1781 	 * The buffer has already been allocated so assert that it has been
1782 	 * initialized.
1783 	 */
1784 	buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1785 	buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1786 	if (buffer)
1787 		hammer_rel_buffer(buffer, 0);
1788 	return (error);
1789 }
1790 
1791 static int
1792 buffer_alist_destroy(void *info, int32_t blk, int32_t radix)
1793 {
1794 	return (0);
1795 }
1796 
1797 static int
1798 buffer_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1799                       int32_t count, int32_t atblk, int32_t *fullp)
1800 {
1801 	hammer_cluster_t cluster = info;
1802 	hammer_buffer_t buffer;
1803 	int32_t buf_no;
1804 	int32_t r;
1805 	int error = 0;
1806 
1807 	buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1808 	buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1809 	if (buffer) {
1810 		KKASSERT(buffer->ondisk->head.buf_type != 0);
1811 
1812 		r = hammer_alist_alloc_fwd(&buffer->alist, count, atblk - blk);
1813 		if (r != HAMMER_ALIST_BLOCK_NONE)
1814 			r += blk;
1815 		*fullp = hammer_alist_isfull(&buffer->alist);
1816 		hammer_rel_buffer(buffer, 0);
1817 	} else {
1818 		r = HAMMER_ALIST_BLOCK_NONE;
1819 	}
1820 	return(r);
1821 }
1822 
1823 static int
1824 buffer_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1825                       int32_t count, int32_t atblk, int32_t *fullp)
1826 {
1827 	hammer_cluster_t cluster = info;
1828 	hammer_buffer_t buffer;
1829 	int32_t buf_no;
1830 	int32_t r;
1831 	int error = 0;
1832 
1833 	buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1834 	buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1835 	if (buffer) {
1836 		KKASSERT(buffer->ondisk->head.buf_type != 0);
1837 
1838 		r = hammer_alist_alloc_rev(&buffer->alist, count, atblk - blk);
1839 		if (r != HAMMER_ALIST_BLOCK_NONE)
1840 			r += blk;
1841 		*fullp = hammer_alist_isfull(&buffer->alist);
1842 		hammer_rel_buffer(buffer, 0);
1843 	} else {
1844 		r = HAMMER_ALIST_BLOCK_NONE;
1845 		*fullp = 0;
1846 	}
1847 	return(r);
1848 }
1849 
1850 static void
1851 buffer_alist_free(void *info, int32_t blk, int32_t radix,
1852                  int32_t base_blk, int32_t count, int32_t *emptyp)
1853 {
1854 	hammer_cluster_t cluster = info;
1855 	hammer_buffer_t buffer;
1856 	int32_t buf_no;
1857 	int error = 0;
1858 
1859 	buf_no = blk / HAMMER_FSBUF_MAXBLKS;
1860 	buffer = hammer_get_buffer(cluster, buf_no, 0, &error);
1861 	if (buffer) {
1862 		KKASSERT(buffer->ondisk->head.buf_type != 0);
1863 		hammer_alist_free(&buffer->alist, base_blk, count);
1864 		*emptyp = hammer_alist_isempty(&buffer->alist);
1865 		hammer_rel_buffer(buffer, 0);
1866 	} else {
1867 		*emptyp = 0;
1868 	}
1869 }
1870 
1871 static void
1872 buffer_alist_print(void *info, int32_t blk, int32_t radix, int tab)
1873 {
1874 }
1875 
1876 /*
1877  * A-LIST - super-cluster recursion into a cluster and cluster recursion
1878  * into a filesystem buffer.  A-List's are mostly self-contained entities,
1879  * but callbacks must be installed to recurse from one A-List to another.
1880  *
1881  * Implementing these callbacks allows us to operate a multi-layered A-List
1882  * as a single entity.
1883  */
1884 static int
1885 super_alist_init(void *info, int32_t blk, int32_t radix)
1886 {
1887 	hammer_volume_t volume = info;
1888 	hammer_supercl_t supercl;
1889 	int32_t scl_no;
1890 	int error = 0;
1891 
1892 	/*
1893 	 * Calculate the super-cluster number containing the cluster (blk)
1894 	 * and obtain the super-cluster buffer.
1895 	 */
1896 	scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1897 	supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1898 	if (supercl)
1899 		hammer_rel_supercl(supercl, 0);
1900 	return (error);
1901 }
1902 
1903 static int
1904 super_alist_destroy(void *info, int32_t blk, int32_t radix)
1905 {
1906 	return(0);
1907 }
1908 
1909 static int
1910 super_alist_alloc_fwd(void *info, int32_t blk, int32_t radix,
1911 		      int32_t count, int32_t atblk, int32_t *fullp)
1912 {
1913 	hammer_volume_t volume = info;
1914 	hammer_supercl_t supercl;
1915 	int32_t scl_no;
1916 	int32_t r;
1917 	int error = 0;
1918 
1919 	scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1920 	supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1921 	if (supercl) {
1922 		r = hammer_alist_alloc_fwd(&supercl->alist, count, atblk - blk);
1923 		if (r != HAMMER_ALIST_BLOCK_NONE)
1924 			r += blk;
1925 		*fullp = hammer_alist_isfull(&supercl->alist);
1926 		hammer_rel_supercl(supercl, 0);
1927 	} else {
1928 		r = HAMMER_ALIST_BLOCK_NONE;
1929 		*fullp = 0;
1930 	}
1931 	return(r);
1932 }
1933 
1934 static int
1935 super_alist_alloc_rev(void *info, int32_t blk, int32_t radix,
1936 		      int32_t count, int32_t atblk, int32_t *fullp)
1937 {
1938 	hammer_volume_t volume = info;
1939 	hammer_supercl_t supercl;
1940 	int32_t scl_no;
1941 	int32_t r;
1942 	int error = 0;
1943 
1944 	scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1945 	supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1946 	if (supercl) {
1947 		r = hammer_alist_alloc_rev(&supercl->alist, count, atblk - blk);
1948 		if (r != HAMMER_ALIST_BLOCK_NONE)
1949 			r += blk;
1950 		*fullp = hammer_alist_isfull(&supercl->alist);
1951 		hammer_rel_supercl(supercl, 0);
1952 	} else {
1953 		r = HAMMER_ALIST_BLOCK_NONE;
1954 		*fullp = 0;
1955 	}
1956 	return(r);
1957 }
1958 
1959 static void
1960 super_alist_free(void *info, int32_t blk, int32_t radix,
1961 		 int32_t base_blk, int32_t count, int32_t *emptyp)
1962 {
1963 	hammer_volume_t volume = info;
1964 	hammer_supercl_t supercl;
1965 	int32_t scl_no;
1966 	int error = 0;
1967 
1968 	scl_no = blk / HAMMER_SCL_MAXCLUSTERS;
1969 	supercl = hammer_get_supercl(volume, scl_no, &error, 1);
1970 	if (supercl) {
1971 		hammer_alist_free(&supercl->alist, base_blk, count);
1972 		*emptyp = hammer_alist_isempty(&supercl->alist);
1973 		hammer_rel_supercl(supercl, 0);
1974 	} else {
1975 		*emptyp = 0;
1976 	}
1977 }
1978 
1979 static void
1980 super_alist_print(void *info, int32_t blk, int32_t radix, int tab)
1981 {
1982 }
1983 
1984 void
1985 hammer_init_alist_config(void)
1986 {
1987 	hammer_alist_config_t config;
1988 
1989 	hammer_alist_template(&Buf_alist_config, HAMMER_FSBUF_MAXBLKS,
1990 			      1, HAMMER_FSBUF_METAELMS);
1991 	hammer_alist_template(&Vol_normal_alist_config, HAMMER_VOL_MAXCLUSTERS,
1992 			      1, HAMMER_VOL_METAELMS_1LYR);
1993 	hammer_alist_template(&Vol_super_alist_config,
1994 			      HAMMER_VOL_MAXSUPERCLUSTERS,
1995 			      HAMMER_SCL_MAXCLUSTERS, HAMMER_VOL_METAELMS_2LYR);
1996 	hammer_alist_template(&Supercl_alist_config, HAMMER_VOL_MAXCLUSTERS,
1997 			      1, HAMMER_SUPERCL_METAELMS);
1998 	hammer_alist_template(&Clu_master_alist_config, HAMMER_CLU_MAXBUFFERS,
1999 			      1, HAMMER_CLU_MASTER_METAELMS);
2000 	hammer_alist_template(&Clu_slave_alist_config, HAMMER_CLU_MAXBUFFERS,
2001 			      HAMMER_FSBUF_MAXBLKS, HAMMER_CLU_SLAVE_METAELMS);
2002 
2003 	config = &Vol_super_alist_config;
2004 	config->bl_radix_init = super_alist_init;
2005 	config->bl_radix_destroy = super_alist_destroy;
2006 	config->bl_radix_alloc_fwd = super_alist_alloc_fwd;
2007 	config->bl_radix_alloc_rev = super_alist_alloc_rev;
2008 	config->bl_radix_free = super_alist_free;
2009 	config->bl_radix_print = super_alist_print;
2010 
2011 	config = &Clu_slave_alist_config;
2012 	config->bl_radix_init = buffer_alist_init;
2013 	config->bl_radix_destroy = buffer_alist_destroy;
2014 	config->bl_radix_alloc_fwd = buffer_alist_alloc_fwd;
2015 	config->bl_radix_alloc_rev = buffer_alist_alloc_rev;
2016 	config->bl_radix_free = buffer_alist_free;
2017 	config->bl_radix_print = buffer_alist_print;
2018 }
2019 
2020