xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision de78d61c)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39 
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    hammer_blockmap_layer2_t layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
48 static void hammer_skip_volume(hammer_off_t *offsetp);
49 
50 /*
51  * Reserved big-blocks red-black tree support
52  */
53 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
54 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
55 
56 static int
57 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
58 {
59 	if (res1->zone_offset < res2->zone_offset)
60 		return(-1);
61 	if (res1->zone_offset > res2->zone_offset)
62 		return(1);
63 	return(0);
64 }
65 
66 /*
67  * Allocate bytes from a zone
68  */
69 hammer_off_t
70 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
71 		      hammer_off_t hint, int *errorp)
72 {
73 	hammer_mount_t hmp;
74 	hammer_volume_t root_volume;
75 	hammer_blockmap_t blockmap;
76 	hammer_blockmap_t freemap;
77 	hammer_reserve_t resv;
78 	hammer_blockmap_layer1_t layer1;
79 	hammer_blockmap_layer2_t layer2;
80 	hammer_buffer_t buffer1 = NULL;
81 	hammer_buffer_t buffer2 = NULL;
82 	hammer_buffer_t buffer3 = NULL;
83 	hammer_off_t tmp_offset;
84 	hammer_off_t next_offset;
85 	hammer_off_t result_offset;
86 	hammer_off_t layer1_offset;
87 	hammer_off_t layer2_offset;
88 	hammer_off_t base_off;
89 	int loops = 0;
90 	int offset;		/* offset within big-block */
91 	int use_hint;
92 
93 	hmp = trans->hmp;
94 
95 	/*
96 	 * Deal with alignment and buffer-boundary issues.
97 	 *
98 	 * Be careful, certain primary alignments are used below to allocate
99 	 * new blockmap blocks.
100 	 */
101 	bytes = HAMMER_DATA_DOALIGN(bytes);
102 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
103 	KKASSERT(hammer_is_index_record(zone));
104 
105 	/*
106 	 * Setup
107 	 */
108 	root_volume = trans->rootvol;
109 	*errorp = 0;
110 	blockmap = &hmp->blockmap[zone];
111 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
112 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
113 
114 	/*
115 	 * Use the hint if we have one.
116 	 */
117 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
118 		next_offset = HAMMER_DATA_DOALIGN_WITH(hammer_off_t, hint);
119 		use_hint = 1;
120 	} else {
121 		next_offset = blockmap->next_offset;
122 		use_hint = 0;
123 	}
124 again:
125 
126 	/*
127 	 * use_hint is turned off if we leave the hinted big-block.
128 	 */
129 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
130 		next_offset = blockmap->next_offset;
131 		use_hint = 0;
132 	}
133 
134 	/*
135 	 * Check for wrap
136 	 */
137 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
138 		if (++loops == 2) {
139 			hmkprintf(hmp, "No space left for zone %d "
140 				"allocation\n", zone);
141 			result_offset = 0;
142 			*errorp = ENOSPC;
143 			goto failed;
144 		}
145 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
146 	}
147 
148 	/*
149 	 * The allocation request may not cross a buffer boundary.  Special
150 	 * large allocations must not cross a big-block boundary.
151 	 */
152 	tmp_offset = next_offset + bytes - 1;
153 	if (bytes <= HAMMER_BUFSIZE) {
154 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
155 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
156 			goto again;
157 		}
158 	} else {
159 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
160 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
161 			goto again;
162 		}
163 	}
164 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
165 
166 	/*
167 	 * Dive layer 1.
168 	 */
169 	layer1_offset = freemap->phys_offset +
170 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
171 
172 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
173 	if (*errorp) {
174 		result_offset = 0;
175 		goto failed;
176 	}
177 
178 	/*
179 	 * Check CRC.
180 	 */
181 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
182 		hammer_lock_ex(&hmp->blkmap_lock);
183 		if (!hammer_crc_test_layer1(hmp->version, layer1))
184 			hpanic("CRC FAILED: LAYER1");
185 		hammer_unlock(&hmp->blkmap_lock);
186 	}
187 
188 	/*
189 	 * If we are at a big-block boundary and layer1 indicates no
190 	 * free big-blocks, then we cannot allocate a new big-block in
191 	 * layer2, skip to the next layer1 entry.
192 	 */
193 	if (offset == 0 && layer1->blocks_free == 0) {
194 		next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
195 		if (hammer_check_volume(hmp, &next_offset)) {
196 			result_offset = 0;
197 			goto failed;
198 		}
199 		goto again;
200 	}
201 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
202 
203 	/*
204 	 * Skip the whole volume if it is pointing to a layer2 big-block
205 	 * on a volume that we are currently trying to remove from the
206 	 * file-system. This is used by the volume-del code together with
207 	 * the reblocker to free up a volume.
208 	 */
209 	if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
210 		hammer_skip_volume(&next_offset);
211 		goto again;
212 	}
213 
214 	/*
215 	 * Dive layer 2, each entry represents a big-block.
216 	 */
217 	layer2_offset = layer1->phys_offset +
218 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
219 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
220 	if (*errorp) {
221 		result_offset = 0;
222 		goto failed;
223 	}
224 
225 	/*
226 	 * Check CRC.  This can race another thread holding the lock
227 	 * and in the middle of modifying layer2.
228 	 */
229 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
230 		hammer_lock_ex(&hmp->blkmap_lock);
231 		if (!hammer_crc_test_layer2(hmp->version, layer2))
232 			hpanic("CRC FAILED: LAYER2");
233 		hammer_unlock(&hmp->blkmap_lock);
234 	}
235 
236 	/*
237 	 * Skip the layer if the zone is owned by someone other then us.
238 	 */
239 	if (layer2->zone && layer2->zone != zone) {
240 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
241 		goto again;
242 	}
243 	if (offset < layer2->append_off) {
244 		next_offset += layer2->append_off - offset;
245 		goto again;
246 	}
247 
248 #if 0
249 	/*
250 	 * If operating in the current non-hint blockmap block, do not
251 	 * allow it to get over-full.  Also drop any active hinting so
252 	 * blockmap->next_offset is updated at the end.
253 	 *
254 	 * We do this for B-Tree and meta-data allocations to provide
255 	 * localization for updates.
256 	 */
257 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
258 	     zone == HAMMER_ZONE_META_INDEX) &&
259 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
260 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
261 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
262 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
263 			use_hint = 0;
264 			goto again;
265 		}
266 	}
267 #endif
268 
269 	/*
270 	 * We need the lock from this point on.  We have to re-check zone
271 	 * ownership after acquiring the lock and also check for reservations.
272 	 */
273 	hammer_lock_ex(&hmp->blkmap_lock);
274 
275 	if (layer2->zone && layer2->zone != zone) {
276 		hammer_unlock(&hmp->blkmap_lock);
277 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
278 		goto again;
279 	}
280 	if (offset < layer2->append_off) {
281 		hammer_unlock(&hmp->blkmap_lock);
282 		next_offset += layer2->append_off - offset;
283 		goto again;
284 	}
285 
286 	/*
287 	 * The big-block might be reserved by another zone.  If it is reserved
288 	 * by our zone we may have to move next_offset past the append_off.
289 	 */
290 	base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
291 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
292 	if (resv) {
293 		if (resv->zone != zone) {
294 			hammer_unlock(&hmp->blkmap_lock);
295 			next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
296 			goto again;
297 		}
298 		if (offset < resv->append_off) {
299 			hammer_unlock(&hmp->blkmap_lock);
300 			next_offset += resv->append_off - offset;
301 			goto again;
302 		}
303 		++resv->refs;
304 	}
305 
306 	/*
307 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
308 	 * of the layer for real.  At this point we've validated any
309 	 * reservation that might exist and can just ignore resv.
310 	 */
311 	if (layer2->zone == 0) {
312 		/*
313 		 * Assign the big-block to our zone
314 		 */
315 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
316 		--layer1->blocks_free;
317 		hammer_crc_set_layer1(hmp->version, layer1);
318 		hammer_modify_buffer_done(buffer1);
319 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
320 		layer2->zone = zone;
321 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
322 		KKASSERT(layer2->append_off == 0);
323 		hammer_modify_volume_field(trans, trans->rootvol,
324 					   vol0_stat_freebigblocks);
325 		--root_volume->ondisk->vol0_stat_freebigblocks;
326 		hmp->copy_stat_freebigblocks =
327 			root_volume->ondisk->vol0_stat_freebigblocks;
328 		hammer_modify_volume_done(trans->rootvol);
329 	} else {
330 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
331 	}
332 	KKASSERT(layer2->zone == zone);
333 
334 	/*
335 	 * NOTE: bytes_free can legally go negative due to de-dup.
336 	 */
337 	layer2->bytes_free -= bytes;
338 	KKASSERT(layer2->append_off <= offset);
339 	layer2->append_off = offset + bytes;
340 	hammer_crc_set_layer2(hmp->version, layer2);
341 	hammer_modify_buffer_done(buffer2);
342 
343 	/*
344 	 * We hold the blockmap lock and should be the only ones
345 	 * capable of modifying resv->append_off.  Track the allocation
346 	 * as appropriate.
347 	 */
348 	KKASSERT(bytes != 0);
349 	if (resv) {
350 		KKASSERT(resv->append_off <= offset);
351 		resv->append_off = offset + bytes;
352 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
353 		hammer_blockmap_reserve_complete(hmp, resv);
354 	}
355 
356 	/*
357 	 * If we are allocating from the base of a new buffer we can avoid
358 	 * a disk read by calling hammer_bnew_ext().
359 	 */
360 	if ((next_offset & HAMMER_BUFMASK) == 0) {
361 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
362 				errorp, &buffer3);
363 		if (*errorp) {
364 			result_offset = 0;
365 			goto failed;
366 		}
367 	}
368 	result_offset = next_offset;
369 
370 	/*
371 	 * If we weren't supplied with a hint or could not use the hint
372 	 * then we wound up using blockmap->next_offset as the hint and
373 	 * need to save it.
374 	 */
375 	if (use_hint == 0) {
376 		hammer_modify_volume_noundo(NULL, root_volume);
377 		blockmap->next_offset = next_offset + bytes;
378 		hammer_modify_volume_done(root_volume);
379 	}
380 	hammer_unlock(&hmp->blkmap_lock);
381 failed:
382 
383 	/*
384 	 * Cleanup
385 	 */
386 	if (buffer1)
387 		hammer_rel_buffer(buffer1, 0);
388 	if (buffer2)
389 		hammer_rel_buffer(buffer2, 0);
390 	if (buffer3)
391 		hammer_rel_buffer(buffer3, 0);
392 
393 	return(result_offset);
394 }
395 
396 /*
397  * Frontend function - Reserve bytes in a zone.
398  *
399  * This code reserves bytes out of a blockmap without committing to any
400  * meta-data modifications, allowing the front-end to directly issue disk
401  * write I/O for big-blocks of data
402  *
403  * The backend later finalizes the reservation with hammer_blockmap_finalize()
404  * upon committing the related record.
405  */
406 hammer_reserve_t
407 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
408 			hammer_off_t *zone_offp, int *errorp)
409 {
410 	hammer_volume_t root_volume;
411 	hammer_blockmap_t blockmap;
412 	hammer_blockmap_t freemap;
413 	hammer_blockmap_layer1_t layer1;
414 	hammer_blockmap_layer2_t layer2;
415 	hammer_buffer_t buffer1 = NULL;
416 	hammer_buffer_t buffer2 = NULL;
417 	hammer_buffer_t buffer3 = NULL;
418 	hammer_off_t tmp_offset;
419 	hammer_off_t next_offset;
420 	hammer_off_t layer1_offset;
421 	hammer_off_t layer2_offset;
422 	hammer_off_t base_off;
423 	hammer_reserve_t resv;
424 	hammer_reserve_t resx = NULL;
425 	int loops = 0;
426 	int offset;
427 
428 	/*
429 	 * Setup
430 	 */
431 	KKASSERT(hammer_is_index_record(zone));
432 	root_volume = hammer_get_root_volume(hmp, errorp);
433 	if (*errorp)
434 		return(NULL);
435 	blockmap = &hmp->blockmap[zone];
436 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
437 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
438 
439 	/*
440 	 * Deal with alignment and buffer-boundary issues.
441 	 *
442 	 * Be careful, certain primary alignments are used below to allocate
443 	 * new blockmap blocks.
444 	 */
445 	bytes = HAMMER_DATA_DOALIGN(bytes);
446 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
447 
448 	next_offset = blockmap->next_offset;
449 again:
450 	resv = NULL;
451 	/*
452 	 * Check for wrap
453 	 */
454 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
455 		if (++loops == 2) {
456 			hmkprintf(hmp, "No space left for zone %d "
457 				"reservation\n", zone);
458 			*errorp = ENOSPC;
459 			goto failed;
460 		}
461 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
462 	}
463 
464 	/*
465 	 * The allocation request may not cross a buffer boundary.  Special
466 	 * large allocations must not cross a big-block boundary.
467 	 */
468 	tmp_offset = next_offset + bytes - 1;
469 	if (bytes <= HAMMER_BUFSIZE) {
470 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
471 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
472 			goto again;
473 		}
474 	} else {
475 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
476 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
477 			goto again;
478 		}
479 	}
480 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
481 
482 	/*
483 	 * Dive layer 1.
484 	 */
485 	layer1_offset = freemap->phys_offset +
486 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
487 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
488 	if (*errorp)
489 		goto failed;
490 
491 	/*
492 	 * Check CRC.
493 	 */
494 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
495 		hammer_lock_ex(&hmp->blkmap_lock);
496 		if (!hammer_crc_test_layer1(hmp->version, layer1))
497 			hpanic("CRC FAILED: LAYER1");
498 		hammer_unlock(&hmp->blkmap_lock);
499 	}
500 
501 	/*
502 	 * If we are at a big-block boundary and layer1 indicates no
503 	 * free big-blocks, then we cannot allocate a new big-block in
504 	 * layer2, skip to the next layer1 entry.
505 	 */
506 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
507 	    layer1->blocks_free == 0) {
508 		next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
509 		if (hammer_check_volume(hmp, &next_offset))
510 			goto failed;
511 		goto again;
512 	}
513 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
514 
515 	/*
516 	 * Dive layer 2, each entry represents a big-block.
517 	 */
518 	layer2_offset = layer1->phys_offset +
519 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
520 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
521 	if (*errorp)
522 		goto failed;
523 
524 	/*
525 	 * Check CRC if not allocating into uninitialized space (which we
526 	 * aren't when reserving space).
527 	 */
528 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
529 		hammer_lock_ex(&hmp->blkmap_lock);
530 		if (!hammer_crc_test_layer2(hmp->version, layer2))
531 			hpanic("CRC FAILED: LAYER2");
532 		hammer_unlock(&hmp->blkmap_lock);
533 	}
534 
535 	/*
536 	 * Skip the layer if the zone is owned by someone other then us.
537 	 */
538 	if (layer2->zone && layer2->zone != zone) {
539 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
540 		goto again;
541 	}
542 	if (offset < layer2->append_off) {
543 		next_offset += layer2->append_off - offset;
544 		goto again;
545 	}
546 
547 	/*
548 	 * We need the lock from this point on.  We have to re-check zone
549 	 * ownership after acquiring the lock and also check for reservations.
550 	 */
551 	hammer_lock_ex(&hmp->blkmap_lock);
552 
553 	if (layer2->zone && layer2->zone != zone) {
554 		hammer_unlock(&hmp->blkmap_lock);
555 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
556 		goto again;
557 	}
558 	if (offset < layer2->append_off) {
559 		hammer_unlock(&hmp->blkmap_lock);
560 		next_offset += layer2->append_off - offset;
561 		goto again;
562 	}
563 
564 	/*
565 	 * The big-block might be reserved by another zone.  If it is reserved
566 	 * by our zone we may have to move next_offset past the append_off.
567 	 */
568 	base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
569 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
570 	if (resv) {
571 		if (resv->zone != zone) {
572 			hammer_unlock(&hmp->blkmap_lock);
573 			next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
574 			goto again;
575 		}
576 		if (offset < resv->append_off) {
577 			hammer_unlock(&hmp->blkmap_lock);
578 			next_offset += resv->append_off - offset;
579 			goto again;
580 		}
581 		++resv->refs;
582 	} else {
583 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
584 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
585 		resx->refs = 1;
586 		resx->zone = zone;
587 		resx->zone_offset = base_off;
588 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
589 			resx->flags |= HAMMER_RESF_LAYER2FREE;
590 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
591 		KKASSERT(resv == NULL);
592 		resv = resx;
593 		++hammer_count_reservations;
594 	}
595 	resv->append_off = offset + bytes;
596 
597 	/*
598 	 * If we are not reserving a whole buffer but are at the start of
599 	 * a new block, call hammer_bnew() to avoid a disk read.
600 	 *
601 	 * If we are reserving a whole buffer (or more), the caller will
602 	 * probably use a direct read, so do nothing.
603 	 *
604 	 * If we do not have a whole lot of system memory we really can't
605 	 * afford to block while holding the blkmap_lock!
606 	 */
607 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
608 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
609 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
610 			if (*errorp)
611 				goto failed;
612 		}
613 	}
614 
615 	blockmap->next_offset = next_offset + bytes;
616 	hammer_unlock(&hmp->blkmap_lock);
617 
618 failed:
619 	if (buffer1)
620 		hammer_rel_buffer(buffer1, 0);
621 	if (buffer2)
622 		hammer_rel_buffer(buffer2, 0);
623 	if (buffer3)
624 		hammer_rel_buffer(buffer3, 0);
625 	hammer_rel_volume(root_volume, 0);
626 	*zone_offp = next_offset;
627 
628 	return(resv);
629 }
630 
631 /*
632  * Dereference a reservation structure.  Upon the final release the
633  * underlying big-block is checked and if it is entirely free we delete
634  * any related HAMMER buffers to avoid potential conflicts with future
635  * reuse of the big-block.
636  */
637 void
638 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
639 {
640 	hammer_off_t base_offset;
641 	int error;
642 
643 	KKASSERT(resv->refs > 0);
644 	KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
645 
646 	/*
647 	 * Setting append_off to the max prevents any new allocations
648 	 * from occuring while we are trying to dispose of the reservation,
649 	 * allowing us to safely delete any related HAMMER buffers.
650 	 *
651 	 * If we are unable to clean out all related HAMMER buffers we
652 	 * requeue the delay.
653 	 */
654 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
655 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
656 		base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
657 		error = hammer_del_buffers(hmp, base_offset,
658 					   resv->zone_offset,
659 					   HAMMER_BIGBLOCK_SIZE,
660 					   1);
661 		if (hammer_debug_general & 0x20000) {
662 			hkprintf("delbgblk %016jx error %d\n",
663 				(intmax_t)base_offset, error);
664 		}
665 		if (error)
666 			hammer_reserve_setdelay(hmp, resv);
667 	}
668 	if (--resv->refs == 0) {
669 		if (hammer_debug_general & 0x20000) {
670 			hkprintf("delresvr %016jx zone %02x\n",
671 				(intmax_t)resv->zone_offset, resv->zone);
672 		}
673 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
674 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
675 		kfree(resv, hmp->m_misc);
676 		--hammer_count_reservations;
677 	}
678 }
679 
680 /*
681  * Prevent a potentially free big-block from being reused until after
682  * the related flushes have completely cycled, otherwise crash recovery
683  * could resurrect a data block that was already reused and overwritten.
684  *
685  * The caller might reset the underlying layer2 entry's append_off to 0, so
686  * our covering append_off must be set to max to prevent any reallocation
687  * until after the flush delays complete, not to mention proper invalidation
688  * of any underlying cached blocks.
689  */
690 static void
691 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
692 			int zone, hammer_blockmap_layer2_t layer2)
693 {
694 	hammer_reserve_t resv;
695 
696 	/*
697 	 * Allocate the reservation if necessary.
698 	 *
699 	 * NOTE: need lock in future around resv lookup/allocation and
700 	 * the setdelay call, currently refs is not bumped until the call.
701 	 */
702 again:
703 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
704 	if (resv == NULL) {
705 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
706 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
707 		resv->zone = zone;
708 		resv->zone_offset = base_offset;
709 		resv->refs = 0;
710 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
711 
712 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
713 			resv->flags |= HAMMER_RESF_LAYER2FREE;
714 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
715 			kfree(resv, hmp->m_misc);
716 			goto again;
717 		}
718 		++hammer_count_reservations;
719 	} else {
720 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
721 			resv->flags |= HAMMER_RESF_LAYER2FREE;
722 	}
723 	hammer_reserve_setdelay(hmp, resv);
724 }
725 
726 /*
727  * Enter the reservation on the on-delay list, or move it if it
728  * is already on the list.
729  */
730 static void
731 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
732 {
733 	if (resv->flags & HAMMER_RESF_ONDELAY) {
734 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
735 		resv->flg_no = hmp->flusher.next + 1;
736 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
737 	} else {
738 		++resv->refs;
739 		++hmp->rsv_fromdelay;
740 		resv->flags |= HAMMER_RESF_ONDELAY;
741 		resv->flg_no = hmp->flusher.next + 1;
742 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
743 	}
744 }
745 
746 /*
747  * Reserve has reached its flush point, remove it from the delay list
748  * and finish it off.  hammer_blockmap_reserve_complete() inherits
749  * the ondelay reference.
750  */
751 void
752 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
753 {
754 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
755 	resv->flags &= ~HAMMER_RESF_ONDELAY;
756 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
757 	--hmp->rsv_fromdelay;
758 	hammer_blockmap_reserve_complete(hmp, resv);
759 }
760 
761 /*
762  * Backend function - free (offset, bytes) in a zone.
763  *
764  * XXX error return
765  */
766 void
767 hammer_blockmap_free(hammer_transaction_t trans,
768 		     hammer_off_t zone_offset, int bytes)
769 {
770 	hammer_mount_t hmp;
771 	hammer_volume_t root_volume;
772 	hammer_blockmap_t freemap;
773 	hammer_blockmap_layer1_t layer1;
774 	hammer_blockmap_layer2_t layer2;
775 	hammer_buffer_t buffer1 = NULL;
776 	hammer_buffer_t buffer2 = NULL;
777 	hammer_off_t layer1_offset;
778 	hammer_off_t layer2_offset;
779 	hammer_off_t base_off;
780 	int error;
781 	int zone;
782 
783 	if (bytes == 0)
784 		return;
785 	hmp = trans->hmp;
786 
787 	/*
788 	 * Alignment
789 	 */
790 	bytes = HAMMER_DATA_DOALIGN(bytes);
791 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
792 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
793 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
794 
795 	/*
796 	 * Basic zone validation & locking
797 	 */
798 	zone = HAMMER_ZONE_DECODE(zone_offset);
799 	KKASSERT(hammer_is_index_record(zone));
800 	root_volume = trans->rootvol;
801 	error = 0;
802 
803 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
804 
805 	/*
806 	 * Dive layer 1.
807 	 */
808 	layer1_offset = freemap->phys_offset +
809 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
810 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
811 	if (error)
812 		goto failed;
813 	KKASSERT(layer1->phys_offset &&
814 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
815 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
816 		hammer_lock_ex(&hmp->blkmap_lock);
817 		if (!hammer_crc_test_layer1(hmp->version, layer1))
818 			hpanic("CRC FAILED: LAYER1");
819 		hammer_unlock(&hmp->blkmap_lock);
820 	}
821 
822 	/*
823 	 * Dive layer 2, each entry represents a big-block.
824 	 */
825 	layer2_offset = layer1->phys_offset +
826 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
827 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
828 	if (error)
829 		goto failed;
830 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
831 		hammer_lock_ex(&hmp->blkmap_lock);
832 		if (!hammer_crc_test_layer2(hmp->version, layer2))
833 			hpanic("CRC FAILED: LAYER2");
834 		hammer_unlock(&hmp->blkmap_lock);
835 	}
836 
837 	hammer_lock_ex(&hmp->blkmap_lock);
838 
839 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
840 
841 	/*
842 	 * Free space previously allocated via blockmap_alloc().
843 	 *
844 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
845 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
846 	 */
847 	KKASSERT(layer2->zone == zone);
848 	layer2->bytes_free += bytes;
849 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
850 
851 	/*
852 	 * If a big-block becomes entirely free we must create a covering
853 	 * reservation to prevent premature reuse.  Note, however, that
854 	 * the big-block and/or reservation may still have an append_off
855 	 * that allows further (non-reused) allocations.
856 	 *
857 	 * Once the reservation has been made we re-check layer2 and if
858 	 * the big-block is still entirely free we reset the layer2 entry.
859 	 * The reservation will prevent premature reuse.
860 	 *
861 	 * NOTE: hammer_buffer's are only invalidated when the reservation
862 	 * is completed, if the layer2 entry is still completely free at
863 	 * that time.  Any allocations from the reservation that may have
864 	 * occured in the mean time, or active references on the reservation
865 	 * from new pending allocations, will prevent the invalidation from
866 	 * occuring.
867 	 */
868 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
869 		base_off = hammer_xlate_to_zone2(zone_offset &
870 						~HAMMER_BIGBLOCK_MASK64);
871 
872 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
873 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
874 			layer2->zone = 0;
875 			layer2->append_off = 0;
876 			hammer_modify_buffer(trans, buffer1,
877 					     layer1, sizeof(*layer1));
878 			++layer1->blocks_free;
879 			hammer_crc_set_layer1(hmp->version, layer1);
880 			hammer_modify_buffer_done(buffer1);
881 			hammer_modify_volume_field(trans,
882 					trans->rootvol,
883 					vol0_stat_freebigblocks);
884 			++root_volume->ondisk->vol0_stat_freebigblocks;
885 			hmp->copy_stat_freebigblocks =
886 			   root_volume->ondisk->vol0_stat_freebigblocks;
887 			hammer_modify_volume_done(trans->rootvol);
888 		}
889 	}
890 	hammer_crc_set_layer2(hmp->version, layer2);
891 	hammer_modify_buffer_done(buffer2);
892 	hammer_unlock(&hmp->blkmap_lock);
893 
894 failed:
895 	if (buffer1)
896 		hammer_rel_buffer(buffer1, 0);
897 	if (buffer2)
898 		hammer_rel_buffer(buffer2, 0);
899 }
900 
901 int
902 hammer_blockmap_dedup(hammer_transaction_t trans,
903 		     hammer_off_t zone_offset, int bytes)
904 {
905 	hammer_mount_t hmp;
906 	hammer_blockmap_t freemap;
907 	hammer_blockmap_layer1_t layer1;
908 	hammer_blockmap_layer2_t layer2;
909 	hammer_buffer_t buffer1 = NULL;
910 	hammer_buffer_t buffer2 = NULL;
911 	hammer_off_t layer1_offset;
912 	hammer_off_t layer2_offset;
913 	int32_t temp;
914 	int error;
915 	int zone __debugvar;
916 
917 	if (bytes == 0)
918 		return (0);
919 	hmp = trans->hmp;
920 
921 	/*
922 	 * Alignment
923 	 */
924 	bytes = HAMMER_DATA_DOALIGN(bytes);
925 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
926 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
927 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
928 
929 	/*
930 	 * Basic zone validation & locking
931 	 */
932 	zone = HAMMER_ZONE_DECODE(zone_offset);
933 	KKASSERT(hammer_is_index_record(zone));
934 	error = 0;
935 
936 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
937 
938 	/*
939 	 * Dive layer 1.
940 	 */
941 	layer1_offset = freemap->phys_offset +
942 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
943 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
944 	if (error)
945 		goto failed;
946 	KKASSERT(layer1->phys_offset &&
947 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
948 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
949 		hammer_lock_ex(&hmp->blkmap_lock);
950 		if (!hammer_crc_test_layer1(hmp->version, layer1))
951 			hpanic("CRC FAILED: LAYER1");
952 		hammer_unlock(&hmp->blkmap_lock);
953 	}
954 
955 	/*
956 	 * Dive layer 2, each entry represents a big-block.
957 	 */
958 	layer2_offset = layer1->phys_offset +
959 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
960 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
961 	if (error)
962 		goto failed;
963 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
964 		hammer_lock_ex(&hmp->blkmap_lock);
965 		if (!hammer_crc_test_layer2(hmp->version, layer2))
966 			hpanic("CRC FAILED: LAYER2");
967 		hammer_unlock(&hmp->blkmap_lock);
968 	}
969 
970 	hammer_lock_ex(&hmp->blkmap_lock);
971 
972 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
973 
974 	/*
975 	 * Free space previously allocated via blockmap_alloc().
976 	 *
977 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
978 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
979 	 */
980 	KKASSERT(layer2->zone == zone);
981 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
982 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
983 	if (temp > layer2->bytes_free) {
984 		error = ERANGE;
985 		goto underflow;
986 	}
987 	layer2->bytes_free -= bytes;
988 
989 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
990 
991 	hammer_crc_set_layer2(hmp->version, layer2);
992 underflow:
993 	hammer_modify_buffer_done(buffer2);
994 	hammer_unlock(&hmp->blkmap_lock);
995 
996 failed:
997 	if (buffer1)
998 		hammer_rel_buffer(buffer1, 0);
999 	if (buffer2)
1000 		hammer_rel_buffer(buffer2, 0);
1001 	return (error);
1002 }
1003 
1004 /*
1005  * Backend function - finalize (offset, bytes) in a zone.
1006  *
1007  * Allocate space that was previously reserved by the frontend.
1008  */
1009 int
1010 hammer_blockmap_finalize(hammer_transaction_t trans,
1011 			 hammer_reserve_t resv,
1012 			 hammer_off_t zone_offset, int bytes)
1013 {
1014 	hammer_mount_t hmp;
1015 	hammer_volume_t root_volume;
1016 	hammer_blockmap_t freemap;
1017 	hammer_blockmap_layer1_t layer1;
1018 	hammer_blockmap_layer2_t layer2;
1019 	hammer_buffer_t buffer1 = NULL;
1020 	hammer_buffer_t buffer2 = NULL;
1021 	hammer_off_t layer1_offset;
1022 	hammer_off_t layer2_offset;
1023 	int error;
1024 	int zone;
1025 	int offset;
1026 
1027 	if (bytes == 0)
1028 		return(0);
1029 	hmp = trans->hmp;
1030 
1031 	/*
1032 	 * Alignment
1033 	 */
1034 	bytes = HAMMER_DATA_DOALIGN(bytes);
1035 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1036 
1037 	/*
1038 	 * Basic zone validation & locking
1039 	 */
1040 	zone = HAMMER_ZONE_DECODE(zone_offset);
1041 	KKASSERT(hammer_is_index_record(zone));
1042 	root_volume = trans->rootvol;
1043 	error = 0;
1044 
1045 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1046 
1047 	/*
1048 	 * Dive layer 1.
1049 	 */
1050 	layer1_offset = freemap->phys_offset +
1051 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1052 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1053 	if (error)
1054 		goto failed;
1055 	KKASSERT(layer1->phys_offset &&
1056 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1057 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1058 		hammer_lock_ex(&hmp->blkmap_lock);
1059 		if (!hammer_crc_test_layer1(hmp->version, layer1))
1060 			hpanic("CRC FAILED: LAYER1");
1061 		hammer_unlock(&hmp->blkmap_lock);
1062 	}
1063 
1064 	/*
1065 	 * Dive layer 2, each entry represents a big-block.
1066 	 */
1067 	layer2_offset = layer1->phys_offset +
1068 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1069 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1070 	if (error)
1071 		goto failed;
1072 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1073 		hammer_lock_ex(&hmp->blkmap_lock);
1074 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1075 			hpanic("CRC FAILED: LAYER2");
1076 		hammer_unlock(&hmp->blkmap_lock);
1077 	}
1078 
1079 	hammer_lock_ex(&hmp->blkmap_lock);
1080 
1081 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1082 
1083 	/*
1084 	 * Finalize some or all of the space covered by a current
1085 	 * reservation.  An allocation in the same layer may have
1086 	 * already assigned ownership.
1087 	 */
1088 	if (layer2->zone == 0) {
1089 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1090 		--layer1->blocks_free;
1091 		hammer_crc_set_layer1(hmp->version, layer1);
1092 		hammer_modify_buffer_done(buffer1);
1093 		layer2->zone = zone;
1094 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1095 		KKASSERT(layer2->append_off == 0);
1096 		hammer_modify_volume_field(trans,
1097 				trans->rootvol,
1098 				vol0_stat_freebigblocks);
1099 		--root_volume->ondisk->vol0_stat_freebigblocks;
1100 		hmp->copy_stat_freebigblocks =
1101 		   root_volume->ondisk->vol0_stat_freebigblocks;
1102 		hammer_modify_volume_done(trans->rootvol);
1103 	}
1104 	if (layer2->zone != zone)
1105 		hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1106 	KKASSERT(layer2->zone == zone);
1107 	KKASSERT(bytes != 0);
1108 	layer2->bytes_free -= bytes;
1109 	if (resv)
1110 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1111 
1112 	/*
1113 	 * Finalizations can occur out of order, or combined with allocations.
1114 	 * append_off must be set to the highest allocated offset.
1115 	 */
1116 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1117 	if (layer2->append_off < offset)
1118 		layer2->append_off = offset;
1119 
1120 	hammer_crc_set_layer2(hmp->version, layer2);
1121 	hammer_modify_buffer_done(buffer2);
1122 	hammer_unlock(&hmp->blkmap_lock);
1123 
1124 failed:
1125 	if (buffer1)
1126 		hammer_rel_buffer(buffer1, 0);
1127 	if (buffer2)
1128 		hammer_rel_buffer(buffer2, 0);
1129 	return(error);
1130 }
1131 
1132 /*
1133  * Return the approximate number of free bytes in the big-block
1134  * containing the specified blockmap offset.
1135  *
1136  * WARNING: A negative number can be returned if data de-dup exists,
1137  *	    and the result will also not represent he actual number
1138  *	    of free bytes in this case.
1139  *
1140  *	    This code is used only by the reblocker.
1141  */
1142 int
1143 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1144 			int *curp, int *errorp)
1145 {
1146 	hammer_volume_t root_volume;
1147 	hammer_blockmap_t blockmap;
1148 	hammer_blockmap_t freemap;
1149 	hammer_blockmap_layer1_t layer1;
1150 	hammer_blockmap_layer2_t layer2;
1151 	hammer_buffer_t buffer = NULL;
1152 	hammer_off_t layer1_offset;
1153 	hammer_off_t layer2_offset;
1154 	int32_t bytes;
1155 	int zone;
1156 
1157 	zone = HAMMER_ZONE_DECODE(zone_offset);
1158 	KKASSERT(hammer_is_index_record(zone));
1159 	root_volume = hammer_get_root_volume(hmp, errorp);
1160 	if (*errorp) {
1161 		*curp = 0;
1162 		return(0);
1163 	}
1164 	blockmap = &hmp->blockmap[zone];
1165 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1166 
1167 	/*
1168 	 * Dive layer 1.
1169 	 */
1170 	layer1_offset = freemap->phys_offset +
1171 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1172 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1173 	if (*errorp) {
1174 		*curp = 0;
1175 		bytes = 0;
1176 		goto failed;
1177 	}
1178 	KKASSERT(layer1->phys_offset);
1179 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1180 		hammer_lock_ex(&hmp->blkmap_lock);
1181 		if (!hammer_crc_test_layer1(hmp->version, layer1))
1182 			hpanic("CRC FAILED: LAYER1");
1183 		hammer_unlock(&hmp->blkmap_lock);
1184 	}
1185 
1186 	/*
1187 	 * Dive layer 2, each entry represents a big-block.
1188 	 *
1189 	 * (reuse buffer, layer1 pointer becomes invalid)
1190 	 */
1191 	layer2_offset = layer1->phys_offset +
1192 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1193 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1194 	if (*errorp) {
1195 		*curp = 0;
1196 		bytes = 0;
1197 		goto failed;
1198 	}
1199 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1200 		hammer_lock_ex(&hmp->blkmap_lock);
1201 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1202 			hpanic("CRC FAILED: LAYER2");
1203 		hammer_unlock(&hmp->blkmap_lock);
1204 	}
1205 	KKASSERT(layer2->zone == zone);
1206 
1207 	bytes = layer2->bytes_free;
1208 
1209 	/*
1210 	 * *curp becomes 1 only when no error and,
1211 	 * next_offset and zone_offset are in the same big-block.
1212 	 */
1213 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1214 		*curp = 0;  /* not same */
1215 	else
1216 		*curp = 1;
1217 failed:
1218 	if (buffer)
1219 		hammer_rel_buffer(buffer, 0);
1220 	hammer_rel_volume(root_volume, 0);
1221 	if (hammer_debug_general & 0x4000) {
1222 		hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1223 	}
1224 	return(bytes);
1225 }
1226 
1227 
1228 /*
1229  * Lookup a blockmap offset and verify blockmap layers.
1230  */
1231 hammer_off_t
1232 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1233 			int *errorp)
1234 {
1235 	hammer_volume_t root_volume;
1236 	hammer_blockmap_t freemap;
1237 	hammer_blockmap_layer1_t layer1;
1238 	hammer_blockmap_layer2_t layer2;
1239 	hammer_buffer_t buffer = NULL;
1240 	hammer_off_t layer1_offset;
1241 	hammer_off_t layer2_offset;
1242 	hammer_off_t result_offset;
1243 	hammer_off_t base_off;
1244 	hammer_reserve_t resv __debugvar;
1245 	int zone;
1246 
1247 	/*
1248 	 * Calculate the zone-2 offset.
1249 	 */
1250 	zone = HAMMER_ZONE_DECODE(zone_offset);
1251 	result_offset = hammer_xlate_to_zone2(zone_offset);
1252 
1253 	/*
1254 	 * Validate the allocation zone
1255 	 */
1256 	root_volume = hammer_get_root_volume(hmp, errorp);
1257 	if (*errorp)
1258 		return(0);
1259 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1260 	KKASSERT(freemap->phys_offset != 0);
1261 
1262 	/*
1263 	 * Dive layer 1.
1264 	 */
1265 	layer1_offset = freemap->phys_offset +
1266 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1267 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1268 	if (*errorp)
1269 		goto failed;
1270 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1271 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1272 		hammer_lock_ex(&hmp->blkmap_lock);
1273 		if (!hammer_crc_test_layer1(hmp->version, layer1))
1274 			hpanic("CRC FAILED: LAYER1");
1275 		hammer_unlock(&hmp->blkmap_lock);
1276 	}
1277 
1278 	/*
1279 	 * Dive layer 2, each entry represents a big-block.
1280 	 */
1281 	layer2_offset = layer1->phys_offset +
1282 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1283 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1284 
1285 	if (*errorp)
1286 		goto failed;
1287 	if (layer2->zone == 0) {
1288 		base_off = hammer_xlate_to_zone2(zone_offset &
1289 						~HAMMER_BIGBLOCK_MASK64);
1290 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1291 				 base_off);
1292 		KKASSERT(resv && resv->zone == zone);
1293 
1294 	} else if (layer2->zone != zone) {
1295 		hpanic("bad zone %d/%d", layer2->zone, zone);
1296 	}
1297 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1298 		hammer_lock_ex(&hmp->blkmap_lock);
1299 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1300 			hpanic("CRC FAILED: LAYER2");
1301 		hammer_unlock(&hmp->blkmap_lock);
1302 	}
1303 
1304 failed:
1305 	if (buffer)
1306 		hammer_rel_buffer(buffer, 0);
1307 	hammer_rel_volume(root_volume, 0);
1308 	if (hammer_debug_general & 0x0800) {
1309 		hdkprintf("%016jx -> %016jx\n",
1310 			(intmax_t)zone_offset, (intmax_t)result_offset);
1311 	}
1312 	return(result_offset);
1313 }
1314 
1315 
1316 /*
1317  * Check space availability
1318  *
1319  * MPSAFE - does not require fs_token
1320  */
1321 int
1322 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1323 {
1324 	const int in_size = sizeof(struct hammer_inode_data) +
1325 			    sizeof(union hammer_btree_elm);
1326 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1327 	int64_t usedbytes;
1328 
1329 	usedbytes = hmp->rsv_inodes * in_size +
1330 		    hmp->rsv_recs * rec_size +
1331 		    hmp->rsv_databytes +
1332 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1333 		    ((int64_t)hammer_limit_dirtybufspace) +
1334 		    (slop << HAMMER_BIGBLOCK_BITS);
1335 
1336 	if (resp)
1337 		*resp = usedbytes;
1338 
1339 	if (hmp->copy_stat_freebigblocks >=
1340 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1341 		return(0);
1342 	}
1343 
1344 	return (ENOSPC);
1345 }
1346 
1347 static int
1348 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1349 {
1350 	hammer_blockmap_t freemap;
1351 	hammer_blockmap_layer1_t layer1;
1352 	hammer_buffer_t buffer1 = NULL;
1353 	hammer_off_t layer1_offset;
1354 	int error = 0;
1355 
1356 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1357 
1358 	layer1_offset = freemap->phys_offset +
1359 			HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1360 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1361 	if (error)
1362 		goto end;
1363 
1364 	/*
1365 	 * No more physically available space in layer1s
1366 	 * of the current volume, go to the next volume.
1367 	 */
1368 	if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1369 		hammer_skip_volume(offsetp);
1370 end:
1371 	if (buffer1)
1372 		hammer_rel_buffer(buffer1, 0);
1373 	return(error);
1374 }
1375 
1376 static void
1377 hammer_skip_volume(hammer_off_t *offsetp)
1378 {
1379 	hammer_off_t offset;
1380 	int zone, vol_no;
1381 
1382 	offset = *offsetp;
1383 	zone = HAMMER_ZONE_DECODE(offset);
1384 	vol_no = HAMMER_VOL_DECODE(offset) + 1;
1385 	KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1386 
1387 	if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1388 		vol_no = 0;
1389 		++zone;
1390 	}
1391 
1392 	*offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1393 }
1394