xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision bcb3e04d)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36 
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
53 
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57 	if (res1->zone_offset < res2->zone_offset)
58 		return(-1);
59 	if (res1->zone_offset > res2->zone_offset)
60 		return(1);
61 	return(0);
62 }
63 
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
69 		      hammer_off_t hint, int *errorp)
70 {
71 	hammer_mount_t hmp;
72 	hammer_volume_t root_volume;
73 	hammer_blockmap_t blockmap;
74 	hammer_blockmap_t freemap;
75 	hammer_reserve_t resv;
76 	struct hammer_blockmap_layer1 *layer1;
77 	struct hammer_blockmap_layer2 *layer2;
78 	hammer_buffer_t buffer1 = NULL;
79 	hammer_buffer_t buffer2 = NULL;
80 	hammer_buffer_t buffer3 = NULL;
81 	hammer_off_t tmp_offset;
82 	hammer_off_t next_offset;
83 	hammer_off_t result_offset;
84 	hammer_off_t layer1_offset;
85 	hammer_off_t layer2_offset;
86 	hammer_off_t base_off;
87 	int loops = 0;
88 	int offset;		/* offset within big-block */
89 	int use_hint;
90 
91 	hmp = trans->hmp;
92 
93 	/*
94 	 * Deal with alignment and buffer-boundary issues.
95 	 *
96 	 * Be careful, certain primary alignments are used below to allocate
97 	 * new blockmap blocks.
98 	 */
99 	bytes = (bytes + 15) & ~15;
100 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
101 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
102 
103 	/*
104 	 * Setup
105 	 */
106 	root_volume = trans->rootvol;
107 	*errorp = 0;
108 	blockmap = &hmp->blockmap[zone];
109 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
110 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
111 
112 	/*
113 	 * Use the hint if we have one.
114 	 */
115 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
116 		next_offset = (hint + 15) & ~(hammer_off_t)15;
117 		use_hint = 1;
118 	} else {
119 		next_offset = blockmap->next_offset;
120 		use_hint = 0;
121 	}
122 again:
123 
124 	/*
125 	 * use_hint is turned off if we leave the hinted big-block.
126 	 */
127 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
128 		next_offset = blockmap->next_offset;
129 		use_hint = 0;
130 	}
131 
132 	/*
133 	 * Check for wrap
134 	 */
135 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
136 		if (++loops == 2) {
137 			result_offset = 0;
138 			*errorp = ENOSPC;
139 			goto failed;
140 		}
141 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
142 	}
143 
144 	/*
145 	 * The allocation request may not cross a buffer boundary.  Special
146 	 * large allocations must not cross a large-block boundary.
147 	 */
148 	tmp_offset = next_offset + bytes - 1;
149 	if (bytes <= HAMMER_BUFSIZE) {
150 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
151 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
152 			goto again;
153 		}
154 	} else {
155 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
156 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
157 			goto again;
158 		}
159 	}
160 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
161 
162 	/*
163 	 * Dive layer 1.
164 	 */
165 	layer1_offset = freemap->phys_offset +
166 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
167 
168 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
169 	if (*errorp) {
170 		result_offset = 0;
171 		goto failed;
172 	}
173 
174 	/*
175 	 * Check CRC.
176 	 */
177 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
178 		hammer_lock_ex(&hmp->blkmap_lock);
179 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
180 			panic("CRC FAILED: LAYER1");
181 		hammer_unlock(&hmp->blkmap_lock);
182 	}
183 
184 	/*
185 	 * If we are at a big-block boundary and layer1 indicates no
186 	 * free big-blocks, then we cannot allocate a new bigblock in
187 	 * layer2, skip to the next layer1 entry.
188 	 */
189 	if (offset == 0 && layer1->blocks_free == 0) {
190 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
191 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
192 		goto again;
193 	}
194 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
195 
196 	/*
197 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
198 	 * on a volume that we are currently trying to remove from the
199 	 * file-system. This is used by the volume-del code together with
200 	 * the reblocker to free up a volume.
201 	 */
202 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
203 	    hmp->volume_to_remove) {
204 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
205 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
206 		goto again;
207 	}
208 
209 	/*
210 	 * Dive layer 2, each entry represents a large-block.
211 	 */
212 	layer2_offset = layer1->phys_offset +
213 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
214 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
215 	if (*errorp) {
216 		result_offset = 0;
217 		goto failed;
218 	}
219 
220 	/*
221 	 * Check CRC.  This can race another thread holding the lock
222 	 * and in the middle of modifying layer2.
223 	 */
224 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
225 		hammer_lock_ex(&hmp->blkmap_lock);
226 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
227 			panic("CRC FAILED: LAYER2");
228 		hammer_unlock(&hmp->blkmap_lock);
229 	}
230 
231 	/*
232 	 * Skip the layer if the zone is owned by someone other then us.
233 	 */
234 	if (layer2->zone && layer2->zone != zone) {
235 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
236 		goto again;
237 	}
238 	if (offset < layer2->append_off) {
239 		next_offset += layer2->append_off - offset;
240 		goto again;
241 	}
242 
243 	/*
244 	 * If operating in the current non-hint blockmap block, do not
245 	 * allow it to get over-full.  Also drop any active hinting so
246 	 * blockmap->next_offset is updated at the end.
247 	 *
248 	 * We do this for B-Tree and meta-data allocations to provide
249 	 * localization for updates.
250 	 */
251 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252 	     zone == HAMMER_ZONE_META_INDEX) &&
253 	    offset >= HAMMER_LARGEBLOCK_OVERFILL &&
254 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
255 	) {
256 		if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
257 			next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
258 			use_hint = 0;
259 			goto again;
260 		}
261 	}
262 
263 	/*
264 	 * We need the lock from this point on.  We have to re-check zone
265 	 * ownership after acquiring the lock and also check for reservations.
266 	 */
267 	hammer_lock_ex(&hmp->blkmap_lock);
268 
269 	if (layer2->zone && layer2->zone != zone) {
270 		hammer_unlock(&hmp->blkmap_lock);
271 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
272 		goto again;
273 	}
274 	if (offset < layer2->append_off) {
275 		hammer_unlock(&hmp->blkmap_lock);
276 		next_offset += layer2->append_off - offset;
277 		goto again;
278 	}
279 
280 	/*
281 	 * The bigblock might be reserved by another zone.  If it is reserved
282 	 * by our zone we may have to move next_offset past the append_off.
283 	 */
284 	base_off = (next_offset &
285 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
286 		    HAMMER_ZONE_RAW_BUFFER;
287 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
288 	if (resv) {
289 		if (resv->zone != zone) {
290 			hammer_unlock(&hmp->blkmap_lock);
291 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
292 				      ~HAMMER_LARGEBLOCK_MASK64;
293 			goto again;
294 		}
295 		if (offset < resv->append_off) {
296 			hammer_unlock(&hmp->blkmap_lock);
297 			next_offset += resv->append_off - offset;
298 			goto again;
299 		}
300 		++resv->refs;
301 	}
302 
303 	/*
304 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
305 	 * of the layer for real.  At this point we've validated any
306 	 * reservation that might exist and can just ignore resv.
307 	 */
308 	if (layer2->zone == 0) {
309 		/*
310 		 * Assign the bigblock to our zone
311 		 */
312 		hammer_modify_buffer(trans, buffer1,
313 				     layer1, sizeof(*layer1));
314 		--layer1->blocks_free;
315 		layer1->layer1_crc = crc32(layer1,
316 					   HAMMER_LAYER1_CRCSIZE);
317 		hammer_modify_buffer_done(buffer1);
318 		hammer_modify_buffer(trans, buffer2,
319 				     layer2, sizeof(*layer2));
320 		layer2->zone = zone;
321 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
322 		KKASSERT(layer2->append_off == 0);
323 		hammer_modify_volume_field(trans, trans->rootvol,
324 					   vol0_stat_freebigblocks);
325 		--root_volume->ondisk->vol0_stat_freebigblocks;
326 		hmp->copy_stat_freebigblocks =
327 			root_volume->ondisk->vol0_stat_freebigblocks;
328 		hammer_modify_volume_done(trans->rootvol);
329 	} else {
330 		hammer_modify_buffer(trans, buffer2,
331 				     layer2, sizeof(*layer2));
332 	}
333 	KKASSERT(layer2->zone == zone);
334 
335 	/*
336 	 * NOTE: bytes_free can legally go negative due to de-dup.
337 	 */
338 	layer2->bytes_free -= bytes;
339 	KKASSERT(layer2->append_off <= offset);
340 	layer2->append_off = offset + bytes;
341 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
342 	hammer_modify_buffer_done(buffer2);
343 
344 	/*
345 	 * We hold the blockmap lock and should be the only ones
346 	 * capable of modifying resv->append_off.  Track the allocation
347 	 * as appropriate.
348 	 */
349 	KKASSERT(bytes != 0);
350 	if (resv) {
351 		KKASSERT(resv->append_off <= offset);
352 		resv->append_off = offset + bytes;
353 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354 		hammer_blockmap_reserve_complete(hmp, resv);
355 	}
356 
357 	/*
358 	 * If we are allocating from the base of a new buffer we can avoid
359 	 * a disk read by calling hammer_bnew().
360 	 */
361 	if ((next_offset & HAMMER_BUFMASK) == 0) {
362 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
363 				errorp, &buffer3);
364 	}
365 	result_offset = next_offset;
366 
367 	/*
368 	 * If we weren't supplied with a hint or could not use the hint
369 	 * then we wound up using blockmap->next_offset as the hint and
370 	 * need to save it.
371 	 */
372 	if (use_hint == 0) {
373 		hammer_modify_volume(NULL, root_volume, NULL, 0);
374 		blockmap->next_offset = next_offset + bytes;
375 		hammer_modify_volume_done(root_volume);
376 	}
377 	hammer_unlock(&hmp->blkmap_lock);
378 failed:
379 
380 	/*
381 	 * Cleanup
382 	 */
383 	if (buffer1)
384 		hammer_rel_buffer(buffer1, 0);
385 	if (buffer2)
386 		hammer_rel_buffer(buffer2, 0);
387 	if (buffer3)
388 		hammer_rel_buffer(buffer3, 0);
389 
390 	return(result_offset);
391 }
392 
393 /*
394  * Frontend function - Reserve bytes in a zone.
395  *
396  * This code reserves bytes out of a blockmap without committing to any
397  * meta-data modifications, allowing the front-end to directly issue disk
398  * write I/O for large blocks of data
399  *
400  * The backend later finalizes the reservation with hammer_blockmap_finalize()
401  * upon committing the related record.
402  */
403 hammer_reserve_t
404 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
405 			hammer_off_t *zone_offp, int *errorp)
406 {
407 	hammer_volume_t root_volume;
408 	hammer_blockmap_t blockmap;
409 	hammer_blockmap_t freemap;
410 	struct hammer_blockmap_layer1 *layer1;
411 	struct hammer_blockmap_layer2 *layer2;
412 	hammer_buffer_t buffer1 = NULL;
413 	hammer_buffer_t buffer2 = NULL;
414 	hammer_buffer_t buffer3 = NULL;
415 	hammer_off_t tmp_offset;
416 	hammer_off_t next_offset;
417 	hammer_off_t layer1_offset;
418 	hammer_off_t layer2_offset;
419 	hammer_off_t base_off;
420 	hammer_reserve_t resv;
421 	hammer_reserve_t resx;
422 	int loops = 0;
423 	int offset;
424 
425 	/*
426 	 * Setup
427 	 */
428 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
429 	root_volume = hammer_get_root_volume(hmp, errorp);
430 	if (*errorp)
431 		return(NULL);
432 	blockmap = &hmp->blockmap[zone];
433 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
434 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
435 
436 	/*
437 	 * Deal with alignment and buffer-boundary issues.
438 	 *
439 	 * Be careful, certain primary alignments are used below to allocate
440 	 * new blockmap blocks.
441 	 */
442 	bytes = (bytes + 15) & ~15;
443 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
444 
445 	next_offset = blockmap->next_offset;
446 again:
447 	resv = NULL;
448 	/*
449 	 * Check for wrap
450 	 */
451 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
452 		if (++loops == 2) {
453 			*errorp = ENOSPC;
454 			goto failed;
455 		}
456 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
457 	}
458 
459 	/*
460 	 * The allocation request may not cross a buffer boundary.  Special
461 	 * large allocations must not cross a large-block boundary.
462 	 */
463 	tmp_offset = next_offset + bytes - 1;
464 	if (bytes <= HAMMER_BUFSIZE) {
465 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
466 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
467 			goto again;
468 		}
469 	} else {
470 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
471 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
472 			goto again;
473 		}
474 	}
475 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
476 
477 	/*
478 	 * Dive layer 1.
479 	 */
480 	layer1_offset = freemap->phys_offset +
481 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
482 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
483 	if (*errorp)
484 		goto failed;
485 
486 	/*
487 	 * Check CRC.
488 	 */
489 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
490 		hammer_lock_ex(&hmp->blkmap_lock);
491 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
492 			panic("CRC FAILED: LAYER1");
493 		hammer_unlock(&hmp->blkmap_lock);
494 	}
495 
496 	/*
497 	 * If we are at a big-block boundary and layer1 indicates no
498 	 * free big-blocks, then we cannot allocate a new bigblock in
499 	 * layer2, skip to the next layer1 entry.
500 	 */
501 	if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
502 	    layer1->blocks_free == 0) {
503 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
504 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
505 		goto again;
506 	}
507 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
508 
509 	/*
510 	 * Dive layer 2, each entry represents a large-block.
511 	 */
512 	layer2_offset = layer1->phys_offset +
513 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
514 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
515 	if (*errorp)
516 		goto failed;
517 
518 	/*
519 	 * Check CRC if not allocating into uninitialized space (which we
520 	 * aren't when reserving space).
521 	 */
522 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
523 		hammer_lock_ex(&hmp->blkmap_lock);
524 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
525 			panic("CRC FAILED: LAYER2");
526 		hammer_unlock(&hmp->blkmap_lock);
527 	}
528 
529 	/*
530 	 * Skip the layer if the zone is owned by someone other then us.
531 	 */
532 	if (layer2->zone && layer2->zone != zone) {
533 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
534 		goto again;
535 	}
536 	if (offset < layer2->append_off) {
537 		next_offset += layer2->append_off - offset;
538 		goto again;
539 	}
540 
541 	/*
542 	 * We need the lock from this point on.  We have to re-check zone
543 	 * ownership after acquiring the lock and also check for reservations.
544 	 */
545 	hammer_lock_ex(&hmp->blkmap_lock);
546 
547 	if (layer2->zone && layer2->zone != zone) {
548 		hammer_unlock(&hmp->blkmap_lock);
549 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
550 		goto again;
551 	}
552 	if (offset < layer2->append_off) {
553 		hammer_unlock(&hmp->blkmap_lock);
554 		next_offset += layer2->append_off - offset;
555 		goto again;
556 	}
557 
558 	/*
559 	 * The bigblock might be reserved by another zone.  If it is reserved
560 	 * by our zone we may have to move next_offset past the append_off.
561 	 */
562 	base_off = (next_offset &
563 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
564 		    HAMMER_ZONE_RAW_BUFFER;
565 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
566 	if (resv) {
567 		if (resv->zone != zone) {
568 			hammer_unlock(&hmp->blkmap_lock);
569 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
570 				      ~HAMMER_LARGEBLOCK_MASK64;
571 			goto again;
572 		}
573 		if (offset < resv->append_off) {
574 			hammer_unlock(&hmp->blkmap_lock);
575 			next_offset += resv->append_off - offset;
576 			goto again;
577 		}
578 		++resv->refs;
579 		resx = NULL;
580 	} else {
581 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
582 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
583 		resx->refs = 1;
584 		resx->zone = zone;
585 		resx->zone_offset = base_off;
586 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
587 			resx->flags |= HAMMER_RESF_LAYER2FREE;
588 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
589 		KKASSERT(resv == NULL);
590 		resv = resx;
591 		++hammer_count_reservations;
592 	}
593 	resv->append_off = offset + bytes;
594 
595 	/*
596 	 * If we are not reserving a whole buffer but are at the start of
597 	 * a new block, call hammer_bnew() to avoid a disk read.
598 	 *
599 	 * If we are reserving a whole buffer (or more), the caller will
600 	 * probably use a direct read, so do nothing.
601 	 */
602 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
603 		hammer_bnew(hmp, next_offset, errorp, &buffer3);
604 	}
605 
606 	/*
607 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
608 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
609 	 * be big-block aligned.
610 	 */
611 	blockmap->next_offset = next_offset + bytes;
612 	hammer_unlock(&hmp->blkmap_lock);
613 
614 failed:
615 	if (buffer1)
616 		hammer_rel_buffer(buffer1, 0);
617 	if (buffer2)
618 		hammer_rel_buffer(buffer2, 0);
619 	if (buffer3)
620 		hammer_rel_buffer(buffer3, 0);
621 	hammer_rel_volume(root_volume, 0);
622 	*zone_offp = next_offset;
623 
624 	return(resv);
625 }
626 
627 /*
628  * Dereference a reservation structure.  Upon the final release the
629  * underlying big-block is checked and if it is entirely free we delete
630  * any related HAMMER buffers to avoid potential conflicts with future
631  * reuse of the big-block.
632  */
633 void
634 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
635 {
636 	hammer_off_t base_offset;
637 	int error;
638 
639 	KKASSERT(resv->refs > 0);
640 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
641 		 HAMMER_ZONE_RAW_BUFFER);
642 
643 	/*
644 	 * Setting append_off to the max prevents any new allocations
645 	 * from occuring while we are trying to dispose of the reservation,
646 	 * allowing us to safely delete any related HAMMER buffers.
647 	 *
648 	 * If we are unable to clean out all related HAMMER buffers we
649 	 * requeue the delay.
650 	 */
651 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
652 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
653 		base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
654 		base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
655 		error = hammer_del_buffers(hmp, base_offset,
656 					   resv->zone_offset,
657 					   HAMMER_LARGEBLOCK_SIZE,
658 					   1);
659 		if (hammer_debug_general & 0x20000) {
660 			kprintf("hammer: dellgblk %016jx error %d\n",
661 				(intmax_t)base_offset, error);
662 		}
663 		if (error)
664 			hammer_reserve_setdelay(hmp, resv);
665 	}
666 	if (--resv->refs == 0) {
667 		if (hammer_debug_general & 0x20000) {
668 			kprintf("hammer: delresvr %016jx zone %02x\n",
669 				(intmax_t)resv->zone_offset, resv->zone);
670 		}
671 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
672 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
673 		kfree(resv, hmp->m_misc);
674 		--hammer_count_reservations;
675 	}
676 }
677 
678 /*
679  * Prevent a potentially free big-block from being reused until after
680  * the related flushes have completely cycled, otherwise crash recovery
681  * could resurrect a data block that was already reused and overwritten.
682  *
683  * The caller might reset the underlying layer2 entry's append_off to 0, so
684  * our covering append_off must be set to max to prevent any reallocation
685  * until after the flush delays complete, not to mention proper invalidation
686  * of any underlying cached blocks.
687  */
688 static void
689 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
690 			int zone, struct hammer_blockmap_layer2 *layer2)
691 {
692 	hammer_reserve_t resv;
693 
694 	/*
695 	 * Allocate the reservation if necessary.
696 	 *
697 	 * NOTE: need lock in future around resv lookup/allocation and
698 	 * the setdelay call, currently refs is not bumped until the call.
699 	 */
700 again:
701 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
702 	if (resv == NULL) {
703 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
704 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
705 		resv->zone = zone;
706 		resv->zone_offset = base_offset;
707 		resv->refs = 0;
708 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
709 
710 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
711 			resv->flags |= HAMMER_RESF_LAYER2FREE;
712 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
713 			kfree(resv, hmp->m_misc);
714 			goto again;
715 		}
716 		++hammer_count_reservations;
717 	} else {
718 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
719 			resv->flags |= HAMMER_RESF_LAYER2FREE;
720 	}
721 	hammer_reserve_setdelay(hmp, resv);
722 }
723 
724 /*
725  * Enter the reservation on the on-delay list, or move it if it
726  * is already on the list.
727  */
728 static void
729 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
730 {
731 	if (resv->flags & HAMMER_RESF_ONDELAY) {
732 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
733 		resv->flush_group = hmp->flusher.next + 1;
734 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
735 	} else {
736 		++resv->refs;
737 		++hmp->rsv_fromdelay;
738 		resv->flags |= HAMMER_RESF_ONDELAY;
739 		resv->flush_group = hmp->flusher.next + 1;
740 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
741 	}
742 }
743 
744 /*
745  * Reserve has reached its flush point, remove it from the delay list
746  * and finish it off.  hammer_blockmap_reserve_complete() inherits
747  * the ondelay reference.
748  */
749 void
750 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
751 {
752 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
753 	resv->flags &= ~HAMMER_RESF_ONDELAY;
754 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
755 	--hmp->rsv_fromdelay;
756 	hammer_blockmap_reserve_complete(hmp, resv);
757 }
758 
759 /*
760  * Backend function - free (offset, bytes) in a zone.
761  *
762  * XXX error return
763  */
764 void
765 hammer_blockmap_free(hammer_transaction_t trans,
766 		     hammer_off_t zone_offset, int bytes)
767 {
768 	hammer_mount_t hmp;
769 	hammer_volume_t root_volume;
770 	hammer_blockmap_t blockmap;
771 	hammer_blockmap_t freemap;
772 	struct hammer_blockmap_layer1 *layer1;
773 	struct hammer_blockmap_layer2 *layer2;
774 	hammer_buffer_t buffer1 = NULL;
775 	hammer_buffer_t buffer2 = NULL;
776 	hammer_off_t layer1_offset;
777 	hammer_off_t layer2_offset;
778 	hammer_off_t base_off;
779 	int error;
780 	int zone;
781 
782 	if (bytes == 0)
783 		return;
784 	hmp = trans->hmp;
785 
786 	/*
787 	 * Alignment
788 	 */
789 	bytes = (bytes + 15) & ~15;
790 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
791 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
792 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
793 
794 	/*
795 	 * Basic zone validation & locking
796 	 */
797 	zone = HAMMER_ZONE_DECODE(zone_offset);
798 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
799 	root_volume = trans->rootvol;
800 	error = 0;
801 
802 	blockmap = &hmp->blockmap[zone];
803 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
804 
805 	/*
806 	 * Dive layer 1.
807 	 */
808 	layer1_offset = freemap->phys_offset +
809 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
810 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
811 	if (error)
812 		goto failed;
813 	KKASSERT(layer1->phys_offset &&
814 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
815 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
816 		hammer_lock_ex(&hmp->blkmap_lock);
817 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
818 			panic("CRC FAILED: LAYER1");
819 		hammer_unlock(&hmp->blkmap_lock);
820 	}
821 
822 	/*
823 	 * Dive layer 2, each entry represents a large-block.
824 	 */
825 	layer2_offset = layer1->phys_offset +
826 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
827 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
828 	if (error)
829 		goto failed;
830 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
831 		hammer_lock_ex(&hmp->blkmap_lock);
832 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
833 			panic("CRC FAILED: LAYER2");
834 		hammer_unlock(&hmp->blkmap_lock);
835 	}
836 
837 	hammer_lock_ex(&hmp->blkmap_lock);
838 
839 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
840 
841 	/*
842 	 * Free space previously allocated via blockmap_alloc().
843 	 *
844 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
845 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
846 	 */
847 	KKASSERT(layer2->zone == zone);
848 	layer2->bytes_free += bytes;
849 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
850 
851 	/*
852 	 * If a big-block becomes entirely free we must create a covering
853 	 * reservation to prevent premature reuse.  Note, however, that
854 	 * the big-block and/or reservation may still have an append_off
855 	 * that allows further (non-reused) allocations.
856 	 *
857 	 * Once the reservation has been made we re-check layer2 and if
858 	 * the big-block is still entirely free we reset the layer2 entry.
859 	 * The reservation will prevent premature reuse.
860 	 *
861 	 * NOTE: hammer_buffer's are only invalidated when the reservation
862 	 * is completed, if the layer2 entry is still completely free at
863 	 * that time.  Any allocations from the reservation that may have
864 	 * occured in the mean time, or active references on the reservation
865 	 * from new pending allocations, will prevent the invalidation from
866 	 * occuring.
867 	 */
868 	if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
869 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
870 
871 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
872 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
873 			layer2->zone = 0;
874 			layer2->append_off = 0;
875 			hammer_modify_buffer(trans, buffer1,
876 					     layer1, sizeof(*layer1));
877 			++layer1->blocks_free;
878 			layer1->layer1_crc = crc32(layer1,
879 						   HAMMER_LAYER1_CRCSIZE);
880 			hammer_modify_buffer_done(buffer1);
881 			hammer_modify_volume_field(trans,
882 					trans->rootvol,
883 					vol0_stat_freebigblocks);
884 			++root_volume->ondisk->vol0_stat_freebigblocks;
885 			hmp->copy_stat_freebigblocks =
886 			   root_volume->ondisk->vol0_stat_freebigblocks;
887 			hammer_modify_volume_done(trans->rootvol);
888 		}
889 	}
890 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
891 	hammer_modify_buffer_done(buffer2);
892 	hammer_unlock(&hmp->blkmap_lock);
893 
894 failed:
895 	if (buffer1)
896 		hammer_rel_buffer(buffer1, 0);
897 	if (buffer2)
898 		hammer_rel_buffer(buffer2, 0);
899 }
900 
901 int
902 hammer_blockmap_dedup(hammer_transaction_t trans,
903 		     hammer_off_t zone_offset, int bytes)
904 {
905 	hammer_mount_t hmp;
906 	hammer_volume_t root_volume;
907 	hammer_blockmap_t blockmap;
908 	hammer_blockmap_t freemap;
909 	struct hammer_blockmap_layer1 *layer1;
910 	struct hammer_blockmap_layer2 *layer2;
911 	hammer_buffer_t buffer1 = NULL;
912 	hammer_buffer_t buffer2 = NULL;
913 	hammer_off_t layer1_offset;
914 	hammer_off_t layer2_offset;
915 	int32_t temp;
916 	int error;
917 	int zone;
918 
919 	if (bytes == 0)
920 		return (0);
921 	hmp = trans->hmp;
922 
923 	/*
924 	 * Alignment
925 	 */
926 	bytes = (bytes + 15) & ~15;
927 	KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
928 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
929 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
930 
931 	/*
932 	 * Basic zone validation & locking
933 	 */
934 	zone = HAMMER_ZONE_DECODE(zone_offset);
935 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
936 	root_volume = trans->rootvol;
937 	error = 0;
938 
939 	blockmap = &hmp->blockmap[zone];
940 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
941 
942 	/*
943 	 * Dive layer 1.
944 	 */
945 	layer1_offset = freemap->phys_offset +
946 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
947 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
948 	if (error)
949 		goto failed;
950 	KKASSERT(layer1->phys_offset &&
951 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
952 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
953 		hammer_lock_ex(&hmp->blkmap_lock);
954 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
955 			panic("CRC FAILED: LAYER1");
956 		hammer_unlock(&hmp->blkmap_lock);
957 	}
958 
959 	/*
960 	 * Dive layer 2, each entry represents a large-block.
961 	 */
962 	layer2_offset = layer1->phys_offset +
963 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
964 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
965 	if (error)
966 		goto failed;
967 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
968 		hammer_lock_ex(&hmp->blkmap_lock);
969 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
970 			panic("CRC FAILED: LAYER2");
971 		hammer_unlock(&hmp->blkmap_lock);
972 	}
973 
974 	hammer_lock_ex(&hmp->blkmap_lock);
975 
976 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
977 
978 	/*
979 	 * Free space previously allocated via blockmap_alloc().
980 	 *
981 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
982 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
983 	 */
984 	KKASSERT(layer2->zone == zone);
985 	temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
986 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
987 	if (temp > layer2->bytes_free) {
988 		error = ERANGE;
989 		goto underflow;
990 	}
991 	layer2->bytes_free -= bytes;
992 
993 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
994 
995 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
996 underflow:
997 	hammer_modify_buffer_done(buffer2);
998 	hammer_unlock(&hmp->blkmap_lock);
999 
1000 failed:
1001 	if (buffer1)
1002 		hammer_rel_buffer(buffer1, 0);
1003 	if (buffer2)
1004 		hammer_rel_buffer(buffer2, 0);
1005 	return (error);
1006 }
1007 
1008 /*
1009  * Backend function - finalize (offset, bytes) in a zone.
1010  *
1011  * Allocate space that was previously reserved by the frontend.
1012  */
1013 int
1014 hammer_blockmap_finalize(hammer_transaction_t trans,
1015 			 hammer_reserve_t resv,
1016 			 hammer_off_t zone_offset, int bytes)
1017 {
1018 	hammer_mount_t hmp;
1019 	hammer_volume_t root_volume;
1020 	hammer_blockmap_t blockmap;
1021 	hammer_blockmap_t freemap;
1022 	struct hammer_blockmap_layer1 *layer1;
1023 	struct hammer_blockmap_layer2 *layer2;
1024 	hammer_buffer_t buffer1 = NULL;
1025 	hammer_buffer_t buffer2 = NULL;
1026 	hammer_off_t layer1_offset;
1027 	hammer_off_t layer2_offset;
1028 	int error;
1029 	int zone;
1030 	int offset;
1031 
1032 	if (bytes == 0)
1033 		return(0);
1034 	hmp = trans->hmp;
1035 
1036 	/*
1037 	 * Alignment
1038 	 */
1039 	bytes = (bytes + 15) & ~15;
1040 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1041 
1042 	/*
1043 	 * Basic zone validation & locking
1044 	 */
1045 	zone = HAMMER_ZONE_DECODE(zone_offset);
1046 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1047 	root_volume = trans->rootvol;
1048 	error = 0;
1049 
1050 	blockmap = &hmp->blockmap[zone];
1051 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1052 
1053 	/*
1054 	 * Dive layer 1.
1055 	 */
1056 	layer1_offset = freemap->phys_offset +
1057 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1058 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1059 	if (error)
1060 		goto failed;
1061 	KKASSERT(layer1->phys_offset &&
1062 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1063 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1064 		hammer_lock_ex(&hmp->blkmap_lock);
1065 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1066 			panic("CRC FAILED: LAYER1");
1067 		hammer_unlock(&hmp->blkmap_lock);
1068 	}
1069 
1070 	/*
1071 	 * Dive layer 2, each entry represents a large-block.
1072 	 */
1073 	layer2_offset = layer1->phys_offset +
1074 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1075 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1076 	if (error)
1077 		goto failed;
1078 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1079 		hammer_lock_ex(&hmp->blkmap_lock);
1080 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1081 			panic("CRC FAILED: LAYER2");
1082 		hammer_unlock(&hmp->blkmap_lock);
1083 	}
1084 
1085 	hammer_lock_ex(&hmp->blkmap_lock);
1086 
1087 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1088 
1089 	/*
1090 	 * Finalize some or all of the space covered by a current
1091 	 * reservation.  An allocation in the same layer may have
1092 	 * already assigned ownership.
1093 	 */
1094 	if (layer2->zone == 0) {
1095 		hammer_modify_buffer(trans, buffer1,
1096 				     layer1, sizeof(*layer1));
1097 		--layer1->blocks_free;
1098 		layer1->layer1_crc = crc32(layer1,
1099 					   HAMMER_LAYER1_CRCSIZE);
1100 		hammer_modify_buffer_done(buffer1);
1101 		layer2->zone = zone;
1102 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1103 		KKASSERT(layer2->append_off == 0);
1104 		hammer_modify_volume_field(trans,
1105 				trans->rootvol,
1106 				vol0_stat_freebigblocks);
1107 		--root_volume->ondisk->vol0_stat_freebigblocks;
1108 		hmp->copy_stat_freebigblocks =
1109 		   root_volume->ondisk->vol0_stat_freebigblocks;
1110 		hammer_modify_volume_done(trans->rootvol);
1111 	}
1112 	if (layer2->zone != zone)
1113 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1114 	KKASSERT(layer2->zone == zone);
1115 	KKASSERT(bytes != 0);
1116 	layer2->bytes_free -= bytes;
1117 	if (resv)
1118 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1119 
1120 	/*
1121 	 * Finalizations can occur out of order, or combined with allocations.
1122 	 * append_off must be set to the highest allocated offset.
1123 	 */
1124 	offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1125 	if (layer2->append_off < offset)
1126 		layer2->append_off = offset;
1127 
1128 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1129 	hammer_modify_buffer_done(buffer2);
1130 	hammer_unlock(&hmp->blkmap_lock);
1131 
1132 failed:
1133 	if (buffer1)
1134 		hammer_rel_buffer(buffer1, 0);
1135 	if (buffer2)
1136 		hammer_rel_buffer(buffer2, 0);
1137 	return(error);
1138 }
1139 
1140 /*
1141  * Return the approximate number of free bytes in the big-block
1142  * containing the specified blockmap offset.
1143  *
1144  * WARNING: A negative number can be returned if data de-dup exists,
1145  *	    and the result will also not represent he actual number
1146  *	    of free bytes in this case.
1147  *
1148  *	    This code is used only by the reblocker.
1149  */
1150 int
1151 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1152 			int *curp, int *errorp)
1153 {
1154 	hammer_volume_t root_volume;
1155 	hammer_blockmap_t blockmap;
1156 	hammer_blockmap_t freemap;
1157 	struct hammer_blockmap_layer1 *layer1;
1158 	struct hammer_blockmap_layer2 *layer2;
1159 	hammer_buffer_t buffer = NULL;
1160 	hammer_off_t layer1_offset;
1161 	hammer_off_t layer2_offset;
1162 	int32_t bytes;
1163 	int zone;
1164 
1165 	zone = HAMMER_ZONE_DECODE(zone_offset);
1166 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1167 	root_volume = hammer_get_root_volume(hmp, errorp);
1168 	if (*errorp) {
1169 		*curp = 0;
1170 		return(0);
1171 	}
1172 	blockmap = &hmp->blockmap[zone];
1173 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1174 
1175 	/*
1176 	 * Dive layer 1.
1177 	 */
1178 	layer1_offset = freemap->phys_offset +
1179 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1180 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1181 	if (*errorp) {
1182 		bytes = 0;
1183 		goto failed;
1184 	}
1185 	KKASSERT(layer1->phys_offset);
1186 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1187 		hammer_lock_ex(&hmp->blkmap_lock);
1188 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1189 			panic("CRC FAILED: LAYER1");
1190 		hammer_unlock(&hmp->blkmap_lock);
1191 	}
1192 
1193 	/*
1194 	 * Dive layer 2, each entry represents a large-block.
1195 	 *
1196 	 * (reuse buffer, layer1 pointer becomes invalid)
1197 	 */
1198 	layer2_offset = layer1->phys_offset +
1199 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1200 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1201 	if (*errorp) {
1202 		bytes = 0;
1203 		goto failed;
1204 	}
1205 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1206 		hammer_lock_ex(&hmp->blkmap_lock);
1207 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1208 			panic("CRC FAILED: LAYER2");
1209 		hammer_unlock(&hmp->blkmap_lock);
1210 	}
1211 	KKASSERT(layer2->zone == zone);
1212 
1213 	bytes = layer2->bytes_free;
1214 
1215 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1216 		*curp = 0;
1217 	else
1218 		*curp = 1;
1219 failed:
1220 	if (buffer)
1221 		hammer_rel_buffer(buffer, 0);
1222 	hammer_rel_volume(root_volume, 0);
1223 	if (hammer_debug_general & 0x0800) {
1224 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1225 			(long long)zone_offset, bytes);
1226 	}
1227 	return(bytes);
1228 }
1229 
1230 
1231 /*
1232  * Lookup a blockmap offset.
1233  */
1234 hammer_off_t
1235 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1236 		       int *errorp)
1237 {
1238 	hammer_volume_t root_volume;
1239 	hammer_blockmap_t freemap;
1240 	struct hammer_blockmap_layer1 *layer1;
1241 	struct hammer_blockmap_layer2 *layer2;
1242 	hammer_buffer_t buffer = NULL;
1243 	hammer_off_t layer1_offset;
1244 	hammer_off_t layer2_offset;
1245 	hammer_off_t result_offset;
1246 	hammer_off_t base_off;
1247 	hammer_reserve_t resv;
1248 	int zone;
1249 
1250 	/*
1251 	 * Calculate the zone-2 offset.
1252 	 */
1253 	zone = HAMMER_ZONE_DECODE(zone_offset);
1254 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1255 
1256 	result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1257 			HAMMER_ZONE_RAW_BUFFER;
1258 
1259 	/*
1260 	 * We can actually stop here, normal blockmaps are now direct-mapped
1261 	 * onto the freemap and so represent zone-2 addresses.
1262 	 */
1263 	if (hammer_verify_zone == 0) {
1264 		*errorp = 0;
1265 		return(result_offset);
1266 	}
1267 
1268 	/*
1269 	 * Validate the allocation zone
1270 	 */
1271 	root_volume = hammer_get_root_volume(hmp, errorp);
1272 	if (*errorp)
1273 		return(0);
1274 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1275 	KKASSERT(freemap->phys_offset != 0);
1276 
1277 	/*
1278 	 * Dive layer 1.
1279 	 */
1280 	layer1_offset = freemap->phys_offset +
1281 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1282 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1283 	if (*errorp)
1284 		goto failed;
1285 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1286 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1287 		hammer_lock_ex(&hmp->blkmap_lock);
1288 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1289 			panic("CRC FAILED: LAYER1");
1290 		hammer_unlock(&hmp->blkmap_lock);
1291 	}
1292 
1293 	/*
1294 	 * Dive layer 2, each entry represents a large-block.
1295 	 */
1296 	layer2_offset = layer1->phys_offset +
1297 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1298 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1299 
1300 	if (*errorp)
1301 		goto failed;
1302 	if (layer2->zone == 0) {
1303 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1304 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1305 				 base_off);
1306 		KKASSERT(resv && resv->zone == zone);
1307 
1308 	} else if (layer2->zone != zone) {
1309 		panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1310 			layer2->zone, zone);
1311 	}
1312 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1313 		hammer_lock_ex(&hmp->blkmap_lock);
1314 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1315 			panic("CRC FAILED: LAYER2");
1316 		hammer_unlock(&hmp->blkmap_lock);
1317 	}
1318 
1319 failed:
1320 	if (buffer)
1321 		hammer_rel_buffer(buffer, 0);
1322 	hammer_rel_volume(root_volume, 0);
1323 	if (hammer_debug_general & 0x0800) {
1324 		kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1325 			(long long)zone_offset, (long long)result_offset);
1326 	}
1327 	return(result_offset);
1328 }
1329 
1330 
1331 /*
1332  * Check space availability
1333  *
1334  * MPSAFE - does not require fs_token
1335  */
1336 int
1337 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1338 {
1339 	const int in_size = sizeof(struct hammer_inode_data) +
1340 			    sizeof(union hammer_btree_elm);
1341 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1342 	int64_t usedbytes;
1343 
1344 	usedbytes = hmp->rsv_inodes * in_size +
1345 		    hmp->rsv_recs * rec_size +
1346 		    hmp->rsv_databytes +
1347 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1348 		    ((int64_t)hidirtybufspace << 2) +
1349 		    (slop << HAMMER_LARGEBLOCK_BITS);
1350 
1351 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1352 	if (resp)
1353 		*resp = usedbytes;
1354 
1355 	if (hmp->copy_stat_freebigblocks >=
1356 	    (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1357 		return(0);
1358 	}
1359 	return (ENOSPC);
1360 }
1361 
1362