xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision 650094e1)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36 
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 
49 /*
50  * Reserved big-blocks red-black tree support
51  */
52 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
54 
55 static int
56 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 {
58 	if (res1->zone_offset < res2->zone_offset)
59 		return(-1);
60 	if (res1->zone_offset > res2->zone_offset)
61 		return(1);
62 	return(0);
63 }
64 
65 /*
66  * Allocate bytes from a zone
67  */
68 hammer_off_t
69 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70 		      hammer_off_t hint, int *errorp)
71 {
72 	hammer_mount_t hmp;
73 	hammer_volume_t root_volume;
74 	hammer_blockmap_t blockmap;
75 	hammer_blockmap_t freemap;
76 	hammer_reserve_t resv;
77 	struct hammer_blockmap_layer1 *layer1;
78 	struct hammer_blockmap_layer2 *layer2;
79 	hammer_buffer_t buffer1 = NULL;
80 	hammer_buffer_t buffer2 = NULL;
81 	hammer_buffer_t buffer3 = NULL;
82 	hammer_off_t tmp_offset;
83 	hammer_off_t next_offset;
84 	hammer_off_t result_offset;
85 	hammer_off_t layer1_offset;
86 	hammer_off_t layer2_offset;
87 	hammer_off_t base_off;
88 	int loops = 0;
89 	int offset;		/* offset within big-block */
90 	int use_hint;
91 
92 	hmp = trans->hmp;
93 
94 	/*
95 	 * Deal with alignment and buffer-boundary issues.
96 	 *
97 	 * Be careful, certain primary alignments are used below to allocate
98 	 * new blockmap blocks.
99 	 */
100 	bytes = (bytes + 15) & ~15;
101 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
102 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
103 
104 	/*
105 	 * Setup
106 	 */
107 	root_volume = trans->rootvol;
108 	*errorp = 0;
109 	blockmap = &hmp->blockmap[zone];
110 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112 
113 	/*
114 	 * Use the hint if we have one.
115 	 */
116 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117 		next_offset = (hint + 15) & ~(hammer_off_t)15;
118 		use_hint = 1;
119 	} else {
120 		next_offset = blockmap->next_offset;
121 		use_hint = 0;
122 	}
123 again:
124 
125 	/*
126 	 * use_hint is turned off if we leave the hinted big-block.
127 	 */
128 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129 		next_offset = blockmap->next_offset;
130 		use_hint = 0;
131 	}
132 
133 	/*
134 	 * Check for wrap
135 	 */
136 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
137 		if (++loops == 2) {
138 			result_offset = 0;
139 			*errorp = ENOSPC;
140 			goto failed;
141 		}
142 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143 	}
144 
145 	/*
146 	 * The allocation request may not cross a buffer boundary.  Special
147 	 * large allocations must not cross a large-block boundary.
148 	 */
149 	tmp_offset = next_offset + bytes - 1;
150 	if (bytes <= HAMMER_BUFSIZE) {
151 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153 			goto again;
154 		}
155 	} else {
156 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
157 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
158 			goto again;
159 		}
160 	}
161 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
162 
163 	/*
164 	 * Dive layer 1.
165 	 */
166 	layer1_offset = freemap->phys_offset +
167 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168 
169 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
170 	if (*errorp) {
171 		result_offset = 0;
172 		goto failed;
173 	}
174 
175 	/*
176 	 * Check CRC.
177 	 */
178 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
179 		hammer_lock_ex(&hmp->blkmap_lock);
180 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181 			panic("CRC FAILED: LAYER1");
182 		hammer_unlock(&hmp->blkmap_lock);
183 	}
184 
185 	/*
186 	 * If we are at a big-block boundary and layer1 indicates no
187 	 * free big-blocks, then we cannot allocate a new bigblock in
188 	 * layer2, skip to the next layer1 entry.
189 	 */
190 	if (offset == 0 && layer1->blocks_free == 0) {
191 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
192 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
193 		goto again;
194 	}
195 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
196 
197 	/*
198 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
199 	 * on a volume that we are currently trying to remove from the
200 	 * file-system. This is used by the volume-del code together with
201 	 * the reblocker to free up a volume.
202 	 */
203 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
204 	    hmp->volume_to_remove) {
205 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
206 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
207 		goto again;
208 	}
209 
210 	/*
211 	 * Dive layer 2, each entry represents a large-block.
212 	 */
213 	layer2_offset = layer1->phys_offset +
214 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
215 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
216 	if (*errorp) {
217 		result_offset = 0;
218 		goto failed;
219 	}
220 
221 	/*
222 	 * Check CRC.  This can race another thread holding the lock
223 	 * and in the middle of modifying layer2.
224 	 */
225 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
226 		hammer_lock_ex(&hmp->blkmap_lock);
227 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
228 			panic("CRC FAILED: LAYER2");
229 		hammer_unlock(&hmp->blkmap_lock);
230 	}
231 
232 	/*
233 	 * Skip the layer if the zone is owned by someone other then us.
234 	 */
235 	if (layer2->zone && layer2->zone != zone) {
236 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
237 		goto again;
238 	}
239 	if (offset < layer2->append_off) {
240 		next_offset += layer2->append_off - offset;
241 		goto again;
242 	}
243 
244 #if 0
245 	/*
246 	 * If operating in the current non-hint blockmap block, do not
247 	 * allow it to get over-full.  Also drop any active hinting so
248 	 * blockmap->next_offset is updated at the end.
249 	 *
250 	 * We do this for B-Tree and meta-data allocations to provide
251 	 * localization for updates.
252 	 */
253 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
254 	     zone == HAMMER_ZONE_META_INDEX) &&
255 	    offset >= HAMMER_LARGEBLOCK_OVERFILL &&
256 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
257 	) {
258 		if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
259 			next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
260 			use_hint = 0;
261 			goto again;
262 		}
263 	}
264 #endif
265 
266 	/*
267 	 * We need the lock from this point on.  We have to re-check zone
268 	 * ownership after acquiring the lock and also check for reservations.
269 	 */
270 	hammer_lock_ex(&hmp->blkmap_lock);
271 
272 	if (layer2->zone && layer2->zone != zone) {
273 		hammer_unlock(&hmp->blkmap_lock);
274 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
275 		goto again;
276 	}
277 	if (offset < layer2->append_off) {
278 		hammer_unlock(&hmp->blkmap_lock);
279 		next_offset += layer2->append_off - offset;
280 		goto again;
281 	}
282 
283 	/*
284 	 * The bigblock might be reserved by another zone.  If it is reserved
285 	 * by our zone we may have to move next_offset past the append_off.
286 	 */
287 	base_off = (next_offset &
288 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
289 		    HAMMER_ZONE_RAW_BUFFER;
290 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
291 	if (resv) {
292 		if (resv->zone != zone) {
293 			hammer_unlock(&hmp->blkmap_lock);
294 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
295 				      ~HAMMER_LARGEBLOCK_MASK64;
296 			goto again;
297 		}
298 		if (offset < resv->append_off) {
299 			hammer_unlock(&hmp->blkmap_lock);
300 			next_offset += resv->append_off - offset;
301 			goto again;
302 		}
303 		++resv->refs;
304 	}
305 
306 	/*
307 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
308 	 * of the layer for real.  At this point we've validated any
309 	 * reservation that might exist and can just ignore resv.
310 	 */
311 	if (layer2->zone == 0) {
312 		/*
313 		 * Assign the bigblock to our zone
314 		 */
315 		hammer_modify_buffer(trans, buffer1,
316 				     layer1, sizeof(*layer1));
317 		--layer1->blocks_free;
318 		layer1->layer1_crc = crc32(layer1,
319 					   HAMMER_LAYER1_CRCSIZE);
320 		hammer_modify_buffer_done(buffer1);
321 		hammer_modify_buffer(trans, buffer2,
322 				     layer2, sizeof(*layer2));
323 		layer2->zone = zone;
324 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
325 		KKASSERT(layer2->append_off == 0);
326 		hammer_modify_volume_field(trans, trans->rootvol,
327 					   vol0_stat_freebigblocks);
328 		--root_volume->ondisk->vol0_stat_freebigblocks;
329 		hmp->copy_stat_freebigblocks =
330 			root_volume->ondisk->vol0_stat_freebigblocks;
331 		hammer_modify_volume_done(trans->rootvol);
332 	} else {
333 		hammer_modify_buffer(trans, buffer2,
334 				     layer2, sizeof(*layer2));
335 	}
336 	KKASSERT(layer2->zone == zone);
337 
338 	/*
339 	 * NOTE: bytes_free can legally go negative due to de-dup.
340 	 */
341 	layer2->bytes_free -= bytes;
342 	KKASSERT(layer2->append_off <= offset);
343 	layer2->append_off = offset + bytes;
344 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
345 	hammer_modify_buffer_done(buffer2);
346 
347 	/*
348 	 * We hold the blockmap lock and should be the only ones
349 	 * capable of modifying resv->append_off.  Track the allocation
350 	 * as appropriate.
351 	 */
352 	KKASSERT(bytes != 0);
353 	if (resv) {
354 		KKASSERT(resv->append_off <= offset);
355 		resv->append_off = offset + bytes;
356 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
357 		hammer_blockmap_reserve_complete(hmp, resv);
358 	}
359 
360 	/*
361 	 * If we are allocating from the base of a new buffer we can avoid
362 	 * a disk read by calling hammer_bnew().
363 	 */
364 	if ((next_offset & HAMMER_BUFMASK) == 0) {
365 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
366 				errorp, &buffer3);
367 	}
368 	result_offset = next_offset;
369 
370 	/*
371 	 * If we weren't supplied with a hint or could not use the hint
372 	 * then we wound up using blockmap->next_offset as the hint and
373 	 * need to save it.
374 	 */
375 	if (use_hint == 0) {
376 		hammer_modify_volume(NULL, root_volume, NULL, 0);
377 		blockmap->next_offset = next_offset + bytes;
378 		hammer_modify_volume_done(root_volume);
379 	}
380 	hammer_unlock(&hmp->blkmap_lock);
381 failed:
382 
383 	/*
384 	 * Cleanup
385 	 */
386 	if (buffer1)
387 		hammer_rel_buffer(buffer1, 0);
388 	if (buffer2)
389 		hammer_rel_buffer(buffer2, 0);
390 	if (buffer3)
391 		hammer_rel_buffer(buffer3, 0);
392 
393 	return(result_offset);
394 }
395 
396 /*
397  * Frontend function - Reserve bytes in a zone.
398  *
399  * This code reserves bytes out of a blockmap without committing to any
400  * meta-data modifications, allowing the front-end to directly issue disk
401  * write I/O for large blocks of data
402  *
403  * The backend later finalizes the reservation with hammer_blockmap_finalize()
404  * upon committing the related record.
405  */
406 hammer_reserve_t
407 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
408 			hammer_off_t *zone_offp, int *errorp)
409 {
410 	hammer_volume_t root_volume;
411 	hammer_blockmap_t blockmap;
412 	hammer_blockmap_t freemap;
413 	struct hammer_blockmap_layer1 *layer1;
414 	struct hammer_blockmap_layer2 *layer2;
415 	hammer_buffer_t buffer1 = NULL;
416 	hammer_buffer_t buffer2 = NULL;
417 	hammer_buffer_t buffer3 = NULL;
418 	hammer_off_t tmp_offset;
419 	hammer_off_t next_offset;
420 	hammer_off_t layer1_offset;
421 	hammer_off_t layer2_offset;
422 	hammer_off_t base_off;
423 	hammer_reserve_t resv;
424 	hammer_reserve_t resx;
425 	int loops = 0;
426 	int offset;
427 
428 	/*
429 	 * Setup
430 	 */
431 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
432 	root_volume = hammer_get_root_volume(hmp, errorp);
433 	if (*errorp)
434 		return(NULL);
435 	blockmap = &hmp->blockmap[zone];
436 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
437 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
438 
439 	/*
440 	 * Deal with alignment and buffer-boundary issues.
441 	 *
442 	 * Be careful, certain primary alignments are used below to allocate
443 	 * new blockmap blocks.
444 	 */
445 	bytes = (bytes + 15) & ~15;
446 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
447 
448 	next_offset = blockmap->next_offset;
449 again:
450 	resv = NULL;
451 	/*
452 	 * Check for wrap
453 	 */
454 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
455 		if (++loops == 2) {
456 			*errorp = ENOSPC;
457 			goto failed;
458 		}
459 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
460 	}
461 
462 	/*
463 	 * The allocation request may not cross a buffer boundary.  Special
464 	 * large allocations must not cross a large-block boundary.
465 	 */
466 	tmp_offset = next_offset + bytes - 1;
467 	if (bytes <= HAMMER_BUFSIZE) {
468 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
469 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
470 			goto again;
471 		}
472 	} else {
473 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
474 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
475 			goto again;
476 		}
477 	}
478 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
479 
480 	/*
481 	 * Dive layer 1.
482 	 */
483 	layer1_offset = freemap->phys_offset +
484 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
485 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
486 	if (*errorp)
487 		goto failed;
488 
489 	/*
490 	 * Check CRC.
491 	 */
492 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
493 		hammer_lock_ex(&hmp->blkmap_lock);
494 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
495 			panic("CRC FAILED: LAYER1");
496 		hammer_unlock(&hmp->blkmap_lock);
497 	}
498 
499 	/*
500 	 * If we are at a big-block boundary and layer1 indicates no
501 	 * free big-blocks, then we cannot allocate a new bigblock in
502 	 * layer2, skip to the next layer1 entry.
503 	 */
504 	if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
505 	    layer1->blocks_free == 0) {
506 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
507 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
508 		goto again;
509 	}
510 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
511 
512 	/*
513 	 * Dive layer 2, each entry represents a large-block.
514 	 */
515 	layer2_offset = layer1->phys_offset +
516 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
517 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
518 	if (*errorp)
519 		goto failed;
520 
521 	/*
522 	 * Check CRC if not allocating into uninitialized space (which we
523 	 * aren't when reserving space).
524 	 */
525 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
526 		hammer_lock_ex(&hmp->blkmap_lock);
527 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
528 			panic("CRC FAILED: LAYER2");
529 		hammer_unlock(&hmp->blkmap_lock);
530 	}
531 
532 	/*
533 	 * Skip the layer if the zone is owned by someone other then us.
534 	 */
535 	if (layer2->zone && layer2->zone != zone) {
536 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
537 		goto again;
538 	}
539 	if (offset < layer2->append_off) {
540 		next_offset += layer2->append_off - offset;
541 		goto again;
542 	}
543 
544 	/*
545 	 * We need the lock from this point on.  We have to re-check zone
546 	 * ownership after acquiring the lock and also check for reservations.
547 	 */
548 	hammer_lock_ex(&hmp->blkmap_lock);
549 
550 	if (layer2->zone && layer2->zone != zone) {
551 		hammer_unlock(&hmp->blkmap_lock);
552 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
553 		goto again;
554 	}
555 	if (offset < layer2->append_off) {
556 		hammer_unlock(&hmp->blkmap_lock);
557 		next_offset += layer2->append_off - offset;
558 		goto again;
559 	}
560 
561 	/*
562 	 * The bigblock might be reserved by another zone.  If it is reserved
563 	 * by our zone we may have to move next_offset past the append_off.
564 	 */
565 	base_off = (next_offset &
566 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
567 		    HAMMER_ZONE_RAW_BUFFER;
568 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
569 	if (resv) {
570 		if (resv->zone != zone) {
571 			hammer_unlock(&hmp->blkmap_lock);
572 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
573 				      ~HAMMER_LARGEBLOCK_MASK64;
574 			goto again;
575 		}
576 		if (offset < resv->append_off) {
577 			hammer_unlock(&hmp->blkmap_lock);
578 			next_offset += resv->append_off - offset;
579 			goto again;
580 		}
581 		++resv->refs;
582 		resx = NULL;
583 	} else {
584 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
585 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
586 		resx->refs = 1;
587 		resx->zone = zone;
588 		resx->zone_offset = base_off;
589 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
590 			resx->flags |= HAMMER_RESF_LAYER2FREE;
591 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
592 		KKASSERT(resv == NULL);
593 		resv = resx;
594 		++hammer_count_reservations;
595 	}
596 	resv->append_off = offset + bytes;
597 
598 	/*
599 	 * If we are not reserving a whole buffer but are at the start of
600 	 * a new block, call hammer_bnew() to avoid a disk read.
601 	 *
602 	 * If we are reserving a whole buffer (or more), the caller will
603 	 * probably use a direct read, so do nothing.
604 	 *
605 	 * If we do not have a whole lot of system memory we really can't
606 	 * afford to block while holding the blkmap_lock!
607 	 */
608 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
609 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE))
610 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
611 	}
612 
613 	/*
614 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
615 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
616 	 * be big-block aligned.
617 	 */
618 	blockmap->next_offset = next_offset + bytes;
619 	hammer_unlock(&hmp->blkmap_lock);
620 
621 failed:
622 	if (buffer1)
623 		hammer_rel_buffer(buffer1, 0);
624 	if (buffer2)
625 		hammer_rel_buffer(buffer2, 0);
626 	if (buffer3)
627 		hammer_rel_buffer(buffer3, 0);
628 	hammer_rel_volume(root_volume, 0);
629 	*zone_offp = next_offset;
630 
631 	return(resv);
632 }
633 
634 /*
635  * Frontend function - Dedup bytes in a zone.
636  *
637  * Dedup reservations work exactly the same as normal write reservations
638  * except we only adjust bytes_free field and don't touch append offset.
639  * Finalization mechanic for dedup reservations is also the same as for
640  * normal write ones - the backend finalizes the reservation with
641  * hammer_blockmap_finalize().
642  */
643 hammer_reserve_t
644 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
645 			      hammer_off_t zone_offset, int *errorp)
646 {
647 	hammer_volume_t root_volume;
648 	hammer_blockmap_t freemap;
649 	struct hammer_blockmap_layer1 *layer1;
650 	struct hammer_blockmap_layer2 *layer2;
651 	hammer_buffer_t buffer1 = NULL;
652 	hammer_buffer_t buffer2 = NULL;
653 	hammer_off_t layer1_offset;
654 	hammer_off_t layer2_offset;
655 	hammer_off_t base_off;
656 	hammer_reserve_t resv = NULL;
657 	hammer_reserve_t resx = NULL;
658 
659 	/*
660 	 * Setup
661 	 */
662 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
663 	root_volume = hammer_get_root_volume(hmp, errorp);
664 	if (*errorp)
665 		return (NULL);
666 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
667 	KKASSERT(freemap->phys_offset != 0);
668 
669 	bytes = (bytes + 15) & ~15;
670 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
671 
672 	/*
673 	 * Dive layer 1.
674 	 */
675 	layer1_offset = freemap->phys_offset +
676 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
677 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
678 	if (*errorp)
679 		goto failed;
680 
681 	/*
682 	 * Check CRC.
683 	 */
684 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
685 		hammer_lock_ex(&hmp->blkmap_lock);
686 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
687 			panic("CRC FAILED: LAYER1");
688 		hammer_unlock(&hmp->blkmap_lock);
689 	}
690 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
691 
692 	/*
693 	 * Dive layer 2, each entry represents a large-block.
694 	 */
695 	layer2_offset = layer1->phys_offset +
696 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
697 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
698 	if (*errorp)
699 		goto failed;
700 
701 	/*
702 	 * Check CRC.
703 	 */
704 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
705 		hammer_lock_ex(&hmp->blkmap_lock);
706 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
707 			panic("CRC FAILED: LAYER2");
708 		hammer_unlock(&hmp->blkmap_lock);
709 	}
710 
711 	/*
712 	 * Fail if the zone is owned by someone other than us.
713 	 */
714 	if (layer2->zone && layer2->zone != zone)
715 		goto failed;
716 
717 	/*
718 	 * We need the lock from this point on.  We have to re-check zone
719 	 * ownership after acquiring the lock and also check for reservations.
720 	 */
721 	hammer_lock_ex(&hmp->blkmap_lock);
722 
723 	if (layer2->zone && layer2->zone != zone) {
724 		hammer_unlock(&hmp->blkmap_lock);
725 		goto failed;
726 	}
727 
728 	base_off = (zone_offset &
729 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
730 		    HAMMER_ZONE_RAW_BUFFER;
731 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
732 	if (resv) {
733 		if (resv->zone != zone) {
734 			hammer_unlock(&hmp->blkmap_lock);
735 			resv = NULL;
736 			goto failed;
737 		}
738 		/*
739 		 * Due to possible big block underflow we can't simply
740 		 * subtract bytes from bytes_free.
741 		 */
742 		if (update_bytes_free(resv, bytes) == 0) {
743 			hammer_unlock(&hmp->blkmap_lock);
744 			resv = NULL;
745 			goto failed;
746 		}
747 		++resv->refs;
748 		resx = NULL;
749 	} else {
750 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
751 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
752 		resx->refs = 1;
753 		resx->zone = zone;
754 		resx->bytes_free = layer2->bytes_free;
755 		/*
756 		 * Due to possible big block underflow we can't simply
757 		 * subtract bytes from bytes_free.
758 		 */
759 		if (update_bytes_free(resx, bytes) == 0) {
760 			hammer_unlock(&hmp->blkmap_lock);
761 			kfree(resx, hmp->m_misc);
762 			goto failed;
763 		}
764 		resx->zone_offset = base_off;
765 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
766 		KKASSERT(resv == NULL);
767 		resv = resx;
768 		++hammer_count_reservations;
769 	}
770 
771 	hammer_unlock(&hmp->blkmap_lock);
772 
773 failed:
774 	if (buffer1)
775 		hammer_rel_buffer(buffer1, 0);
776 	if (buffer2)
777 		hammer_rel_buffer(buffer2, 0);
778 	hammer_rel_volume(root_volume, 0);
779 
780 	return(resv);
781 }
782 
783 static int
784 update_bytes_free(hammer_reserve_t resv, int bytes)
785 {
786 	int32_t temp;
787 
788 	/*
789 	 * Big-block underflow check
790 	 */
791 	temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
792 	cpu_ccfence(); /* XXX do we really need it ? */
793 	if (temp > resv->bytes_free) {
794 		kprintf("BIGBLOCK UNDERFLOW\n");
795 		return (0);
796 	}
797 
798 	resv->bytes_free -= bytes;
799 	return (1);
800 }
801 
802 /*
803  * Dereference a reservation structure.  Upon the final release the
804  * underlying big-block is checked and if it is entirely free we delete
805  * any related HAMMER buffers to avoid potential conflicts with future
806  * reuse of the big-block.
807  */
808 void
809 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
810 {
811 	hammer_off_t base_offset;
812 	int error;
813 
814 	KKASSERT(resv->refs > 0);
815 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
816 		 HAMMER_ZONE_RAW_BUFFER);
817 
818 	/*
819 	 * Setting append_off to the max prevents any new allocations
820 	 * from occuring while we are trying to dispose of the reservation,
821 	 * allowing us to safely delete any related HAMMER buffers.
822 	 *
823 	 * If we are unable to clean out all related HAMMER buffers we
824 	 * requeue the delay.
825 	 */
826 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
827 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
828 		base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
829 		base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
830 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
831 			hammer_dedup_cache_inval(hmp, base_offset);
832 		error = hammer_del_buffers(hmp, base_offset,
833 					   resv->zone_offset,
834 					   HAMMER_LARGEBLOCK_SIZE,
835 					   1);
836 		if (hammer_debug_general & 0x20000) {
837 			kprintf("hammer: dellgblk %016jx error %d\n",
838 				(intmax_t)base_offset, error);
839 		}
840 		if (error)
841 			hammer_reserve_setdelay(hmp, resv);
842 	}
843 	if (--resv->refs == 0) {
844 		if (hammer_debug_general & 0x20000) {
845 			kprintf("hammer: delresvr %016jx zone %02x\n",
846 				(intmax_t)resv->zone_offset, resv->zone);
847 		}
848 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
849 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
850 		kfree(resv, hmp->m_misc);
851 		--hammer_count_reservations;
852 	}
853 }
854 
855 /*
856  * Prevent a potentially free big-block from being reused until after
857  * the related flushes have completely cycled, otherwise crash recovery
858  * could resurrect a data block that was already reused and overwritten.
859  *
860  * The caller might reset the underlying layer2 entry's append_off to 0, so
861  * our covering append_off must be set to max to prevent any reallocation
862  * until after the flush delays complete, not to mention proper invalidation
863  * of any underlying cached blocks.
864  */
865 static void
866 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
867 			int zone, struct hammer_blockmap_layer2 *layer2)
868 {
869 	hammer_reserve_t resv;
870 
871 	/*
872 	 * Allocate the reservation if necessary.
873 	 *
874 	 * NOTE: need lock in future around resv lookup/allocation and
875 	 * the setdelay call, currently refs is not bumped until the call.
876 	 */
877 again:
878 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
879 	if (resv == NULL) {
880 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
881 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
882 		resv->zone = zone;
883 		resv->zone_offset = base_offset;
884 		resv->refs = 0;
885 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
886 
887 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
888 			resv->flags |= HAMMER_RESF_LAYER2FREE;
889 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
890 			kfree(resv, hmp->m_misc);
891 			goto again;
892 		}
893 		++hammer_count_reservations;
894 	} else {
895 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
896 			resv->flags |= HAMMER_RESF_LAYER2FREE;
897 	}
898 	hammer_reserve_setdelay(hmp, resv);
899 }
900 
901 /*
902  * Enter the reservation on the on-delay list, or move it if it
903  * is already on the list.
904  */
905 static void
906 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
907 {
908 	if (resv->flags & HAMMER_RESF_ONDELAY) {
909 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
910 		resv->flush_group = hmp->flusher.next + 1;
911 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
912 	} else {
913 		++resv->refs;
914 		++hmp->rsv_fromdelay;
915 		resv->flags |= HAMMER_RESF_ONDELAY;
916 		resv->flush_group = hmp->flusher.next + 1;
917 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
918 	}
919 }
920 
921 /*
922  * Reserve has reached its flush point, remove it from the delay list
923  * and finish it off.  hammer_blockmap_reserve_complete() inherits
924  * the ondelay reference.
925  */
926 void
927 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
928 {
929 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
930 	resv->flags &= ~HAMMER_RESF_ONDELAY;
931 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
932 	--hmp->rsv_fromdelay;
933 	hammer_blockmap_reserve_complete(hmp, resv);
934 }
935 
936 /*
937  * Backend function - free (offset, bytes) in a zone.
938  *
939  * XXX error return
940  */
941 void
942 hammer_blockmap_free(hammer_transaction_t trans,
943 		     hammer_off_t zone_offset, int bytes)
944 {
945 	hammer_mount_t hmp;
946 	hammer_volume_t root_volume;
947 	hammer_blockmap_t freemap;
948 	struct hammer_blockmap_layer1 *layer1;
949 	struct hammer_blockmap_layer2 *layer2;
950 	hammer_buffer_t buffer1 = NULL;
951 	hammer_buffer_t buffer2 = NULL;
952 	hammer_off_t layer1_offset;
953 	hammer_off_t layer2_offset;
954 	hammer_off_t base_off;
955 	int error;
956 	int zone;
957 
958 	if (bytes == 0)
959 		return;
960 	hmp = trans->hmp;
961 
962 	/*
963 	 * Alignment
964 	 */
965 	bytes = (bytes + 15) & ~15;
966 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
967 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
968 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
969 
970 	/*
971 	 * Basic zone validation & locking
972 	 */
973 	zone = HAMMER_ZONE_DECODE(zone_offset);
974 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
975 	root_volume = trans->rootvol;
976 	error = 0;
977 
978 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
979 
980 	/*
981 	 * Dive layer 1.
982 	 */
983 	layer1_offset = freemap->phys_offset +
984 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
985 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
986 	if (error)
987 		goto failed;
988 	KKASSERT(layer1->phys_offset &&
989 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
990 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
991 		hammer_lock_ex(&hmp->blkmap_lock);
992 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
993 			panic("CRC FAILED: LAYER1");
994 		hammer_unlock(&hmp->blkmap_lock);
995 	}
996 
997 	/*
998 	 * Dive layer 2, each entry represents a large-block.
999 	 */
1000 	layer2_offset = layer1->phys_offset +
1001 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1002 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1003 	if (error)
1004 		goto failed;
1005 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1006 		hammer_lock_ex(&hmp->blkmap_lock);
1007 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1008 			panic("CRC FAILED: LAYER2");
1009 		hammer_unlock(&hmp->blkmap_lock);
1010 	}
1011 
1012 	hammer_lock_ex(&hmp->blkmap_lock);
1013 
1014 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1015 
1016 	/*
1017 	 * Free space previously allocated via blockmap_alloc().
1018 	 *
1019 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1020 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1021 	 */
1022 	KKASSERT(layer2->zone == zone);
1023 	layer2->bytes_free += bytes;
1024 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1025 
1026 	/*
1027 	 * If a big-block becomes entirely free we must create a covering
1028 	 * reservation to prevent premature reuse.  Note, however, that
1029 	 * the big-block and/or reservation may still have an append_off
1030 	 * that allows further (non-reused) allocations.
1031 	 *
1032 	 * Once the reservation has been made we re-check layer2 and if
1033 	 * the big-block is still entirely free we reset the layer2 entry.
1034 	 * The reservation will prevent premature reuse.
1035 	 *
1036 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1037 	 * is completed, if the layer2 entry is still completely free at
1038 	 * that time.  Any allocations from the reservation that may have
1039 	 * occured in the mean time, or active references on the reservation
1040 	 * from new pending allocations, will prevent the invalidation from
1041 	 * occuring.
1042 	 */
1043 	if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1044 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1045 
1046 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1047 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1048 			layer2->zone = 0;
1049 			layer2->append_off = 0;
1050 			hammer_modify_buffer(trans, buffer1,
1051 					     layer1, sizeof(*layer1));
1052 			++layer1->blocks_free;
1053 			layer1->layer1_crc = crc32(layer1,
1054 						   HAMMER_LAYER1_CRCSIZE);
1055 			hammer_modify_buffer_done(buffer1);
1056 			hammer_modify_volume_field(trans,
1057 					trans->rootvol,
1058 					vol0_stat_freebigblocks);
1059 			++root_volume->ondisk->vol0_stat_freebigblocks;
1060 			hmp->copy_stat_freebigblocks =
1061 			   root_volume->ondisk->vol0_stat_freebigblocks;
1062 			hammer_modify_volume_done(trans->rootvol);
1063 		}
1064 	}
1065 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1066 	hammer_modify_buffer_done(buffer2);
1067 	hammer_unlock(&hmp->blkmap_lock);
1068 
1069 failed:
1070 	if (buffer1)
1071 		hammer_rel_buffer(buffer1, 0);
1072 	if (buffer2)
1073 		hammer_rel_buffer(buffer2, 0);
1074 }
1075 
1076 int
1077 hammer_blockmap_dedup(hammer_transaction_t trans,
1078 		     hammer_off_t zone_offset, int bytes)
1079 {
1080 	hammer_mount_t hmp;
1081 	hammer_volume_t root_volume;
1082 	hammer_blockmap_t freemap;
1083 	struct hammer_blockmap_layer1 *layer1;
1084 	struct hammer_blockmap_layer2 *layer2;
1085 	hammer_buffer_t buffer1 = NULL;
1086 	hammer_buffer_t buffer2 = NULL;
1087 	hammer_off_t layer1_offset;
1088 	hammer_off_t layer2_offset;
1089 	int32_t temp;
1090 	int error;
1091 	int zone;
1092 
1093 	if (bytes == 0)
1094 		return (0);
1095 	hmp = trans->hmp;
1096 
1097 	/*
1098 	 * Alignment
1099 	 */
1100 	bytes = (bytes + 15) & ~15;
1101 	KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1102 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1103 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
1104 
1105 	/*
1106 	 * Basic zone validation & locking
1107 	 */
1108 	zone = HAMMER_ZONE_DECODE(zone_offset);
1109 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1110 	root_volume = trans->rootvol;
1111 	error = 0;
1112 
1113 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1114 
1115 	/*
1116 	 * Dive layer 1.
1117 	 */
1118 	layer1_offset = freemap->phys_offset +
1119 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1120 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1121 	if (error)
1122 		goto failed;
1123 	KKASSERT(layer1->phys_offset &&
1124 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1125 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1126 		hammer_lock_ex(&hmp->blkmap_lock);
1127 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1128 			panic("CRC FAILED: LAYER1");
1129 		hammer_unlock(&hmp->blkmap_lock);
1130 	}
1131 
1132 	/*
1133 	 * Dive layer 2, each entry represents a large-block.
1134 	 */
1135 	layer2_offset = layer1->phys_offset +
1136 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1137 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1138 	if (error)
1139 		goto failed;
1140 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1141 		hammer_lock_ex(&hmp->blkmap_lock);
1142 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1143 			panic("CRC FAILED: LAYER2");
1144 		hammer_unlock(&hmp->blkmap_lock);
1145 	}
1146 
1147 	hammer_lock_ex(&hmp->blkmap_lock);
1148 
1149 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1150 
1151 	/*
1152 	 * Free space previously allocated via blockmap_alloc().
1153 	 *
1154 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1155 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1156 	 */
1157 	KKASSERT(layer2->zone == zone);
1158 	temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1159 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1160 	if (temp > layer2->bytes_free) {
1161 		error = ERANGE;
1162 		goto underflow;
1163 	}
1164 	layer2->bytes_free -= bytes;
1165 
1166 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1167 
1168 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1169 underflow:
1170 	hammer_modify_buffer_done(buffer2);
1171 	hammer_unlock(&hmp->blkmap_lock);
1172 
1173 failed:
1174 	if (buffer1)
1175 		hammer_rel_buffer(buffer1, 0);
1176 	if (buffer2)
1177 		hammer_rel_buffer(buffer2, 0);
1178 	return (error);
1179 }
1180 
1181 /*
1182  * Backend function - finalize (offset, bytes) in a zone.
1183  *
1184  * Allocate space that was previously reserved by the frontend.
1185  */
1186 int
1187 hammer_blockmap_finalize(hammer_transaction_t trans,
1188 			 hammer_reserve_t resv,
1189 			 hammer_off_t zone_offset, int bytes)
1190 {
1191 	hammer_mount_t hmp;
1192 	hammer_volume_t root_volume;
1193 	hammer_blockmap_t freemap;
1194 	struct hammer_blockmap_layer1 *layer1;
1195 	struct hammer_blockmap_layer2 *layer2;
1196 	hammer_buffer_t buffer1 = NULL;
1197 	hammer_buffer_t buffer2 = NULL;
1198 	hammer_off_t layer1_offset;
1199 	hammer_off_t layer2_offset;
1200 	int error;
1201 	int zone;
1202 	int offset;
1203 
1204 	if (bytes == 0)
1205 		return(0);
1206 	hmp = trans->hmp;
1207 
1208 	/*
1209 	 * Alignment
1210 	 */
1211 	bytes = (bytes + 15) & ~15;
1212 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1213 
1214 	/*
1215 	 * Basic zone validation & locking
1216 	 */
1217 	zone = HAMMER_ZONE_DECODE(zone_offset);
1218 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1219 	root_volume = trans->rootvol;
1220 	error = 0;
1221 
1222 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1223 
1224 	/*
1225 	 * Dive layer 1.
1226 	 */
1227 	layer1_offset = freemap->phys_offset +
1228 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1229 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1230 	if (error)
1231 		goto failed;
1232 	KKASSERT(layer1->phys_offset &&
1233 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1234 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1235 		hammer_lock_ex(&hmp->blkmap_lock);
1236 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1237 			panic("CRC FAILED: LAYER1");
1238 		hammer_unlock(&hmp->blkmap_lock);
1239 	}
1240 
1241 	/*
1242 	 * Dive layer 2, each entry represents a large-block.
1243 	 */
1244 	layer2_offset = layer1->phys_offset +
1245 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1246 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1247 	if (error)
1248 		goto failed;
1249 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1250 		hammer_lock_ex(&hmp->blkmap_lock);
1251 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1252 			panic("CRC FAILED: LAYER2");
1253 		hammer_unlock(&hmp->blkmap_lock);
1254 	}
1255 
1256 	hammer_lock_ex(&hmp->blkmap_lock);
1257 
1258 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1259 
1260 	/*
1261 	 * Finalize some or all of the space covered by a current
1262 	 * reservation.  An allocation in the same layer may have
1263 	 * already assigned ownership.
1264 	 */
1265 	if (layer2->zone == 0) {
1266 		hammer_modify_buffer(trans, buffer1,
1267 				     layer1, sizeof(*layer1));
1268 		--layer1->blocks_free;
1269 		layer1->layer1_crc = crc32(layer1,
1270 					   HAMMER_LAYER1_CRCSIZE);
1271 		hammer_modify_buffer_done(buffer1);
1272 		layer2->zone = zone;
1273 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1274 		KKASSERT(layer2->append_off == 0);
1275 		hammer_modify_volume_field(trans,
1276 				trans->rootvol,
1277 				vol0_stat_freebigblocks);
1278 		--root_volume->ondisk->vol0_stat_freebigblocks;
1279 		hmp->copy_stat_freebigblocks =
1280 		   root_volume->ondisk->vol0_stat_freebigblocks;
1281 		hammer_modify_volume_done(trans->rootvol);
1282 	}
1283 	if (layer2->zone != zone)
1284 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1285 	KKASSERT(layer2->zone == zone);
1286 	KKASSERT(bytes != 0);
1287 	layer2->bytes_free -= bytes;
1288 
1289 	if (resv) {
1290 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1291 	}
1292 
1293 	/*
1294 	 * Finalizations can occur out of order, or combined with allocations.
1295 	 * append_off must be set to the highest allocated offset.
1296 	 */
1297 	offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1298 	if (layer2->append_off < offset)
1299 		layer2->append_off = offset;
1300 
1301 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1302 	hammer_modify_buffer_done(buffer2);
1303 	hammer_unlock(&hmp->blkmap_lock);
1304 
1305 failed:
1306 	if (buffer1)
1307 		hammer_rel_buffer(buffer1, 0);
1308 	if (buffer2)
1309 		hammer_rel_buffer(buffer2, 0);
1310 	return(error);
1311 }
1312 
1313 /*
1314  * Return the approximate number of free bytes in the big-block
1315  * containing the specified blockmap offset.
1316  *
1317  * WARNING: A negative number can be returned if data de-dup exists,
1318  *	    and the result will also not represent he actual number
1319  *	    of free bytes in this case.
1320  *
1321  *	    This code is used only by the reblocker.
1322  */
1323 int
1324 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1325 			int *curp, int *errorp)
1326 {
1327 	hammer_volume_t root_volume;
1328 	hammer_blockmap_t blockmap;
1329 	hammer_blockmap_t freemap;
1330 	struct hammer_blockmap_layer1 *layer1;
1331 	struct hammer_blockmap_layer2 *layer2;
1332 	hammer_buffer_t buffer = NULL;
1333 	hammer_off_t layer1_offset;
1334 	hammer_off_t layer2_offset;
1335 	int32_t bytes;
1336 	int zone;
1337 
1338 	zone = HAMMER_ZONE_DECODE(zone_offset);
1339 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1340 	root_volume = hammer_get_root_volume(hmp, errorp);
1341 	if (*errorp) {
1342 		*curp = 0;
1343 		return(0);
1344 	}
1345 	blockmap = &hmp->blockmap[zone];
1346 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1347 
1348 	/*
1349 	 * Dive layer 1.
1350 	 */
1351 	layer1_offset = freemap->phys_offset +
1352 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1353 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1354 	if (*errorp) {
1355 		bytes = 0;
1356 		goto failed;
1357 	}
1358 	KKASSERT(layer1->phys_offset);
1359 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1360 		hammer_lock_ex(&hmp->blkmap_lock);
1361 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1362 			panic("CRC FAILED: LAYER1");
1363 		hammer_unlock(&hmp->blkmap_lock);
1364 	}
1365 
1366 	/*
1367 	 * Dive layer 2, each entry represents a large-block.
1368 	 *
1369 	 * (reuse buffer, layer1 pointer becomes invalid)
1370 	 */
1371 	layer2_offset = layer1->phys_offset +
1372 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1373 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1374 	if (*errorp) {
1375 		bytes = 0;
1376 		goto failed;
1377 	}
1378 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1379 		hammer_lock_ex(&hmp->blkmap_lock);
1380 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1381 			panic("CRC FAILED: LAYER2");
1382 		hammer_unlock(&hmp->blkmap_lock);
1383 	}
1384 	KKASSERT(layer2->zone == zone);
1385 
1386 	bytes = layer2->bytes_free;
1387 
1388 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1389 		*curp = 0;
1390 	else
1391 		*curp = 1;
1392 failed:
1393 	if (buffer)
1394 		hammer_rel_buffer(buffer, 0);
1395 	hammer_rel_volume(root_volume, 0);
1396 	if (hammer_debug_general & 0x0800) {
1397 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1398 			(long long)zone_offset, bytes);
1399 	}
1400 	return(bytes);
1401 }
1402 
1403 
1404 /*
1405  * Lookup a blockmap offset.
1406  */
1407 hammer_off_t
1408 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1409 		       int *errorp)
1410 {
1411 	hammer_volume_t root_volume;
1412 	hammer_blockmap_t freemap;
1413 	struct hammer_blockmap_layer1 *layer1;
1414 	struct hammer_blockmap_layer2 *layer2;
1415 	hammer_buffer_t buffer = NULL;
1416 	hammer_off_t layer1_offset;
1417 	hammer_off_t layer2_offset;
1418 	hammer_off_t result_offset;
1419 	hammer_off_t base_off;
1420 	hammer_reserve_t resv;
1421 	int zone;
1422 
1423 	/*
1424 	 * Calculate the zone-2 offset.
1425 	 */
1426 	zone = HAMMER_ZONE_DECODE(zone_offset);
1427 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1428 
1429 	result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1430 			HAMMER_ZONE_RAW_BUFFER;
1431 
1432 	/*
1433 	 * We can actually stop here, normal blockmaps are now direct-mapped
1434 	 * onto the freemap and so represent zone-2 addresses.
1435 	 */
1436 	if (hammer_verify_zone == 0) {
1437 		*errorp = 0;
1438 		return(result_offset);
1439 	}
1440 
1441 	/*
1442 	 * Validate the allocation zone
1443 	 */
1444 	root_volume = hammer_get_root_volume(hmp, errorp);
1445 	if (*errorp)
1446 		return(0);
1447 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1448 	KKASSERT(freemap->phys_offset != 0);
1449 
1450 	/*
1451 	 * Dive layer 1.
1452 	 */
1453 	layer1_offset = freemap->phys_offset +
1454 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1455 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1456 	if (*errorp)
1457 		goto failed;
1458 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1459 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1460 		hammer_lock_ex(&hmp->blkmap_lock);
1461 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1462 			panic("CRC FAILED: LAYER1");
1463 		hammer_unlock(&hmp->blkmap_lock);
1464 	}
1465 
1466 	/*
1467 	 * Dive layer 2, each entry represents a large-block.
1468 	 */
1469 	layer2_offset = layer1->phys_offset +
1470 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1471 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1472 
1473 	if (*errorp)
1474 		goto failed;
1475 	if (layer2->zone == 0) {
1476 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1477 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1478 				 base_off);
1479 		KKASSERT(resv && resv->zone == zone);
1480 
1481 	} else if (layer2->zone != zone) {
1482 		panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1483 			layer2->zone, zone);
1484 	}
1485 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1486 		hammer_lock_ex(&hmp->blkmap_lock);
1487 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1488 			panic("CRC FAILED: LAYER2");
1489 		hammer_unlock(&hmp->blkmap_lock);
1490 	}
1491 
1492 failed:
1493 	if (buffer)
1494 		hammer_rel_buffer(buffer, 0);
1495 	hammer_rel_volume(root_volume, 0);
1496 	if (hammer_debug_general & 0x0800) {
1497 		kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1498 			(long long)zone_offset, (long long)result_offset);
1499 	}
1500 	return(result_offset);
1501 }
1502 
1503 
1504 /*
1505  * Check space availability
1506  *
1507  * MPSAFE - does not require fs_token
1508  */
1509 int
1510 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1511 {
1512 	const int in_size = sizeof(struct hammer_inode_data) +
1513 			    sizeof(union hammer_btree_elm);
1514 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1515 	int64_t usedbytes;
1516 
1517 	usedbytes = hmp->rsv_inodes * in_size +
1518 		    hmp->rsv_recs * rec_size +
1519 		    hmp->rsv_databytes +
1520 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1521 		    ((int64_t)hidirtybufspace << 2) +
1522 		    (slop << HAMMER_LARGEBLOCK_BITS);
1523 
1524 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1525 	if (resp)
1526 		*resp = usedbytes;
1527 
1528 	if (hmp->copy_stat_freebigblocks >=
1529 	    (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1530 		return(0);
1531 	}
1532 	return (ENOSPC);
1533 }
1534 
1535