xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision b0d289c2)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39 
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42 				    hammer_off_t base_offset, int zone,
43 				    struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46 
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
52 
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56 	if (res1->zone_offset < res2->zone_offset)
57 		return(-1);
58 	if (res1->zone_offset > res2->zone_offset)
59 		return(1);
60 	return(0);
61 }
62 
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
68 		      hammer_off_t hint, int *errorp)
69 {
70 	hammer_mount_t hmp;
71 	hammer_volume_t root_volume;
72 	hammer_blockmap_t blockmap;
73 	hammer_blockmap_t freemap;
74 	hammer_reserve_t resv;
75 	struct hammer_blockmap_layer1 *layer1;
76 	struct hammer_blockmap_layer2 *layer2;
77 	hammer_buffer_t buffer1 = NULL;
78 	hammer_buffer_t buffer2 = NULL;
79 	hammer_buffer_t buffer3 = NULL;
80 	hammer_off_t tmp_offset;
81 	hammer_off_t next_offset;
82 	hammer_off_t result_offset;
83 	hammer_off_t layer1_offset;
84 	hammer_off_t layer2_offset;
85 	hammer_off_t base_off;
86 	int loops = 0;
87 	int offset;		/* offset within big-block */
88 	int use_hint;
89 
90 	hmp = trans->hmp;
91 
92 	/*
93 	 * Deal with alignment and buffer-boundary issues.
94 	 *
95 	 * Be careful, certain primary alignments are used below to allocate
96 	 * new blockmap blocks.
97 	 */
98 	bytes = (bytes + 15) & ~15;
99 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
101 
102 	/*
103 	 * Setup
104 	 */
105 	root_volume = trans->rootvol;
106 	*errorp = 0;
107 	blockmap = &hmp->blockmap[zone];
108 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110 
111 	/*
112 	 * Use the hint if we have one.
113 	 */
114 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
115 		next_offset = (hint + 15) & ~(hammer_off_t)15;
116 		use_hint = 1;
117 	} else {
118 		next_offset = blockmap->next_offset;
119 		use_hint = 0;
120 	}
121 again:
122 
123 	/*
124 	 * use_hint is turned off if we leave the hinted big-block.
125 	 */
126 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
127 		next_offset = blockmap->next_offset;
128 		use_hint = 0;
129 	}
130 
131 	/*
132 	 * Check for wrap
133 	 */
134 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
135 		if (++loops == 2) {
136 			result_offset = 0;
137 			*errorp = ENOSPC;
138 			goto failed;
139 		}
140 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
141 	}
142 
143 	/*
144 	 * The allocation request may not cross a buffer boundary.  Special
145 	 * large allocations must not cross a big-block boundary.
146 	 */
147 	tmp_offset = next_offset + bytes - 1;
148 	if (bytes <= HAMMER_BUFSIZE) {
149 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
150 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
151 			goto again;
152 		}
153 	} else {
154 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
155 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
156 			goto again;
157 		}
158 	}
159 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
160 
161 	/*
162 	 * Dive layer 1.
163 	 */
164 	layer1_offset = freemap->phys_offset +
165 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
166 
167 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
168 	if (*errorp) {
169 		result_offset = 0;
170 		goto failed;
171 	}
172 
173 	/*
174 	 * Check CRC.
175 	 */
176 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
177 		hammer_lock_ex(&hmp->blkmap_lock);
178 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
179 			panic("CRC FAILED: LAYER1");
180 		hammer_unlock(&hmp->blkmap_lock);
181 	}
182 
183 	/*
184 	 * If we are at a big-block boundary and layer1 indicates no
185 	 * free big-blocks, then we cannot allocate a new bigblock in
186 	 * layer2, skip to the next layer1 entry.
187 	 */
188 	if (offset == 0 && layer1->blocks_free == 0) {
189 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
190 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
191 		goto again;
192 	}
193 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
194 
195 	/*
196 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
197 	 * on a volume that we are currently trying to remove from the
198 	 * file-system. This is used by the volume-del code together with
199 	 * the reblocker to free up a volume.
200 	 */
201 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
202 	    hmp->volume_to_remove) {
203 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
204 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
205 		goto again;
206 	}
207 
208 	/*
209 	 * Dive layer 2, each entry represents a big-block.
210 	 */
211 	layer2_offset = layer1->phys_offset +
212 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
213 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
214 	if (*errorp) {
215 		result_offset = 0;
216 		goto failed;
217 	}
218 
219 	/*
220 	 * Check CRC.  This can race another thread holding the lock
221 	 * and in the middle of modifying layer2.
222 	 */
223 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
224 		hammer_lock_ex(&hmp->blkmap_lock);
225 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
226 			panic("CRC FAILED: LAYER2");
227 		hammer_unlock(&hmp->blkmap_lock);
228 	}
229 
230 	/*
231 	 * Skip the layer if the zone is owned by someone other then us.
232 	 */
233 	if (layer2->zone && layer2->zone != zone) {
234 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
235 		goto again;
236 	}
237 	if (offset < layer2->append_off) {
238 		next_offset += layer2->append_off - offset;
239 		goto again;
240 	}
241 
242 #if 0
243 	/*
244 	 * If operating in the current non-hint blockmap block, do not
245 	 * allow it to get over-full.  Also drop any active hinting so
246 	 * blockmap->next_offset is updated at the end.
247 	 *
248 	 * We do this for B-Tree and meta-data allocations to provide
249 	 * localization for updates.
250 	 */
251 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252 	     zone == HAMMER_ZONE_META_INDEX) &&
253 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
254 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)
255 	) {
256 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
257 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
258 			use_hint = 0;
259 			goto again;
260 		}
261 	}
262 #endif
263 
264 	/*
265 	 * We need the lock from this point on.  We have to re-check zone
266 	 * ownership after acquiring the lock and also check for reservations.
267 	 */
268 	hammer_lock_ex(&hmp->blkmap_lock);
269 
270 	if (layer2->zone && layer2->zone != zone) {
271 		hammer_unlock(&hmp->blkmap_lock);
272 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
273 		goto again;
274 	}
275 	if (offset < layer2->append_off) {
276 		hammer_unlock(&hmp->blkmap_lock);
277 		next_offset += layer2->append_off - offset;
278 		goto again;
279 	}
280 
281 	/*
282 	 * The bigblock might be reserved by another zone.  If it is reserved
283 	 * by our zone we may have to move next_offset past the append_off.
284 	 */
285 	base_off = hammer_xlate_to_zone2(next_offset &
286 					~HAMMER_BIGBLOCK_MASK64);
287 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
288 	if (resv) {
289 		if (resv->zone != zone) {
290 			hammer_unlock(&hmp->blkmap_lock);
291 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
292 				      ~HAMMER_BIGBLOCK_MASK64;
293 			goto again;
294 		}
295 		if (offset < resv->append_off) {
296 			hammer_unlock(&hmp->blkmap_lock);
297 			next_offset += resv->append_off - offset;
298 			goto again;
299 		}
300 		++resv->refs;
301 	}
302 
303 	/*
304 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
305 	 * of the layer for real.  At this point we've validated any
306 	 * reservation that might exist and can just ignore resv.
307 	 */
308 	if (layer2->zone == 0) {
309 		/*
310 		 * Assign the bigblock to our zone
311 		 */
312 		hammer_modify_buffer(trans, buffer1,
313 				     layer1, sizeof(*layer1));
314 		--layer1->blocks_free;
315 		layer1->layer1_crc = crc32(layer1,
316 					   HAMMER_LAYER1_CRCSIZE);
317 		hammer_modify_buffer_done(buffer1);
318 		hammer_modify_buffer(trans, buffer2,
319 				     layer2, sizeof(*layer2));
320 		layer2->zone = zone;
321 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
322 		KKASSERT(layer2->append_off == 0);
323 		hammer_modify_volume_field(trans, trans->rootvol,
324 					   vol0_stat_freebigblocks);
325 		--root_volume->ondisk->vol0_stat_freebigblocks;
326 		hmp->copy_stat_freebigblocks =
327 			root_volume->ondisk->vol0_stat_freebigblocks;
328 		hammer_modify_volume_done(trans->rootvol);
329 	} else {
330 		hammer_modify_buffer(trans, buffer2,
331 				     layer2, sizeof(*layer2));
332 	}
333 	KKASSERT(layer2->zone == zone);
334 
335 	/*
336 	 * NOTE: bytes_free can legally go negative due to de-dup.
337 	 */
338 	layer2->bytes_free -= bytes;
339 	KKASSERT(layer2->append_off <= offset);
340 	layer2->append_off = offset + bytes;
341 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
342 	hammer_modify_buffer_done(buffer2);
343 
344 	/*
345 	 * We hold the blockmap lock and should be the only ones
346 	 * capable of modifying resv->append_off.  Track the allocation
347 	 * as appropriate.
348 	 */
349 	KKASSERT(bytes != 0);
350 	if (resv) {
351 		KKASSERT(resv->append_off <= offset);
352 		resv->append_off = offset + bytes;
353 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354 		hammer_blockmap_reserve_complete(hmp, resv);
355 	}
356 
357 	/*
358 	 * If we are allocating from the base of a new buffer we can avoid
359 	 * a disk read by calling hammer_bnew_ext().
360 	 */
361 	if ((next_offset & HAMMER_BUFMASK) == 0) {
362 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
363 				errorp, &buffer3);
364 	}
365 	result_offset = next_offset;
366 
367 	/*
368 	 * If we weren't supplied with a hint or could not use the hint
369 	 * then we wound up using blockmap->next_offset as the hint and
370 	 * need to save it.
371 	 */
372 	if (use_hint == 0) {
373 		hammer_modify_volume_noundo(NULL, root_volume);
374 		blockmap->next_offset = next_offset + bytes;
375 		hammer_modify_volume_done(root_volume);
376 	}
377 	hammer_unlock(&hmp->blkmap_lock);
378 failed:
379 
380 	/*
381 	 * Cleanup
382 	 */
383 	if (buffer1)
384 		hammer_rel_buffer(buffer1, 0);
385 	if (buffer2)
386 		hammer_rel_buffer(buffer2, 0);
387 	if (buffer3)
388 		hammer_rel_buffer(buffer3, 0);
389 
390 	return(result_offset);
391 }
392 
393 /*
394  * Frontend function - Reserve bytes in a zone.
395  *
396  * This code reserves bytes out of a blockmap without committing to any
397  * meta-data modifications, allowing the front-end to directly issue disk
398  * write I/O for big blocks of data
399  *
400  * The backend later finalizes the reservation with hammer_blockmap_finalize()
401  * upon committing the related record.
402  */
403 hammer_reserve_t
404 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
405 			hammer_off_t *zone_offp, int *errorp)
406 {
407 	hammer_volume_t root_volume;
408 	hammer_blockmap_t blockmap;
409 	hammer_blockmap_t freemap;
410 	struct hammer_blockmap_layer1 *layer1;
411 	struct hammer_blockmap_layer2 *layer2;
412 	hammer_buffer_t buffer1 = NULL;
413 	hammer_buffer_t buffer2 = NULL;
414 	hammer_buffer_t buffer3 = NULL;
415 	hammer_off_t tmp_offset;
416 	hammer_off_t next_offset;
417 	hammer_off_t layer1_offset;
418 	hammer_off_t layer2_offset;
419 	hammer_off_t base_off;
420 	hammer_reserve_t resv;
421 	hammer_reserve_t resx;
422 	int loops = 0;
423 	int offset;
424 
425 	/*
426 	 * Setup
427 	 */
428 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
429 	root_volume = hammer_get_root_volume(hmp, errorp);
430 	if (*errorp)
431 		return(NULL);
432 	blockmap = &hmp->blockmap[zone];
433 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
434 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
435 
436 	/*
437 	 * Deal with alignment and buffer-boundary issues.
438 	 *
439 	 * Be careful, certain primary alignments are used below to allocate
440 	 * new blockmap blocks.
441 	 */
442 	bytes = (bytes + 15) & ~15;
443 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
444 
445 	next_offset = blockmap->next_offset;
446 again:
447 	resv = NULL;
448 	/*
449 	 * Check for wrap
450 	 */
451 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
452 		if (++loops == 2) {
453 			*errorp = ENOSPC;
454 			goto failed;
455 		}
456 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
457 	}
458 
459 	/*
460 	 * The allocation request may not cross a buffer boundary.  Special
461 	 * large allocations must not cross a big-block boundary.
462 	 */
463 	tmp_offset = next_offset + bytes - 1;
464 	if (bytes <= HAMMER_BUFSIZE) {
465 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
466 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
467 			goto again;
468 		}
469 	} else {
470 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
471 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
472 			goto again;
473 		}
474 	}
475 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
476 
477 	/*
478 	 * Dive layer 1.
479 	 */
480 	layer1_offset = freemap->phys_offset +
481 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
482 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
483 	if (*errorp)
484 		goto failed;
485 
486 	/*
487 	 * Check CRC.
488 	 */
489 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
490 		hammer_lock_ex(&hmp->blkmap_lock);
491 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
492 			panic("CRC FAILED: LAYER1");
493 		hammer_unlock(&hmp->blkmap_lock);
494 	}
495 
496 	/*
497 	 * If we are at a big-block boundary and layer1 indicates no
498 	 * free big-blocks, then we cannot allocate a new bigblock in
499 	 * layer2, skip to the next layer1 entry.
500 	 */
501 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
502 	    layer1->blocks_free == 0) {
503 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
504 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
505 		goto again;
506 	}
507 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
508 
509 	/*
510 	 * Dive layer 2, each entry represents a big-block.
511 	 */
512 	layer2_offset = layer1->phys_offset +
513 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
514 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
515 	if (*errorp)
516 		goto failed;
517 
518 	/*
519 	 * Check CRC if not allocating into uninitialized space (which we
520 	 * aren't when reserving space).
521 	 */
522 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
523 		hammer_lock_ex(&hmp->blkmap_lock);
524 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
525 			panic("CRC FAILED: LAYER2");
526 		hammer_unlock(&hmp->blkmap_lock);
527 	}
528 
529 	/*
530 	 * Skip the layer if the zone is owned by someone other then us.
531 	 */
532 	if (layer2->zone && layer2->zone != zone) {
533 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
534 		goto again;
535 	}
536 	if (offset < layer2->append_off) {
537 		next_offset += layer2->append_off - offset;
538 		goto again;
539 	}
540 
541 	/*
542 	 * We need the lock from this point on.  We have to re-check zone
543 	 * ownership after acquiring the lock and also check for reservations.
544 	 */
545 	hammer_lock_ex(&hmp->blkmap_lock);
546 
547 	if (layer2->zone && layer2->zone != zone) {
548 		hammer_unlock(&hmp->blkmap_lock);
549 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
550 		goto again;
551 	}
552 	if (offset < layer2->append_off) {
553 		hammer_unlock(&hmp->blkmap_lock);
554 		next_offset += layer2->append_off - offset;
555 		goto again;
556 	}
557 
558 	/*
559 	 * The bigblock might be reserved by another zone.  If it is reserved
560 	 * by our zone we may have to move next_offset past the append_off.
561 	 */
562 	base_off = hammer_xlate_to_zone2(next_offset &
563 					~HAMMER_BIGBLOCK_MASK64);
564 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
565 	if (resv) {
566 		if (resv->zone != zone) {
567 			hammer_unlock(&hmp->blkmap_lock);
568 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
569 				      ~HAMMER_BIGBLOCK_MASK64;
570 			goto again;
571 		}
572 		if (offset < resv->append_off) {
573 			hammer_unlock(&hmp->blkmap_lock);
574 			next_offset += resv->append_off - offset;
575 			goto again;
576 		}
577 		++resv->refs;
578 		resx = NULL;
579 	} else {
580 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
581 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
582 		resx->refs = 1;
583 		resx->zone = zone;
584 		resx->zone_offset = base_off;
585 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
586 			resx->flags |= HAMMER_RESF_LAYER2FREE;
587 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
588 		KKASSERT(resv == NULL);
589 		resv = resx;
590 		++hammer_count_reservations;
591 	}
592 	resv->append_off = offset + bytes;
593 
594 	/*
595 	 * If we are not reserving a whole buffer but are at the start of
596 	 * a new block, call hammer_bnew() to avoid a disk read.
597 	 *
598 	 * If we are reserving a whole buffer (or more), the caller will
599 	 * probably use a direct read, so do nothing.
600 	 *
601 	 * If we do not have a whole lot of system memory we really can't
602 	 * afford to block while holding the blkmap_lock!
603 	 */
604 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
605 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE))
606 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
607 	}
608 
609 	/*
610 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
611 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
612 	 * be big-block aligned.
613 	 */
614 	blockmap->next_offset = next_offset + bytes;
615 	hammer_unlock(&hmp->blkmap_lock);
616 
617 failed:
618 	if (buffer1)
619 		hammer_rel_buffer(buffer1, 0);
620 	if (buffer2)
621 		hammer_rel_buffer(buffer2, 0);
622 	if (buffer3)
623 		hammer_rel_buffer(buffer3, 0);
624 	hammer_rel_volume(root_volume, 0);
625 	*zone_offp = next_offset;
626 
627 	return(resv);
628 }
629 
630 /*
631  * Frontend function - Dedup bytes in a zone.
632  *
633  * Dedup reservations work exactly the same as normal write reservations
634  * except we only adjust bytes_free field and don't touch append offset.
635  * Finalization mechanic for dedup reservations is also the same as for
636  * normal write ones - the backend finalizes the reservation with
637  * hammer_blockmap_finalize().
638  */
639 hammer_reserve_t
640 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
641 			      hammer_off_t zone_offset, int *errorp)
642 {
643 	hammer_volume_t root_volume;
644 	hammer_blockmap_t freemap;
645 	struct hammer_blockmap_layer1 *layer1;
646 	struct hammer_blockmap_layer2 *layer2;
647 	hammer_buffer_t buffer1 = NULL;
648 	hammer_buffer_t buffer2 = NULL;
649 	hammer_off_t layer1_offset;
650 	hammer_off_t layer2_offset;
651 	hammer_off_t base_off;
652 	hammer_reserve_t resv = NULL;
653 	hammer_reserve_t resx = NULL;
654 
655 	/*
656 	 * Setup
657 	 */
658 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
659 	root_volume = hammer_get_root_volume(hmp, errorp);
660 	if (*errorp)
661 		return (NULL);
662 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
663 	KKASSERT(freemap->phys_offset != 0);
664 
665 	bytes = (bytes + 15) & ~15;
666 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
667 
668 	/*
669 	 * Dive layer 1.
670 	 */
671 	layer1_offset = freemap->phys_offset +
672 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
673 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
674 	if (*errorp)
675 		goto failed;
676 
677 	/*
678 	 * Check CRC.
679 	 */
680 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
681 		hammer_lock_ex(&hmp->blkmap_lock);
682 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
683 			panic("CRC FAILED: LAYER1");
684 		hammer_unlock(&hmp->blkmap_lock);
685 	}
686 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
687 
688 	/*
689 	 * Dive layer 2, each entry represents a big-block.
690 	 */
691 	layer2_offset = layer1->phys_offset +
692 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
693 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
694 	if (*errorp)
695 		goto failed;
696 
697 	/*
698 	 * Check CRC.
699 	 */
700 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
701 		hammer_lock_ex(&hmp->blkmap_lock);
702 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
703 			panic("CRC FAILED: LAYER2");
704 		hammer_unlock(&hmp->blkmap_lock);
705 	}
706 
707 	/*
708 	 * Fail if the zone is owned by someone other than us.
709 	 */
710 	if (layer2->zone && layer2->zone != zone)
711 		goto failed;
712 
713 	/*
714 	 * We need the lock from this point on.  We have to re-check zone
715 	 * ownership after acquiring the lock and also check for reservations.
716 	 */
717 	hammer_lock_ex(&hmp->blkmap_lock);
718 
719 	if (layer2->zone && layer2->zone != zone) {
720 		hammer_unlock(&hmp->blkmap_lock);
721 		goto failed;
722 	}
723 
724 	base_off = hammer_xlate_to_zone2(zone_offset &
725 					~HAMMER_BIGBLOCK_MASK64);
726 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
727 	if (resv) {
728 		if (resv->zone != zone) {
729 			hammer_unlock(&hmp->blkmap_lock);
730 			resv = NULL;
731 			goto failed;
732 		}
733 		/*
734 		 * Due to possible big block underflow we can't simply
735 		 * subtract bytes from bytes_free.
736 		 */
737 		if (update_bytes_free(resv, bytes) == 0) {
738 			hammer_unlock(&hmp->blkmap_lock);
739 			resv = NULL;
740 			goto failed;
741 		}
742 		++resv->refs;
743 		resx = NULL;
744 	} else {
745 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
746 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
747 		resx->refs = 1;
748 		resx->zone = zone;
749 		resx->bytes_free = layer2->bytes_free;
750 		/*
751 		 * Due to possible big block underflow we can't simply
752 		 * subtract bytes from bytes_free.
753 		 */
754 		if (update_bytes_free(resx, bytes) == 0) {
755 			hammer_unlock(&hmp->blkmap_lock);
756 			kfree(resx, hmp->m_misc);
757 			goto failed;
758 		}
759 		resx->zone_offset = base_off;
760 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
761 		KKASSERT(resv == NULL);
762 		resv = resx;
763 		++hammer_count_reservations;
764 	}
765 
766 	hammer_unlock(&hmp->blkmap_lock);
767 
768 failed:
769 	if (buffer1)
770 		hammer_rel_buffer(buffer1, 0);
771 	if (buffer2)
772 		hammer_rel_buffer(buffer2, 0);
773 	hammer_rel_volume(root_volume, 0);
774 
775 	return(resv);
776 }
777 
778 static int
779 update_bytes_free(hammer_reserve_t resv, int bytes)
780 {
781 	int32_t temp;
782 
783 	/*
784 	 * Big-block underflow check
785 	 */
786 	temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
787 	cpu_ccfence(); /* XXX do we really need it ? */
788 	if (temp > resv->bytes_free) {
789 		kprintf("BIGBLOCK UNDERFLOW\n");
790 		return (0);
791 	}
792 
793 	resv->bytes_free -= bytes;
794 	return (1);
795 }
796 
797 /*
798  * Dereference a reservation structure.  Upon the final release the
799  * underlying big-block is checked and if it is entirely free we delete
800  * any related HAMMER buffers to avoid potential conflicts with future
801  * reuse of the big-block.
802  */
803 void
804 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
805 {
806 	hammer_off_t base_offset;
807 	int error;
808 
809 	KKASSERT(resv->refs > 0);
810 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
811 		 HAMMER_ZONE_RAW_BUFFER);
812 
813 	/*
814 	 * Setting append_off to the max prevents any new allocations
815 	 * from occuring while we are trying to dispose of the reservation,
816 	 * allowing us to safely delete any related HAMMER buffers.
817 	 *
818 	 * If we are unable to clean out all related HAMMER buffers we
819 	 * requeue the delay.
820 	 */
821 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
822 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
823 		base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
824 		base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
825 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
826 			hammer_dedup_cache_inval(hmp, base_offset);
827 		error = hammer_del_buffers(hmp, base_offset,
828 					   resv->zone_offset,
829 					   HAMMER_BIGBLOCK_SIZE,
830 					   1);
831 		if (hammer_debug_general & 0x20000) {
832 			kprintf("hammer: delbgblk %016jx error %d\n",
833 				(intmax_t)base_offset, error);
834 		}
835 		if (error)
836 			hammer_reserve_setdelay(hmp, resv);
837 	}
838 	if (--resv->refs == 0) {
839 		if (hammer_debug_general & 0x20000) {
840 			kprintf("hammer: delresvr %016jx zone %02x\n",
841 				(intmax_t)resv->zone_offset, resv->zone);
842 		}
843 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
844 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
845 		kfree(resv, hmp->m_misc);
846 		--hammer_count_reservations;
847 	}
848 }
849 
850 /*
851  * Prevent a potentially free big-block from being reused until after
852  * the related flushes have completely cycled, otherwise crash recovery
853  * could resurrect a data block that was already reused and overwritten.
854  *
855  * The caller might reset the underlying layer2 entry's append_off to 0, so
856  * our covering append_off must be set to max to prevent any reallocation
857  * until after the flush delays complete, not to mention proper invalidation
858  * of any underlying cached blocks.
859  */
860 static void
861 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
862 			int zone, struct hammer_blockmap_layer2 *layer2)
863 {
864 	hammer_reserve_t resv;
865 
866 	/*
867 	 * Allocate the reservation if necessary.
868 	 *
869 	 * NOTE: need lock in future around resv lookup/allocation and
870 	 * the setdelay call, currently refs is not bumped until the call.
871 	 */
872 again:
873 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
874 	if (resv == NULL) {
875 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
876 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
877 		resv->zone = zone;
878 		resv->zone_offset = base_offset;
879 		resv->refs = 0;
880 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
881 
882 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
883 			resv->flags |= HAMMER_RESF_LAYER2FREE;
884 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
885 			kfree(resv, hmp->m_misc);
886 			goto again;
887 		}
888 		++hammer_count_reservations;
889 	} else {
890 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
891 			resv->flags |= HAMMER_RESF_LAYER2FREE;
892 	}
893 	hammer_reserve_setdelay(hmp, resv);
894 }
895 
896 /*
897  * Enter the reservation on the on-delay list, or move it if it
898  * is already on the list.
899  */
900 static void
901 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
902 {
903 	if (resv->flags & HAMMER_RESF_ONDELAY) {
904 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
905 		resv->flush_group = hmp->flusher.next + 1;
906 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
907 	} else {
908 		++resv->refs;
909 		++hmp->rsv_fromdelay;
910 		resv->flags |= HAMMER_RESF_ONDELAY;
911 		resv->flush_group = hmp->flusher.next + 1;
912 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
913 	}
914 }
915 
916 /*
917  * Reserve has reached its flush point, remove it from the delay list
918  * and finish it off.  hammer_blockmap_reserve_complete() inherits
919  * the ondelay reference.
920  */
921 void
922 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
923 {
924 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
925 	resv->flags &= ~HAMMER_RESF_ONDELAY;
926 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
927 	--hmp->rsv_fromdelay;
928 	hammer_blockmap_reserve_complete(hmp, resv);
929 }
930 
931 /*
932  * Backend function - free (offset, bytes) in a zone.
933  *
934  * XXX error return
935  */
936 void
937 hammer_blockmap_free(hammer_transaction_t trans,
938 		     hammer_off_t zone_offset, int bytes)
939 {
940 	hammer_mount_t hmp;
941 	hammer_volume_t root_volume;
942 	hammer_blockmap_t freemap;
943 	struct hammer_blockmap_layer1 *layer1;
944 	struct hammer_blockmap_layer2 *layer2;
945 	hammer_buffer_t buffer1 = NULL;
946 	hammer_buffer_t buffer2 = NULL;
947 	hammer_off_t layer1_offset;
948 	hammer_off_t layer2_offset;
949 	hammer_off_t base_off;
950 	int error;
951 	int zone;
952 
953 	if (bytes == 0)
954 		return;
955 	hmp = trans->hmp;
956 
957 	/*
958 	 * Alignment
959 	 */
960 	bytes = (bytes + 15) & ~15;
961 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
962 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
963 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
964 
965 	/*
966 	 * Basic zone validation & locking
967 	 */
968 	zone = HAMMER_ZONE_DECODE(zone_offset);
969 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
970 	root_volume = trans->rootvol;
971 	error = 0;
972 
973 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
974 
975 	/*
976 	 * Dive layer 1.
977 	 */
978 	layer1_offset = freemap->phys_offset +
979 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
980 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
981 	if (error)
982 		goto failed;
983 	KKASSERT(layer1->phys_offset &&
984 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
985 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
986 		hammer_lock_ex(&hmp->blkmap_lock);
987 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
988 			panic("CRC FAILED: LAYER1");
989 		hammer_unlock(&hmp->blkmap_lock);
990 	}
991 
992 	/*
993 	 * Dive layer 2, each entry represents a big-block.
994 	 */
995 	layer2_offset = layer1->phys_offset +
996 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
997 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
998 	if (error)
999 		goto failed;
1000 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1001 		hammer_lock_ex(&hmp->blkmap_lock);
1002 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1003 			panic("CRC FAILED: LAYER2");
1004 		hammer_unlock(&hmp->blkmap_lock);
1005 	}
1006 
1007 	hammer_lock_ex(&hmp->blkmap_lock);
1008 
1009 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1010 
1011 	/*
1012 	 * Free space previously allocated via blockmap_alloc().
1013 	 *
1014 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1015 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1016 	 */
1017 	KKASSERT(layer2->zone == zone);
1018 	layer2->bytes_free += bytes;
1019 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1020 
1021 	/*
1022 	 * If a big-block becomes entirely free we must create a covering
1023 	 * reservation to prevent premature reuse.  Note, however, that
1024 	 * the big-block and/or reservation may still have an append_off
1025 	 * that allows further (non-reused) allocations.
1026 	 *
1027 	 * Once the reservation has been made we re-check layer2 and if
1028 	 * the big-block is still entirely free we reset the layer2 entry.
1029 	 * The reservation will prevent premature reuse.
1030 	 *
1031 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1032 	 * is completed, if the layer2 entry is still completely free at
1033 	 * that time.  Any allocations from the reservation that may have
1034 	 * occured in the mean time, or active references on the reservation
1035 	 * from new pending allocations, will prevent the invalidation from
1036 	 * occuring.
1037 	 */
1038 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1039 		base_off = hammer_xlate_to_zone2(zone_offset &
1040 						~HAMMER_BIGBLOCK_MASK64);
1041 
1042 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1043 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1044 			layer2->zone = 0;
1045 			layer2->append_off = 0;
1046 			hammer_modify_buffer(trans, buffer1,
1047 					     layer1, sizeof(*layer1));
1048 			++layer1->blocks_free;
1049 			layer1->layer1_crc = crc32(layer1,
1050 						   HAMMER_LAYER1_CRCSIZE);
1051 			hammer_modify_buffer_done(buffer1);
1052 			hammer_modify_volume_field(trans,
1053 					trans->rootvol,
1054 					vol0_stat_freebigblocks);
1055 			++root_volume->ondisk->vol0_stat_freebigblocks;
1056 			hmp->copy_stat_freebigblocks =
1057 			   root_volume->ondisk->vol0_stat_freebigblocks;
1058 			hammer_modify_volume_done(trans->rootvol);
1059 		}
1060 	}
1061 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1062 	hammer_modify_buffer_done(buffer2);
1063 	hammer_unlock(&hmp->blkmap_lock);
1064 
1065 failed:
1066 	if (buffer1)
1067 		hammer_rel_buffer(buffer1, 0);
1068 	if (buffer2)
1069 		hammer_rel_buffer(buffer2, 0);
1070 }
1071 
1072 int
1073 hammer_blockmap_dedup(hammer_transaction_t trans,
1074 		     hammer_off_t zone_offset, int bytes)
1075 {
1076 	hammer_mount_t hmp;
1077 	hammer_blockmap_t freemap;
1078 	struct hammer_blockmap_layer1 *layer1;
1079 	struct hammer_blockmap_layer2 *layer2;
1080 	hammer_buffer_t buffer1 = NULL;
1081 	hammer_buffer_t buffer2 = NULL;
1082 	hammer_off_t layer1_offset;
1083 	hammer_off_t layer2_offset;
1084 	int32_t temp;
1085 	int error;
1086 	int zone __debugvar;
1087 
1088 	if (bytes == 0)
1089 		return (0);
1090 	hmp = trans->hmp;
1091 
1092 	/*
1093 	 * Alignment
1094 	 */
1095 	bytes = (bytes + 15) & ~15;
1096 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1097 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1098 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
1099 
1100 	/*
1101 	 * Basic zone validation & locking
1102 	 */
1103 	zone = HAMMER_ZONE_DECODE(zone_offset);
1104 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1105 	error = 0;
1106 
1107 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1108 
1109 	/*
1110 	 * Dive layer 1.
1111 	 */
1112 	layer1_offset = freemap->phys_offset +
1113 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1114 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1115 	if (error)
1116 		goto failed;
1117 	KKASSERT(layer1->phys_offset &&
1118 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1119 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1120 		hammer_lock_ex(&hmp->blkmap_lock);
1121 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1122 			panic("CRC FAILED: LAYER1");
1123 		hammer_unlock(&hmp->blkmap_lock);
1124 	}
1125 
1126 	/*
1127 	 * Dive layer 2, each entry represents a big-block.
1128 	 */
1129 	layer2_offset = layer1->phys_offset +
1130 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1131 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1132 	if (error)
1133 		goto failed;
1134 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1135 		hammer_lock_ex(&hmp->blkmap_lock);
1136 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1137 			panic("CRC FAILED: LAYER2");
1138 		hammer_unlock(&hmp->blkmap_lock);
1139 	}
1140 
1141 	hammer_lock_ex(&hmp->blkmap_lock);
1142 
1143 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1144 
1145 	/*
1146 	 * Free space previously allocated via blockmap_alloc().
1147 	 *
1148 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1149 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1150 	 */
1151 	KKASSERT(layer2->zone == zone);
1152 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1153 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1154 	if (temp > layer2->bytes_free) {
1155 		error = ERANGE;
1156 		goto underflow;
1157 	}
1158 	layer2->bytes_free -= bytes;
1159 
1160 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1161 
1162 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1163 underflow:
1164 	hammer_modify_buffer_done(buffer2);
1165 	hammer_unlock(&hmp->blkmap_lock);
1166 
1167 failed:
1168 	if (buffer1)
1169 		hammer_rel_buffer(buffer1, 0);
1170 	if (buffer2)
1171 		hammer_rel_buffer(buffer2, 0);
1172 	return (error);
1173 }
1174 
1175 /*
1176  * Backend function - finalize (offset, bytes) in a zone.
1177  *
1178  * Allocate space that was previously reserved by the frontend.
1179  */
1180 int
1181 hammer_blockmap_finalize(hammer_transaction_t trans,
1182 			 hammer_reserve_t resv,
1183 			 hammer_off_t zone_offset, int bytes)
1184 {
1185 	hammer_mount_t hmp;
1186 	hammer_volume_t root_volume;
1187 	hammer_blockmap_t freemap;
1188 	struct hammer_blockmap_layer1 *layer1;
1189 	struct hammer_blockmap_layer2 *layer2;
1190 	hammer_buffer_t buffer1 = NULL;
1191 	hammer_buffer_t buffer2 = NULL;
1192 	hammer_off_t layer1_offset;
1193 	hammer_off_t layer2_offset;
1194 	int error;
1195 	int zone;
1196 	int offset;
1197 
1198 	if (bytes == 0)
1199 		return(0);
1200 	hmp = trans->hmp;
1201 
1202 	/*
1203 	 * Alignment
1204 	 */
1205 	bytes = (bytes + 15) & ~15;
1206 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1207 
1208 	/*
1209 	 * Basic zone validation & locking
1210 	 */
1211 	zone = HAMMER_ZONE_DECODE(zone_offset);
1212 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1213 	root_volume = trans->rootvol;
1214 	error = 0;
1215 
1216 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1217 
1218 	/*
1219 	 * Dive layer 1.
1220 	 */
1221 	layer1_offset = freemap->phys_offset +
1222 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1223 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1224 	if (error)
1225 		goto failed;
1226 	KKASSERT(layer1->phys_offset &&
1227 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1228 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1229 		hammer_lock_ex(&hmp->blkmap_lock);
1230 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1231 			panic("CRC FAILED: LAYER1");
1232 		hammer_unlock(&hmp->blkmap_lock);
1233 	}
1234 
1235 	/*
1236 	 * Dive layer 2, each entry represents a big-block.
1237 	 */
1238 	layer2_offset = layer1->phys_offset +
1239 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1240 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1241 	if (error)
1242 		goto failed;
1243 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1244 		hammer_lock_ex(&hmp->blkmap_lock);
1245 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1246 			panic("CRC FAILED: LAYER2");
1247 		hammer_unlock(&hmp->blkmap_lock);
1248 	}
1249 
1250 	hammer_lock_ex(&hmp->blkmap_lock);
1251 
1252 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1253 
1254 	/*
1255 	 * Finalize some or all of the space covered by a current
1256 	 * reservation.  An allocation in the same layer may have
1257 	 * already assigned ownership.
1258 	 */
1259 	if (layer2->zone == 0) {
1260 		hammer_modify_buffer(trans, buffer1,
1261 				     layer1, sizeof(*layer1));
1262 		--layer1->blocks_free;
1263 		layer1->layer1_crc = crc32(layer1,
1264 					   HAMMER_LAYER1_CRCSIZE);
1265 		hammer_modify_buffer_done(buffer1);
1266 		layer2->zone = zone;
1267 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1268 		KKASSERT(layer2->append_off == 0);
1269 		hammer_modify_volume_field(trans,
1270 				trans->rootvol,
1271 				vol0_stat_freebigblocks);
1272 		--root_volume->ondisk->vol0_stat_freebigblocks;
1273 		hmp->copy_stat_freebigblocks =
1274 		   root_volume->ondisk->vol0_stat_freebigblocks;
1275 		hammer_modify_volume_done(trans->rootvol);
1276 	}
1277 	if (layer2->zone != zone)
1278 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1279 	KKASSERT(layer2->zone == zone);
1280 	KKASSERT(bytes != 0);
1281 	layer2->bytes_free -= bytes;
1282 
1283 	if (resv) {
1284 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1285 	}
1286 
1287 	/*
1288 	 * Finalizations can occur out of order, or combined with allocations.
1289 	 * append_off must be set to the highest allocated offset.
1290 	 */
1291 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1292 	if (layer2->append_off < offset)
1293 		layer2->append_off = offset;
1294 
1295 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1296 	hammer_modify_buffer_done(buffer2);
1297 	hammer_unlock(&hmp->blkmap_lock);
1298 
1299 failed:
1300 	if (buffer1)
1301 		hammer_rel_buffer(buffer1, 0);
1302 	if (buffer2)
1303 		hammer_rel_buffer(buffer2, 0);
1304 	return(error);
1305 }
1306 
1307 /*
1308  * Return the approximate number of free bytes in the big-block
1309  * containing the specified blockmap offset.
1310  *
1311  * WARNING: A negative number can be returned if data de-dup exists,
1312  *	    and the result will also not represent he actual number
1313  *	    of free bytes in this case.
1314  *
1315  *	    This code is used only by the reblocker.
1316  */
1317 int
1318 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1319 			int *curp, int *errorp)
1320 {
1321 	hammer_volume_t root_volume;
1322 	hammer_blockmap_t blockmap;
1323 	hammer_blockmap_t freemap;
1324 	struct hammer_blockmap_layer1 *layer1;
1325 	struct hammer_blockmap_layer2 *layer2;
1326 	hammer_buffer_t buffer = NULL;
1327 	hammer_off_t layer1_offset;
1328 	hammer_off_t layer2_offset;
1329 	int32_t bytes;
1330 	int zone;
1331 
1332 	zone = HAMMER_ZONE_DECODE(zone_offset);
1333 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1334 	root_volume = hammer_get_root_volume(hmp, errorp);
1335 	if (*errorp) {
1336 		*curp = 0;
1337 		return(0);
1338 	}
1339 	blockmap = &hmp->blockmap[zone];
1340 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1341 
1342 	/*
1343 	 * Dive layer 1.
1344 	 */
1345 	layer1_offset = freemap->phys_offset +
1346 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1347 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1348 	if (*errorp) {
1349 		*curp = 0;
1350 		bytes = 0;
1351 		goto failed;
1352 	}
1353 	KKASSERT(layer1->phys_offset);
1354 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1355 		hammer_lock_ex(&hmp->blkmap_lock);
1356 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1357 			panic("CRC FAILED: LAYER1");
1358 		hammer_unlock(&hmp->blkmap_lock);
1359 	}
1360 
1361 	/*
1362 	 * Dive layer 2, each entry represents a big-block.
1363 	 *
1364 	 * (reuse buffer, layer1 pointer becomes invalid)
1365 	 */
1366 	layer2_offset = layer1->phys_offset +
1367 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1368 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1369 	if (*errorp) {
1370 		*curp = 0;
1371 		bytes = 0;
1372 		goto failed;
1373 	}
1374 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1375 		hammer_lock_ex(&hmp->blkmap_lock);
1376 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1377 			panic("CRC FAILED: LAYER2");
1378 		hammer_unlock(&hmp->blkmap_lock);
1379 	}
1380 	KKASSERT(layer2->zone == zone);
1381 
1382 	bytes = layer2->bytes_free;
1383 
1384 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1385 		*curp = 0;
1386 	else
1387 		*curp = 1;
1388 failed:
1389 	if (buffer)
1390 		hammer_rel_buffer(buffer, 0);
1391 	hammer_rel_volume(root_volume, 0);
1392 	if (hammer_debug_general & 0x0800) {
1393 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1394 			(long long)zone_offset, bytes);
1395 	}
1396 	return(bytes);
1397 }
1398 
1399 
1400 /*
1401  * Lookup a blockmap offset and verify blockmap layers.
1402  */
1403 hammer_off_t
1404 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1405 			int *errorp)
1406 {
1407 	hammer_volume_t root_volume;
1408 	hammer_blockmap_t freemap;
1409 	struct hammer_blockmap_layer1 *layer1;
1410 	struct hammer_blockmap_layer2 *layer2;
1411 	hammer_buffer_t buffer = NULL;
1412 	hammer_off_t layer1_offset;
1413 	hammer_off_t layer2_offset;
1414 	hammer_off_t result_offset;
1415 	hammer_off_t base_off;
1416 	hammer_reserve_t resv __debugvar;
1417 	int zone;
1418 
1419 	/*
1420 	 * Calculate the zone-2 offset.
1421 	 */
1422 	zone = HAMMER_ZONE_DECODE(zone_offset);
1423 	result_offset = hammer_xlate_to_zone2(zone_offset);
1424 
1425 	/*
1426 	 * Validate the allocation zone
1427 	 */
1428 	root_volume = hammer_get_root_volume(hmp, errorp);
1429 	if (*errorp)
1430 		return(0);
1431 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1432 	KKASSERT(freemap->phys_offset != 0);
1433 
1434 	/*
1435 	 * Dive layer 1.
1436 	 */
1437 	layer1_offset = freemap->phys_offset +
1438 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1439 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1440 	if (*errorp)
1441 		goto failed;
1442 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1443 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1444 		hammer_lock_ex(&hmp->blkmap_lock);
1445 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1446 			panic("CRC FAILED: LAYER1");
1447 		hammer_unlock(&hmp->blkmap_lock);
1448 	}
1449 
1450 	/*
1451 	 * Dive layer 2, each entry represents a big-block.
1452 	 */
1453 	layer2_offset = layer1->phys_offset +
1454 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1455 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1456 
1457 	if (*errorp)
1458 		goto failed;
1459 	if (layer2->zone == 0) {
1460 		base_off = hammer_xlate_to_zone2(zone_offset &
1461 						~HAMMER_BIGBLOCK_MASK64);
1462 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1463 				 base_off);
1464 		KKASSERT(resv && resv->zone == zone);
1465 
1466 	} else if (layer2->zone != zone) {
1467 		panic("hammer_blockmap_lookup_verify: bad zone %d/%d",
1468 			layer2->zone, zone);
1469 	}
1470 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1471 		hammer_lock_ex(&hmp->blkmap_lock);
1472 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1473 			panic("CRC FAILED: LAYER2");
1474 		hammer_unlock(&hmp->blkmap_lock);
1475 	}
1476 
1477 failed:
1478 	if (buffer)
1479 		hammer_rel_buffer(buffer, 0);
1480 	hammer_rel_volume(root_volume, 0);
1481 	if (hammer_debug_general & 0x0800) {
1482 		kprintf("hammer_blockmap_lookup_verify: %016llx -> %016llx\n",
1483 			(long long)zone_offset, (long long)result_offset);
1484 	}
1485 	return(result_offset);
1486 }
1487 
1488 
1489 /*
1490  * Check space availability
1491  *
1492  * MPSAFE - does not require fs_token
1493  */
1494 int
1495 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1496 {
1497 	const int in_size = sizeof(struct hammer_inode_data) +
1498 			    sizeof(union hammer_btree_elm);
1499 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1500 	int64_t usedbytes;
1501 
1502 	usedbytes = hmp->rsv_inodes * in_size +
1503 		    hmp->rsv_recs * rec_size +
1504 		    hmp->rsv_databytes +
1505 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1506 		    ((int64_t)hammer_limit_dirtybufspace) +
1507 		    (slop << HAMMER_BIGBLOCK_BITS);
1508 
1509 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1510 	if (resp)
1511 		*resp = usedbytes;
1512 
1513 	if (hmp->copy_stat_freebigblocks >=
1514 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1515 		return(0);
1516 	}
1517 	return (ENOSPC);
1518 }
1519 
1520