xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision 279dd846)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39 
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42 				    hammer_off_t base_offset, int zone,
43 				    struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
47 static void hammer_skip_volume(hammer_off_t *offsetp);
48 
49 /*
50  * Reserved big-blocks red-black tree support
51  */
52 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
54 
55 static int
56 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 {
58 	if (res1->zone_offset < res2->zone_offset)
59 		return(-1);
60 	if (res1->zone_offset > res2->zone_offset)
61 		return(1);
62 	return(0);
63 }
64 
65 /*
66  * Allocate bytes from a zone
67  */
68 hammer_off_t
69 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70 		      hammer_off_t hint, int *errorp)
71 {
72 	hammer_mount_t hmp;
73 	hammer_volume_t root_volume;
74 	hammer_blockmap_t blockmap;
75 	hammer_blockmap_t freemap;
76 	hammer_reserve_t resv;
77 	struct hammer_blockmap_layer1 *layer1;
78 	struct hammer_blockmap_layer2 *layer2;
79 	hammer_buffer_t buffer1 = NULL;
80 	hammer_buffer_t buffer2 = NULL;
81 	hammer_buffer_t buffer3 = NULL;
82 	hammer_off_t tmp_offset;
83 	hammer_off_t next_offset;
84 	hammer_off_t result_offset;
85 	hammer_off_t layer1_offset;
86 	hammer_off_t layer2_offset;
87 	hammer_off_t base_off;
88 	int loops = 0;
89 	int offset;		/* offset within big-block */
90 	int use_hint;
91 
92 	hmp = trans->hmp;
93 
94 	/*
95 	 * Deal with alignment and buffer-boundary issues.
96 	 *
97 	 * Be careful, certain primary alignments are used below to allocate
98 	 * new blockmap blocks.
99 	 */
100 	bytes = (bytes + 15) & ~15;
101 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
102 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
103 
104 	/*
105 	 * Setup
106 	 */
107 	root_volume = trans->rootvol;
108 	*errorp = 0;
109 	blockmap = &hmp->blockmap[zone];
110 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112 
113 	/*
114 	 * Use the hint if we have one.
115 	 */
116 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117 		next_offset = (hint + 15) & ~(hammer_off_t)15;
118 		use_hint = 1;
119 	} else {
120 		next_offset = blockmap->next_offset;
121 		use_hint = 0;
122 	}
123 again:
124 
125 	/*
126 	 * use_hint is turned off if we leave the hinted big-block.
127 	 */
128 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129 		next_offset = blockmap->next_offset;
130 		use_hint = 0;
131 	}
132 
133 	/*
134 	 * Check for wrap
135 	 */
136 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
137 		if (++loops == 2) {
138 			result_offset = 0;
139 			*errorp = ENOSPC;
140 			goto failed;
141 		}
142 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143 	}
144 
145 	/*
146 	 * The allocation request may not cross a buffer boundary.  Special
147 	 * large allocations must not cross a big-block boundary.
148 	 */
149 	tmp_offset = next_offset + bytes - 1;
150 	if (bytes <= HAMMER_BUFSIZE) {
151 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153 			goto again;
154 		}
155 	} else {
156 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
157 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
158 			goto again;
159 		}
160 	}
161 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
162 
163 	/*
164 	 * Dive layer 1.
165 	 */
166 	layer1_offset = freemap->phys_offset +
167 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168 
169 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
170 	if (*errorp) {
171 		result_offset = 0;
172 		goto failed;
173 	}
174 
175 	/*
176 	 * Check CRC.
177 	 */
178 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
179 		hammer_lock_ex(&hmp->blkmap_lock);
180 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181 			panic("CRC FAILED: LAYER1");
182 		hammer_unlock(&hmp->blkmap_lock);
183 	}
184 
185 	/*
186 	 * If we are at a big-block boundary and layer1 indicates no
187 	 * free big-blocks, then we cannot allocate a new big-block in
188 	 * layer2, skip to the next layer1 entry.
189 	 */
190 	if (offset == 0 && layer1->blocks_free == 0) {
191 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
192 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
193 		if (hammer_check_volume(hmp, &next_offset)) {
194 			result_offset = 0;
195 			goto failed;
196 		}
197 		goto again;
198 	}
199 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
200 
201 	/*
202 	 * Skip the whole volume if it is pointing to a layer2 big-block
203 	 * on a volume that we are currently trying to remove from the
204 	 * file-system. This is used by the volume-del code together with
205 	 * the reblocker to free up a volume.
206 	 */
207 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
208 	    hmp->volume_to_remove) {
209 		hammer_skip_volume(&next_offset);
210 		goto again;
211 	}
212 
213 	/*
214 	 * Dive layer 2, each entry represents a big-block.
215 	 */
216 	layer2_offset = layer1->phys_offset +
217 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
218 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
219 	if (*errorp) {
220 		result_offset = 0;
221 		goto failed;
222 	}
223 
224 	/*
225 	 * Check CRC.  This can race another thread holding the lock
226 	 * and in the middle of modifying layer2.
227 	 */
228 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
229 		hammer_lock_ex(&hmp->blkmap_lock);
230 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
231 			panic("CRC FAILED: LAYER2");
232 		hammer_unlock(&hmp->blkmap_lock);
233 	}
234 
235 	/*
236 	 * Skip the layer if the zone is owned by someone other then us.
237 	 */
238 	if (layer2->zone && layer2->zone != zone) {
239 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
240 		goto again;
241 	}
242 	if (offset < layer2->append_off) {
243 		next_offset += layer2->append_off - offset;
244 		goto again;
245 	}
246 
247 #if 0
248 	/*
249 	 * If operating in the current non-hint blockmap block, do not
250 	 * allow it to get over-full.  Also drop any active hinting so
251 	 * blockmap->next_offset is updated at the end.
252 	 *
253 	 * We do this for B-Tree and meta-data allocations to provide
254 	 * localization for updates.
255 	 */
256 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
257 	     zone == HAMMER_ZONE_META_INDEX) &&
258 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
259 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
260 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
261 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
262 			use_hint = 0;
263 			goto again;
264 		}
265 	}
266 #endif
267 
268 	/*
269 	 * We need the lock from this point on.  We have to re-check zone
270 	 * ownership after acquiring the lock and also check for reservations.
271 	 */
272 	hammer_lock_ex(&hmp->blkmap_lock);
273 
274 	if (layer2->zone && layer2->zone != zone) {
275 		hammer_unlock(&hmp->blkmap_lock);
276 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
277 		goto again;
278 	}
279 	if (offset < layer2->append_off) {
280 		hammer_unlock(&hmp->blkmap_lock);
281 		next_offset += layer2->append_off - offset;
282 		goto again;
283 	}
284 
285 	/*
286 	 * The big-block might be reserved by another zone.  If it is reserved
287 	 * by our zone we may have to move next_offset past the append_off.
288 	 */
289 	base_off = hammer_xlate_to_zone2(next_offset &
290 					~HAMMER_BIGBLOCK_MASK64);
291 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
292 	if (resv) {
293 		if (resv->zone != zone) {
294 			hammer_unlock(&hmp->blkmap_lock);
295 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
296 				      ~HAMMER_BIGBLOCK_MASK64;
297 			goto again;
298 		}
299 		if (offset < resv->append_off) {
300 			hammer_unlock(&hmp->blkmap_lock);
301 			next_offset += resv->append_off - offset;
302 			goto again;
303 		}
304 		++resv->refs;
305 	}
306 
307 	/*
308 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
309 	 * of the layer for real.  At this point we've validated any
310 	 * reservation that might exist and can just ignore resv.
311 	 */
312 	if (layer2->zone == 0) {
313 		/*
314 		 * Assign the big-block to our zone
315 		 */
316 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
317 		--layer1->blocks_free;
318 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
319 		hammer_modify_buffer_done(buffer1);
320 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
321 		layer2->zone = zone;
322 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
323 		KKASSERT(layer2->append_off == 0);
324 		hammer_modify_volume_field(trans, trans->rootvol,
325 					   vol0_stat_freebigblocks);
326 		--root_volume->ondisk->vol0_stat_freebigblocks;
327 		hmp->copy_stat_freebigblocks =
328 			root_volume->ondisk->vol0_stat_freebigblocks;
329 		hammer_modify_volume_done(trans->rootvol);
330 	} else {
331 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
332 	}
333 	KKASSERT(layer2->zone == zone);
334 
335 	/*
336 	 * NOTE: bytes_free can legally go negative due to de-dup.
337 	 */
338 	layer2->bytes_free -= bytes;
339 	KKASSERT(layer2->append_off <= offset);
340 	layer2->append_off = offset + bytes;
341 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
342 	hammer_modify_buffer_done(buffer2);
343 
344 	/*
345 	 * We hold the blockmap lock and should be the only ones
346 	 * capable of modifying resv->append_off.  Track the allocation
347 	 * as appropriate.
348 	 */
349 	KKASSERT(bytes != 0);
350 	if (resv) {
351 		KKASSERT(resv->append_off <= offset);
352 		resv->append_off = offset + bytes;
353 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354 		hammer_blockmap_reserve_complete(hmp, resv);
355 	}
356 
357 	/*
358 	 * If we are allocating from the base of a new buffer we can avoid
359 	 * a disk read by calling hammer_bnew_ext().
360 	 */
361 	if ((next_offset & HAMMER_BUFMASK) == 0) {
362 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
363 				errorp, &buffer3);
364 		if (*errorp) {
365 			result_offset = 0;
366 			goto failed;
367 		}
368 	}
369 	result_offset = next_offset;
370 
371 	/*
372 	 * If we weren't supplied with a hint or could not use the hint
373 	 * then we wound up using blockmap->next_offset as the hint and
374 	 * need to save it.
375 	 */
376 	if (use_hint == 0) {
377 		hammer_modify_volume_noundo(NULL, root_volume);
378 		blockmap->next_offset = next_offset + bytes;
379 		hammer_modify_volume_done(root_volume);
380 	}
381 	hammer_unlock(&hmp->blkmap_lock);
382 failed:
383 
384 	/*
385 	 * Cleanup
386 	 */
387 	if (buffer1)
388 		hammer_rel_buffer(buffer1, 0);
389 	if (buffer2)
390 		hammer_rel_buffer(buffer2, 0);
391 	if (buffer3)
392 		hammer_rel_buffer(buffer3, 0);
393 
394 	return(result_offset);
395 }
396 
397 /*
398  * Frontend function - Reserve bytes in a zone.
399  *
400  * This code reserves bytes out of a blockmap without committing to any
401  * meta-data modifications, allowing the front-end to directly issue disk
402  * write I/O for big-blocks of data
403  *
404  * The backend later finalizes the reservation with hammer_blockmap_finalize()
405  * upon committing the related record.
406  */
407 hammer_reserve_t
408 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
409 			hammer_off_t *zone_offp, int *errorp)
410 {
411 	hammer_volume_t root_volume;
412 	hammer_blockmap_t blockmap;
413 	hammer_blockmap_t freemap;
414 	struct hammer_blockmap_layer1 *layer1;
415 	struct hammer_blockmap_layer2 *layer2;
416 	hammer_buffer_t buffer1 = NULL;
417 	hammer_buffer_t buffer2 = NULL;
418 	hammer_buffer_t buffer3 = NULL;
419 	hammer_off_t tmp_offset;
420 	hammer_off_t next_offset;
421 	hammer_off_t layer1_offset;
422 	hammer_off_t layer2_offset;
423 	hammer_off_t base_off;
424 	hammer_reserve_t resv;
425 	hammer_reserve_t resx;
426 	int loops = 0;
427 	int offset;
428 
429 	/*
430 	 * Setup
431 	 */
432 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
433 	root_volume = hammer_get_root_volume(hmp, errorp);
434 	if (*errorp)
435 		return(NULL);
436 	blockmap = &hmp->blockmap[zone];
437 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
438 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
439 
440 	/*
441 	 * Deal with alignment and buffer-boundary issues.
442 	 *
443 	 * Be careful, certain primary alignments are used below to allocate
444 	 * new blockmap blocks.
445 	 */
446 	bytes = (bytes + 15) & ~15;
447 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
448 
449 	next_offset = blockmap->next_offset;
450 again:
451 	resv = NULL;
452 	/*
453 	 * Check for wrap
454 	 */
455 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
456 		if (++loops == 2) {
457 			*errorp = ENOSPC;
458 			goto failed;
459 		}
460 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
461 	}
462 
463 	/*
464 	 * The allocation request may not cross a buffer boundary.  Special
465 	 * large allocations must not cross a big-block boundary.
466 	 */
467 	tmp_offset = next_offset + bytes - 1;
468 	if (bytes <= HAMMER_BUFSIZE) {
469 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
470 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
471 			goto again;
472 		}
473 	} else {
474 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
475 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
476 			goto again;
477 		}
478 	}
479 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
480 
481 	/*
482 	 * Dive layer 1.
483 	 */
484 	layer1_offset = freemap->phys_offset +
485 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
486 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
487 	if (*errorp)
488 		goto failed;
489 
490 	/*
491 	 * Check CRC.
492 	 */
493 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
494 		hammer_lock_ex(&hmp->blkmap_lock);
495 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
496 			panic("CRC FAILED: LAYER1");
497 		hammer_unlock(&hmp->blkmap_lock);
498 	}
499 
500 	/*
501 	 * If we are at a big-block boundary and layer1 indicates no
502 	 * free big-blocks, then we cannot allocate a new big-block in
503 	 * layer2, skip to the next layer1 entry.
504 	 */
505 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
506 	    layer1->blocks_free == 0) {
507 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
508 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
509 		if (hammer_check_volume(hmp, &next_offset))
510 			goto failed;
511 		goto again;
512 	}
513 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
514 
515 	/*
516 	 * Dive layer 2, each entry represents a big-block.
517 	 */
518 	layer2_offset = layer1->phys_offset +
519 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
520 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
521 	if (*errorp)
522 		goto failed;
523 
524 	/*
525 	 * Check CRC if not allocating into uninitialized space (which we
526 	 * aren't when reserving space).
527 	 */
528 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
529 		hammer_lock_ex(&hmp->blkmap_lock);
530 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
531 			panic("CRC FAILED: LAYER2");
532 		hammer_unlock(&hmp->blkmap_lock);
533 	}
534 
535 	/*
536 	 * Skip the layer if the zone is owned by someone other then us.
537 	 */
538 	if (layer2->zone && layer2->zone != zone) {
539 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
540 		goto again;
541 	}
542 	if (offset < layer2->append_off) {
543 		next_offset += layer2->append_off - offset;
544 		goto again;
545 	}
546 
547 	/*
548 	 * We need the lock from this point on.  We have to re-check zone
549 	 * ownership after acquiring the lock and also check for reservations.
550 	 */
551 	hammer_lock_ex(&hmp->blkmap_lock);
552 
553 	if (layer2->zone && layer2->zone != zone) {
554 		hammer_unlock(&hmp->blkmap_lock);
555 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
556 		goto again;
557 	}
558 	if (offset < layer2->append_off) {
559 		hammer_unlock(&hmp->blkmap_lock);
560 		next_offset += layer2->append_off - offset;
561 		goto again;
562 	}
563 
564 	/*
565 	 * The big-block might be reserved by another zone.  If it is reserved
566 	 * by our zone we may have to move next_offset past the append_off.
567 	 */
568 	base_off = hammer_xlate_to_zone2(next_offset &
569 					~HAMMER_BIGBLOCK_MASK64);
570 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
571 	if (resv) {
572 		if (resv->zone != zone) {
573 			hammer_unlock(&hmp->blkmap_lock);
574 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
575 				      ~HAMMER_BIGBLOCK_MASK64;
576 			goto again;
577 		}
578 		if (offset < resv->append_off) {
579 			hammer_unlock(&hmp->blkmap_lock);
580 			next_offset += resv->append_off - offset;
581 			goto again;
582 		}
583 		++resv->refs;
584 		resx = NULL;
585 	} else {
586 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
587 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
588 		resx->refs = 1;
589 		resx->zone = zone;
590 		resx->zone_offset = base_off;
591 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
592 			resx->flags |= HAMMER_RESF_LAYER2FREE;
593 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
594 		KKASSERT(resv == NULL);
595 		resv = resx;
596 		++hammer_count_reservations;
597 	}
598 	resv->append_off = offset + bytes;
599 
600 	/*
601 	 * If we are not reserving a whole buffer but are at the start of
602 	 * a new block, call hammer_bnew() to avoid a disk read.
603 	 *
604 	 * If we are reserving a whole buffer (or more), the caller will
605 	 * probably use a direct read, so do nothing.
606 	 *
607 	 * If we do not have a whole lot of system memory we really can't
608 	 * afford to block while holding the blkmap_lock!
609 	 */
610 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
611 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
612 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
613 			if (*errorp)
614 				goto failed;
615 		}
616 	}
617 
618 	/*
619 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
620 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
621 	 * be big-block aligned.
622 	 */
623 	blockmap->next_offset = next_offset + bytes;
624 	hammer_unlock(&hmp->blkmap_lock);
625 
626 failed:
627 	if (buffer1)
628 		hammer_rel_buffer(buffer1, 0);
629 	if (buffer2)
630 		hammer_rel_buffer(buffer2, 0);
631 	if (buffer3)
632 		hammer_rel_buffer(buffer3, 0);
633 	hammer_rel_volume(root_volume, 0);
634 	*zone_offp = next_offset;
635 
636 	return(resv);
637 }
638 
639 /*
640  * Frontend function - Dedup bytes in a zone.
641  *
642  * Dedup reservations work exactly the same as normal write reservations
643  * except we only adjust bytes_free field and don't touch append offset.
644  * Finalization mechanic for dedup reservations is also the same as for
645  * normal write ones - the backend finalizes the reservation with
646  * hammer_blockmap_finalize().
647  */
648 hammer_reserve_t
649 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
650 			      hammer_off_t zone_offset, int *errorp)
651 {
652 	hammer_volume_t root_volume;
653 	hammer_blockmap_t freemap;
654 	struct hammer_blockmap_layer1 *layer1;
655 	struct hammer_blockmap_layer2 *layer2;
656 	hammer_buffer_t buffer1 = NULL;
657 	hammer_buffer_t buffer2 = NULL;
658 	hammer_off_t layer1_offset;
659 	hammer_off_t layer2_offset;
660 	hammer_off_t base_off;
661 	hammer_reserve_t resv = NULL;
662 	hammer_reserve_t resx = NULL;
663 
664 	/*
665 	 * Setup
666 	 */
667 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
668 	root_volume = hammer_get_root_volume(hmp, errorp);
669 	if (*errorp)
670 		return (NULL);
671 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
672 	KKASSERT(freemap->phys_offset != 0);
673 
674 	bytes = (bytes + 15) & ~15;
675 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
676 
677 	/*
678 	 * Dive layer 1.
679 	 */
680 	layer1_offset = freemap->phys_offset +
681 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
682 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
683 	if (*errorp)
684 		goto failed;
685 
686 	/*
687 	 * Check CRC.
688 	 */
689 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
690 		hammer_lock_ex(&hmp->blkmap_lock);
691 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
692 			panic("CRC FAILED: LAYER1");
693 		hammer_unlock(&hmp->blkmap_lock);
694 	}
695 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
696 
697 	/*
698 	 * Dive layer 2, each entry represents a big-block.
699 	 */
700 	layer2_offset = layer1->phys_offset +
701 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
702 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
703 	if (*errorp)
704 		goto failed;
705 
706 	/*
707 	 * Check CRC.
708 	 */
709 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
710 		hammer_lock_ex(&hmp->blkmap_lock);
711 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
712 			panic("CRC FAILED: LAYER2");
713 		hammer_unlock(&hmp->blkmap_lock);
714 	}
715 
716 	/*
717 	 * Fail if the zone is owned by someone other than us.
718 	 */
719 	if (layer2->zone && layer2->zone != zone)
720 		goto failed;
721 
722 	/*
723 	 * We need the lock from this point on.  We have to re-check zone
724 	 * ownership after acquiring the lock and also check for reservations.
725 	 */
726 	hammer_lock_ex(&hmp->blkmap_lock);
727 
728 	if (layer2->zone && layer2->zone != zone) {
729 		hammer_unlock(&hmp->blkmap_lock);
730 		goto failed;
731 	}
732 
733 	base_off = hammer_xlate_to_zone2(zone_offset &
734 					~HAMMER_BIGBLOCK_MASK64);
735 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
736 	if (resv) {
737 		if (resv->zone != zone) {
738 			hammer_unlock(&hmp->blkmap_lock);
739 			resv = NULL;
740 			goto failed;
741 		}
742 		/*
743 		 * Due to possible big-block underflow we can't simply
744 		 * subtract bytes from bytes_free.
745 		 */
746 		if (update_bytes_free(resv, bytes) == 0) {
747 			hammer_unlock(&hmp->blkmap_lock);
748 			resv = NULL;
749 			goto failed;
750 		}
751 		++resv->refs;
752 		resx = NULL;
753 	} else {
754 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
755 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
756 		resx->refs = 1;
757 		resx->zone = zone;
758 		resx->bytes_free = layer2->bytes_free;
759 		/*
760 		 * Due to possible big-block underflow we can't simply
761 		 * subtract bytes from bytes_free.
762 		 */
763 		if (update_bytes_free(resx, bytes) == 0) {
764 			hammer_unlock(&hmp->blkmap_lock);
765 			kfree(resx, hmp->m_misc);
766 			goto failed;
767 		}
768 		resx->zone_offset = base_off;
769 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
770 		KKASSERT(resv == NULL);
771 		resv = resx;
772 		++hammer_count_reservations;
773 	}
774 
775 	hammer_unlock(&hmp->blkmap_lock);
776 
777 failed:
778 	if (buffer1)
779 		hammer_rel_buffer(buffer1, 0);
780 	if (buffer2)
781 		hammer_rel_buffer(buffer2, 0);
782 	hammer_rel_volume(root_volume, 0);
783 
784 	return(resv);
785 }
786 
787 static int
788 update_bytes_free(hammer_reserve_t resv, int bytes)
789 {
790 	int32_t temp;
791 
792 	/*
793 	 * Big-block underflow check
794 	 */
795 	temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
796 	cpu_ccfence(); /* XXX do we really need it ? */
797 	if (temp > resv->bytes_free) {
798 		kprintf("BIGBLOCK UNDERFLOW\n");
799 		return (0);
800 	}
801 
802 	resv->bytes_free -= bytes;
803 	return (1);
804 }
805 
806 /*
807  * Dereference a reservation structure.  Upon the final release the
808  * underlying big-block is checked and if it is entirely free we delete
809  * any related HAMMER buffers to avoid potential conflicts with future
810  * reuse of the big-block.
811  */
812 void
813 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
814 {
815 	hammer_off_t base_offset;
816 	int error;
817 
818 	KKASSERT(resv->refs > 0);
819 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
820 		 HAMMER_ZONE_RAW_BUFFER);
821 
822 	/*
823 	 * Setting append_off to the max prevents any new allocations
824 	 * from occuring while we are trying to dispose of the reservation,
825 	 * allowing us to safely delete any related HAMMER buffers.
826 	 *
827 	 * If we are unable to clean out all related HAMMER buffers we
828 	 * requeue the delay.
829 	 */
830 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
831 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
832 		base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
833 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
834 			hammer_dedup_cache_inval(hmp, base_offset);
835 		error = hammer_del_buffers(hmp, base_offset,
836 					   resv->zone_offset,
837 					   HAMMER_BIGBLOCK_SIZE,
838 					   1);
839 		if (hammer_debug_general & 0x20000) {
840 			kprintf("HAMMER: delbgblk %016jx error %d\n",
841 				(intmax_t)base_offset, error);
842 		}
843 		if (error)
844 			hammer_reserve_setdelay(hmp, resv);
845 	}
846 	if (--resv->refs == 0) {
847 		if (hammer_debug_general & 0x20000) {
848 			kprintf("HAMMER: delresvr %016jx zone %02x\n",
849 				(intmax_t)resv->zone_offset, resv->zone);
850 		}
851 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
852 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
853 		kfree(resv, hmp->m_misc);
854 		--hammer_count_reservations;
855 	}
856 }
857 
858 /*
859  * Prevent a potentially free big-block from being reused until after
860  * the related flushes have completely cycled, otherwise crash recovery
861  * could resurrect a data block that was already reused and overwritten.
862  *
863  * The caller might reset the underlying layer2 entry's append_off to 0, so
864  * our covering append_off must be set to max to prevent any reallocation
865  * until after the flush delays complete, not to mention proper invalidation
866  * of any underlying cached blocks.
867  */
868 static void
869 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
870 			int zone, struct hammer_blockmap_layer2 *layer2)
871 {
872 	hammer_reserve_t resv;
873 
874 	/*
875 	 * Allocate the reservation if necessary.
876 	 *
877 	 * NOTE: need lock in future around resv lookup/allocation and
878 	 * the setdelay call, currently refs is not bumped until the call.
879 	 */
880 again:
881 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
882 	if (resv == NULL) {
883 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
884 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
885 		resv->zone = zone;
886 		resv->zone_offset = base_offset;
887 		resv->refs = 0;
888 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
889 
890 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
891 			resv->flags |= HAMMER_RESF_LAYER2FREE;
892 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
893 			kfree(resv, hmp->m_misc);
894 			goto again;
895 		}
896 		++hammer_count_reservations;
897 	} else {
898 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
899 			resv->flags |= HAMMER_RESF_LAYER2FREE;
900 	}
901 	hammer_reserve_setdelay(hmp, resv);
902 }
903 
904 /*
905  * Enter the reservation on the on-delay list, or move it if it
906  * is already on the list.
907  */
908 static void
909 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
910 {
911 	if (resv->flags & HAMMER_RESF_ONDELAY) {
912 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
913 		resv->flush_group = hmp->flusher.next + 1;
914 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
915 	} else {
916 		++resv->refs;
917 		++hmp->rsv_fromdelay;
918 		resv->flags |= HAMMER_RESF_ONDELAY;
919 		resv->flush_group = hmp->flusher.next + 1;
920 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
921 	}
922 }
923 
924 /*
925  * Reserve has reached its flush point, remove it from the delay list
926  * and finish it off.  hammer_blockmap_reserve_complete() inherits
927  * the ondelay reference.
928  */
929 void
930 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
931 {
932 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
933 	resv->flags &= ~HAMMER_RESF_ONDELAY;
934 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
935 	--hmp->rsv_fromdelay;
936 	hammer_blockmap_reserve_complete(hmp, resv);
937 }
938 
939 /*
940  * Backend function - free (offset, bytes) in a zone.
941  *
942  * XXX error return
943  */
944 void
945 hammer_blockmap_free(hammer_transaction_t trans,
946 		     hammer_off_t zone_offset, int bytes)
947 {
948 	hammer_mount_t hmp;
949 	hammer_volume_t root_volume;
950 	hammer_blockmap_t freemap;
951 	struct hammer_blockmap_layer1 *layer1;
952 	struct hammer_blockmap_layer2 *layer2;
953 	hammer_buffer_t buffer1 = NULL;
954 	hammer_buffer_t buffer2 = NULL;
955 	hammer_off_t layer1_offset;
956 	hammer_off_t layer2_offset;
957 	hammer_off_t base_off;
958 	int error;
959 	int zone;
960 
961 	if (bytes == 0)
962 		return;
963 	hmp = trans->hmp;
964 
965 	/*
966 	 * Alignment
967 	 */
968 	bytes = (bytes + 15) & ~15;
969 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
970 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
971 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
972 
973 	/*
974 	 * Basic zone validation & locking
975 	 */
976 	zone = HAMMER_ZONE_DECODE(zone_offset);
977 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
978 	root_volume = trans->rootvol;
979 	error = 0;
980 
981 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
982 
983 	/*
984 	 * Dive layer 1.
985 	 */
986 	layer1_offset = freemap->phys_offset +
987 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
988 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
989 	if (error)
990 		goto failed;
991 	KKASSERT(layer1->phys_offset &&
992 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
993 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
994 		hammer_lock_ex(&hmp->blkmap_lock);
995 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
996 			panic("CRC FAILED: LAYER1");
997 		hammer_unlock(&hmp->blkmap_lock);
998 	}
999 
1000 	/*
1001 	 * Dive layer 2, each entry represents a big-block.
1002 	 */
1003 	layer2_offset = layer1->phys_offset +
1004 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1005 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1006 	if (error)
1007 		goto failed;
1008 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1009 		hammer_lock_ex(&hmp->blkmap_lock);
1010 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1011 			panic("CRC FAILED: LAYER2");
1012 		hammer_unlock(&hmp->blkmap_lock);
1013 	}
1014 
1015 	hammer_lock_ex(&hmp->blkmap_lock);
1016 
1017 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1018 
1019 	/*
1020 	 * Free space previously allocated via blockmap_alloc().
1021 	 *
1022 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1023 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1024 	 */
1025 	KKASSERT(layer2->zone == zone);
1026 	layer2->bytes_free += bytes;
1027 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1028 
1029 	/*
1030 	 * If a big-block becomes entirely free we must create a covering
1031 	 * reservation to prevent premature reuse.  Note, however, that
1032 	 * the big-block and/or reservation may still have an append_off
1033 	 * that allows further (non-reused) allocations.
1034 	 *
1035 	 * Once the reservation has been made we re-check layer2 and if
1036 	 * the big-block is still entirely free we reset the layer2 entry.
1037 	 * The reservation will prevent premature reuse.
1038 	 *
1039 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1040 	 * is completed, if the layer2 entry is still completely free at
1041 	 * that time.  Any allocations from the reservation that may have
1042 	 * occured in the mean time, or active references on the reservation
1043 	 * from new pending allocations, will prevent the invalidation from
1044 	 * occuring.
1045 	 */
1046 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1047 		base_off = hammer_xlate_to_zone2(zone_offset &
1048 						~HAMMER_BIGBLOCK_MASK64);
1049 
1050 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1051 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1052 			layer2->zone = 0;
1053 			layer2->append_off = 0;
1054 			hammer_modify_buffer(trans, buffer1,
1055 					     layer1, sizeof(*layer1));
1056 			++layer1->blocks_free;
1057 			layer1->layer1_crc = crc32(layer1,
1058 						   HAMMER_LAYER1_CRCSIZE);
1059 			hammer_modify_buffer_done(buffer1);
1060 			hammer_modify_volume_field(trans,
1061 					trans->rootvol,
1062 					vol0_stat_freebigblocks);
1063 			++root_volume->ondisk->vol0_stat_freebigblocks;
1064 			hmp->copy_stat_freebigblocks =
1065 			   root_volume->ondisk->vol0_stat_freebigblocks;
1066 			hammer_modify_volume_done(trans->rootvol);
1067 		}
1068 	}
1069 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1070 	hammer_modify_buffer_done(buffer2);
1071 	hammer_unlock(&hmp->blkmap_lock);
1072 
1073 failed:
1074 	if (buffer1)
1075 		hammer_rel_buffer(buffer1, 0);
1076 	if (buffer2)
1077 		hammer_rel_buffer(buffer2, 0);
1078 }
1079 
1080 int
1081 hammer_blockmap_dedup(hammer_transaction_t trans,
1082 		     hammer_off_t zone_offset, int bytes)
1083 {
1084 	hammer_mount_t hmp;
1085 	hammer_blockmap_t freemap;
1086 	struct hammer_blockmap_layer1 *layer1;
1087 	struct hammer_blockmap_layer2 *layer2;
1088 	hammer_buffer_t buffer1 = NULL;
1089 	hammer_buffer_t buffer2 = NULL;
1090 	hammer_off_t layer1_offset;
1091 	hammer_off_t layer2_offset;
1092 	int32_t temp;
1093 	int error;
1094 	int zone __debugvar;
1095 
1096 	if (bytes == 0)
1097 		return (0);
1098 	hmp = trans->hmp;
1099 
1100 	/*
1101 	 * Alignment
1102 	 */
1103 	bytes = (bytes + 15) & ~15;
1104 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1105 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1106 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
1107 
1108 	/*
1109 	 * Basic zone validation & locking
1110 	 */
1111 	zone = HAMMER_ZONE_DECODE(zone_offset);
1112 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1113 	error = 0;
1114 
1115 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1116 
1117 	/*
1118 	 * Dive layer 1.
1119 	 */
1120 	layer1_offset = freemap->phys_offset +
1121 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1122 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1123 	if (error)
1124 		goto failed;
1125 	KKASSERT(layer1->phys_offset &&
1126 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1127 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1128 		hammer_lock_ex(&hmp->blkmap_lock);
1129 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1130 			panic("CRC FAILED: LAYER1");
1131 		hammer_unlock(&hmp->blkmap_lock);
1132 	}
1133 
1134 	/*
1135 	 * Dive layer 2, each entry represents a big-block.
1136 	 */
1137 	layer2_offset = layer1->phys_offset +
1138 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1139 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1140 	if (error)
1141 		goto failed;
1142 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1143 		hammer_lock_ex(&hmp->blkmap_lock);
1144 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1145 			panic("CRC FAILED: LAYER2");
1146 		hammer_unlock(&hmp->blkmap_lock);
1147 	}
1148 
1149 	hammer_lock_ex(&hmp->blkmap_lock);
1150 
1151 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1152 
1153 	/*
1154 	 * Free space previously allocated via blockmap_alloc().
1155 	 *
1156 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1157 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1158 	 */
1159 	KKASSERT(layer2->zone == zone);
1160 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1161 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1162 	if (temp > layer2->bytes_free) {
1163 		error = ERANGE;
1164 		goto underflow;
1165 	}
1166 	layer2->bytes_free -= bytes;
1167 
1168 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1169 
1170 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1171 underflow:
1172 	hammer_modify_buffer_done(buffer2);
1173 	hammer_unlock(&hmp->blkmap_lock);
1174 
1175 failed:
1176 	if (buffer1)
1177 		hammer_rel_buffer(buffer1, 0);
1178 	if (buffer2)
1179 		hammer_rel_buffer(buffer2, 0);
1180 	return (error);
1181 }
1182 
1183 /*
1184  * Backend function - finalize (offset, bytes) in a zone.
1185  *
1186  * Allocate space that was previously reserved by the frontend.
1187  */
1188 int
1189 hammer_blockmap_finalize(hammer_transaction_t trans,
1190 			 hammer_reserve_t resv,
1191 			 hammer_off_t zone_offset, int bytes)
1192 {
1193 	hammer_mount_t hmp;
1194 	hammer_volume_t root_volume;
1195 	hammer_blockmap_t freemap;
1196 	struct hammer_blockmap_layer1 *layer1;
1197 	struct hammer_blockmap_layer2 *layer2;
1198 	hammer_buffer_t buffer1 = NULL;
1199 	hammer_buffer_t buffer2 = NULL;
1200 	hammer_off_t layer1_offset;
1201 	hammer_off_t layer2_offset;
1202 	int error;
1203 	int zone;
1204 	int offset;
1205 
1206 	if (bytes == 0)
1207 		return(0);
1208 	hmp = trans->hmp;
1209 
1210 	/*
1211 	 * Alignment
1212 	 */
1213 	bytes = (bytes + 15) & ~15;
1214 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1215 
1216 	/*
1217 	 * Basic zone validation & locking
1218 	 */
1219 	zone = HAMMER_ZONE_DECODE(zone_offset);
1220 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1221 	root_volume = trans->rootvol;
1222 	error = 0;
1223 
1224 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1225 
1226 	/*
1227 	 * Dive layer 1.
1228 	 */
1229 	layer1_offset = freemap->phys_offset +
1230 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1231 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1232 	if (error)
1233 		goto failed;
1234 	KKASSERT(layer1->phys_offset &&
1235 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1236 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1237 		hammer_lock_ex(&hmp->blkmap_lock);
1238 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1239 			panic("CRC FAILED: LAYER1");
1240 		hammer_unlock(&hmp->blkmap_lock);
1241 	}
1242 
1243 	/*
1244 	 * Dive layer 2, each entry represents a big-block.
1245 	 */
1246 	layer2_offset = layer1->phys_offset +
1247 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1248 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1249 	if (error)
1250 		goto failed;
1251 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1252 		hammer_lock_ex(&hmp->blkmap_lock);
1253 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1254 			panic("CRC FAILED: LAYER2");
1255 		hammer_unlock(&hmp->blkmap_lock);
1256 	}
1257 
1258 	hammer_lock_ex(&hmp->blkmap_lock);
1259 
1260 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1261 
1262 	/*
1263 	 * Finalize some or all of the space covered by a current
1264 	 * reservation.  An allocation in the same layer may have
1265 	 * already assigned ownership.
1266 	 */
1267 	if (layer2->zone == 0) {
1268 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1269 		--layer1->blocks_free;
1270 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
1271 		hammer_modify_buffer_done(buffer1);
1272 		layer2->zone = zone;
1273 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1274 		KKASSERT(layer2->append_off == 0);
1275 		hammer_modify_volume_field(trans,
1276 				trans->rootvol,
1277 				vol0_stat_freebigblocks);
1278 		--root_volume->ondisk->vol0_stat_freebigblocks;
1279 		hmp->copy_stat_freebigblocks =
1280 		   root_volume->ondisk->vol0_stat_freebigblocks;
1281 		hammer_modify_volume_done(trans->rootvol);
1282 	}
1283 	if (layer2->zone != zone)
1284 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1285 	KKASSERT(layer2->zone == zone);
1286 	KKASSERT(bytes != 0);
1287 	layer2->bytes_free -= bytes;
1288 
1289 	if (resv) {
1290 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1291 	}
1292 
1293 	/*
1294 	 * Finalizations can occur out of order, or combined with allocations.
1295 	 * append_off must be set to the highest allocated offset.
1296 	 */
1297 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1298 	if (layer2->append_off < offset)
1299 		layer2->append_off = offset;
1300 
1301 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1302 	hammer_modify_buffer_done(buffer2);
1303 	hammer_unlock(&hmp->blkmap_lock);
1304 
1305 failed:
1306 	if (buffer1)
1307 		hammer_rel_buffer(buffer1, 0);
1308 	if (buffer2)
1309 		hammer_rel_buffer(buffer2, 0);
1310 	return(error);
1311 }
1312 
1313 /*
1314  * Return the approximate number of free bytes in the big-block
1315  * containing the specified blockmap offset.
1316  *
1317  * WARNING: A negative number can be returned if data de-dup exists,
1318  *	    and the result will also not represent he actual number
1319  *	    of free bytes in this case.
1320  *
1321  *	    This code is used only by the reblocker.
1322  */
1323 int
1324 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1325 			int *curp, int *errorp)
1326 {
1327 	hammer_volume_t root_volume;
1328 	hammer_blockmap_t blockmap;
1329 	hammer_blockmap_t freemap;
1330 	struct hammer_blockmap_layer1 *layer1;
1331 	struct hammer_blockmap_layer2 *layer2;
1332 	hammer_buffer_t buffer = NULL;
1333 	hammer_off_t layer1_offset;
1334 	hammer_off_t layer2_offset;
1335 	int32_t bytes;
1336 	int zone;
1337 
1338 	zone = HAMMER_ZONE_DECODE(zone_offset);
1339 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1340 	root_volume = hammer_get_root_volume(hmp, errorp);
1341 	if (*errorp) {
1342 		*curp = 0;
1343 		return(0);
1344 	}
1345 	blockmap = &hmp->blockmap[zone];
1346 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1347 
1348 	/*
1349 	 * Dive layer 1.
1350 	 */
1351 	layer1_offset = freemap->phys_offset +
1352 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1353 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1354 	if (*errorp) {
1355 		*curp = 0;
1356 		bytes = 0;
1357 		goto failed;
1358 	}
1359 	KKASSERT(layer1->phys_offset);
1360 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1361 		hammer_lock_ex(&hmp->blkmap_lock);
1362 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1363 			panic("CRC FAILED: LAYER1");
1364 		hammer_unlock(&hmp->blkmap_lock);
1365 	}
1366 
1367 	/*
1368 	 * Dive layer 2, each entry represents a big-block.
1369 	 *
1370 	 * (reuse buffer, layer1 pointer becomes invalid)
1371 	 */
1372 	layer2_offset = layer1->phys_offset +
1373 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1374 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1375 	if (*errorp) {
1376 		*curp = 0;
1377 		bytes = 0;
1378 		goto failed;
1379 	}
1380 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1381 		hammer_lock_ex(&hmp->blkmap_lock);
1382 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1383 			panic("CRC FAILED: LAYER2");
1384 		hammer_unlock(&hmp->blkmap_lock);
1385 	}
1386 	KKASSERT(layer2->zone == zone);
1387 
1388 	bytes = layer2->bytes_free;
1389 
1390 	/*
1391 	 * *curp becomes 1 only when no error and,
1392 	 * next_offset and zone_offset are in the same big-block.
1393 	 */
1394 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1395 		*curp = 0;  /* not same */
1396 	else
1397 		*curp = 1;
1398 failed:
1399 	if (buffer)
1400 		hammer_rel_buffer(buffer, 0);
1401 	hammer_rel_volume(root_volume, 0);
1402 	if (hammer_debug_general & 0x4000) {
1403 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1404 			(long long)zone_offset, bytes);
1405 	}
1406 	return(bytes);
1407 }
1408 
1409 
1410 /*
1411  * Lookup a blockmap offset and verify blockmap layers.
1412  */
1413 hammer_off_t
1414 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1415 			int *errorp)
1416 {
1417 	hammer_volume_t root_volume;
1418 	hammer_blockmap_t freemap;
1419 	struct hammer_blockmap_layer1 *layer1;
1420 	struct hammer_blockmap_layer2 *layer2;
1421 	hammer_buffer_t buffer = NULL;
1422 	hammer_off_t layer1_offset;
1423 	hammer_off_t layer2_offset;
1424 	hammer_off_t result_offset;
1425 	hammer_off_t base_off;
1426 	hammer_reserve_t resv __debugvar;
1427 	int zone;
1428 
1429 	/*
1430 	 * Calculate the zone-2 offset.
1431 	 */
1432 	zone = HAMMER_ZONE_DECODE(zone_offset);
1433 	result_offset = hammer_xlate_to_zone2(zone_offset);
1434 
1435 	/*
1436 	 * Validate the allocation zone
1437 	 */
1438 	root_volume = hammer_get_root_volume(hmp, errorp);
1439 	if (*errorp)
1440 		return(0);
1441 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1442 	KKASSERT(freemap->phys_offset != 0);
1443 
1444 	/*
1445 	 * Dive layer 1.
1446 	 */
1447 	layer1_offset = freemap->phys_offset +
1448 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1449 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1450 	if (*errorp)
1451 		goto failed;
1452 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1453 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1454 		hammer_lock_ex(&hmp->blkmap_lock);
1455 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1456 			panic("CRC FAILED: LAYER1");
1457 		hammer_unlock(&hmp->blkmap_lock);
1458 	}
1459 
1460 	/*
1461 	 * Dive layer 2, each entry represents a big-block.
1462 	 */
1463 	layer2_offset = layer1->phys_offset +
1464 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1465 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1466 
1467 	if (*errorp)
1468 		goto failed;
1469 	if (layer2->zone == 0) {
1470 		base_off = hammer_xlate_to_zone2(zone_offset &
1471 						~HAMMER_BIGBLOCK_MASK64);
1472 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1473 				 base_off);
1474 		KKASSERT(resv && resv->zone == zone);
1475 
1476 	} else if (layer2->zone != zone) {
1477 		panic("hammer_blockmap_lookup_verify: bad zone %d/%d",
1478 			layer2->zone, zone);
1479 	}
1480 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1481 		hammer_lock_ex(&hmp->blkmap_lock);
1482 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1483 			panic("CRC FAILED: LAYER2");
1484 		hammer_unlock(&hmp->blkmap_lock);
1485 	}
1486 
1487 failed:
1488 	if (buffer)
1489 		hammer_rel_buffer(buffer, 0);
1490 	hammer_rel_volume(root_volume, 0);
1491 	if (hammer_debug_general & 0x0800) {
1492 		kprintf("hammer_blockmap_lookup_verify: %016llx -> %016llx\n",
1493 			(long long)zone_offset, (long long)result_offset);
1494 	}
1495 	return(result_offset);
1496 }
1497 
1498 
1499 /*
1500  * Check space availability
1501  *
1502  * MPSAFE - does not require fs_token
1503  */
1504 int
1505 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1506 {
1507 	const int in_size = sizeof(struct hammer_inode_data) +
1508 			    sizeof(union hammer_btree_elm);
1509 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1510 	int64_t usedbytes;
1511 
1512 	usedbytes = hmp->rsv_inodes * in_size +
1513 		    hmp->rsv_recs * rec_size +
1514 		    hmp->rsv_databytes +
1515 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1516 		    ((int64_t)hammer_limit_dirtybufspace) +
1517 		    (slop << HAMMER_BIGBLOCK_BITS);
1518 
1519 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1520 	if (resp)
1521 		*resp = usedbytes;
1522 
1523 	if (hmp->copy_stat_freebigblocks >=
1524 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1525 		return(0);
1526 	}
1527 	return (ENOSPC);
1528 }
1529 
1530 static int
1531 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1532 {
1533 	hammer_blockmap_t freemap;
1534 	struct hammer_blockmap_layer1 *layer1;
1535 	hammer_buffer_t buffer1 = NULL;
1536 	hammer_off_t layer1_offset;
1537 	int error = 0;
1538 
1539 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1540 
1541 	layer1_offset = freemap->phys_offset +
1542 			HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1543 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1544 	if (error)
1545 		goto end;
1546 
1547 	/*
1548 	 * No more physically available space in layer1s
1549 	 * of the current volume, go to the next volume.
1550 	 */
1551 	if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1552 		hammer_skip_volume(offsetp);
1553 end:
1554 	if (buffer1)
1555 		hammer_rel_buffer(buffer1, 0);
1556 	return(error);
1557 }
1558 
1559 static void
1560 hammer_skip_volume(hammer_off_t *offsetp)
1561 {
1562 	hammer_off_t offset;
1563 	int zone, vol_no;
1564 
1565 	offset = *offsetp;
1566 	zone = HAMMER_ZONE_DECODE(offset);
1567 	vol_no = HAMMER_VOL_DECODE(offset) + 1;
1568 	KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1569 
1570 	if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1571 		vol_no = 0;
1572 		++zone;
1573 	}
1574 
1575 	*offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1576 }
1577