xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision 31524921)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39 
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50 
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
56 
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60 	if (res1->zone_offset < res2->zone_offset)
61 		return(-1);
62 	if (res1->zone_offset > res2->zone_offset)
63 		return(1);
64 	return(0);
65 }
66 
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72 		      hammer_off_t hint, int *errorp)
73 {
74 	hammer_mount_t hmp;
75 	hammer_volume_t root_volume;
76 	hammer_blockmap_t blockmap;
77 	hammer_blockmap_t freemap;
78 	hammer_reserve_t resv;
79 	struct hammer_blockmap_layer1 *layer1;
80 	struct hammer_blockmap_layer2 *layer2;
81 	hammer_buffer_t buffer1 = NULL;
82 	hammer_buffer_t buffer2 = NULL;
83 	hammer_buffer_t buffer3 = NULL;
84 	hammer_off_t tmp_offset;
85 	hammer_off_t next_offset;
86 	hammer_off_t result_offset;
87 	hammer_off_t layer1_offset;
88 	hammer_off_t layer2_offset;
89 	hammer_off_t base_off;
90 	int loops = 0;
91 	int offset;		/* offset within big-block */
92 	int use_hint;
93 
94 	hmp = trans->hmp;
95 
96 	/*
97 	 * Deal with alignment and buffer-boundary issues.
98 	 *
99 	 * Be careful, certain primary alignments are used below to allocate
100 	 * new blockmap blocks.
101 	 */
102 	bytes = (bytes + 15) & ~15;
103 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
105 
106 	/*
107 	 * Setup
108 	 */
109 	root_volume = trans->rootvol;
110 	*errorp = 0;
111 	blockmap = &hmp->blockmap[zone];
112 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114 
115 	/*
116 	 * Use the hint if we have one.
117 	 */
118 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119 		next_offset = (hint + 15) & ~(hammer_off_t)15;
120 		use_hint = 1;
121 	} else {
122 		next_offset = blockmap->next_offset;
123 		use_hint = 0;
124 	}
125 again:
126 
127 	/*
128 	 * use_hint is turned off if we leave the hinted big-block.
129 	 */
130 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131 		next_offset = blockmap->next_offset;
132 		use_hint = 0;
133 	}
134 
135 	/*
136 	 * Check for wrap
137 	 */
138 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139 		if (++loops == 2) {
140 			result_offset = 0;
141 			*errorp = ENOSPC;
142 			goto failed;
143 		}
144 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
145 	}
146 
147 	/*
148 	 * The allocation request may not cross a buffer boundary.  Special
149 	 * large allocations must not cross a big-block boundary.
150 	 */
151 	tmp_offset = next_offset + bytes - 1;
152 	if (bytes <= HAMMER_BUFSIZE) {
153 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
154 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
155 			goto again;
156 		}
157 	} else {
158 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
159 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
160 			goto again;
161 		}
162 	}
163 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
164 
165 	/*
166 	 * Dive layer 1.
167 	 */
168 	layer1_offset = freemap->phys_offset +
169 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
170 
171 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
172 	if (*errorp) {
173 		result_offset = 0;
174 		goto failed;
175 	}
176 
177 	/*
178 	 * Check CRC.
179 	 */
180 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
181 		hammer_lock_ex(&hmp->blkmap_lock);
182 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
183 			hpanic("CRC FAILED: LAYER1");
184 		hammer_unlock(&hmp->blkmap_lock);
185 	}
186 
187 	/*
188 	 * If we are at a big-block boundary and layer1 indicates no
189 	 * free big-blocks, then we cannot allocate a new big-block in
190 	 * layer2, skip to the next layer1 entry.
191 	 */
192 	if (offset == 0 && layer1->blocks_free == 0) {
193 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
194 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
195 		if (hammer_check_volume(hmp, &next_offset)) {
196 			result_offset = 0;
197 			goto failed;
198 		}
199 		goto again;
200 	}
201 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
202 
203 	/*
204 	 * Skip the whole volume if it is pointing to a layer2 big-block
205 	 * on a volume that we are currently trying to remove from the
206 	 * file-system. This is used by the volume-del code together with
207 	 * the reblocker to free up a volume.
208 	 */
209 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
210 	    hmp->volume_to_remove) {
211 		hammer_skip_volume(&next_offset);
212 		goto again;
213 	}
214 
215 	/*
216 	 * Dive layer 2, each entry represents a big-block.
217 	 */
218 	layer2_offset = layer1->phys_offset +
219 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
220 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
221 	if (*errorp) {
222 		result_offset = 0;
223 		goto failed;
224 	}
225 
226 	/*
227 	 * Check CRC.  This can race another thread holding the lock
228 	 * and in the middle of modifying layer2.
229 	 */
230 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
231 		hammer_lock_ex(&hmp->blkmap_lock);
232 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
233 			hpanic("CRC FAILED: LAYER2");
234 		hammer_unlock(&hmp->blkmap_lock);
235 	}
236 
237 	/*
238 	 * Skip the layer if the zone is owned by someone other then us.
239 	 */
240 	if (layer2->zone && layer2->zone != zone) {
241 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
242 		goto again;
243 	}
244 	if (offset < layer2->append_off) {
245 		next_offset += layer2->append_off - offset;
246 		goto again;
247 	}
248 
249 #if 0
250 	/*
251 	 * If operating in the current non-hint blockmap block, do not
252 	 * allow it to get over-full.  Also drop any active hinting so
253 	 * blockmap->next_offset is updated at the end.
254 	 *
255 	 * We do this for B-Tree and meta-data allocations to provide
256 	 * localization for updates.
257 	 */
258 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
259 	     zone == HAMMER_ZONE_META_INDEX) &&
260 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
261 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
262 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
263 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
264 			use_hint = 0;
265 			goto again;
266 		}
267 	}
268 #endif
269 
270 	/*
271 	 * We need the lock from this point on.  We have to re-check zone
272 	 * ownership after acquiring the lock and also check for reservations.
273 	 */
274 	hammer_lock_ex(&hmp->blkmap_lock);
275 
276 	if (layer2->zone && layer2->zone != zone) {
277 		hammer_unlock(&hmp->blkmap_lock);
278 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
279 		goto again;
280 	}
281 	if (offset < layer2->append_off) {
282 		hammer_unlock(&hmp->blkmap_lock);
283 		next_offset += layer2->append_off - offset;
284 		goto again;
285 	}
286 
287 	/*
288 	 * The big-block might be reserved by another zone.  If it is reserved
289 	 * by our zone we may have to move next_offset past the append_off.
290 	 */
291 	base_off = hammer_xlate_to_zone2(next_offset &
292 					~HAMMER_BIGBLOCK_MASK64);
293 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
294 	if (resv) {
295 		if (resv->zone != zone) {
296 			hammer_unlock(&hmp->blkmap_lock);
297 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
298 				      ~HAMMER_BIGBLOCK_MASK64;
299 			goto again;
300 		}
301 		if (offset < resv->append_off) {
302 			hammer_unlock(&hmp->blkmap_lock);
303 			next_offset += resv->append_off - offset;
304 			goto again;
305 		}
306 		++resv->refs;
307 	}
308 
309 	/*
310 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
311 	 * of the layer for real.  At this point we've validated any
312 	 * reservation that might exist and can just ignore resv.
313 	 */
314 	if (layer2->zone == 0) {
315 		/*
316 		 * Assign the big-block to our zone
317 		 */
318 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
319 		--layer1->blocks_free;
320 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
321 		hammer_modify_buffer_done(buffer1);
322 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
323 		layer2->zone = zone;
324 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
325 		KKASSERT(layer2->append_off == 0);
326 		hammer_modify_volume_field(trans, trans->rootvol,
327 					   vol0_stat_freebigblocks);
328 		--root_volume->ondisk->vol0_stat_freebigblocks;
329 		hmp->copy_stat_freebigblocks =
330 			root_volume->ondisk->vol0_stat_freebigblocks;
331 		hammer_modify_volume_done(trans->rootvol);
332 	} else {
333 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
334 	}
335 	KKASSERT(layer2->zone == zone);
336 
337 	/*
338 	 * NOTE: bytes_free can legally go negative due to de-dup.
339 	 */
340 	layer2->bytes_free -= bytes;
341 	KKASSERT(layer2->append_off <= offset);
342 	layer2->append_off = offset + bytes;
343 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
344 	hammer_modify_buffer_done(buffer2);
345 
346 	/*
347 	 * We hold the blockmap lock and should be the only ones
348 	 * capable of modifying resv->append_off.  Track the allocation
349 	 * as appropriate.
350 	 */
351 	KKASSERT(bytes != 0);
352 	if (resv) {
353 		KKASSERT(resv->append_off <= offset);
354 		resv->append_off = offset + bytes;
355 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
356 		hammer_blockmap_reserve_complete(hmp, resv);
357 	}
358 
359 	/*
360 	 * If we are allocating from the base of a new buffer we can avoid
361 	 * a disk read by calling hammer_bnew_ext().
362 	 */
363 	if ((next_offset & HAMMER_BUFMASK) == 0) {
364 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
365 				errorp, &buffer3);
366 		if (*errorp) {
367 			result_offset = 0;
368 			goto failed;
369 		}
370 	}
371 	result_offset = next_offset;
372 
373 	/*
374 	 * If we weren't supplied with a hint or could not use the hint
375 	 * then we wound up using blockmap->next_offset as the hint and
376 	 * need to save it.
377 	 */
378 	if (use_hint == 0) {
379 		hammer_modify_volume_noundo(NULL, root_volume);
380 		blockmap->next_offset = next_offset + bytes;
381 		hammer_modify_volume_done(root_volume);
382 	}
383 	hammer_unlock(&hmp->blkmap_lock);
384 failed:
385 
386 	/*
387 	 * Cleanup
388 	 */
389 	if (buffer1)
390 		hammer_rel_buffer(buffer1, 0);
391 	if (buffer2)
392 		hammer_rel_buffer(buffer2, 0);
393 	if (buffer3)
394 		hammer_rel_buffer(buffer3, 0);
395 
396 	return(result_offset);
397 }
398 
399 /*
400  * Frontend function - Reserve bytes in a zone.
401  *
402  * This code reserves bytes out of a blockmap without committing to any
403  * meta-data modifications, allowing the front-end to directly issue disk
404  * write I/O for big-blocks of data
405  *
406  * The backend later finalizes the reservation with hammer_blockmap_finalize()
407  * upon committing the related record.
408  */
409 hammer_reserve_t
410 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
411 			hammer_off_t *zone_offp, int *errorp)
412 {
413 	hammer_volume_t root_volume;
414 	hammer_blockmap_t blockmap;
415 	hammer_blockmap_t freemap;
416 	struct hammer_blockmap_layer1 *layer1;
417 	struct hammer_blockmap_layer2 *layer2;
418 	hammer_buffer_t buffer1 = NULL;
419 	hammer_buffer_t buffer2 = NULL;
420 	hammer_buffer_t buffer3 = NULL;
421 	hammer_off_t tmp_offset;
422 	hammer_off_t next_offset;
423 	hammer_off_t layer1_offset;
424 	hammer_off_t layer2_offset;
425 	hammer_off_t base_off;
426 	hammer_reserve_t resv;
427 	hammer_reserve_t resx;
428 	int loops = 0;
429 	int offset;
430 
431 	/*
432 	 * Setup
433 	 */
434 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
435 	root_volume = hammer_get_root_volume(hmp, errorp);
436 	if (*errorp)
437 		return(NULL);
438 	blockmap = &hmp->blockmap[zone];
439 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
440 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
441 
442 	/*
443 	 * Deal with alignment and buffer-boundary issues.
444 	 *
445 	 * Be careful, certain primary alignments are used below to allocate
446 	 * new blockmap blocks.
447 	 */
448 	bytes = (bytes + 15) & ~15;
449 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
450 
451 	next_offset = blockmap->next_offset;
452 again:
453 	resv = NULL;
454 	/*
455 	 * Check for wrap
456 	 */
457 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
458 		if (++loops == 2) {
459 			*errorp = ENOSPC;
460 			goto failed;
461 		}
462 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
463 	}
464 
465 	/*
466 	 * The allocation request may not cross a buffer boundary.  Special
467 	 * large allocations must not cross a big-block boundary.
468 	 */
469 	tmp_offset = next_offset + bytes - 1;
470 	if (bytes <= HAMMER_BUFSIZE) {
471 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
472 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
473 			goto again;
474 		}
475 	} else {
476 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
477 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
478 			goto again;
479 		}
480 	}
481 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
482 
483 	/*
484 	 * Dive layer 1.
485 	 */
486 	layer1_offset = freemap->phys_offset +
487 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
488 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
489 	if (*errorp)
490 		goto failed;
491 
492 	/*
493 	 * Check CRC.
494 	 */
495 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
496 		hammer_lock_ex(&hmp->blkmap_lock);
497 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
498 			hpanic("CRC FAILED: LAYER1");
499 		hammer_unlock(&hmp->blkmap_lock);
500 	}
501 
502 	/*
503 	 * If we are at a big-block boundary and layer1 indicates no
504 	 * free big-blocks, then we cannot allocate a new big-block in
505 	 * layer2, skip to the next layer1 entry.
506 	 */
507 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
508 	    layer1->blocks_free == 0) {
509 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
510 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
511 		if (hammer_check_volume(hmp, &next_offset))
512 			goto failed;
513 		goto again;
514 	}
515 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
516 
517 	/*
518 	 * Dive layer 2, each entry represents a big-block.
519 	 */
520 	layer2_offset = layer1->phys_offset +
521 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
522 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
523 	if (*errorp)
524 		goto failed;
525 
526 	/*
527 	 * Check CRC if not allocating into uninitialized space (which we
528 	 * aren't when reserving space).
529 	 */
530 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
531 		hammer_lock_ex(&hmp->blkmap_lock);
532 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
533 			hpanic("CRC FAILED: LAYER2");
534 		hammer_unlock(&hmp->blkmap_lock);
535 	}
536 
537 	/*
538 	 * Skip the layer if the zone is owned by someone other then us.
539 	 */
540 	if (layer2->zone && layer2->zone != zone) {
541 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
542 		goto again;
543 	}
544 	if (offset < layer2->append_off) {
545 		next_offset += layer2->append_off - offset;
546 		goto again;
547 	}
548 
549 	/*
550 	 * We need the lock from this point on.  We have to re-check zone
551 	 * ownership after acquiring the lock and also check for reservations.
552 	 */
553 	hammer_lock_ex(&hmp->blkmap_lock);
554 
555 	if (layer2->zone && layer2->zone != zone) {
556 		hammer_unlock(&hmp->blkmap_lock);
557 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
558 		goto again;
559 	}
560 	if (offset < layer2->append_off) {
561 		hammer_unlock(&hmp->blkmap_lock);
562 		next_offset += layer2->append_off - offset;
563 		goto again;
564 	}
565 
566 	/*
567 	 * The big-block might be reserved by another zone.  If it is reserved
568 	 * by our zone we may have to move next_offset past the append_off.
569 	 */
570 	base_off = hammer_xlate_to_zone2(next_offset &
571 					~HAMMER_BIGBLOCK_MASK64);
572 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
573 	if (resv) {
574 		if (resv->zone != zone) {
575 			hammer_unlock(&hmp->blkmap_lock);
576 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
577 				      ~HAMMER_BIGBLOCK_MASK64;
578 			goto again;
579 		}
580 		if (offset < resv->append_off) {
581 			hammer_unlock(&hmp->blkmap_lock);
582 			next_offset += resv->append_off - offset;
583 			goto again;
584 		}
585 		++resv->refs;
586 		resx = NULL;
587 	} else {
588 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
589 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
590 		resx->refs = 1;
591 		resx->zone = zone;
592 		resx->zone_offset = base_off;
593 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
594 			resx->flags |= HAMMER_RESF_LAYER2FREE;
595 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
596 		KKASSERT(resv == NULL);
597 		resv = resx;
598 		++hammer_count_reservations;
599 	}
600 	resv->append_off = offset + bytes;
601 
602 	/*
603 	 * If we are not reserving a whole buffer but are at the start of
604 	 * a new block, call hammer_bnew() to avoid a disk read.
605 	 *
606 	 * If we are reserving a whole buffer (or more), the caller will
607 	 * probably use a direct read, so do nothing.
608 	 *
609 	 * If we do not have a whole lot of system memory we really can't
610 	 * afford to block while holding the blkmap_lock!
611 	 */
612 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
613 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
614 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
615 			if (*errorp)
616 				goto failed;
617 		}
618 	}
619 
620 	/*
621 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
622 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
623 	 * be big-block aligned.
624 	 */
625 	blockmap->next_offset = next_offset + bytes;
626 	hammer_unlock(&hmp->blkmap_lock);
627 
628 failed:
629 	if (buffer1)
630 		hammer_rel_buffer(buffer1, 0);
631 	if (buffer2)
632 		hammer_rel_buffer(buffer2, 0);
633 	if (buffer3)
634 		hammer_rel_buffer(buffer3, 0);
635 	hammer_rel_volume(root_volume, 0);
636 	*zone_offp = next_offset;
637 
638 	return(resv);
639 }
640 
641 /*
642  * Frontend function - Dedup bytes in a zone.
643  *
644  * Dedup reservations work exactly the same as normal write reservations
645  * except we only adjust bytes_free field and don't touch append offset.
646  * Finalization mechanic for dedup reservations is also the same as for
647  * normal write ones - the backend finalizes the reservation with
648  * hammer_blockmap_finalize().
649  */
650 hammer_reserve_t
651 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
652 			      hammer_off_t zone_offset, int *errorp)
653 {
654 	hammer_volume_t root_volume;
655 	hammer_blockmap_t freemap;
656 	struct hammer_blockmap_layer1 *layer1;
657 	struct hammer_blockmap_layer2 *layer2;
658 	hammer_buffer_t buffer1 = NULL;
659 	hammer_buffer_t buffer2 = NULL;
660 	hammer_off_t layer1_offset;
661 	hammer_off_t layer2_offset;
662 	hammer_off_t base_off;
663 	hammer_reserve_t resv = NULL;
664 	hammer_reserve_t resx = NULL;
665 
666 	/*
667 	 * Setup
668 	 */
669 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
670 	root_volume = hammer_get_root_volume(hmp, errorp);
671 	if (*errorp)
672 		return (NULL);
673 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
674 	KKASSERT(freemap->phys_offset != 0);
675 
676 	bytes = (bytes + 15) & ~15;
677 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
678 
679 	/*
680 	 * Dive layer 1.
681 	 */
682 	layer1_offset = freemap->phys_offset +
683 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
684 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
685 	if (*errorp)
686 		goto failed;
687 
688 	/*
689 	 * Check CRC.
690 	 */
691 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
692 		hammer_lock_ex(&hmp->blkmap_lock);
693 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
694 			hpanic("CRC FAILED: LAYER1");
695 		hammer_unlock(&hmp->blkmap_lock);
696 	}
697 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
698 
699 	/*
700 	 * Dive layer 2, each entry represents a big-block.
701 	 */
702 	layer2_offset = layer1->phys_offset +
703 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
704 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
705 	if (*errorp)
706 		goto failed;
707 
708 	/*
709 	 * Check CRC.
710 	 */
711 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
712 		hammer_lock_ex(&hmp->blkmap_lock);
713 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
714 			hpanic("CRC FAILED: LAYER2");
715 		hammer_unlock(&hmp->blkmap_lock);
716 	}
717 
718 	/*
719 	 * Fail if the zone is owned by someone other than us.
720 	 */
721 	if (layer2->zone && layer2->zone != zone)
722 		goto failed;
723 
724 	/*
725 	 * We need the lock from this point on.  We have to re-check zone
726 	 * ownership after acquiring the lock and also check for reservations.
727 	 */
728 	hammer_lock_ex(&hmp->blkmap_lock);
729 
730 	if (layer2->zone && layer2->zone != zone) {
731 		hammer_unlock(&hmp->blkmap_lock);
732 		goto failed;
733 	}
734 
735 	base_off = hammer_xlate_to_zone2(zone_offset &
736 					~HAMMER_BIGBLOCK_MASK64);
737 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
738 	if (resv) {
739 		if (resv->zone != zone) {
740 			hammer_unlock(&hmp->blkmap_lock);
741 			resv = NULL;
742 			goto failed;
743 		}
744 		/*
745 		 * Due to possible big-block underflow we can't simply
746 		 * subtract bytes from bytes_free.
747 		 */
748 		if (update_bytes_free(resv, bytes) == 0) {
749 			hammer_unlock(&hmp->blkmap_lock);
750 			resv = NULL;
751 			goto failed;
752 		}
753 		++resv->refs;
754 		resx = NULL;
755 	} else {
756 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
757 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
758 		resx->refs = 1;
759 		resx->zone = zone;
760 		resx->bytes_free = layer2->bytes_free;
761 		/*
762 		 * Due to possible big-block underflow we can't simply
763 		 * subtract bytes from bytes_free.
764 		 */
765 		if (update_bytes_free(resx, bytes) == 0) {
766 			hammer_unlock(&hmp->blkmap_lock);
767 			kfree(resx, hmp->m_misc);
768 			goto failed;
769 		}
770 		resx->zone_offset = base_off;
771 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
772 		KKASSERT(resv == NULL);
773 		resv = resx;
774 		++hammer_count_reservations;
775 	}
776 
777 	hammer_unlock(&hmp->blkmap_lock);
778 
779 failed:
780 	if (buffer1)
781 		hammer_rel_buffer(buffer1, 0);
782 	if (buffer2)
783 		hammer_rel_buffer(buffer2, 0);
784 	hammer_rel_volume(root_volume, 0);
785 
786 	return(resv);
787 }
788 
789 static int
790 update_bytes_free(hammer_reserve_t resv, int bytes)
791 {
792 	int32_t temp;
793 
794 	/*
795 	 * Big-block underflow check
796 	 */
797 	temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
798 	cpu_ccfence(); /* XXX do we really need it ? */
799 	if (temp > resv->bytes_free) {
800 		hdkprintf("BIGBLOCK UNDERFLOW\n");
801 		return (0);
802 	}
803 
804 	resv->bytes_free -= bytes;
805 	return (1);
806 }
807 
808 /*
809  * Dereference a reservation structure.  Upon the final release the
810  * underlying big-block is checked and if it is entirely free we delete
811  * any related HAMMER buffers to avoid potential conflicts with future
812  * reuse of the big-block.
813  */
814 void
815 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
816 {
817 	hammer_off_t base_offset;
818 	int error;
819 
820 	KKASSERT(resv->refs > 0);
821 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
822 		 HAMMER_ZONE_RAW_BUFFER);
823 
824 	/*
825 	 * Setting append_off to the max prevents any new allocations
826 	 * from occuring while we are trying to dispose of the reservation,
827 	 * allowing us to safely delete any related HAMMER buffers.
828 	 *
829 	 * If we are unable to clean out all related HAMMER buffers we
830 	 * requeue the delay.
831 	 */
832 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
833 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
834 		base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
835 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
836 			hammer_dedup_cache_inval(hmp, base_offset);
837 		error = hammer_del_buffers(hmp, base_offset,
838 					   resv->zone_offset,
839 					   HAMMER_BIGBLOCK_SIZE,
840 					   1);
841 		if (hammer_debug_general & 0x20000) {
842 			hkprintf("delbgblk %016jx error %d\n",
843 				(intmax_t)base_offset, error);
844 		}
845 		if (error)
846 			hammer_reserve_setdelay(hmp, resv);
847 	}
848 	if (--resv->refs == 0) {
849 		if (hammer_debug_general & 0x20000) {
850 			hkprintf("delresvr %016jx zone %02x\n",
851 				(intmax_t)resv->zone_offset, resv->zone);
852 		}
853 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
854 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
855 		kfree(resv, hmp->m_misc);
856 		--hammer_count_reservations;
857 	}
858 }
859 
860 /*
861  * Prevent a potentially free big-block from being reused until after
862  * the related flushes have completely cycled, otherwise crash recovery
863  * could resurrect a data block that was already reused and overwritten.
864  *
865  * The caller might reset the underlying layer2 entry's append_off to 0, so
866  * our covering append_off must be set to max to prevent any reallocation
867  * until after the flush delays complete, not to mention proper invalidation
868  * of any underlying cached blocks.
869  */
870 static void
871 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
872 			int zone, struct hammer_blockmap_layer2 *layer2)
873 {
874 	hammer_reserve_t resv;
875 
876 	/*
877 	 * Allocate the reservation if necessary.
878 	 *
879 	 * NOTE: need lock in future around resv lookup/allocation and
880 	 * the setdelay call, currently refs is not bumped until the call.
881 	 */
882 again:
883 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
884 	if (resv == NULL) {
885 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
886 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
887 		resv->zone = zone;
888 		resv->zone_offset = base_offset;
889 		resv->refs = 0;
890 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
891 
892 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
893 			resv->flags |= HAMMER_RESF_LAYER2FREE;
894 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
895 			kfree(resv, hmp->m_misc);
896 			goto again;
897 		}
898 		++hammer_count_reservations;
899 	} else {
900 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
901 			resv->flags |= HAMMER_RESF_LAYER2FREE;
902 	}
903 	hammer_reserve_setdelay(hmp, resv);
904 }
905 
906 /*
907  * Enter the reservation on the on-delay list, or move it if it
908  * is already on the list.
909  */
910 static void
911 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
912 {
913 	if (resv->flags & HAMMER_RESF_ONDELAY) {
914 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
915 		resv->flush_group = hmp->flusher.next + 1;
916 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
917 	} else {
918 		++resv->refs;
919 		++hmp->rsv_fromdelay;
920 		resv->flags |= HAMMER_RESF_ONDELAY;
921 		resv->flush_group = hmp->flusher.next + 1;
922 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
923 	}
924 }
925 
926 /*
927  * Reserve has reached its flush point, remove it from the delay list
928  * and finish it off.  hammer_blockmap_reserve_complete() inherits
929  * the ondelay reference.
930  */
931 void
932 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
933 {
934 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
935 	resv->flags &= ~HAMMER_RESF_ONDELAY;
936 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
937 	--hmp->rsv_fromdelay;
938 	hammer_blockmap_reserve_complete(hmp, resv);
939 }
940 
941 /*
942  * Backend function - free (offset, bytes) in a zone.
943  *
944  * XXX error return
945  */
946 void
947 hammer_blockmap_free(hammer_transaction_t trans,
948 		     hammer_off_t zone_offset, int bytes)
949 {
950 	hammer_mount_t hmp;
951 	hammer_volume_t root_volume;
952 	hammer_blockmap_t freemap;
953 	struct hammer_blockmap_layer1 *layer1;
954 	struct hammer_blockmap_layer2 *layer2;
955 	hammer_buffer_t buffer1 = NULL;
956 	hammer_buffer_t buffer2 = NULL;
957 	hammer_off_t layer1_offset;
958 	hammer_off_t layer2_offset;
959 	hammer_off_t base_off;
960 	int error;
961 	int zone;
962 
963 	if (bytes == 0)
964 		return;
965 	hmp = trans->hmp;
966 
967 	/*
968 	 * Alignment
969 	 */
970 	bytes = (bytes + 15) & ~15;
971 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
972 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
973 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
974 
975 	/*
976 	 * Basic zone validation & locking
977 	 */
978 	zone = HAMMER_ZONE_DECODE(zone_offset);
979 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
980 	root_volume = trans->rootvol;
981 	error = 0;
982 
983 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
984 
985 	/*
986 	 * Dive layer 1.
987 	 */
988 	layer1_offset = freemap->phys_offset +
989 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
990 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
991 	if (error)
992 		goto failed;
993 	KKASSERT(layer1->phys_offset &&
994 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
995 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
996 		hammer_lock_ex(&hmp->blkmap_lock);
997 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
998 			hpanic("CRC FAILED: LAYER1");
999 		hammer_unlock(&hmp->blkmap_lock);
1000 	}
1001 
1002 	/*
1003 	 * Dive layer 2, each entry represents a big-block.
1004 	 */
1005 	layer2_offset = layer1->phys_offset +
1006 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1007 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1008 	if (error)
1009 		goto failed;
1010 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1011 		hammer_lock_ex(&hmp->blkmap_lock);
1012 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1013 			hpanic("CRC FAILED: LAYER2");
1014 		hammer_unlock(&hmp->blkmap_lock);
1015 	}
1016 
1017 	hammer_lock_ex(&hmp->blkmap_lock);
1018 
1019 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1020 
1021 	/*
1022 	 * Free space previously allocated via blockmap_alloc().
1023 	 *
1024 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1025 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1026 	 */
1027 	KKASSERT(layer2->zone == zone);
1028 	layer2->bytes_free += bytes;
1029 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1030 
1031 	/*
1032 	 * If a big-block becomes entirely free we must create a covering
1033 	 * reservation to prevent premature reuse.  Note, however, that
1034 	 * the big-block and/or reservation may still have an append_off
1035 	 * that allows further (non-reused) allocations.
1036 	 *
1037 	 * Once the reservation has been made we re-check layer2 and if
1038 	 * the big-block is still entirely free we reset the layer2 entry.
1039 	 * The reservation will prevent premature reuse.
1040 	 *
1041 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1042 	 * is completed, if the layer2 entry is still completely free at
1043 	 * that time.  Any allocations from the reservation that may have
1044 	 * occured in the mean time, or active references on the reservation
1045 	 * from new pending allocations, will prevent the invalidation from
1046 	 * occuring.
1047 	 */
1048 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1049 		base_off = hammer_xlate_to_zone2(zone_offset &
1050 						~HAMMER_BIGBLOCK_MASK64);
1051 
1052 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1053 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1054 			layer2->zone = 0;
1055 			layer2->append_off = 0;
1056 			hammer_modify_buffer(trans, buffer1,
1057 					     layer1, sizeof(*layer1));
1058 			++layer1->blocks_free;
1059 			layer1->layer1_crc = crc32(layer1,
1060 						   HAMMER_LAYER1_CRCSIZE);
1061 			hammer_modify_buffer_done(buffer1);
1062 			hammer_modify_volume_field(trans,
1063 					trans->rootvol,
1064 					vol0_stat_freebigblocks);
1065 			++root_volume->ondisk->vol0_stat_freebigblocks;
1066 			hmp->copy_stat_freebigblocks =
1067 			   root_volume->ondisk->vol0_stat_freebigblocks;
1068 			hammer_modify_volume_done(trans->rootvol);
1069 		}
1070 	}
1071 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1072 	hammer_modify_buffer_done(buffer2);
1073 	hammer_unlock(&hmp->blkmap_lock);
1074 
1075 failed:
1076 	if (buffer1)
1077 		hammer_rel_buffer(buffer1, 0);
1078 	if (buffer2)
1079 		hammer_rel_buffer(buffer2, 0);
1080 }
1081 
1082 int
1083 hammer_blockmap_dedup(hammer_transaction_t trans,
1084 		     hammer_off_t zone_offset, int bytes)
1085 {
1086 	hammer_mount_t hmp;
1087 	hammer_blockmap_t freemap;
1088 	struct hammer_blockmap_layer1 *layer1;
1089 	struct hammer_blockmap_layer2 *layer2;
1090 	hammer_buffer_t buffer1 = NULL;
1091 	hammer_buffer_t buffer2 = NULL;
1092 	hammer_off_t layer1_offset;
1093 	hammer_off_t layer2_offset;
1094 	int32_t temp;
1095 	int error;
1096 	int zone __debugvar;
1097 
1098 	if (bytes == 0)
1099 		return (0);
1100 	hmp = trans->hmp;
1101 
1102 	/*
1103 	 * Alignment
1104 	 */
1105 	bytes = (bytes + 15) & ~15;
1106 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1107 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1108 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
1109 
1110 	/*
1111 	 * Basic zone validation & locking
1112 	 */
1113 	zone = HAMMER_ZONE_DECODE(zone_offset);
1114 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1115 	error = 0;
1116 
1117 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1118 
1119 	/*
1120 	 * Dive layer 1.
1121 	 */
1122 	layer1_offset = freemap->phys_offset +
1123 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1124 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1125 	if (error)
1126 		goto failed;
1127 	KKASSERT(layer1->phys_offset &&
1128 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1129 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1130 		hammer_lock_ex(&hmp->blkmap_lock);
1131 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1132 			hpanic("CRC FAILED: LAYER1");
1133 		hammer_unlock(&hmp->blkmap_lock);
1134 	}
1135 
1136 	/*
1137 	 * Dive layer 2, each entry represents a big-block.
1138 	 */
1139 	layer2_offset = layer1->phys_offset +
1140 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1141 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1142 	if (error)
1143 		goto failed;
1144 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1145 		hammer_lock_ex(&hmp->blkmap_lock);
1146 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1147 			hpanic("CRC FAILED: LAYER2");
1148 		hammer_unlock(&hmp->blkmap_lock);
1149 	}
1150 
1151 	hammer_lock_ex(&hmp->blkmap_lock);
1152 
1153 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1154 
1155 	/*
1156 	 * Free space previously allocated via blockmap_alloc().
1157 	 *
1158 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1159 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1160 	 */
1161 	KKASSERT(layer2->zone == zone);
1162 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1163 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1164 	if (temp > layer2->bytes_free) {
1165 		error = ERANGE;
1166 		goto underflow;
1167 	}
1168 	layer2->bytes_free -= bytes;
1169 
1170 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1171 
1172 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1173 underflow:
1174 	hammer_modify_buffer_done(buffer2);
1175 	hammer_unlock(&hmp->blkmap_lock);
1176 
1177 failed:
1178 	if (buffer1)
1179 		hammer_rel_buffer(buffer1, 0);
1180 	if (buffer2)
1181 		hammer_rel_buffer(buffer2, 0);
1182 	return (error);
1183 }
1184 
1185 /*
1186  * Backend function - finalize (offset, bytes) in a zone.
1187  *
1188  * Allocate space that was previously reserved by the frontend.
1189  */
1190 int
1191 hammer_blockmap_finalize(hammer_transaction_t trans,
1192 			 hammer_reserve_t resv,
1193 			 hammer_off_t zone_offset, int bytes)
1194 {
1195 	hammer_mount_t hmp;
1196 	hammer_volume_t root_volume;
1197 	hammer_blockmap_t freemap;
1198 	struct hammer_blockmap_layer1 *layer1;
1199 	struct hammer_blockmap_layer2 *layer2;
1200 	hammer_buffer_t buffer1 = NULL;
1201 	hammer_buffer_t buffer2 = NULL;
1202 	hammer_off_t layer1_offset;
1203 	hammer_off_t layer2_offset;
1204 	int error;
1205 	int zone;
1206 	int offset;
1207 
1208 	if (bytes == 0)
1209 		return(0);
1210 	hmp = trans->hmp;
1211 
1212 	/*
1213 	 * Alignment
1214 	 */
1215 	bytes = (bytes + 15) & ~15;
1216 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1217 
1218 	/*
1219 	 * Basic zone validation & locking
1220 	 */
1221 	zone = HAMMER_ZONE_DECODE(zone_offset);
1222 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1223 	root_volume = trans->rootvol;
1224 	error = 0;
1225 
1226 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1227 
1228 	/*
1229 	 * Dive layer 1.
1230 	 */
1231 	layer1_offset = freemap->phys_offset +
1232 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1233 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1234 	if (error)
1235 		goto failed;
1236 	KKASSERT(layer1->phys_offset &&
1237 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1238 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1239 		hammer_lock_ex(&hmp->blkmap_lock);
1240 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1241 			hpanic("CRC FAILED: LAYER1");
1242 		hammer_unlock(&hmp->blkmap_lock);
1243 	}
1244 
1245 	/*
1246 	 * Dive layer 2, each entry represents a big-block.
1247 	 */
1248 	layer2_offset = layer1->phys_offset +
1249 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1250 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1251 	if (error)
1252 		goto failed;
1253 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1254 		hammer_lock_ex(&hmp->blkmap_lock);
1255 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1256 			hpanic("CRC FAILED: LAYER2");
1257 		hammer_unlock(&hmp->blkmap_lock);
1258 	}
1259 
1260 	hammer_lock_ex(&hmp->blkmap_lock);
1261 
1262 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1263 
1264 	/*
1265 	 * Finalize some or all of the space covered by a current
1266 	 * reservation.  An allocation in the same layer may have
1267 	 * already assigned ownership.
1268 	 */
1269 	if (layer2->zone == 0) {
1270 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1271 		--layer1->blocks_free;
1272 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
1273 		hammer_modify_buffer_done(buffer1);
1274 		layer2->zone = zone;
1275 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1276 		KKASSERT(layer2->append_off == 0);
1277 		hammer_modify_volume_field(trans,
1278 				trans->rootvol,
1279 				vol0_stat_freebigblocks);
1280 		--root_volume->ondisk->vol0_stat_freebigblocks;
1281 		hmp->copy_stat_freebigblocks =
1282 		   root_volume->ondisk->vol0_stat_freebigblocks;
1283 		hammer_modify_volume_done(trans->rootvol);
1284 	}
1285 	if (layer2->zone != zone)
1286 		hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1287 	KKASSERT(layer2->zone == zone);
1288 	KKASSERT(bytes != 0);
1289 	layer2->bytes_free -= bytes;
1290 
1291 	if (resv) {
1292 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1293 	}
1294 
1295 	/*
1296 	 * Finalizations can occur out of order, or combined with allocations.
1297 	 * append_off must be set to the highest allocated offset.
1298 	 */
1299 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1300 	if (layer2->append_off < offset)
1301 		layer2->append_off = offset;
1302 
1303 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1304 	hammer_modify_buffer_done(buffer2);
1305 	hammer_unlock(&hmp->blkmap_lock);
1306 
1307 failed:
1308 	if (buffer1)
1309 		hammer_rel_buffer(buffer1, 0);
1310 	if (buffer2)
1311 		hammer_rel_buffer(buffer2, 0);
1312 	return(error);
1313 }
1314 
1315 /*
1316  * Return the approximate number of free bytes in the big-block
1317  * containing the specified blockmap offset.
1318  *
1319  * WARNING: A negative number can be returned if data de-dup exists,
1320  *	    and the result will also not represent he actual number
1321  *	    of free bytes in this case.
1322  *
1323  *	    This code is used only by the reblocker.
1324  */
1325 int
1326 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1327 			int *curp, int *errorp)
1328 {
1329 	hammer_volume_t root_volume;
1330 	hammer_blockmap_t blockmap;
1331 	hammer_blockmap_t freemap;
1332 	struct hammer_blockmap_layer1 *layer1;
1333 	struct hammer_blockmap_layer2 *layer2;
1334 	hammer_buffer_t buffer = NULL;
1335 	hammer_off_t layer1_offset;
1336 	hammer_off_t layer2_offset;
1337 	int32_t bytes;
1338 	int zone;
1339 
1340 	zone = HAMMER_ZONE_DECODE(zone_offset);
1341 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1342 	root_volume = hammer_get_root_volume(hmp, errorp);
1343 	if (*errorp) {
1344 		*curp = 0;
1345 		return(0);
1346 	}
1347 	blockmap = &hmp->blockmap[zone];
1348 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1349 
1350 	/*
1351 	 * Dive layer 1.
1352 	 */
1353 	layer1_offset = freemap->phys_offset +
1354 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1355 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1356 	if (*errorp) {
1357 		*curp = 0;
1358 		bytes = 0;
1359 		goto failed;
1360 	}
1361 	KKASSERT(layer1->phys_offset);
1362 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1363 		hammer_lock_ex(&hmp->blkmap_lock);
1364 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1365 			hpanic("CRC FAILED: LAYER1");
1366 		hammer_unlock(&hmp->blkmap_lock);
1367 	}
1368 
1369 	/*
1370 	 * Dive layer 2, each entry represents a big-block.
1371 	 *
1372 	 * (reuse buffer, layer1 pointer becomes invalid)
1373 	 */
1374 	layer2_offset = layer1->phys_offset +
1375 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1376 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1377 	if (*errorp) {
1378 		*curp = 0;
1379 		bytes = 0;
1380 		goto failed;
1381 	}
1382 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1383 		hammer_lock_ex(&hmp->blkmap_lock);
1384 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1385 			hpanic("CRC FAILED: LAYER2");
1386 		hammer_unlock(&hmp->blkmap_lock);
1387 	}
1388 	KKASSERT(layer2->zone == zone);
1389 
1390 	bytes = layer2->bytes_free;
1391 
1392 	/*
1393 	 * *curp becomes 1 only when no error and,
1394 	 * next_offset and zone_offset are in the same big-block.
1395 	 */
1396 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1397 		*curp = 0;  /* not same */
1398 	else
1399 		*curp = 1;
1400 failed:
1401 	if (buffer)
1402 		hammer_rel_buffer(buffer, 0);
1403 	hammer_rel_volume(root_volume, 0);
1404 	if (hammer_debug_general & 0x4000) {
1405 		hdkprintf("%016llx -> %d\n", (long long)zone_offset, bytes);
1406 	}
1407 	return(bytes);
1408 }
1409 
1410 
1411 /*
1412  * Lookup a blockmap offset and verify blockmap layers.
1413  */
1414 hammer_off_t
1415 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1416 			int *errorp)
1417 {
1418 	hammer_volume_t root_volume;
1419 	hammer_blockmap_t freemap;
1420 	struct hammer_blockmap_layer1 *layer1;
1421 	struct hammer_blockmap_layer2 *layer2;
1422 	hammer_buffer_t buffer = NULL;
1423 	hammer_off_t layer1_offset;
1424 	hammer_off_t layer2_offset;
1425 	hammer_off_t result_offset;
1426 	hammer_off_t base_off;
1427 	hammer_reserve_t resv __debugvar;
1428 	int zone;
1429 
1430 	/*
1431 	 * Calculate the zone-2 offset.
1432 	 */
1433 	zone = HAMMER_ZONE_DECODE(zone_offset);
1434 	result_offset = hammer_xlate_to_zone2(zone_offset);
1435 
1436 	/*
1437 	 * Validate the allocation zone
1438 	 */
1439 	root_volume = hammer_get_root_volume(hmp, errorp);
1440 	if (*errorp)
1441 		return(0);
1442 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1443 	KKASSERT(freemap->phys_offset != 0);
1444 
1445 	/*
1446 	 * Dive layer 1.
1447 	 */
1448 	layer1_offset = freemap->phys_offset +
1449 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1450 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1451 	if (*errorp)
1452 		goto failed;
1453 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1454 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1455 		hammer_lock_ex(&hmp->blkmap_lock);
1456 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1457 			hpanic("CRC FAILED: LAYER1");
1458 		hammer_unlock(&hmp->blkmap_lock);
1459 	}
1460 
1461 	/*
1462 	 * Dive layer 2, each entry represents a big-block.
1463 	 */
1464 	layer2_offset = layer1->phys_offset +
1465 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1466 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1467 
1468 	if (*errorp)
1469 		goto failed;
1470 	if (layer2->zone == 0) {
1471 		base_off = hammer_xlate_to_zone2(zone_offset &
1472 						~HAMMER_BIGBLOCK_MASK64);
1473 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1474 				 base_off);
1475 		KKASSERT(resv && resv->zone == zone);
1476 
1477 	} else if (layer2->zone != zone) {
1478 		hpanic("bad zone %d/%d", layer2->zone, zone);
1479 	}
1480 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1481 		hammer_lock_ex(&hmp->blkmap_lock);
1482 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1483 			hpanic("CRC FAILED: LAYER2");
1484 		hammer_unlock(&hmp->blkmap_lock);
1485 	}
1486 
1487 failed:
1488 	if (buffer)
1489 		hammer_rel_buffer(buffer, 0);
1490 	hammer_rel_volume(root_volume, 0);
1491 	if (hammer_debug_general & 0x0800) {
1492 		hdkprintf("%016llx -> %016llx\n",
1493 			(long long)zone_offset, (long long)result_offset);
1494 	}
1495 	return(result_offset);
1496 }
1497 
1498 
1499 /*
1500  * Check space availability
1501  *
1502  * MPSAFE - does not require fs_token
1503  */
1504 int
1505 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1506 {
1507 	const int in_size = sizeof(struct hammer_inode_data) +
1508 			    sizeof(union hammer_btree_elm);
1509 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1510 	int64_t usedbytes;
1511 
1512 	usedbytes = hmp->rsv_inodes * in_size +
1513 		    hmp->rsv_recs * rec_size +
1514 		    hmp->rsv_databytes +
1515 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1516 		    ((int64_t)hammer_limit_dirtybufspace) +
1517 		    (slop << HAMMER_BIGBLOCK_BITS);
1518 
1519 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1520 	if (resp)
1521 		*resp = usedbytes;
1522 
1523 	if (hmp->copy_stat_freebigblocks >=
1524 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1525 		return(0);
1526 	}
1527 	return (ENOSPC);
1528 }
1529 
1530 static int
1531 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1532 {
1533 	hammer_blockmap_t freemap;
1534 	struct hammer_blockmap_layer1 *layer1;
1535 	hammer_buffer_t buffer1 = NULL;
1536 	hammer_off_t layer1_offset;
1537 	int error = 0;
1538 
1539 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1540 
1541 	layer1_offset = freemap->phys_offset +
1542 			HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1543 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1544 	if (error)
1545 		goto end;
1546 
1547 	/*
1548 	 * No more physically available space in layer1s
1549 	 * of the current volume, go to the next volume.
1550 	 */
1551 	if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1552 		hammer_skip_volume(offsetp);
1553 end:
1554 	if (buffer1)
1555 		hammer_rel_buffer(buffer1, 0);
1556 	return(error);
1557 }
1558 
1559 static void
1560 hammer_skip_volume(hammer_off_t *offsetp)
1561 {
1562 	hammer_off_t offset;
1563 	int zone, vol_no;
1564 
1565 	offset = *offsetp;
1566 	zone = HAMMER_ZONE_DECODE(offset);
1567 	vol_no = HAMMER_VOL_DECODE(offset) + 1;
1568 	KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1569 
1570 	if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1571 		vol_no = 0;
1572 		++zone;
1573 	}
1574 
1575 	*offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1576 }
1577