xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision bf31779e)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39 
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    hammer_blockmap_layer2_t layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50 
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
56 
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60 	if (res1->zone_offset < res2->zone_offset)
61 		return(-1);
62 	if (res1->zone_offset > res2->zone_offset)
63 		return(1);
64 	return(0);
65 }
66 
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72 		      hammer_off_t hint, int *errorp)
73 {
74 	hammer_mount_t hmp;
75 	hammer_volume_t root_volume;
76 	hammer_blockmap_t blockmap;
77 	hammer_blockmap_t freemap;
78 	hammer_reserve_t resv;
79 	hammer_blockmap_layer1_t layer1;
80 	hammer_blockmap_layer2_t layer2;
81 	hammer_buffer_t buffer1 = NULL;
82 	hammer_buffer_t buffer2 = NULL;
83 	hammer_buffer_t buffer3 = NULL;
84 	hammer_off_t tmp_offset;
85 	hammer_off_t next_offset;
86 	hammer_off_t result_offset;
87 	hammer_off_t layer1_offset;
88 	hammer_off_t layer2_offset;
89 	hammer_off_t base_off;
90 	int loops = 0;
91 	int offset;		/* offset within big-block */
92 	int use_hint;
93 
94 	hmp = trans->hmp;
95 
96 	/*
97 	 * Deal with alignment and buffer-boundary issues.
98 	 *
99 	 * Be careful, certain primary alignments are used below to allocate
100 	 * new blockmap blocks.
101 	 */
102 	bytes = HAMMER_DATA_DOALIGN(bytes);
103 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104 	KKASSERT(hammer_is_index_record(zone));
105 
106 	/*
107 	 * Setup
108 	 */
109 	root_volume = trans->rootvol;
110 	*errorp = 0;
111 	blockmap = &hmp->blockmap[zone];
112 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114 
115 	/*
116 	 * Use the hint if we have one.
117 	 */
118 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119 		next_offset = HAMMER_DATA_DOALIGN_WITH(hammer_off_t, hint);
120 		use_hint = 1;
121 	} else {
122 		next_offset = blockmap->next_offset;
123 		use_hint = 0;
124 	}
125 again:
126 
127 	/*
128 	 * use_hint is turned off if we leave the hinted big-block.
129 	 */
130 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131 		next_offset = blockmap->next_offset;
132 		use_hint = 0;
133 	}
134 
135 	/*
136 	 * Check for wrap
137 	 */
138 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139 		if (++loops == 2) {
140 			hmkprintf(hmp, "No space left for zone %d "
141 				"allocation\n", zone);
142 			result_offset = 0;
143 			*errorp = ENOSPC;
144 			goto failed;
145 		}
146 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
147 	}
148 
149 	/*
150 	 * The allocation request may not cross a buffer boundary.  Special
151 	 * large allocations must not cross a big-block boundary.
152 	 */
153 	tmp_offset = next_offset + bytes - 1;
154 	if (bytes <= HAMMER_BUFSIZE) {
155 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
156 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
157 			goto again;
158 		}
159 	} else {
160 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
161 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
162 			goto again;
163 		}
164 	}
165 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
166 
167 	/*
168 	 * Dive layer 1.
169 	 */
170 	layer1_offset = freemap->phys_offset +
171 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
172 
173 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
174 	if (*errorp) {
175 		result_offset = 0;
176 		goto failed;
177 	}
178 
179 	/*
180 	 * Check CRC.
181 	 */
182 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
183 		hammer_lock_ex(&hmp->blkmap_lock);
184 		if (!hammer_crc_test_layer1(hmp->version, layer1))
185 			hpanic("CRC FAILED: LAYER1");
186 		hammer_unlock(&hmp->blkmap_lock);
187 	}
188 
189 	/*
190 	 * If we are at a big-block boundary and layer1 indicates no
191 	 * free big-blocks, then we cannot allocate a new big-block in
192 	 * layer2, skip to the next layer1 entry.
193 	 */
194 	if (offset == 0 && layer1->blocks_free == 0) {
195 		next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
196 		if (hammer_check_volume(hmp, &next_offset)) {
197 			result_offset = 0;
198 			goto failed;
199 		}
200 		goto again;
201 	}
202 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
203 
204 	/*
205 	 * Skip the whole volume if it is pointing to a layer2 big-block
206 	 * on a volume that we are currently trying to remove from the
207 	 * file-system. This is used by the volume-del code together with
208 	 * the reblocker to free up a volume.
209 	 */
210 	if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
211 		hammer_skip_volume(&next_offset);
212 		goto again;
213 	}
214 
215 	/*
216 	 * Dive layer 2, each entry represents a big-block.
217 	 */
218 	layer2_offset = layer1->phys_offset +
219 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
220 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
221 	if (*errorp) {
222 		result_offset = 0;
223 		goto failed;
224 	}
225 
226 	/*
227 	 * Check CRC.  This can race another thread holding the lock
228 	 * and in the middle of modifying layer2.
229 	 */
230 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
231 		hammer_lock_ex(&hmp->blkmap_lock);
232 		if (!hammer_crc_test_layer2(hmp->version, layer2))
233 			hpanic("CRC FAILED: LAYER2");
234 		hammer_unlock(&hmp->blkmap_lock);
235 	}
236 
237 	/*
238 	 * Skip the layer if the zone is owned by someone other then us.
239 	 */
240 	if (layer2->zone && layer2->zone != zone) {
241 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
242 		goto again;
243 	}
244 	if (offset < layer2->append_off) {
245 		next_offset += layer2->append_off - offset;
246 		goto again;
247 	}
248 
249 #if 0
250 	/*
251 	 * If operating in the current non-hint blockmap block, do not
252 	 * allow it to get over-full.  Also drop any active hinting so
253 	 * blockmap->next_offset is updated at the end.
254 	 *
255 	 * We do this for B-Tree and meta-data allocations to provide
256 	 * localization for updates.
257 	 */
258 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
259 	     zone == HAMMER_ZONE_META_INDEX) &&
260 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
261 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
262 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
263 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
264 			use_hint = 0;
265 			goto again;
266 		}
267 	}
268 #endif
269 
270 	/*
271 	 * We need the lock from this point on.  We have to re-check zone
272 	 * ownership after acquiring the lock and also check for reservations.
273 	 */
274 	hammer_lock_ex(&hmp->blkmap_lock);
275 
276 	if (layer2->zone && layer2->zone != zone) {
277 		hammer_unlock(&hmp->blkmap_lock);
278 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
279 		goto again;
280 	}
281 	if (offset < layer2->append_off) {
282 		hammer_unlock(&hmp->blkmap_lock);
283 		next_offset += layer2->append_off - offset;
284 		goto again;
285 	}
286 
287 	/*
288 	 * The big-block might be reserved by another zone.  If it is reserved
289 	 * by our zone we may have to move next_offset past the append_off.
290 	 */
291 	base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
292 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
293 	if (resv) {
294 		if (resv->zone != zone) {
295 			hammer_unlock(&hmp->blkmap_lock);
296 			next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
297 			goto again;
298 		}
299 		if (offset < resv->append_off) {
300 			hammer_unlock(&hmp->blkmap_lock);
301 			next_offset += resv->append_off - offset;
302 			goto again;
303 		}
304 		++resv->refs;
305 	}
306 
307 	/*
308 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
309 	 * of the layer for real.  At this point we've validated any
310 	 * reservation that might exist and can just ignore resv.
311 	 */
312 	if (layer2->zone == 0) {
313 		/*
314 		 * Assign the big-block to our zone
315 		 */
316 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
317 		--layer1->blocks_free;
318 		hammer_crc_set_layer1(hmp->version, layer1);
319 		hammer_modify_buffer_done(buffer1);
320 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
321 		layer2->zone = zone;
322 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
323 		KKASSERT(layer2->append_off == 0);
324 		hammer_modify_volume_field(trans, trans->rootvol,
325 					   vol0_stat_freebigblocks);
326 		--root_volume->ondisk->vol0_stat_freebigblocks;
327 		hmp->copy_stat_freebigblocks =
328 			root_volume->ondisk->vol0_stat_freebigblocks;
329 		hammer_modify_volume_done(trans->rootvol);
330 	} else {
331 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
332 	}
333 	KKASSERT(layer2->zone == zone);
334 
335 	/*
336 	 * NOTE: bytes_free can legally go negative due to de-dup.
337 	 */
338 	layer2->bytes_free -= bytes;
339 	KKASSERT(layer2->append_off <= offset);
340 	layer2->append_off = offset + bytes;
341 	hammer_crc_set_layer2(hmp->version, layer2);
342 	hammer_modify_buffer_done(buffer2);
343 
344 	/*
345 	 * We hold the blockmap lock and should be the only ones
346 	 * capable of modifying resv->append_off.  Track the allocation
347 	 * as appropriate.
348 	 */
349 	KKASSERT(bytes != 0);
350 	if (resv) {
351 		KKASSERT(resv->append_off <= offset);
352 		resv->append_off = offset + bytes;
353 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354 		hammer_blockmap_reserve_complete(hmp, resv);
355 	}
356 
357 	/*
358 	 * If we are allocating from the base of a new buffer we can avoid
359 	 * a disk read by calling hammer_bnew_ext().
360 	 */
361 	if ((next_offset & HAMMER_BUFMASK) == 0) {
362 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
363 				errorp, &buffer3);
364 		if (*errorp) {
365 			result_offset = 0;
366 			goto failed;
367 		}
368 	}
369 	result_offset = next_offset;
370 
371 	/*
372 	 * If we weren't supplied with a hint or could not use the hint
373 	 * then we wound up using blockmap->next_offset as the hint and
374 	 * need to save it.
375 	 */
376 	if (use_hint == 0) {
377 		hammer_modify_volume_noundo(NULL, root_volume);
378 		blockmap->next_offset = next_offset + bytes;
379 		hammer_modify_volume_done(root_volume);
380 	}
381 	hammer_unlock(&hmp->blkmap_lock);
382 failed:
383 
384 	/*
385 	 * Cleanup
386 	 */
387 	if (buffer1)
388 		hammer_rel_buffer(buffer1, 0);
389 	if (buffer2)
390 		hammer_rel_buffer(buffer2, 0);
391 	if (buffer3)
392 		hammer_rel_buffer(buffer3, 0);
393 
394 	return(result_offset);
395 }
396 
397 /*
398  * Frontend function - Reserve bytes in a zone.
399  *
400  * This code reserves bytes out of a blockmap without committing to any
401  * meta-data modifications, allowing the front-end to directly issue disk
402  * write I/O for big-blocks of data
403  *
404  * The backend later finalizes the reservation with hammer_blockmap_finalize()
405  * upon committing the related record.
406  */
407 hammer_reserve_t
408 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
409 			hammer_off_t *zone_offp, int *errorp)
410 {
411 	hammer_volume_t root_volume;
412 	hammer_blockmap_t blockmap;
413 	hammer_blockmap_t freemap;
414 	hammer_blockmap_layer1_t layer1;
415 	hammer_blockmap_layer2_t layer2;
416 	hammer_buffer_t buffer1 = NULL;
417 	hammer_buffer_t buffer2 = NULL;
418 	hammer_buffer_t buffer3 = NULL;
419 	hammer_off_t tmp_offset;
420 	hammer_off_t next_offset;
421 	hammer_off_t layer1_offset;
422 	hammer_off_t layer2_offset;
423 	hammer_off_t base_off;
424 	hammer_reserve_t resv;
425 	hammer_reserve_t resx = NULL;
426 	int loops = 0;
427 	int offset;
428 
429 	/*
430 	 * Setup
431 	 */
432 	KKASSERT(hammer_is_index_record(zone));
433 	root_volume = hammer_get_root_volume(hmp, errorp);
434 	if (*errorp)
435 		return(NULL);
436 	blockmap = &hmp->blockmap[zone];
437 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
438 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
439 
440 	/*
441 	 * Deal with alignment and buffer-boundary issues.
442 	 *
443 	 * Be careful, certain primary alignments are used below to allocate
444 	 * new blockmap blocks.
445 	 */
446 	bytes = HAMMER_DATA_DOALIGN(bytes);
447 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
448 
449 	next_offset = blockmap->next_offset;
450 again:
451 	resv = NULL;
452 	/*
453 	 * Check for wrap
454 	 */
455 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
456 		if (++loops == 2) {
457 			hmkprintf(hmp, "No space left for zone %d "
458 				"reservation\n", zone);
459 			*errorp = ENOSPC;
460 			goto failed;
461 		}
462 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
463 	}
464 
465 	/*
466 	 * The allocation request may not cross a buffer boundary.  Special
467 	 * large allocations must not cross a big-block boundary.
468 	 */
469 	tmp_offset = next_offset + bytes - 1;
470 	if (bytes <= HAMMER_BUFSIZE) {
471 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
472 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
473 			goto again;
474 		}
475 	} else {
476 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
477 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
478 			goto again;
479 		}
480 	}
481 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
482 
483 	/*
484 	 * Dive layer 1.
485 	 */
486 	layer1_offset = freemap->phys_offset +
487 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
488 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
489 	if (*errorp)
490 		goto failed;
491 
492 	/*
493 	 * Check CRC.
494 	 */
495 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
496 		hammer_lock_ex(&hmp->blkmap_lock);
497 		if (!hammer_crc_test_layer1(hmp->version, layer1))
498 			hpanic("CRC FAILED: LAYER1");
499 		hammer_unlock(&hmp->blkmap_lock);
500 	}
501 
502 	/*
503 	 * If we are at a big-block boundary and layer1 indicates no
504 	 * free big-blocks, then we cannot allocate a new big-block in
505 	 * layer2, skip to the next layer1 entry.
506 	 */
507 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
508 	    layer1->blocks_free == 0) {
509 		next_offset = HAMMER_ZONE_LAYER1_NEXT_OFFSET(next_offset);
510 		if (hammer_check_volume(hmp, &next_offset))
511 			goto failed;
512 		goto again;
513 	}
514 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
515 
516 	/*
517 	 * Dive layer 2, each entry represents a big-block.
518 	 */
519 	layer2_offset = layer1->phys_offset +
520 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
521 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
522 	if (*errorp)
523 		goto failed;
524 
525 	/*
526 	 * Check CRC if not allocating into uninitialized space (which we
527 	 * aren't when reserving space).
528 	 */
529 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
530 		hammer_lock_ex(&hmp->blkmap_lock);
531 		if (!hammer_crc_test_layer2(hmp->version, layer2))
532 			hpanic("CRC FAILED: LAYER2");
533 		hammer_unlock(&hmp->blkmap_lock);
534 	}
535 
536 	/*
537 	 * Skip the layer if the zone is owned by someone other then us.
538 	 */
539 	if (layer2->zone && layer2->zone != zone) {
540 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
541 		goto again;
542 	}
543 	if (offset < layer2->append_off) {
544 		next_offset += layer2->append_off - offset;
545 		goto again;
546 	}
547 
548 	/*
549 	 * We need the lock from this point on.  We have to re-check zone
550 	 * ownership after acquiring the lock and also check for reservations.
551 	 */
552 	hammer_lock_ex(&hmp->blkmap_lock);
553 
554 	if (layer2->zone && layer2->zone != zone) {
555 		hammer_unlock(&hmp->blkmap_lock);
556 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
557 		goto again;
558 	}
559 	if (offset < layer2->append_off) {
560 		hammer_unlock(&hmp->blkmap_lock);
561 		next_offset += layer2->append_off - offset;
562 		goto again;
563 	}
564 
565 	/*
566 	 * The big-block might be reserved by another zone.  If it is reserved
567 	 * by our zone we may have to move next_offset past the append_off.
568 	 */
569 	base_off = hammer_xlate_to_zone2(next_offset & ~HAMMER_BIGBLOCK_MASK64);
570 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
571 	if (resv) {
572 		if (resv->zone != zone) {
573 			hammer_unlock(&hmp->blkmap_lock);
574 			next_offset = HAMMER_ZONE_LAYER2_NEXT_OFFSET(next_offset);
575 			goto again;
576 		}
577 		if (offset < resv->append_off) {
578 			hammer_unlock(&hmp->blkmap_lock);
579 			next_offset += resv->append_off - offset;
580 			goto again;
581 		}
582 		++resv->refs;
583 	} else {
584 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
585 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
586 		resx->refs = 1;
587 		resx->zone = zone;
588 		resx->zone_offset = base_off;
589 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
590 			resx->flags |= HAMMER_RESF_LAYER2FREE;
591 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
592 		KKASSERT(resv == NULL);
593 		resv = resx;
594 		++hammer_count_reservations;
595 	}
596 	resv->append_off = offset + bytes;
597 
598 	/*
599 	 * If we are not reserving a whole buffer but are at the start of
600 	 * a new block, call hammer_bnew() to avoid a disk read.
601 	 *
602 	 * If we are reserving a whole buffer (or more), the caller will
603 	 * probably use a direct read, so do nothing.
604 	 *
605 	 * If we do not have a whole lot of system memory we really can't
606 	 * afford to block while holding the blkmap_lock!
607 	 */
608 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
609 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
610 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
611 			if (*errorp)
612 				goto failed;
613 		}
614 	}
615 
616 	blockmap->next_offset = next_offset + bytes;
617 	hammer_unlock(&hmp->blkmap_lock);
618 
619 failed:
620 	if (buffer1)
621 		hammer_rel_buffer(buffer1, 0);
622 	if (buffer2)
623 		hammer_rel_buffer(buffer2, 0);
624 	if (buffer3)
625 		hammer_rel_buffer(buffer3, 0);
626 	hammer_rel_volume(root_volume, 0);
627 	*zone_offp = next_offset;
628 
629 	return(resv);
630 }
631 
632 /*
633  * Frontend function - Dedup bytes in a zone.
634  *
635  * Dedup reservations work exactly the same as normal write reservations
636  * except we only adjust bytes_free field and don't touch append offset.
637  * Finalization mechanic for dedup reservations is also the same as for
638  * normal write ones - the backend finalizes the reservation with
639  * hammer_blockmap_finalize().
640  */
641 hammer_reserve_t
642 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
643 			      hammer_off_t zone_offset, int *errorp)
644 {
645 	hammer_volume_t root_volume;
646 	hammer_blockmap_t freemap;
647 	hammer_blockmap_layer1_t layer1;
648 	hammer_blockmap_layer2_t layer2;
649 	hammer_buffer_t buffer1 = NULL;
650 	hammer_buffer_t buffer2 = NULL;
651 	hammer_off_t layer1_offset;
652 	hammer_off_t layer2_offset;
653 	hammer_off_t base_off;
654 	hammer_reserve_t resv = NULL;
655 	hammer_reserve_t resx = NULL;
656 
657 	/*
658 	 * Setup
659 	 */
660 	KKASSERT(hammer_is_index_record(zone));
661 	root_volume = hammer_get_root_volume(hmp, errorp);
662 	if (*errorp)
663 		return (NULL);
664 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
665 	KKASSERT(freemap->phys_offset != 0);
666 
667 	bytes = HAMMER_DATA_DOALIGN(bytes);
668 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
669 
670 	/*
671 	 * Dive layer 1.
672 	 */
673 	layer1_offset = freemap->phys_offset +
674 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
675 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
676 	if (*errorp)
677 		goto failed;
678 
679 	/*
680 	 * Check CRC.
681 	 */
682 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
683 		hammer_lock_ex(&hmp->blkmap_lock);
684 		if (!hammer_crc_test_layer1(hmp->version, layer1))
685 			hpanic("CRC FAILED: LAYER1");
686 		hammer_unlock(&hmp->blkmap_lock);
687 	}
688 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
689 
690 	/*
691 	 * Dive layer 2, each entry represents a big-block.
692 	 */
693 	layer2_offset = layer1->phys_offset +
694 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
695 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
696 	if (*errorp)
697 		goto failed;
698 
699 	/*
700 	 * Check CRC.
701 	 */
702 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
703 		hammer_lock_ex(&hmp->blkmap_lock);
704 		if (!hammer_crc_test_layer2(hmp->version, layer2))
705 			hpanic("CRC FAILED: LAYER2");
706 		hammer_unlock(&hmp->blkmap_lock);
707 	}
708 
709 	/*
710 	 * Fail if the zone is owned by someone other than us.
711 	 */
712 	if (layer2->zone && layer2->zone != zone)
713 		goto failed;
714 
715 	/*
716 	 * We need the lock from this point on.  We have to re-check zone
717 	 * ownership after acquiring the lock and also check for reservations.
718 	 */
719 	hammer_lock_ex(&hmp->blkmap_lock);
720 
721 	if (layer2->zone && layer2->zone != zone) {
722 		hammer_unlock(&hmp->blkmap_lock);
723 		goto failed;
724 	}
725 
726 	base_off = hammer_xlate_to_zone2(zone_offset & ~HAMMER_BIGBLOCK_MASK64);
727 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
728 	if (resv) {
729 		if (resv->zone != zone) {
730 			hammer_unlock(&hmp->blkmap_lock);
731 			resv = NULL;
732 			goto failed;
733 		}
734 		/*
735 		 * Due to possible big-block underflow we can't simply
736 		 * subtract bytes from bytes_free.
737 		 */
738 		if (update_bytes_free(resv, bytes) == 0) {
739 			hammer_unlock(&hmp->blkmap_lock);
740 			resv = NULL;
741 			goto failed;
742 		}
743 		++resv->refs;
744 	} else {
745 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
746 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
747 		resx->refs = 1;
748 		resx->zone = zone;
749 		resx->bytes_free = layer2->bytes_free;
750 		/*
751 		 * Due to possible big-block underflow we can't simply
752 		 * subtract bytes from bytes_free.
753 		 */
754 		if (update_bytes_free(resx, bytes) == 0) {
755 			hammer_unlock(&hmp->blkmap_lock);
756 			kfree(resx, hmp->m_misc);
757 			goto failed;
758 		}
759 		resx->zone_offset = base_off;
760 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
761 		KKASSERT(resv == NULL);
762 		resv = resx;
763 		++hammer_count_reservations;
764 	}
765 
766 	hammer_unlock(&hmp->blkmap_lock);
767 
768 failed:
769 	if (buffer1)
770 		hammer_rel_buffer(buffer1, 0);
771 	if (buffer2)
772 		hammer_rel_buffer(buffer2, 0);
773 	hammer_rel_volume(root_volume, 0);
774 
775 	return(resv);
776 }
777 
778 static int
779 update_bytes_free(hammer_reserve_t resv, int bytes)
780 {
781 	int32_t temp;
782 
783 	/*
784 	 * Big-block underflow check
785 	 */
786 	temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
787 	cpu_ccfence(); /* XXX do we really need it ? */
788 	if (temp > resv->bytes_free) {
789 		hdkprintf("BIGBLOCK UNDERFLOW\n");
790 		return (0);
791 	}
792 
793 	resv->bytes_free -= bytes;
794 	return (1);
795 }
796 
797 /*
798  * Dereference a reservation structure.  Upon the final release the
799  * underlying big-block is checked and if it is entirely free we delete
800  * any related HAMMER buffers to avoid potential conflicts with future
801  * reuse of the big-block.
802  */
803 void
804 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
805 {
806 	hammer_off_t base_offset;
807 	int error;
808 
809 	KKASSERT(resv->refs > 0);
810 	KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
811 
812 	/*
813 	 * Setting append_off to the max prevents any new allocations
814 	 * from occuring while we are trying to dispose of the reservation,
815 	 * allowing us to safely delete any related HAMMER buffers.
816 	 *
817 	 * If we are unable to clean out all related HAMMER buffers we
818 	 * requeue the delay.
819 	 */
820 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
821 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
822 		base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
823 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
824 			hammer_dedup_cache_inval(hmp, base_offset);
825 		error = hammer_del_buffers(hmp, base_offset,
826 					   resv->zone_offset,
827 					   HAMMER_BIGBLOCK_SIZE,
828 					   1);
829 		if (hammer_debug_general & 0x20000) {
830 			hkprintf("delbgblk %016jx error %d\n",
831 				(intmax_t)base_offset, error);
832 		}
833 		if (error)
834 			hammer_reserve_setdelay(hmp, resv);
835 	}
836 	if (--resv->refs == 0) {
837 		if (hammer_debug_general & 0x20000) {
838 			hkprintf("delresvr %016jx zone %02x\n",
839 				(intmax_t)resv->zone_offset, resv->zone);
840 		}
841 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
842 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
843 		kfree(resv, hmp->m_misc);
844 		--hammer_count_reservations;
845 	}
846 }
847 
848 /*
849  * Prevent a potentially free big-block from being reused until after
850  * the related flushes have completely cycled, otherwise crash recovery
851  * could resurrect a data block that was already reused and overwritten.
852  *
853  * The caller might reset the underlying layer2 entry's append_off to 0, so
854  * our covering append_off must be set to max to prevent any reallocation
855  * until after the flush delays complete, not to mention proper invalidation
856  * of any underlying cached blocks.
857  */
858 static void
859 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
860 			int zone, hammer_blockmap_layer2_t layer2)
861 {
862 	hammer_reserve_t resv;
863 
864 	/*
865 	 * Allocate the reservation if necessary.
866 	 *
867 	 * NOTE: need lock in future around resv lookup/allocation and
868 	 * the setdelay call, currently refs is not bumped until the call.
869 	 */
870 again:
871 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
872 	if (resv == NULL) {
873 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
874 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
875 		resv->zone = zone;
876 		resv->zone_offset = base_offset;
877 		resv->refs = 0;
878 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
879 
880 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
881 			resv->flags |= HAMMER_RESF_LAYER2FREE;
882 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
883 			kfree(resv, hmp->m_misc);
884 			goto again;
885 		}
886 		++hammer_count_reservations;
887 	} else {
888 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
889 			resv->flags |= HAMMER_RESF_LAYER2FREE;
890 	}
891 	hammer_reserve_setdelay(hmp, resv);
892 }
893 
894 /*
895  * Enter the reservation on the on-delay list, or move it if it
896  * is already on the list.
897  */
898 static void
899 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
900 {
901 	if (resv->flags & HAMMER_RESF_ONDELAY) {
902 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
903 		resv->flg_no = hmp->flusher.next + 1;
904 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
905 	} else {
906 		++resv->refs;
907 		++hmp->rsv_fromdelay;
908 		resv->flags |= HAMMER_RESF_ONDELAY;
909 		resv->flg_no = hmp->flusher.next + 1;
910 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
911 	}
912 }
913 
914 /*
915  * Reserve has reached its flush point, remove it from the delay list
916  * and finish it off.  hammer_blockmap_reserve_complete() inherits
917  * the ondelay reference.
918  */
919 void
920 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
921 {
922 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
923 	resv->flags &= ~HAMMER_RESF_ONDELAY;
924 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
925 	--hmp->rsv_fromdelay;
926 	hammer_blockmap_reserve_complete(hmp, resv);
927 }
928 
929 /*
930  * Backend function - free (offset, bytes) in a zone.
931  *
932  * XXX error return
933  */
934 void
935 hammer_blockmap_free(hammer_transaction_t trans,
936 		     hammer_off_t zone_offset, int bytes)
937 {
938 	hammer_mount_t hmp;
939 	hammer_volume_t root_volume;
940 	hammer_blockmap_t freemap;
941 	hammer_blockmap_layer1_t layer1;
942 	hammer_blockmap_layer2_t layer2;
943 	hammer_buffer_t buffer1 = NULL;
944 	hammer_buffer_t buffer2 = NULL;
945 	hammer_off_t layer1_offset;
946 	hammer_off_t layer2_offset;
947 	hammer_off_t base_off;
948 	int error;
949 	int zone;
950 
951 	if (bytes == 0)
952 		return;
953 	hmp = trans->hmp;
954 
955 	/*
956 	 * Alignment
957 	 */
958 	bytes = HAMMER_DATA_DOALIGN(bytes);
959 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
960 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
961 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
962 
963 	/*
964 	 * Basic zone validation & locking
965 	 */
966 	zone = HAMMER_ZONE_DECODE(zone_offset);
967 	KKASSERT(hammer_is_index_record(zone));
968 	root_volume = trans->rootvol;
969 	error = 0;
970 
971 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
972 
973 	/*
974 	 * Dive layer 1.
975 	 */
976 	layer1_offset = freemap->phys_offset +
977 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
978 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
979 	if (error)
980 		goto failed;
981 	KKASSERT(layer1->phys_offset &&
982 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
983 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
984 		hammer_lock_ex(&hmp->blkmap_lock);
985 		if (!hammer_crc_test_layer1(hmp->version, layer1))
986 			hpanic("CRC FAILED: LAYER1");
987 		hammer_unlock(&hmp->blkmap_lock);
988 	}
989 
990 	/*
991 	 * Dive layer 2, each entry represents a big-block.
992 	 */
993 	layer2_offset = layer1->phys_offset +
994 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
995 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
996 	if (error)
997 		goto failed;
998 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
999 		hammer_lock_ex(&hmp->blkmap_lock);
1000 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1001 			hpanic("CRC FAILED: LAYER2");
1002 		hammer_unlock(&hmp->blkmap_lock);
1003 	}
1004 
1005 	hammer_lock_ex(&hmp->blkmap_lock);
1006 
1007 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1008 
1009 	/*
1010 	 * Free space previously allocated via blockmap_alloc().
1011 	 *
1012 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1013 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1014 	 */
1015 	KKASSERT(layer2->zone == zone);
1016 	layer2->bytes_free += bytes;
1017 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1018 
1019 	/*
1020 	 * If a big-block becomes entirely free we must create a covering
1021 	 * reservation to prevent premature reuse.  Note, however, that
1022 	 * the big-block and/or reservation may still have an append_off
1023 	 * that allows further (non-reused) allocations.
1024 	 *
1025 	 * Once the reservation has been made we re-check layer2 and if
1026 	 * the big-block is still entirely free we reset the layer2 entry.
1027 	 * The reservation will prevent premature reuse.
1028 	 *
1029 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1030 	 * is completed, if the layer2 entry is still completely free at
1031 	 * that time.  Any allocations from the reservation that may have
1032 	 * occured in the mean time, or active references on the reservation
1033 	 * from new pending allocations, will prevent the invalidation from
1034 	 * occuring.
1035 	 */
1036 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1037 		base_off = hammer_xlate_to_zone2(zone_offset &
1038 						~HAMMER_BIGBLOCK_MASK64);
1039 
1040 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1041 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1042 			layer2->zone = 0;
1043 			layer2->append_off = 0;
1044 			hammer_modify_buffer(trans, buffer1,
1045 					     layer1, sizeof(*layer1));
1046 			++layer1->blocks_free;
1047 			hammer_crc_set_layer1(hmp->version, layer1);
1048 			hammer_modify_buffer_done(buffer1);
1049 			hammer_modify_volume_field(trans,
1050 					trans->rootvol,
1051 					vol0_stat_freebigblocks);
1052 			++root_volume->ondisk->vol0_stat_freebigblocks;
1053 			hmp->copy_stat_freebigblocks =
1054 			   root_volume->ondisk->vol0_stat_freebigblocks;
1055 			hammer_modify_volume_done(trans->rootvol);
1056 		}
1057 	}
1058 	hammer_crc_set_layer2(hmp->version, layer2);
1059 	hammer_modify_buffer_done(buffer2);
1060 	hammer_unlock(&hmp->blkmap_lock);
1061 
1062 failed:
1063 	if (buffer1)
1064 		hammer_rel_buffer(buffer1, 0);
1065 	if (buffer2)
1066 		hammer_rel_buffer(buffer2, 0);
1067 }
1068 
1069 int
1070 hammer_blockmap_dedup(hammer_transaction_t trans,
1071 		     hammer_off_t zone_offset, int bytes)
1072 {
1073 	hammer_mount_t hmp;
1074 	hammer_blockmap_t freemap;
1075 	hammer_blockmap_layer1_t layer1;
1076 	hammer_blockmap_layer2_t layer2;
1077 	hammer_buffer_t buffer1 = NULL;
1078 	hammer_buffer_t buffer2 = NULL;
1079 	hammer_off_t layer1_offset;
1080 	hammer_off_t layer2_offset;
1081 	int32_t temp;
1082 	int error;
1083 	int zone __debugvar;
1084 
1085 	if (bytes == 0)
1086 		return (0);
1087 	hmp = trans->hmp;
1088 
1089 	/*
1090 	 * Alignment
1091 	 */
1092 	bytes = HAMMER_DATA_DOALIGN(bytes);
1093 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1094 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1095 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
1096 
1097 	/*
1098 	 * Basic zone validation & locking
1099 	 */
1100 	zone = HAMMER_ZONE_DECODE(zone_offset);
1101 	KKASSERT(hammer_is_index_record(zone));
1102 	error = 0;
1103 
1104 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1105 
1106 	/*
1107 	 * Dive layer 1.
1108 	 */
1109 	layer1_offset = freemap->phys_offset +
1110 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1111 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1112 	if (error)
1113 		goto failed;
1114 	KKASSERT(layer1->phys_offset &&
1115 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1116 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1117 		hammer_lock_ex(&hmp->blkmap_lock);
1118 		if (!hammer_crc_test_layer1(hmp->version, layer1))
1119 			hpanic("CRC FAILED: LAYER1");
1120 		hammer_unlock(&hmp->blkmap_lock);
1121 	}
1122 
1123 	/*
1124 	 * Dive layer 2, each entry represents a big-block.
1125 	 */
1126 	layer2_offset = layer1->phys_offset +
1127 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1128 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1129 	if (error)
1130 		goto failed;
1131 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1132 		hammer_lock_ex(&hmp->blkmap_lock);
1133 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1134 			hpanic("CRC FAILED: LAYER2");
1135 		hammer_unlock(&hmp->blkmap_lock);
1136 	}
1137 
1138 	hammer_lock_ex(&hmp->blkmap_lock);
1139 
1140 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1141 
1142 	/*
1143 	 * Free space previously allocated via blockmap_alloc().
1144 	 *
1145 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1146 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1147 	 */
1148 	KKASSERT(layer2->zone == zone);
1149 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1150 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1151 	if (temp > layer2->bytes_free) {
1152 		error = ERANGE;
1153 		goto underflow;
1154 	}
1155 	layer2->bytes_free -= bytes;
1156 
1157 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1158 
1159 	hammer_crc_set_layer2(hmp->version, layer2);
1160 underflow:
1161 	hammer_modify_buffer_done(buffer2);
1162 	hammer_unlock(&hmp->blkmap_lock);
1163 
1164 failed:
1165 	if (buffer1)
1166 		hammer_rel_buffer(buffer1, 0);
1167 	if (buffer2)
1168 		hammer_rel_buffer(buffer2, 0);
1169 	return (error);
1170 }
1171 
1172 /*
1173  * Backend function - finalize (offset, bytes) in a zone.
1174  *
1175  * Allocate space that was previously reserved by the frontend.
1176  */
1177 int
1178 hammer_blockmap_finalize(hammer_transaction_t trans,
1179 			 hammer_reserve_t resv,
1180 			 hammer_off_t zone_offset, int bytes)
1181 {
1182 	hammer_mount_t hmp;
1183 	hammer_volume_t root_volume;
1184 	hammer_blockmap_t freemap;
1185 	hammer_blockmap_layer1_t layer1;
1186 	hammer_blockmap_layer2_t layer2;
1187 	hammer_buffer_t buffer1 = NULL;
1188 	hammer_buffer_t buffer2 = NULL;
1189 	hammer_off_t layer1_offset;
1190 	hammer_off_t layer2_offset;
1191 	int error;
1192 	int zone;
1193 	int offset;
1194 
1195 	if (bytes == 0)
1196 		return(0);
1197 	hmp = trans->hmp;
1198 
1199 	/*
1200 	 * Alignment
1201 	 */
1202 	bytes = HAMMER_DATA_DOALIGN(bytes);
1203 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1204 
1205 	/*
1206 	 * Basic zone validation & locking
1207 	 */
1208 	zone = HAMMER_ZONE_DECODE(zone_offset);
1209 	KKASSERT(hammer_is_index_record(zone));
1210 	root_volume = trans->rootvol;
1211 	error = 0;
1212 
1213 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1214 
1215 	/*
1216 	 * Dive layer 1.
1217 	 */
1218 	layer1_offset = freemap->phys_offset +
1219 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1220 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1221 	if (error)
1222 		goto failed;
1223 	KKASSERT(layer1->phys_offset &&
1224 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1225 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1226 		hammer_lock_ex(&hmp->blkmap_lock);
1227 		if (!hammer_crc_test_layer1(hmp->version, layer1))
1228 			hpanic("CRC FAILED: LAYER1");
1229 		hammer_unlock(&hmp->blkmap_lock);
1230 	}
1231 
1232 	/*
1233 	 * Dive layer 2, each entry represents a big-block.
1234 	 */
1235 	layer2_offset = layer1->phys_offset +
1236 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1237 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1238 	if (error)
1239 		goto failed;
1240 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1241 		hammer_lock_ex(&hmp->blkmap_lock);
1242 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1243 			hpanic("CRC FAILED: LAYER2");
1244 		hammer_unlock(&hmp->blkmap_lock);
1245 	}
1246 
1247 	hammer_lock_ex(&hmp->blkmap_lock);
1248 
1249 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1250 
1251 	/*
1252 	 * Finalize some or all of the space covered by a current
1253 	 * reservation.  An allocation in the same layer may have
1254 	 * already assigned ownership.
1255 	 */
1256 	if (layer2->zone == 0) {
1257 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1258 		--layer1->blocks_free;
1259 		hammer_crc_set_layer1(hmp->version, layer1);
1260 		hammer_modify_buffer_done(buffer1);
1261 		layer2->zone = zone;
1262 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1263 		KKASSERT(layer2->append_off == 0);
1264 		hammer_modify_volume_field(trans,
1265 				trans->rootvol,
1266 				vol0_stat_freebigblocks);
1267 		--root_volume->ondisk->vol0_stat_freebigblocks;
1268 		hmp->copy_stat_freebigblocks =
1269 		   root_volume->ondisk->vol0_stat_freebigblocks;
1270 		hammer_modify_volume_done(trans->rootvol);
1271 	}
1272 	if (layer2->zone != zone)
1273 		hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1274 	KKASSERT(layer2->zone == zone);
1275 	KKASSERT(bytes != 0);
1276 	layer2->bytes_free -= bytes;
1277 
1278 	if (resv) {
1279 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1280 	}
1281 
1282 	/*
1283 	 * Finalizations can occur out of order, or combined with allocations.
1284 	 * append_off must be set to the highest allocated offset.
1285 	 */
1286 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1287 	if (layer2->append_off < offset)
1288 		layer2->append_off = offset;
1289 
1290 	hammer_crc_set_layer2(hmp->version, layer2);
1291 	hammer_modify_buffer_done(buffer2);
1292 	hammer_unlock(&hmp->blkmap_lock);
1293 
1294 failed:
1295 	if (buffer1)
1296 		hammer_rel_buffer(buffer1, 0);
1297 	if (buffer2)
1298 		hammer_rel_buffer(buffer2, 0);
1299 	return(error);
1300 }
1301 
1302 /*
1303  * Return the approximate number of free bytes in the big-block
1304  * containing the specified blockmap offset.
1305  *
1306  * WARNING: A negative number can be returned if data de-dup exists,
1307  *	    and the result will also not represent he actual number
1308  *	    of free bytes in this case.
1309  *
1310  *	    This code is used only by the reblocker.
1311  */
1312 int
1313 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1314 			int *curp, int *errorp)
1315 {
1316 	hammer_volume_t root_volume;
1317 	hammer_blockmap_t blockmap;
1318 	hammer_blockmap_t freemap;
1319 	hammer_blockmap_layer1_t layer1;
1320 	hammer_blockmap_layer2_t layer2;
1321 	hammer_buffer_t buffer = NULL;
1322 	hammer_off_t layer1_offset;
1323 	hammer_off_t layer2_offset;
1324 	int32_t bytes;
1325 	int zone;
1326 
1327 	zone = HAMMER_ZONE_DECODE(zone_offset);
1328 	KKASSERT(hammer_is_index_record(zone));
1329 	root_volume = hammer_get_root_volume(hmp, errorp);
1330 	if (*errorp) {
1331 		*curp = 0;
1332 		return(0);
1333 	}
1334 	blockmap = &hmp->blockmap[zone];
1335 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1336 
1337 	/*
1338 	 * Dive layer 1.
1339 	 */
1340 	layer1_offset = freemap->phys_offset +
1341 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1342 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1343 	if (*errorp) {
1344 		*curp = 0;
1345 		bytes = 0;
1346 		goto failed;
1347 	}
1348 	KKASSERT(layer1->phys_offset);
1349 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1350 		hammer_lock_ex(&hmp->blkmap_lock);
1351 		if (!hammer_crc_test_layer1(hmp->version, layer1))
1352 			hpanic("CRC FAILED: LAYER1");
1353 		hammer_unlock(&hmp->blkmap_lock);
1354 	}
1355 
1356 	/*
1357 	 * Dive layer 2, each entry represents a big-block.
1358 	 *
1359 	 * (reuse buffer, layer1 pointer becomes invalid)
1360 	 */
1361 	layer2_offset = layer1->phys_offset +
1362 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1363 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1364 	if (*errorp) {
1365 		*curp = 0;
1366 		bytes = 0;
1367 		goto failed;
1368 	}
1369 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1370 		hammer_lock_ex(&hmp->blkmap_lock);
1371 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1372 			hpanic("CRC FAILED: LAYER2");
1373 		hammer_unlock(&hmp->blkmap_lock);
1374 	}
1375 	KKASSERT(layer2->zone == zone);
1376 
1377 	bytes = layer2->bytes_free;
1378 
1379 	/*
1380 	 * *curp becomes 1 only when no error and,
1381 	 * next_offset and zone_offset are in the same big-block.
1382 	 */
1383 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1384 		*curp = 0;  /* not same */
1385 	else
1386 		*curp = 1;
1387 failed:
1388 	if (buffer)
1389 		hammer_rel_buffer(buffer, 0);
1390 	hammer_rel_volume(root_volume, 0);
1391 	if (hammer_debug_general & 0x4000) {
1392 		hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1393 	}
1394 	return(bytes);
1395 }
1396 
1397 
1398 /*
1399  * Lookup a blockmap offset and verify blockmap layers.
1400  */
1401 hammer_off_t
1402 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1403 			int *errorp)
1404 {
1405 	hammer_volume_t root_volume;
1406 	hammer_blockmap_t freemap;
1407 	hammer_blockmap_layer1_t layer1;
1408 	hammer_blockmap_layer2_t layer2;
1409 	hammer_buffer_t buffer = NULL;
1410 	hammer_off_t layer1_offset;
1411 	hammer_off_t layer2_offset;
1412 	hammer_off_t result_offset;
1413 	hammer_off_t base_off;
1414 	hammer_reserve_t resv __debugvar;
1415 	int zone;
1416 
1417 	/*
1418 	 * Calculate the zone-2 offset.
1419 	 */
1420 	zone = HAMMER_ZONE_DECODE(zone_offset);
1421 	result_offset = hammer_xlate_to_zone2(zone_offset);
1422 
1423 	/*
1424 	 * Validate the allocation zone
1425 	 */
1426 	root_volume = hammer_get_root_volume(hmp, errorp);
1427 	if (*errorp)
1428 		return(0);
1429 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1430 	KKASSERT(freemap->phys_offset != 0);
1431 
1432 	/*
1433 	 * Dive layer 1.
1434 	 */
1435 	layer1_offset = freemap->phys_offset +
1436 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1437 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1438 	if (*errorp)
1439 		goto failed;
1440 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1441 	if (!hammer_crc_test_layer1(hmp->version, layer1)) {
1442 		hammer_lock_ex(&hmp->blkmap_lock);
1443 		if (!hammer_crc_test_layer1(hmp->version, layer1))
1444 			hpanic("CRC FAILED: LAYER1");
1445 		hammer_unlock(&hmp->blkmap_lock);
1446 	}
1447 
1448 	/*
1449 	 * Dive layer 2, each entry represents a big-block.
1450 	 */
1451 	layer2_offset = layer1->phys_offset +
1452 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1453 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1454 
1455 	if (*errorp)
1456 		goto failed;
1457 	if (layer2->zone == 0) {
1458 		base_off = hammer_xlate_to_zone2(zone_offset &
1459 						~HAMMER_BIGBLOCK_MASK64);
1460 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1461 				 base_off);
1462 		KKASSERT(resv && resv->zone == zone);
1463 
1464 	} else if (layer2->zone != zone) {
1465 		hpanic("bad zone %d/%d", layer2->zone, zone);
1466 	}
1467 	if (!hammer_crc_test_layer2(hmp->version, layer2)) {
1468 		hammer_lock_ex(&hmp->blkmap_lock);
1469 		if (!hammer_crc_test_layer2(hmp->version, layer2))
1470 			hpanic("CRC FAILED: LAYER2");
1471 		hammer_unlock(&hmp->blkmap_lock);
1472 	}
1473 
1474 failed:
1475 	if (buffer)
1476 		hammer_rel_buffer(buffer, 0);
1477 	hammer_rel_volume(root_volume, 0);
1478 	if (hammer_debug_general & 0x0800) {
1479 		hdkprintf("%016jx -> %016jx\n",
1480 			(intmax_t)zone_offset, (intmax_t)result_offset);
1481 	}
1482 	return(result_offset);
1483 }
1484 
1485 
1486 /*
1487  * Check space availability
1488  *
1489  * MPSAFE - does not require fs_token
1490  */
1491 int
1492 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1493 {
1494 	const int in_size = sizeof(struct hammer_inode_data) +
1495 			    sizeof(union hammer_btree_elm);
1496 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1497 	int64_t usedbytes;
1498 
1499 	usedbytes = hmp->rsv_inodes * in_size +
1500 		    hmp->rsv_recs * rec_size +
1501 		    hmp->rsv_databytes +
1502 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1503 		    ((int64_t)hammer_limit_dirtybufspace) +
1504 		    (slop << HAMMER_BIGBLOCK_BITS);
1505 
1506 	if (resp)
1507 		*resp = usedbytes;
1508 
1509 	if (hmp->copy_stat_freebigblocks >=
1510 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1511 		return(0);
1512 	}
1513 
1514 	return (ENOSPC);
1515 }
1516 
1517 static int
1518 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1519 {
1520 	hammer_blockmap_t freemap;
1521 	hammer_blockmap_layer1_t layer1;
1522 	hammer_buffer_t buffer1 = NULL;
1523 	hammer_off_t layer1_offset;
1524 	int error = 0;
1525 
1526 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1527 
1528 	layer1_offset = freemap->phys_offset +
1529 			HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1530 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1531 	if (error)
1532 		goto end;
1533 
1534 	/*
1535 	 * No more physically available space in layer1s
1536 	 * of the current volume, go to the next volume.
1537 	 */
1538 	if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1539 		hammer_skip_volume(offsetp);
1540 end:
1541 	if (buffer1)
1542 		hammer_rel_buffer(buffer1, 0);
1543 	return(error);
1544 }
1545 
1546 static void
1547 hammer_skip_volume(hammer_off_t *offsetp)
1548 {
1549 	hammer_off_t offset;
1550 	int zone, vol_no;
1551 
1552 	offset = *offsetp;
1553 	zone = HAMMER_ZONE_DECODE(offset);
1554 	vol_no = HAMMER_VOL_DECODE(offset) + 1;
1555 	KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1556 
1557 	if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1558 		vol_no = 0;
1559 		++zone;
1560 	}
1561 
1562 	*offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1563 }
1564