xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision fcf53d9b)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36 
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    struct hammer_blockmap_layer2 *layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 
49 /*
50  * Reserved big-blocks red-black tree support
51  */
52 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
53 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
54 
55 static int
56 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
57 {
58 	if (res1->zone_offset < res2->zone_offset)
59 		return(-1);
60 	if (res1->zone_offset > res2->zone_offset)
61 		return(1);
62 	return(0);
63 }
64 
65 /*
66  * Allocate bytes from a zone
67  */
68 hammer_off_t
69 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
70 		      hammer_off_t hint, int *errorp)
71 {
72 	hammer_mount_t hmp;
73 	hammer_volume_t root_volume;
74 	hammer_blockmap_t blockmap;
75 	hammer_blockmap_t freemap;
76 	hammer_reserve_t resv;
77 	struct hammer_blockmap_layer1 *layer1;
78 	struct hammer_blockmap_layer2 *layer2;
79 	hammer_buffer_t buffer1 = NULL;
80 	hammer_buffer_t buffer2 = NULL;
81 	hammer_buffer_t buffer3 = NULL;
82 	hammer_off_t tmp_offset;
83 	hammer_off_t next_offset;
84 	hammer_off_t result_offset;
85 	hammer_off_t layer1_offset;
86 	hammer_off_t layer2_offset;
87 	hammer_off_t base_off;
88 	int loops = 0;
89 	int offset;		/* offset within big-block */
90 	int use_hint;
91 
92 	hmp = trans->hmp;
93 
94 	/*
95 	 * Deal with alignment and buffer-boundary issues.
96 	 *
97 	 * Be careful, certain primary alignments are used below to allocate
98 	 * new blockmap blocks.
99 	 */
100 	bytes = (bytes + 15) & ~15;
101 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
102 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
103 
104 	/*
105 	 * Setup
106 	 */
107 	root_volume = trans->rootvol;
108 	*errorp = 0;
109 	blockmap = &hmp->blockmap[zone];
110 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
111 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
112 
113 	/*
114 	 * Use the hint if we have one.
115 	 */
116 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
117 		next_offset = (hint + 15) & ~(hammer_off_t)15;
118 		use_hint = 1;
119 	} else {
120 		next_offset = blockmap->next_offset;
121 		use_hint = 0;
122 	}
123 again:
124 
125 	/*
126 	 * use_hint is turned off if we leave the hinted big-block.
127 	 */
128 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
129 		next_offset = blockmap->next_offset;
130 		use_hint = 0;
131 	}
132 
133 	/*
134 	 * Check for wrap
135 	 */
136 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
137 		if (++loops == 2) {
138 			result_offset = 0;
139 			*errorp = ENOSPC;
140 			goto failed;
141 		}
142 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
143 	}
144 
145 	/*
146 	 * The allocation request may not cross a buffer boundary.  Special
147 	 * large allocations must not cross a large-block boundary.
148 	 */
149 	tmp_offset = next_offset + bytes - 1;
150 	if (bytes <= HAMMER_BUFSIZE) {
151 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
152 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
153 			goto again;
154 		}
155 	} else {
156 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
157 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
158 			goto again;
159 		}
160 	}
161 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
162 
163 	/*
164 	 * Dive layer 1.
165 	 */
166 	layer1_offset = freemap->phys_offset +
167 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
168 
169 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
170 	if (*errorp) {
171 		result_offset = 0;
172 		goto failed;
173 	}
174 
175 	/*
176 	 * Check CRC.
177 	 */
178 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
179 		hammer_lock_ex(&hmp->blkmap_lock);
180 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
181 			panic("CRC FAILED: LAYER1");
182 		hammer_unlock(&hmp->blkmap_lock);
183 	}
184 
185 	/*
186 	 * If we are at a big-block boundary and layer1 indicates no
187 	 * free big-blocks, then we cannot allocate a new bigblock in
188 	 * layer2, skip to the next layer1 entry.
189 	 */
190 	if (offset == 0 && layer1->blocks_free == 0) {
191 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
192 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
193 		goto again;
194 	}
195 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
196 
197 	/*
198 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
199 	 * on a volume that we are currently trying to remove from the
200 	 * file-system. This is used by the volume-del code together with
201 	 * the reblocker to free up a volume.
202 	 */
203 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
204 	    hmp->volume_to_remove) {
205 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
206 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
207 		goto again;
208 	}
209 
210 	/*
211 	 * Dive layer 2, each entry represents a large-block.
212 	 */
213 	layer2_offset = layer1->phys_offset +
214 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
215 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
216 	if (*errorp) {
217 		result_offset = 0;
218 		goto failed;
219 	}
220 
221 	/*
222 	 * Check CRC.  This can race another thread holding the lock
223 	 * and in the middle of modifying layer2.
224 	 */
225 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
226 		hammer_lock_ex(&hmp->blkmap_lock);
227 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
228 			panic("CRC FAILED: LAYER2");
229 		hammer_unlock(&hmp->blkmap_lock);
230 	}
231 
232 	/*
233 	 * Skip the layer if the zone is owned by someone other then us.
234 	 */
235 	if (layer2->zone && layer2->zone != zone) {
236 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
237 		goto again;
238 	}
239 	if (offset < layer2->append_off) {
240 		next_offset += layer2->append_off - offset;
241 		goto again;
242 	}
243 
244 #if 0
245 	/*
246 	 * If operating in the current non-hint blockmap block, do not
247 	 * allow it to get over-full.  Also drop any active hinting so
248 	 * blockmap->next_offset is updated at the end.
249 	 *
250 	 * We do this for B-Tree and meta-data allocations to provide
251 	 * localization for updates.
252 	 */
253 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
254 	     zone == HAMMER_ZONE_META_INDEX) &&
255 	    offset >= HAMMER_LARGEBLOCK_OVERFILL &&
256 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
257 	) {
258 		if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
259 			next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
260 			use_hint = 0;
261 			goto again;
262 		}
263 	}
264 #endif
265 
266 	/*
267 	 * We need the lock from this point on.  We have to re-check zone
268 	 * ownership after acquiring the lock and also check for reservations.
269 	 */
270 	hammer_lock_ex(&hmp->blkmap_lock);
271 
272 	if (layer2->zone && layer2->zone != zone) {
273 		hammer_unlock(&hmp->blkmap_lock);
274 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
275 		goto again;
276 	}
277 	if (offset < layer2->append_off) {
278 		hammer_unlock(&hmp->blkmap_lock);
279 		next_offset += layer2->append_off - offset;
280 		goto again;
281 	}
282 
283 	/*
284 	 * The bigblock might be reserved by another zone.  If it is reserved
285 	 * by our zone we may have to move next_offset past the append_off.
286 	 */
287 	base_off = (next_offset &
288 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
289 		    HAMMER_ZONE_RAW_BUFFER;
290 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
291 	if (resv) {
292 		if (resv->zone != zone) {
293 			hammer_unlock(&hmp->blkmap_lock);
294 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
295 				      ~HAMMER_LARGEBLOCK_MASK64;
296 			goto again;
297 		}
298 		if (offset < resv->append_off) {
299 			hammer_unlock(&hmp->blkmap_lock);
300 			next_offset += resv->append_off - offset;
301 			goto again;
302 		}
303 		++resv->refs;
304 	}
305 
306 	/*
307 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
308 	 * of the layer for real.  At this point we've validated any
309 	 * reservation that might exist and can just ignore resv.
310 	 */
311 	if (layer2->zone == 0) {
312 		/*
313 		 * Assign the bigblock to our zone
314 		 */
315 		hammer_modify_buffer(trans, buffer1,
316 				     layer1, sizeof(*layer1));
317 		--layer1->blocks_free;
318 		layer1->layer1_crc = crc32(layer1,
319 					   HAMMER_LAYER1_CRCSIZE);
320 		hammer_modify_buffer_done(buffer1);
321 		hammer_modify_buffer(trans, buffer2,
322 				     layer2, sizeof(*layer2));
323 		layer2->zone = zone;
324 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
325 		KKASSERT(layer2->append_off == 0);
326 		hammer_modify_volume_field(trans, trans->rootvol,
327 					   vol0_stat_freebigblocks);
328 		--root_volume->ondisk->vol0_stat_freebigblocks;
329 		hmp->copy_stat_freebigblocks =
330 			root_volume->ondisk->vol0_stat_freebigblocks;
331 		hammer_modify_volume_done(trans->rootvol);
332 	} else {
333 		hammer_modify_buffer(trans, buffer2,
334 				     layer2, sizeof(*layer2));
335 	}
336 	KKASSERT(layer2->zone == zone);
337 
338 	/*
339 	 * NOTE: bytes_free can legally go negative due to de-dup.
340 	 */
341 	layer2->bytes_free -= bytes;
342 	KKASSERT(layer2->append_off <= offset);
343 	layer2->append_off = offset + bytes;
344 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
345 	hammer_modify_buffer_done(buffer2);
346 
347 	/*
348 	 * We hold the blockmap lock and should be the only ones
349 	 * capable of modifying resv->append_off.  Track the allocation
350 	 * as appropriate.
351 	 */
352 	KKASSERT(bytes != 0);
353 	if (resv) {
354 		KKASSERT(resv->append_off <= offset);
355 		resv->append_off = offset + bytes;
356 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
357 		hammer_blockmap_reserve_complete(hmp, resv);
358 	}
359 
360 	/*
361 	 * If we are allocating from the base of a new buffer we can avoid
362 	 * a disk read by calling hammer_bnew().
363 	 */
364 	if ((next_offset & HAMMER_BUFMASK) == 0) {
365 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
366 				errorp, &buffer3);
367 	}
368 	result_offset = next_offset;
369 
370 	/*
371 	 * If we weren't supplied with a hint or could not use the hint
372 	 * then we wound up using blockmap->next_offset as the hint and
373 	 * need to save it.
374 	 */
375 	if (use_hint == 0) {
376 		hammer_modify_volume(NULL, root_volume, NULL, 0);
377 		blockmap->next_offset = next_offset + bytes;
378 		hammer_modify_volume_done(root_volume);
379 	}
380 	hammer_unlock(&hmp->blkmap_lock);
381 failed:
382 
383 	/*
384 	 * Cleanup
385 	 */
386 	if (buffer1)
387 		hammer_rel_buffer(buffer1, 0);
388 	if (buffer2)
389 		hammer_rel_buffer(buffer2, 0);
390 	if (buffer3)
391 		hammer_rel_buffer(buffer3, 0);
392 
393 	return(result_offset);
394 }
395 
396 /*
397  * Frontend function - Reserve bytes in a zone.
398  *
399  * This code reserves bytes out of a blockmap without committing to any
400  * meta-data modifications, allowing the front-end to directly issue disk
401  * write I/O for large blocks of data
402  *
403  * The backend later finalizes the reservation with hammer_blockmap_finalize()
404  * upon committing the related record.
405  */
406 hammer_reserve_t
407 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
408 			hammer_off_t *zone_offp, int *errorp)
409 {
410 	hammer_volume_t root_volume;
411 	hammer_blockmap_t blockmap;
412 	hammer_blockmap_t freemap;
413 	struct hammer_blockmap_layer1 *layer1;
414 	struct hammer_blockmap_layer2 *layer2;
415 	hammer_buffer_t buffer1 = NULL;
416 	hammer_buffer_t buffer2 = NULL;
417 	hammer_buffer_t buffer3 = NULL;
418 	hammer_off_t tmp_offset;
419 	hammer_off_t next_offset;
420 	hammer_off_t layer1_offset;
421 	hammer_off_t layer2_offset;
422 	hammer_off_t base_off;
423 	hammer_reserve_t resv;
424 	hammer_reserve_t resx;
425 	int loops = 0;
426 	int offset;
427 
428 	/*
429 	 * Setup
430 	 */
431 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
432 	root_volume = hammer_get_root_volume(hmp, errorp);
433 	if (*errorp)
434 		return(NULL);
435 	blockmap = &hmp->blockmap[zone];
436 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
437 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
438 
439 	/*
440 	 * Deal with alignment and buffer-boundary issues.
441 	 *
442 	 * Be careful, certain primary alignments are used below to allocate
443 	 * new blockmap blocks.
444 	 */
445 	bytes = (bytes + 15) & ~15;
446 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
447 
448 	next_offset = blockmap->next_offset;
449 again:
450 	resv = NULL;
451 	/*
452 	 * Check for wrap
453 	 */
454 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
455 		if (++loops == 2) {
456 			*errorp = ENOSPC;
457 			goto failed;
458 		}
459 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
460 	}
461 
462 	/*
463 	 * The allocation request may not cross a buffer boundary.  Special
464 	 * large allocations must not cross a large-block boundary.
465 	 */
466 	tmp_offset = next_offset + bytes - 1;
467 	if (bytes <= HAMMER_BUFSIZE) {
468 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
469 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
470 			goto again;
471 		}
472 	} else {
473 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
474 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
475 			goto again;
476 		}
477 	}
478 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
479 
480 	/*
481 	 * Dive layer 1.
482 	 */
483 	layer1_offset = freemap->phys_offset +
484 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
485 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
486 	if (*errorp)
487 		goto failed;
488 
489 	/*
490 	 * Check CRC.
491 	 */
492 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
493 		hammer_lock_ex(&hmp->blkmap_lock);
494 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
495 			panic("CRC FAILED: LAYER1");
496 		hammer_unlock(&hmp->blkmap_lock);
497 	}
498 
499 	/*
500 	 * If we are at a big-block boundary and layer1 indicates no
501 	 * free big-blocks, then we cannot allocate a new bigblock in
502 	 * layer2, skip to the next layer1 entry.
503 	 */
504 	if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
505 	    layer1->blocks_free == 0) {
506 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
507 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
508 		goto again;
509 	}
510 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
511 
512 	/*
513 	 * Dive layer 2, each entry represents a large-block.
514 	 */
515 	layer2_offset = layer1->phys_offset +
516 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
517 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
518 	if (*errorp)
519 		goto failed;
520 
521 	/*
522 	 * Check CRC if not allocating into uninitialized space (which we
523 	 * aren't when reserving space).
524 	 */
525 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
526 		hammer_lock_ex(&hmp->blkmap_lock);
527 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
528 			panic("CRC FAILED: LAYER2");
529 		hammer_unlock(&hmp->blkmap_lock);
530 	}
531 
532 	/*
533 	 * Skip the layer if the zone is owned by someone other then us.
534 	 */
535 	if (layer2->zone && layer2->zone != zone) {
536 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
537 		goto again;
538 	}
539 	if (offset < layer2->append_off) {
540 		next_offset += layer2->append_off - offset;
541 		goto again;
542 	}
543 
544 	/*
545 	 * We need the lock from this point on.  We have to re-check zone
546 	 * ownership after acquiring the lock and also check for reservations.
547 	 */
548 	hammer_lock_ex(&hmp->blkmap_lock);
549 
550 	if (layer2->zone && layer2->zone != zone) {
551 		hammer_unlock(&hmp->blkmap_lock);
552 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
553 		goto again;
554 	}
555 	if (offset < layer2->append_off) {
556 		hammer_unlock(&hmp->blkmap_lock);
557 		next_offset += layer2->append_off - offset;
558 		goto again;
559 	}
560 
561 	/*
562 	 * The bigblock might be reserved by another zone.  If it is reserved
563 	 * by our zone we may have to move next_offset past the append_off.
564 	 */
565 	base_off = (next_offset &
566 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
567 		    HAMMER_ZONE_RAW_BUFFER;
568 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
569 	if (resv) {
570 		if (resv->zone != zone) {
571 			hammer_unlock(&hmp->blkmap_lock);
572 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
573 				      ~HAMMER_LARGEBLOCK_MASK64;
574 			goto again;
575 		}
576 		if (offset < resv->append_off) {
577 			hammer_unlock(&hmp->blkmap_lock);
578 			next_offset += resv->append_off - offset;
579 			goto again;
580 		}
581 		++resv->refs;
582 		resx = NULL;
583 	} else {
584 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
585 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
586 		resx->refs = 1;
587 		resx->zone = zone;
588 		resx->zone_offset = base_off;
589 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
590 			resx->flags |= HAMMER_RESF_LAYER2FREE;
591 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
592 		KKASSERT(resv == NULL);
593 		resv = resx;
594 		++hammer_count_reservations;
595 	}
596 	resv->append_off = offset + bytes;
597 
598 	/*
599 	 * If we are not reserving a whole buffer but are at the start of
600 	 * a new block, call hammer_bnew() to avoid a disk read.
601 	 *
602 	 * If we are reserving a whole buffer (or more), the caller will
603 	 * probably use a direct read, so do nothing.
604 	 */
605 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
606 		hammer_bnew(hmp, next_offset, errorp, &buffer3);
607 	}
608 
609 	/*
610 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
611 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
612 	 * be big-block aligned.
613 	 */
614 	blockmap->next_offset = next_offset + bytes;
615 	hammer_unlock(&hmp->blkmap_lock);
616 
617 failed:
618 	if (buffer1)
619 		hammer_rel_buffer(buffer1, 0);
620 	if (buffer2)
621 		hammer_rel_buffer(buffer2, 0);
622 	if (buffer3)
623 		hammer_rel_buffer(buffer3, 0);
624 	hammer_rel_volume(root_volume, 0);
625 	*zone_offp = next_offset;
626 
627 	return(resv);
628 }
629 
630 /*
631  * Frontend function - Dedup bytes in a zone.
632  *
633  * Dedup reservations work exactly the same as normal write reservations
634  * except we only adjust bytes_free field and don't touch append offset.
635  * Finalization mechanic for dedup reservations is also the same as for
636  * normal write ones - the backend finalizes the reservation with
637  * hammer_blockmap_finalize().
638  */
639 hammer_reserve_t
640 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
641 			      hammer_off_t zone_offset, int *errorp)
642 {
643 	hammer_volume_t root_volume;
644 	hammer_blockmap_t freemap;
645 	struct hammer_blockmap_layer1 *layer1;
646 	struct hammer_blockmap_layer2 *layer2;
647 	hammer_buffer_t buffer1 = NULL;
648 	hammer_buffer_t buffer2 = NULL;
649 	hammer_off_t layer1_offset;
650 	hammer_off_t layer2_offset;
651 	hammer_off_t base_off;
652 	hammer_reserve_t resv = NULL;
653 	hammer_reserve_t resx = NULL;
654 
655 	/*
656 	 * Setup
657 	 */
658 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
659 	root_volume = hammer_get_root_volume(hmp, errorp);
660 	if (*errorp)
661 		return (NULL);
662 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
663 	KKASSERT(freemap->phys_offset != 0);
664 
665 	bytes = (bytes + 15) & ~15;
666 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
667 
668 	/*
669 	 * Dive layer 1.
670 	 */
671 	layer1_offset = freemap->phys_offset +
672 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
673 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
674 	if (*errorp)
675 		goto failed;
676 
677 	/*
678 	 * Check CRC.
679 	 */
680 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
681 		hammer_lock_ex(&hmp->blkmap_lock);
682 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
683 			panic("CRC FAILED: LAYER1");
684 		hammer_unlock(&hmp->blkmap_lock);
685 	}
686 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
687 
688 	/*
689 	 * Dive layer 2, each entry represents a large-block.
690 	 */
691 	layer2_offset = layer1->phys_offset +
692 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
693 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
694 	if (*errorp)
695 		goto failed;
696 
697 	/*
698 	 * Check CRC.
699 	 */
700 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
701 		hammer_lock_ex(&hmp->blkmap_lock);
702 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
703 			panic("CRC FAILED: LAYER2");
704 		hammer_unlock(&hmp->blkmap_lock);
705 	}
706 
707 	/*
708 	 * Fail if the zone is owned by someone other than us.
709 	 */
710 	if (layer2->zone && layer2->zone != zone)
711 		goto failed;
712 
713 	/*
714 	 * We need the lock from this point on.  We have to re-check zone
715 	 * ownership after acquiring the lock and also check for reservations.
716 	 */
717 	hammer_lock_ex(&hmp->blkmap_lock);
718 
719 	if (layer2->zone && layer2->zone != zone) {
720 		hammer_unlock(&hmp->blkmap_lock);
721 		goto failed;
722 	}
723 
724 	base_off = (zone_offset &
725 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
726 		    HAMMER_ZONE_RAW_BUFFER;
727 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
728 	if (resv) {
729 		if (resv->zone != zone) {
730 			hammer_unlock(&hmp->blkmap_lock);
731 			resv = NULL;
732 			goto failed;
733 		}
734 		/*
735 		 * Due to possible big block underflow we can't simply
736 		 * subtract bytes from bytes_free.
737 		 */
738 		if (update_bytes_free(resv, bytes) == 0) {
739 			hammer_unlock(&hmp->blkmap_lock);
740 			resv = NULL;
741 			goto failed;
742 		}
743 		++resv->refs;
744 		resx = NULL;
745 	} else {
746 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
747 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
748 		resx->refs = 1;
749 		resx->zone = zone;
750 		resx->bytes_free = layer2->bytes_free;
751 		/*
752 		 * Due to possible big block underflow we can't simply
753 		 * subtract bytes from bytes_free.
754 		 */
755 		if (update_bytes_free(resx, bytes) == 0) {
756 			hammer_unlock(&hmp->blkmap_lock);
757 			kfree(resx, hmp->m_misc);
758 			goto failed;
759 		}
760 		resx->zone_offset = base_off;
761 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
762 		KKASSERT(resv == NULL);
763 		resv = resx;
764 		++hammer_count_reservations;
765 	}
766 
767 	hammer_unlock(&hmp->blkmap_lock);
768 
769 failed:
770 	if (buffer1)
771 		hammer_rel_buffer(buffer1, 0);
772 	if (buffer2)
773 		hammer_rel_buffer(buffer2, 0);
774 	hammer_rel_volume(root_volume, 0);
775 
776 	return(resv);
777 }
778 
779 static int
780 update_bytes_free(hammer_reserve_t resv, int bytes)
781 {
782 	int32_t temp;
783 
784 	/*
785 	 * Big-block underflow check
786 	 */
787 	temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
788 	cpu_ccfence(); /* XXX do we really need it ? */
789 	if (temp > resv->bytes_free) {
790 		kprintf("BIGBLOCK UNDERFLOW\n");
791 		return (0);
792 	}
793 
794 	resv->bytes_free -= bytes;
795 	return (1);
796 }
797 
798 /*
799  * Dereference a reservation structure.  Upon the final release the
800  * underlying big-block is checked and if it is entirely free we delete
801  * any related HAMMER buffers to avoid potential conflicts with future
802  * reuse of the big-block.
803  */
804 void
805 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
806 {
807 	hammer_off_t base_offset;
808 	int error;
809 
810 	KKASSERT(resv->refs > 0);
811 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
812 		 HAMMER_ZONE_RAW_BUFFER);
813 
814 	/*
815 	 * Setting append_off to the max prevents any new allocations
816 	 * from occuring while we are trying to dispose of the reservation,
817 	 * allowing us to safely delete any related HAMMER buffers.
818 	 *
819 	 * If we are unable to clean out all related HAMMER buffers we
820 	 * requeue the delay.
821 	 */
822 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
823 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
824 		base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
825 		base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
826 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
827 			hammer_dedup_cache_inval(hmp, base_offset);
828 		error = hammer_del_buffers(hmp, base_offset,
829 					   resv->zone_offset,
830 					   HAMMER_LARGEBLOCK_SIZE,
831 					   1);
832 		if (hammer_debug_general & 0x20000) {
833 			kprintf("hammer: dellgblk %016jx error %d\n",
834 				(intmax_t)base_offset, error);
835 		}
836 		if (error)
837 			hammer_reserve_setdelay(hmp, resv);
838 	}
839 	if (--resv->refs == 0) {
840 		if (hammer_debug_general & 0x20000) {
841 			kprintf("hammer: delresvr %016jx zone %02x\n",
842 				(intmax_t)resv->zone_offset, resv->zone);
843 		}
844 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
845 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
846 		kfree(resv, hmp->m_misc);
847 		--hammer_count_reservations;
848 	}
849 }
850 
851 /*
852  * Prevent a potentially free big-block from being reused until after
853  * the related flushes have completely cycled, otherwise crash recovery
854  * could resurrect a data block that was already reused and overwritten.
855  *
856  * The caller might reset the underlying layer2 entry's append_off to 0, so
857  * our covering append_off must be set to max to prevent any reallocation
858  * until after the flush delays complete, not to mention proper invalidation
859  * of any underlying cached blocks.
860  */
861 static void
862 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
863 			int zone, struct hammer_blockmap_layer2 *layer2)
864 {
865 	hammer_reserve_t resv;
866 
867 	/*
868 	 * Allocate the reservation if necessary.
869 	 *
870 	 * NOTE: need lock in future around resv lookup/allocation and
871 	 * the setdelay call, currently refs is not bumped until the call.
872 	 */
873 again:
874 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
875 	if (resv == NULL) {
876 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
877 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
878 		resv->zone = zone;
879 		resv->zone_offset = base_offset;
880 		resv->refs = 0;
881 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
882 
883 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
884 			resv->flags |= HAMMER_RESF_LAYER2FREE;
885 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
886 			kfree(resv, hmp->m_misc);
887 			goto again;
888 		}
889 		++hammer_count_reservations;
890 	} else {
891 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
892 			resv->flags |= HAMMER_RESF_LAYER2FREE;
893 	}
894 	hammer_reserve_setdelay(hmp, resv);
895 }
896 
897 /*
898  * Enter the reservation on the on-delay list, or move it if it
899  * is already on the list.
900  */
901 static void
902 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
903 {
904 	if (resv->flags & HAMMER_RESF_ONDELAY) {
905 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
906 		resv->flush_group = hmp->flusher.next + 1;
907 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
908 	} else {
909 		++resv->refs;
910 		++hmp->rsv_fromdelay;
911 		resv->flags |= HAMMER_RESF_ONDELAY;
912 		resv->flush_group = hmp->flusher.next + 1;
913 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
914 	}
915 }
916 
917 /*
918  * Reserve has reached its flush point, remove it from the delay list
919  * and finish it off.  hammer_blockmap_reserve_complete() inherits
920  * the ondelay reference.
921  */
922 void
923 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
924 {
925 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
926 	resv->flags &= ~HAMMER_RESF_ONDELAY;
927 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
928 	--hmp->rsv_fromdelay;
929 	hammer_blockmap_reserve_complete(hmp, resv);
930 }
931 
932 /*
933  * Backend function - free (offset, bytes) in a zone.
934  *
935  * XXX error return
936  */
937 void
938 hammer_blockmap_free(hammer_transaction_t trans,
939 		     hammer_off_t zone_offset, int bytes)
940 {
941 	hammer_mount_t hmp;
942 	hammer_volume_t root_volume;
943 	hammer_blockmap_t freemap;
944 	struct hammer_blockmap_layer1 *layer1;
945 	struct hammer_blockmap_layer2 *layer2;
946 	hammer_buffer_t buffer1 = NULL;
947 	hammer_buffer_t buffer2 = NULL;
948 	hammer_off_t layer1_offset;
949 	hammer_off_t layer2_offset;
950 	hammer_off_t base_off;
951 	int error;
952 	int zone;
953 
954 	if (bytes == 0)
955 		return;
956 	hmp = trans->hmp;
957 
958 	/*
959 	 * Alignment
960 	 */
961 	bytes = (bytes + 15) & ~15;
962 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
963 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
964 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
965 
966 	/*
967 	 * Basic zone validation & locking
968 	 */
969 	zone = HAMMER_ZONE_DECODE(zone_offset);
970 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
971 	root_volume = trans->rootvol;
972 	error = 0;
973 
974 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
975 
976 	/*
977 	 * Dive layer 1.
978 	 */
979 	layer1_offset = freemap->phys_offset +
980 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
981 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
982 	if (error)
983 		goto failed;
984 	KKASSERT(layer1->phys_offset &&
985 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
986 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
987 		hammer_lock_ex(&hmp->blkmap_lock);
988 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
989 			panic("CRC FAILED: LAYER1");
990 		hammer_unlock(&hmp->blkmap_lock);
991 	}
992 
993 	/*
994 	 * Dive layer 2, each entry represents a large-block.
995 	 */
996 	layer2_offset = layer1->phys_offset +
997 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
998 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
999 	if (error)
1000 		goto failed;
1001 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1002 		hammer_lock_ex(&hmp->blkmap_lock);
1003 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1004 			panic("CRC FAILED: LAYER2");
1005 		hammer_unlock(&hmp->blkmap_lock);
1006 	}
1007 
1008 	hammer_lock_ex(&hmp->blkmap_lock);
1009 
1010 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1011 
1012 	/*
1013 	 * Free space previously allocated via blockmap_alloc().
1014 	 *
1015 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1016 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1017 	 */
1018 	KKASSERT(layer2->zone == zone);
1019 	layer2->bytes_free += bytes;
1020 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1021 
1022 	/*
1023 	 * If a big-block becomes entirely free we must create a covering
1024 	 * reservation to prevent premature reuse.  Note, however, that
1025 	 * the big-block and/or reservation may still have an append_off
1026 	 * that allows further (non-reused) allocations.
1027 	 *
1028 	 * Once the reservation has been made we re-check layer2 and if
1029 	 * the big-block is still entirely free we reset the layer2 entry.
1030 	 * The reservation will prevent premature reuse.
1031 	 *
1032 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1033 	 * is completed, if the layer2 entry is still completely free at
1034 	 * that time.  Any allocations from the reservation that may have
1035 	 * occured in the mean time, or active references on the reservation
1036 	 * from new pending allocations, will prevent the invalidation from
1037 	 * occuring.
1038 	 */
1039 	if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1040 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1041 
1042 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1043 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1044 			layer2->zone = 0;
1045 			layer2->append_off = 0;
1046 			hammer_modify_buffer(trans, buffer1,
1047 					     layer1, sizeof(*layer1));
1048 			++layer1->blocks_free;
1049 			layer1->layer1_crc = crc32(layer1,
1050 						   HAMMER_LAYER1_CRCSIZE);
1051 			hammer_modify_buffer_done(buffer1);
1052 			hammer_modify_volume_field(trans,
1053 					trans->rootvol,
1054 					vol0_stat_freebigblocks);
1055 			++root_volume->ondisk->vol0_stat_freebigblocks;
1056 			hmp->copy_stat_freebigblocks =
1057 			   root_volume->ondisk->vol0_stat_freebigblocks;
1058 			hammer_modify_volume_done(trans->rootvol);
1059 		}
1060 	}
1061 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1062 	hammer_modify_buffer_done(buffer2);
1063 	hammer_unlock(&hmp->blkmap_lock);
1064 
1065 failed:
1066 	if (buffer1)
1067 		hammer_rel_buffer(buffer1, 0);
1068 	if (buffer2)
1069 		hammer_rel_buffer(buffer2, 0);
1070 }
1071 
1072 int
1073 hammer_blockmap_dedup(hammer_transaction_t trans,
1074 		     hammer_off_t zone_offset, int bytes)
1075 {
1076 	hammer_mount_t hmp;
1077 	hammer_volume_t root_volume;
1078 	hammer_blockmap_t freemap;
1079 	struct hammer_blockmap_layer1 *layer1;
1080 	struct hammer_blockmap_layer2 *layer2;
1081 	hammer_buffer_t buffer1 = NULL;
1082 	hammer_buffer_t buffer2 = NULL;
1083 	hammer_off_t layer1_offset;
1084 	hammer_off_t layer2_offset;
1085 	int32_t temp;
1086 	int error;
1087 	int zone;
1088 
1089 	if (bytes == 0)
1090 		return (0);
1091 	hmp = trans->hmp;
1092 
1093 	/*
1094 	 * Alignment
1095 	 */
1096 	bytes = (bytes + 15) & ~15;
1097 	KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1098 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1099 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
1100 
1101 	/*
1102 	 * Basic zone validation & locking
1103 	 */
1104 	zone = HAMMER_ZONE_DECODE(zone_offset);
1105 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1106 	root_volume = trans->rootvol;
1107 	error = 0;
1108 
1109 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1110 
1111 	/*
1112 	 * Dive layer 1.
1113 	 */
1114 	layer1_offset = freemap->phys_offset +
1115 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1116 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1117 	if (error)
1118 		goto failed;
1119 	KKASSERT(layer1->phys_offset &&
1120 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1121 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1122 		hammer_lock_ex(&hmp->blkmap_lock);
1123 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1124 			panic("CRC FAILED: LAYER1");
1125 		hammer_unlock(&hmp->blkmap_lock);
1126 	}
1127 
1128 	/*
1129 	 * Dive layer 2, each entry represents a large-block.
1130 	 */
1131 	layer2_offset = layer1->phys_offset +
1132 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1133 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1134 	if (error)
1135 		goto failed;
1136 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1137 		hammer_lock_ex(&hmp->blkmap_lock);
1138 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1139 			panic("CRC FAILED: LAYER2");
1140 		hammer_unlock(&hmp->blkmap_lock);
1141 	}
1142 
1143 	hammer_lock_ex(&hmp->blkmap_lock);
1144 
1145 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1146 
1147 	/*
1148 	 * Free space previously allocated via blockmap_alloc().
1149 	 *
1150 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1151 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1152 	 */
1153 	KKASSERT(layer2->zone == zone);
1154 	temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1155 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1156 	if (temp > layer2->bytes_free) {
1157 		error = ERANGE;
1158 		goto underflow;
1159 	}
1160 	layer2->bytes_free -= bytes;
1161 
1162 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1163 
1164 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1165 underflow:
1166 	hammer_modify_buffer_done(buffer2);
1167 	hammer_unlock(&hmp->blkmap_lock);
1168 
1169 failed:
1170 	if (buffer1)
1171 		hammer_rel_buffer(buffer1, 0);
1172 	if (buffer2)
1173 		hammer_rel_buffer(buffer2, 0);
1174 	return (error);
1175 }
1176 
1177 /*
1178  * Backend function - finalize (offset, bytes) in a zone.
1179  *
1180  * Allocate space that was previously reserved by the frontend.
1181  */
1182 int
1183 hammer_blockmap_finalize(hammer_transaction_t trans,
1184 			 hammer_reserve_t resv,
1185 			 hammer_off_t zone_offset, int bytes)
1186 {
1187 	hammer_mount_t hmp;
1188 	hammer_volume_t root_volume;
1189 	hammer_blockmap_t freemap;
1190 	struct hammer_blockmap_layer1 *layer1;
1191 	struct hammer_blockmap_layer2 *layer2;
1192 	hammer_buffer_t buffer1 = NULL;
1193 	hammer_buffer_t buffer2 = NULL;
1194 	hammer_off_t layer1_offset;
1195 	hammer_off_t layer2_offset;
1196 	int error;
1197 	int zone;
1198 	int offset;
1199 
1200 	if (bytes == 0)
1201 		return(0);
1202 	hmp = trans->hmp;
1203 
1204 	/*
1205 	 * Alignment
1206 	 */
1207 	bytes = (bytes + 15) & ~15;
1208 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1209 
1210 	/*
1211 	 * Basic zone validation & locking
1212 	 */
1213 	zone = HAMMER_ZONE_DECODE(zone_offset);
1214 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1215 	root_volume = trans->rootvol;
1216 	error = 0;
1217 
1218 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1219 
1220 	/*
1221 	 * Dive layer 1.
1222 	 */
1223 	layer1_offset = freemap->phys_offset +
1224 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1225 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1226 	if (error)
1227 		goto failed;
1228 	KKASSERT(layer1->phys_offset &&
1229 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1230 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1231 		hammer_lock_ex(&hmp->blkmap_lock);
1232 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1233 			panic("CRC FAILED: LAYER1");
1234 		hammer_unlock(&hmp->blkmap_lock);
1235 	}
1236 
1237 	/*
1238 	 * Dive layer 2, each entry represents a large-block.
1239 	 */
1240 	layer2_offset = layer1->phys_offset +
1241 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1242 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1243 	if (error)
1244 		goto failed;
1245 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1246 		hammer_lock_ex(&hmp->blkmap_lock);
1247 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1248 			panic("CRC FAILED: LAYER2");
1249 		hammer_unlock(&hmp->blkmap_lock);
1250 	}
1251 
1252 	hammer_lock_ex(&hmp->blkmap_lock);
1253 
1254 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1255 
1256 	/*
1257 	 * Finalize some or all of the space covered by a current
1258 	 * reservation.  An allocation in the same layer may have
1259 	 * already assigned ownership.
1260 	 */
1261 	if (layer2->zone == 0) {
1262 		hammer_modify_buffer(trans, buffer1,
1263 				     layer1, sizeof(*layer1));
1264 		--layer1->blocks_free;
1265 		layer1->layer1_crc = crc32(layer1,
1266 					   HAMMER_LAYER1_CRCSIZE);
1267 		hammer_modify_buffer_done(buffer1);
1268 		layer2->zone = zone;
1269 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1270 		KKASSERT(layer2->append_off == 0);
1271 		hammer_modify_volume_field(trans,
1272 				trans->rootvol,
1273 				vol0_stat_freebigblocks);
1274 		--root_volume->ondisk->vol0_stat_freebigblocks;
1275 		hmp->copy_stat_freebigblocks =
1276 		   root_volume->ondisk->vol0_stat_freebigblocks;
1277 		hammer_modify_volume_done(trans->rootvol);
1278 	}
1279 	if (layer2->zone != zone)
1280 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1281 	KKASSERT(layer2->zone == zone);
1282 	KKASSERT(bytes != 0);
1283 	layer2->bytes_free -= bytes;
1284 
1285 	if (resv) {
1286 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1287 	}
1288 
1289 	/*
1290 	 * Finalizations can occur out of order, or combined with allocations.
1291 	 * append_off must be set to the highest allocated offset.
1292 	 */
1293 	offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1294 	if (layer2->append_off < offset)
1295 		layer2->append_off = offset;
1296 
1297 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1298 	hammer_modify_buffer_done(buffer2);
1299 	hammer_unlock(&hmp->blkmap_lock);
1300 
1301 failed:
1302 	if (buffer1)
1303 		hammer_rel_buffer(buffer1, 0);
1304 	if (buffer2)
1305 		hammer_rel_buffer(buffer2, 0);
1306 	return(error);
1307 }
1308 
1309 /*
1310  * Return the approximate number of free bytes in the big-block
1311  * containing the specified blockmap offset.
1312  *
1313  * WARNING: A negative number can be returned if data de-dup exists,
1314  *	    and the result will also not represent he actual number
1315  *	    of free bytes in this case.
1316  *
1317  *	    This code is used only by the reblocker.
1318  */
1319 int
1320 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1321 			int *curp, int *errorp)
1322 {
1323 	hammer_volume_t root_volume;
1324 	hammer_blockmap_t blockmap;
1325 	hammer_blockmap_t freemap;
1326 	struct hammer_blockmap_layer1 *layer1;
1327 	struct hammer_blockmap_layer2 *layer2;
1328 	hammer_buffer_t buffer = NULL;
1329 	hammer_off_t layer1_offset;
1330 	hammer_off_t layer2_offset;
1331 	int32_t bytes;
1332 	int zone;
1333 
1334 	zone = HAMMER_ZONE_DECODE(zone_offset);
1335 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1336 	root_volume = hammer_get_root_volume(hmp, errorp);
1337 	if (*errorp) {
1338 		*curp = 0;
1339 		return(0);
1340 	}
1341 	blockmap = &hmp->blockmap[zone];
1342 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1343 
1344 	/*
1345 	 * Dive layer 1.
1346 	 */
1347 	layer1_offset = freemap->phys_offset +
1348 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1349 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1350 	if (*errorp) {
1351 		bytes = 0;
1352 		goto failed;
1353 	}
1354 	KKASSERT(layer1->phys_offset);
1355 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1356 		hammer_lock_ex(&hmp->blkmap_lock);
1357 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1358 			panic("CRC FAILED: LAYER1");
1359 		hammer_unlock(&hmp->blkmap_lock);
1360 	}
1361 
1362 	/*
1363 	 * Dive layer 2, each entry represents a large-block.
1364 	 *
1365 	 * (reuse buffer, layer1 pointer becomes invalid)
1366 	 */
1367 	layer2_offset = layer1->phys_offset +
1368 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1369 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1370 	if (*errorp) {
1371 		bytes = 0;
1372 		goto failed;
1373 	}
1374 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1375 		hammer_lock_ex(&hmp->blkmap_lock);
1376 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1377 			panic("CRC FAILED: LAYER2");
1378 		hammer_unlock(&hmp->blkmap_lock);
1379 	}
1380 	KKASSERT(layer2->zone == zone);
1381 
1382 	bytes = layer2->bytes_free;
1383 
1384 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1385 		*curp = 0;
1386 	else
1387 		*curp = 1;
1388 failed:
1389 	if (buffer)
1390 		hammer_rel_buffer(buffer, 0);
1391 	hammer_rel_volume(root_volume, 0);
1392 	if (hammer_debug_general & 0x0800) {
1393 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1394 			(long long)zone_offset, bytes);
1395 	}
1396 	return(bytes);
1397 }
1398 
1399 
1400 /*
1401  * Lookup a blockmap offset.
1402  */
1403 hammer_off_t
1404 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1405 		       int *errorp)
1406 {
1407 	hammer_volume_t root_volume;
1408 	hammer_blockmap_t freemap;
1409 	struct hammer_blockmap_layer1 *layer1;
1410 	struct hammer_blockmap_layer2 *layer2;
1411 	hammer_buffer_t buffer = NULL;
1412 	hammer_off_t layer1_offset;
1413 	hammer_off_t layer2_offset;
1414 	hammer_off_t result_offset;
1415 	hammer_off_t base_off;
1416 	hammer_reserve_t resv;
1417 	int zone;
1418 
1419 	/*
1420 	 * Calculate the zone-2 offset.
1421 	 */
1422 	zone = HAMMER_ZONE_DECODE(zone_offset);
1423 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1424 
1425 	result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1426 			HAMMER_ZONE_RAW_BUFFER;
1427 
1428 	/*
1429 	 * We can actually stop here, normal blockmaps are now direct-mapped
1430 	 * onto the freemap and so represent zone-2 addresses.
1431 	 */
1432 	if (hammer_verify_zone == 0) {
1433 		*errorp = 0;
1434 		return(result_offset);
1435 	}
1436 
1437 	/*
1438 	 * Validate the allocation zone
1439 	 */
1440 	root_volume = hammer_get_root_volume(hmp, errorp);
1441 	if (*errorp)
1442 		return(0);
1443 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1444 	KKASSERT(freemap->phys_offset != 0);
1445 
1446 	/*
1447 	 * Dive layer 1.
1448 	 */
1449 	layer1_offset = freemap->phys_offset +
1450 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1451 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1452 	if (*errorp)
1453 		goto failed;
1454 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1455 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1456 		hammer_lock_ex(&hmp->blkmap_lock);
1457 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1458 			panic("CRC FAILED: LAYER1");
1459 		hammer_unlock(&hmp->blkmap_lock);
1460 	}
1461 
1462 	/*
1463 	 * Dive layer 2, each entry represents a large-block.
1464 	 */
1465 	layer2_offset = layer1->phys_offset +
1466 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1467 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1468 
1469 	if (*errorp)
1470 		goto failed;
1471 	if (layer2->zone == 0) {
1472 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1473 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1474 				 base_off);
1475 		KKASSERT(resv && resv->zone == zone);
1476 
1477 	} else if (layer2->zone != zone) {
1478 		panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1479 			layer2->zone, zone);
1480 	}
1481 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1482 		hammer_lock_ex(&hmp->blkmap_lock);
1483 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1484 			panic("CRC FAILED: LAYER2");
1485 		hammer_unlock(&hmp->blkmap_lock);
1486 	}
1487 
1488 failed:
1489 	if (buffer)
1490 		hammer_rel_buffer(buffer, 0);
1491 	hammer_rel_volume(root_volume, 0);
1492 	if (hammer_debug_general & 0x0800) {
1493 		kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1494 			(long long)zone_offset, (long long)result_offset);
1495 	}
1496 	return(result_offset);
1497 }
1498 
1499 
1500 /*
1501  * Check space availability
1502  *
1503  * MPSAFE - does not require fs_token
1504  */
1505 int
1506 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1507 {
1508 	const int in_size = sizeof(struct hammer_inode_data) +
1509 			    sizeof(union hammer_btree_elm);
1510 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1511 	int64_t usedbytes;
1512 
1513 	usedbytes = hmp->rsv_inodes * in_size +
1514 		    hmp->rsv_recs * rec_size +
1515 		    hmp->rsv_databytes +
1516 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1517 		    ((int64_t)hidirtybufspace << 2) +
1518 		    (slop << HAMMER_LARGEBLOCK_BITS);
1519 
1520 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1521 	if (resp)
1522 		*resp = usedbytes;
1523 
1524 	if (hmp->copy_stat_freebigblocks >=
1525 	    (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1526 		return(0);
1527 	}
1528 	return (ENOSPC);
1529 }
1530 
1531