xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision be09fc23)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39 
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42 				    hammer_off_t base_offset, int zone,
43 				    struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46 
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
52 
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56 	if (res1->zone_offset < res2->zone_offset)
57 		return(-1);
58 	if (res1->zone_offset > res2->zone_offset)
59 		return(1);
60 	return(0);
61 }
62 
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
68 		      hammer_off_t hint, int *errorp)
69 {
70 	hammer_mount_t hmp;
71 	hammer_volume_t root_volume;
72 	hammer_blockmap_t blockmap;
73 	hammer_blockmap_t freemap;
74 	hammer_reserve_t resv;
75 	struct hammer_blockmap_layer1 *layer1;
76 	struct hammer_blockmap_layer2 *layer2;
77 	hammer_buffer_t buffer1 = NULL;
78 	hammer_buffer_t buffer2 = NULL;
79 	hammer_buffer_t buffer3 = NULL;
80 	hammer_off_t tmp_offset;
81 	hammer_off_t next_offset;
82 	hammer_off_t result_offset;
83 	hammer_off_t layer1_offset;
84 	hammer_off_t layer2_offset;
85 	hammer_off_t base_off;
86 	int loops = 0;
87 	int offset;		/* offset within big-block */
88 	int use_hint;
89 
90 	hmp = trans->hmp;
91 
92 	/*
93 	 * Deal with alignment and buffer-boundary issues.
94 	 *
95 	 * Be careful, certain primary alignments are used below to allocate
96 	 * new blockmap blocks.
97 	 */
98 	bytes = (bytes + 15) & ~15;
99 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
101 
102 	/*
103 	 * Setup
104 	 */
105 	root_volume = trans->rootvol;
106 	*errorp = 0;
107 	blockmap = &hmp->blockmap[zone];
108 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110 
111 	/*
112 	 * Use the hint if we have one.
113 	 */
114 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
115 		next_offset = (hint + 15) & ~(hammer_off_t)15;
116 		use_hint = 1;
117 	} else {
118 		next_offset = blockmap->next_offset;
119 		use_hint = 0;
120 	}
121 again:
122 
123 	/*
124 	 * use_hint is turned off if we leave the hinted big-block.
125 	 */
126 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
127 		next_offset = blockmap->next_offset;
128 		use_hint = 0;
129 	}
130 
131 	/*
132 	 * Check for wrap
133 	 */
134 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
135 		if (++loops == 2) {
136 			result_offset = 0;
137 			*errorp = ENOSPC;
138 			goto failed;
139 		}
140 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
141 	}
142 
143 	/*
144 	 * The allocation request may not cross a buffer boundary.  Special
145 	 * large allocations must not cross a big-block boundary.
146 	 */
147 	tmp_offset = next_offset + bytes - 1;
148 	if (bytes <= HAMMER_BUFSIZE) {
149 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
150 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
151 			goto again;
152 		}
153 	} else {
154 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
155 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
156 			goto again;
157 		}
158 	}
159 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
160 
161 	/*
162 	 * Dive layer 1.
163 	 */
164 	layer1_offset = freemap->phys_offset +
165 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
166 
167 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
168 	if (*errorp) {
169 		result_offset = 0;
170 		goto failed;
171 	}
172 
173 	/*
174 	 * Check CRC.
175 	 */
176 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
177 		hammer_lock_ex(&hmp->blkmap_lock);
178 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
179 			panic("CRC FAILED: LAYER1");
180 		hammer_unlock(&hmp->blkmap_lock);
181 	}
182 
183 	/*
184 	 * If we are at a big-block boundary and layer1 indicates no
185 	 * free big-blocks, then we cannot allocate a new big-block in
186 	 * layer2, skip to the next layer1 entry.
187 	 */
188 	if (offset == 0 && layer1->blocks_free == 0) {
189 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
190 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
191 		goto again;
192 	}
193 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
194 
195 	/*
196 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
197 	 * on a volume that we are currently trying to remove from the
198 	 * file-system. This is used by the volume-del code together with
199 	 * the reblocker to free up a volume.
200 	 */
201 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
202 	    hmp->volume_to_remove) {
203 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
204 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
205 		goto again;
206 	}
207 
208 	/*
209 	 * Dive layer 2, each entry represents a big-block.
210 	 */
211 	layer2_offset = layer1->phys_offset +
212 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
213 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
214 	if (*errorp) {
215 		result_offset = 0;
216 		goto failed;
217 	}
218 
219 	/*
220 	 * Check CRC.  This can race another thread holding the lock
221 	 * and in the middle of modifying layer2.
222 	 */
223 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
224 		hammer_lock_ex(&hmp->blkmap_lock);
225 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
226 			panic("CRC FAILED: LAYER2");
227 		hammer_unlock(&hmp->blkmap_lock);
228 	}
229 
230 	/*
231 	 * Skip the layer if the zone is owned by someone other then us.
232 	 */
233 	if (layer2->zone && layer2->zone != zone) {
234 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
235 		goto again;
236 	}
237 	if (offset < layer2->append_off) {
238 		next_offset += layer2->append_off - offset;
239 		goto again;
240 	}
241 
242 #if 0
243 	/*
244 	 * If operating in the current non-hint blockmap block, do not
245 	 * allow it to get over-full.  Also drop any active hinting so
246 	 * blockmap->next_offset is updated at the end.
247 	 *
248 	 * We do this for B-Tree and meta-data allocations to provide
249 	 * localization for updates.
250 	 */
251 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252 	     zone == HAMMER_ZONE_META_INDEX) &&
253 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
254 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)
255 	) {
256 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
257 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
258 			use_hint = 0;
259 			goto again;
260 		}
261 	}
262 #endif
263 
264 	/*
265 	 * We need the lock from this point on.  We have to re-check zone
266 	 * ownership after acquiring the lock and also check for reservations.
267 	 */
268 	hammer_lock_ex(&hmp->blkmap_lock);
269 
270 	if (layer2->zone && layer2->zone != zone) {
271 		hammer_unlock(&hmp->blkmap_lock);
272 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
273 		goto again;
274 	}
275 	if (offset < layer2->append_off) {
276 		hammer_unlock(&hmp->blkmap_lock);
277 		next_offset += layer2->append_off - offset;
278 		goto again;
279 	}
280 
281 	/*
282 	 * The big-block might be reserved by another zone.  If it is reserved
283 	 * by our zone we may have to move next_offset past the append_off.
284 	 */
285 	base_off = hammer_xlate_to_zone2(next_offset &
286 					~HAMMER_BIGBLOCK_MASK64);
287 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
288 	if (resv) {
289 		if (resv->zone != zone) {
290 			hammer_unlock(&hmp->blkmap_lock);
291 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
292 				      ~HAMMER_BIGBLOCK_MASK64;
293 			goto again;
294 		}
295 		if (offset < resv->append_off) {
296 			hammer_unlock(&hmp->blkmap_lock);
297 			next_offset += resv->append_off - offset;
298 			goto again;
299 		}
300 		++resv->refs;
301 	}
302 
303 	/*
304 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
305 	 * of the layer for real.  At this point we've validated any
306 	 * reservation that might exist and can just ignore resv.
307 	 */
308 	if (layer2->zone == 0) {
309 		/*
310 		 * Assign the big-block to our zone
311 		 */
312 		hammer_modify_buffer(trans, buffer1,
313 				     layer1, sizeof(*layer1));
314 		--layer1->blocks_free;
315 		layer1->layer1_crc = crc32(layer1,
316 					   HAMMER_LAYER1_CRCSIZE);
317 		hammer_modify_buffer_done(buffer1);
318 		hammer_modify_buffer(trans, buffer2,
319 				     layer2, sizeof(*layer2));
320 		layer2->zone = zone;
321 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
322 		KKASSERT(layer2->append_off == 0);
323 		hammer_modify_volume_field(trans, trans->rootvol,
324 					   vol0_stat_freebigblocks);
325 		--root_volume->ondisk->vol0_stat_freebigblocks;
326 		hmp->copy_stat_freebigblocks =
327 			root_volume->ondisk->vol0_stat_freebigblocks;
328 		hammer_modify_volume_done(trans->rootvol);
329 	} else {
330 		hammer_modify_buffer(trans, buffer2,
331 				     layer2, sizeof(*layer2));
332 	}
333 	KKASSERT(layer2->zone == zone);
334 
335 	/*
336 	 * NOTE: bytes_free can legally go negative due to de-dup.
337 	 */
338 	layer2->bytes_free -= bytes;
339 	KKASSERT(layer2->append_off <= offset);
340 	layer2->append_off = offset + bytes;
341 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
342 	hammer_modify_buffer_done(buffer2);
343 
344 	/*
345 	 * We hold the blockmap lock and should be the only ones
346 	 * capable of modifying resv->append_off.  Track the allocation
347 	 * as appropriate.
348 	 */
349 	KKASSERT(bytes != 0);
350 	if (resv) {
351 		KKASSERT(resv->append_off <= offset);
352 		resv->append_off = offset + bytes;
353 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
354 		hammer_blockmap_reserve_complete(hmp, resv);
355 	}
356 
357 	/*
358 	 * If we are allocating from the base of a new buffer we can avoid
359 	 * a disk read by calling hammer_bnew_ext().
360 	 */
361 	if ((next_offset & HAMMER_BUFMASK) == 0) {
362 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
363 				errorp, &buffer3);
364 	}
365 	result_offset = next_offset;
366 
367 	/*
368 	 * If we weren't supplied with a hint or could not use the hint
369 	 * then we wound up using blockmap->next_offset as the hint and
370 	 * need to save it.
371 	 */
372 	if (use_hint == 0) {
373 		hammer_modify_volume_noundo(NULL, root_volume);
374 		blockmap->next_offset = next_offset + bytes;
375 		hammer_modify_volume_done(root_volume);
376 	}
377 	hammer_unlock(&hmp->blkmap_lock);
378 failed:
379 
380 	/*
381 	 * Cleanup
382 	 */
383 	if (buffer1)
384 		hammer_rel_buffer(buffer1, 0);
385 	if (buffer2)
386 		hammer_rel_buffer(buffer2, 0);
387 	if (buffer3)
388 		hammer_rel_buffer(buffer3, 0);
389 
390 	return(result_offset);
391 }
392 
393 /*
394  * Frontend function - Reserve bytes in a zone.
395  *
396  * This code reserves bytes out of a blockmap without committing to any
397  * meta-data modifications, allowing the front-end to directly issue disk
398  * write I/O for big-blocks of data
399  *
400  * The backend later finalizes the reservation with hammer_blockmap_finalize()
401  * upon committing the related record.
402  */
403 hammer_reserve_t
404 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
405 			hammer_off_t *zone_offp, int *errorp)
406 {
407 	hammer_volume_t root_volume;
408 	hammer_blockmap_t blockmap;
409 	hammer_blockmap_t freemap;
410 	struct hammer_blockmap_layer1 *layer1;
411 	struct hammer_blockmap_layer2 *layer2;
412 	hammer_buffer_t buffer1 = NULL;
413 	hammer_buffer_t buffer2 = NULL;
414 	hammer_buffer_t buffer3 = NULL;
415 	hammer_off_t tmp_offset;
416 	hammer_off_t next_offset;
417 	hammer_off_t layer1_offset;
418 	hammer_off_t layer2_offset;
419 	hammer_off_t base_off;
420 	hammer_reserve_t resv;
421 	hammer_reserve_t resx;
422 	int loops = 0;
423 	int offset;
424 
425 	/*
426 	 * Setup
427 	 */
428 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
429 	root_volume = hammer_get_root_volume(hmp, errorp);
430 	if (*errorp)
431 		return(NULL);
432 	blockmap = &hmp->blockmap[zone];
433 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
434 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
435 
436 	/*
437 	 * Deal with alignment and buffer-boundary issues.
438 	 *
439 	 * Be careful, certain primary alignments are used below to allocate
440 	 * new blockmap blocks.
441 	 */
442 	bytes = (bytes + 15) & ~15;
443 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
444 
445 	next_offset = blockmap->next_offset;
446 again:
447 	resv = NULL;
448 	/*
449 	 * Check for wrap
450 	 */
451 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
452 		if (++loops == 2) {
453 			*errorp = ENOSPC;
454 			goto failed;
455 		}
456 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
457 	}
458 
459 	/*
460 	 * The allocation request may not cross a buffer boundary.  Special
461 	 * large allocations must not cross a big-block boundary.
462 	 */
463 	tmp_offset = next_offset + bytes - 1;
464 	if (bytes <= HAMMER_BUFSIZE) {
465 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
466 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
467 			goto again;
468 		}
469 	} else {
470 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
471 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
472 			goto again;
473 		}
474 	}
475 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
476 
477 	/*
478 	 * Dive layer 1.
479 	 */
480 	layer1_offset = freemap->phys_offset +
481 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
482 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
483 	if (*errorp)
484 		goto failed;
485 
486 	/*
487 	 * Check CRC.
488 	 */
489 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
490 		hammer_lock_ex(&hmp->blkmap_lock);
491 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
492 			panic("CRC FAILED: LAYER1");
493 		hammer_unlock(&hmp->blkmap_lock);
494 	}
495 
496 	/*
497 	 * If we are at a big-block boundary and layer1 indicates no
498 	 * free big-blocks, then we cannot allocate a new big-block in
499 	 * layer2, skip to the next layer1 entry.
500 	 */
501 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
502 	    layer1->blocks_free == 0) {
503 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
504 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
505 		goto again;
506 	}
507 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
508 
509 	/*
510 	 * Dive layer 2, each entry represents a big-block.
511 	 */
512 	layer2_offset = layer1->phys_offset +
513 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
514 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
515 	if (*errorp)
516 		goto failed;
517 
518 	/*
519 	 * Check CRC if not allocating into uninitialized space (which we
520 	 * aren't when reserving space).
521 	 */
522 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
523 		hammer_lock_ex(&hmp->blkmap_lock);
524 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
525 			panic("CRC FAILED: LAYER2");
526 		hammer_unlock(&hmp->blkmap_lock);
527 	}
528 
529 	/*
530 	 * Skip the layer if the zone is owned by someone other then us.
531 	 */
532 	if (layer2->zone && layer2->zone != zone) {
533 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
534 		goto again;
535 	}
536 	if (offset < layer2->append_off) {
537 		next_offset += layer2->append_off - offset;
538 		goto again;
539 	}
540 
541 	/*
542 	 * We need the lock from this point on.  We have to re-check zone
543 	 * ownership after acquiring the lock and also check for reservations.
544 	 */
545 	hammer_lock_ex(&hmp->blkmap_lock);
546 
547 	if (layer2->zone && layer2->zone != zone) {
548 		hammer_unlock(&hmp->blkmap_lock);
549 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
550 		goto again;
551 	}
552 	if (offset < layer2->append_off) {
553 		hammer_unlock(&hmp->blkmap_lock);
554 		next_offset += layer2->append_off - offset;
555 		goto again;
556 	}
557 
558 	/*
559 	 * The big-block might be reserved by another zone.  If it is reserved
560 	 * by our zone we may have to move next_offset past the append_off.
561 	 */
562 	base_off = hammer_xlate_to_zone2(next_offset &
563 					~HAMMER_BIGBLOCK_MASK64);
564 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
565 	if (resv) {
566 		if (resv->zone != zone) {
567 			hammer_unlock(&hmp->blkmap_lock);
568 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
569 				      ~HAMMER_BIGBLOCK_MASK64;
570 			goto again;
571 		}
572 		if (offset < resv->append_off) {
573 			hammer_unlock(&hmp->blkmap_lock);
574 			next_offset += resv->append_off - offset;
575 			goto again;
576 		}
577 		++resv->refs;
578 		resx = NULL;
579 	} else {
580 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
581 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
582 		resx->refs = 1;
583 		resx->zone = zone;
584 		resx->zone_offset = base_off;
585 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
586 			resx->flags |= HAMMER_RESF_LAYER2FREE;
587 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
588 		KKASSERT(resv == NULL);
589 		resv = resx;
590 		++hammer_count_reservations;
591 	}
592 	resv->append_off = offset + bytes;
593 
594 	/*
595 	 * If we are not reserving a whole buffer but are at the start of
596 	 * a new block, call hammer_bnew() to avoid a disk read.
597 	 *
598 	 * If we are reserving a whole buffer (or more), the caller will
599 	 * probably use a direct read, so do nothing.
600 	 *
601 	 * If we do not have a whole lot of system memory we really can't
602 	 * afford to block while holding the blkmap_lock!
603 	 */
604 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
605 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE))
606 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
607 	}
608 
609 	/*
610 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
611 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
612 	 * be big-block aligned.
613 	 */
614 	blockmap->next_offset = next_offset + bytes;
615 	hammer_unlock(&hmp->blkmap_lock);
616 
617 failed:
618 	if (buffer1)
619 		hammer_rel_buffer(buffer1, 0);
620 	if (buffer2)
621 		hammer_rel_buffer(buffer2, 0);
622 	if (buffer3)
623 		hammer_rel_buffer(buffer3, 0);
624 	hammer_rel_volume(root_volume, 0);
625 	*zone_offp = next_offset;
626 
627 	return(resv);
628 }
629 
630 /*
631  * Frontend function - Dedup bytes in a zone.
632  *
633  * Dedup reservations work exactly the same as normal write reservations
634  * except we only adjust bytes_free field and don't touch append offset.
635  * Finalization mechanic for dedup reservations is also the same as for
636  * normal write ones - the backend finalizes the reservation with
637  * hammer_blockmap_finalize().
638  */
639 hammer_reserve_t
640 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
641 			      hammer_off_t zone_offset, int *errorp)
642 {
643 	hammer_volume_t root_volume;
644 	hammer_blockmap_t freemap;
645 	struct hammer_blockmap_layer1 *layer1;
646 	struct hammer_blockmap_layer2 *layer2;
647 	hammer_buffer_t buffer1 = NULL;
648 	hammer_buffer_t buffer2 = NULL;
649 	hammer_off_t layer1_offset;
650 	hammer_off_t layer2_offset;
651 	hammer_off_t base_off;
652 	hammer_reserve_t resv = NULL;
653 	hammer_reserve_t resx = NULL;
654 
655 	/*
656 	 * Setup
657 	 */
658 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
659 	root_volume = hammer_get_root_volume(hmp, errorp);
660 	if (*errorp)
661 		return (NULL);
662 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
663 	KKASSERT(freemap->phys_offset != 0);
664 
665 	bytes = (bytes + 15) & ~15;
666 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
667 
668 	/*
669 	 * Dive layer 1.
670 	 */
671 	layer1_offset = freemap->phys_offset +
672 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
673 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
674 	if (*errorp)
675 		goto failed;
676 
677 	/*
678 	 * Check CRC.
679 	 */
680 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
681 		hammer_lock_ex(&hmp->blkmap_lock);
682 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
683 			panic("CRC FAILED: LAYER1");
684 		hammer_unlock(&hmp->blkmap_lock);
685 	}
686 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
687 
688 	/*
689 	 * Dive layer 2, each entry represents a big-block.
690 	 */
691 	layer2_offset = layer1->phys_offset +
692 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
693 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
694 	if (*errorp)
695 		goto failed;
696 
697 	/*
698 	 * Check CRC.
699 	 */
700 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
701 		hammer_lock_ex(&hmp->blkmap_lock);
702 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
703 			panic("CRC FAILED: LAYER2");
704 		hammer_unlock(&hmp->blkmap_lock);
705 	}
706 
707 	/*
708 	 * Fail if the zone is owned by someone other than us.
709 	 */
710 	if (layer2->zone && layer2->zone != zone)
711 		goto failed;
712 
713 	/*
714 	 * We need the lock from this point on.  We have to re-check zone
715 	 * ownership after acquiring the lock and also check for reservations.
716 	 */
717 	hammer_lock_ex(&hmp->blkmap_lock);
718 
719 	if (layer2->zone && layer2->zone != zone) {
720 		hammer_unlock(&hmp->blkmap_lock);
721 		goto failed;
722 	}
723 
724 	base_off = hammer_xlate_to_zone2(zone_offset &
725 					~HAMMER_BIGBLOCK_MASK64);
726 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
727 	if (resv) {
728 		if (resv->zone != zone) {
729 			hammer_unlock(&hmp->blkmap_lock);
730 			resv = NULL;
731 			goto failed;
732 		}
733 		/*
734 		 * Due to possible big-block underflow we can't simply
735 		 * subtract bytes from bytes_free.
736 		 */
737 		if (update_bytes_free(resv, bytes) == 0) {
738 			hammer_unlock(&hmp->blkmap_lock);
739 			resv = NULL;
740 			goto failed;
741 		}
742 		++resv->refs;
743 		resx = NULL;
744 	} else {
745 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
746 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
747 		resx->refs = 1;
748 		resx->zone = zone;
749 		resx->bytes_free = layer2->bytes_free;
750 		/*
751 		 * Due to possible big-block underflow we can't simply
752 		 * subtract bytes from bytes_free.
753 		 */
754 		if (update_bytes_free(resx, bytes) == 0) {
755 			hammer_unlock(&hmp->blkmap_lock);
756 			kfree(resx, hmp->m_misc);
757 			goto failed;
758 		}
759 		resx->zone_offset = base_off;
760 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
761 		KKASSERT(resv == NULL);
762 		resv = resx;
763 		++hammer_count_reservations;
764 	}
765 
766 	hammer_unlock(&hmp->blkmap_lock);
767 
768 failed:
769 	if (buffer1)
770 		hammer_rel_buffer(buffer1, 0);
771 	if (buffer2)
772 		hammer_rel_buffer(buffer2, 0);
773 	hammer_rel_volume(root_volume, 0);
774 
775 	return(resv);
776 }
777 
778 static int
779 update_bytes_free(hammer_reserve_t resv, int bytes)
780 {
781 	int32_t temp;
782 
783 	/*
784 	 * Big-block underflow check
785 	 */
786 	temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
787 	cpu_ccfence(); /* XXX do we really need it ? */
788 	if (temp > resv->bytes_free) {
789 		kprintf("BIGBLOCK UNDERFLOW\n");
790 		return (0);
791 	}
792 
793 	resv->bytes_free -= bytes;
794 	return (1);
795 }
796 
797 /*
798  * Dereference a reservation structure.  Upon the final release the
799  * underlying big-block is checked and if it is entirely free we delete
800  * any related HAMMER buffers to avoid potential conflicts with future
801  * reuse of the big-block.
802  */
803 void
804 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
805 {
806 	hammer_off_t base_offset;
807 	int error;
808 
809 	KKASSERT(resv->refs > 0);
810 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
811 		 HAMMER_ZONE_RAW_BUFFER);
812 
813 	/*
814 	 * Setting append_off to the max prevents any new allocations
815 	 * from occuring while we are trying to dispose of the reservation,
816 	 * allowing us to safely delete any related HAMMER buffers.
817 	 *
818 	 * If we are unable to clean out all related HAMMER buffers we
819 	 * requeue the delay.
820 	 */
821 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
822 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
823 		base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
824 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
825 			hammer_dedup_cache_inval(hmp, base_offset);
826 		error = hammer_del_buffers(hmp, base_offset,
827 					   resv->zone_offset,
828 					   HAMMER_BIGBLOCK_SIZE,
829 					   1);
830 		if (hammer_debug_general & 0x20000) {
831 			kprintf("hammer: delbgblk %016jx error %d\n",
832 				(intmax_t)base_offset, error);
833 		}
834 		if (error)
835 			hammer_reserve_setdelay(hmp, resv);
836 	}
837 	if (--resv->refs == 0) {
838 		if (hammer_debug_general & 0x20000) {
839 			kprintf("hammer: delresvr %016jx zone %02x\n",
840 				(intmax_t)resv->zone_offset, resv->zone);
841 		}
842 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
843 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
844 		kfree(resv, hmp->m_misc);
845 		--hammer_count_reservations;
846 	}
847 }
848 
849 /*
850  * Prevent a potentially free big-block from being reused until after
851  * the related flushes have completely cycled, otherwise crash recovery
852  * could resurrect a data block that was already reused and overwritten.
853  *
854  * The caller might reset the underlying layer2 entry's append_off to 0, so
855  * our covering append_off must be set to max to prevent any reallocation
856  * until after the flush delays complete, not to mention proper invalidation
857  * of any underlying cached blocks.
858  */
859 static void
860 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
861 			int zone, struct hammer_blockmap_layer2 *layer2)
862 {
863 	hammer_reserve_t resv;
864 
865 	/*
866 	 * Allocate the reservation if necessary.
867 	 *
868 	 * NOTE: need lock in future around resv lookup/allocation and
869 	 * the setdelay call, currently refs is not bumped until the call.
870 	 */
871 again:
872 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
873 	if (resv == NULL) {
874 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
875 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
876 		resv->zone = zone;
877 		resv->zone_offset = base_offset;
878 		resv->refs = 0;
879 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
880 
881 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
882 			resv->flags |= HAMMER_RESF_LAYER2FREE;
883 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
884 			kfree(resv, hmp->m_misc);
885 			goto again;
886 		}
887 		++hammer_count_reservations;
888 	} else {
889 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
890 			resv->flags |= HAMMER_RESF_LAYER2FREE;
891 	}
892 	hammer_reserve_setdelay(hmp, resv);
893 }
894 
895 /*
896  * Enter the reservation on the on-delay list, or move it if it
897  * is already on the list.
898  */
899 static void
900 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
901 {
902 	if (resv->flags & HAMMER_RESF_ONDELAY) {
903 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
904 		resv->flush_group = hmp->flusher.next + 1;
905 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
906 	} else {
907 		++resv->refs;
908 		++hmp->rsv_fromdelay;
909 		resv->flags |= HAMMER_RESF_ONDELAY;
910 		resv->flush_group = hmp->flusher.next + 1;
911 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
912 	}
913 }
914 
915 /*
916  * Reserve has reached its flush point, remove it from the delay list
917  * and finish it off.  hammer_blockmap_reserve_complete() inherits
918  * the ondelay reference.
919  */
920 void
921 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
922 {
923 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
924 	resv->flags &= ~HAMMER_RESF_ONDELAY;
925 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
926 	--hmp->rsv_fromdelay;
927 	hammer_blockmap_reserve_complete(hmp, resv);
928 }
929 
930 /*
931  * Backend function - free (offset, bytes) in a zone.
932  *
933  * XXX error return
934  */
935 void
936 hammer_blockmap_free(hammer_transaction_t trans,
937 		     hammer_off_t zone_offset, int bytes)
938 {
939 	hammer_mount_t hmp;
940 	hammer_volume_t root_volume;
941 	hammer_blockmap_t freemap;
942 	struct hammer_blockmap_layer1 *layer1;
943 	struct hammer_blockmap_layer2 *layer2;
944 	hammer_buffer_t buffer1 = NULL;
945 	hammer_buffer_t buffer2 = NULL;
946 	hammer_off_t layer1_offset;
947 	hammer_off_t layer2_offset;
948 	hammer_off_t base_off;
949 	int error;
950 	int zone;
951 
952 	if (bytes == 0)
953 		return;
954 	hmp = trans->hmp;
955 
956 	/*
957 	 * Alignment
958 	 */
959 	bytes = (bytes + 15) & ~15;
960 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
961 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
962 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
963 
964 	/*
965 	 * Basic zone validation & locking
966 	 */
967 	zone = HAMMER_ZONE_DECODE(zone_offset);
968 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
969 	root_volume = trans->rootvol;
970 	error = 0;
971 
972 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
973 
974 	/*
975 	 * Dive layer 1.
976 	 */
977 	layer1_offset = freemap->phys_offset +
978 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
979 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
980 	if (error)
981 		goto failed;
982 	KKASSERT(layer1->phys_offset &&
983 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
984 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
985 		hammer_lock_ex(&hmp->blkmap_lock);
986 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
987 			panic("CRC FAILED: LAYER1");
988 		hammer_unlock(&hmp->blkmap_lock);
989 	}
990 
991 	/*
992 	 * Dive layer 2, each entry represents a big-block.
993 	 */
994 	layer2_offset = layer1->phys_offset +
995 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
996 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
997 	if (error)
998 		goto failed;
999 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1000 		hammer_lock_ex(&hmp->blkmap_lock);
1001 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1002 			panic("CRC FAILED: LAYER2");
1003 		hammer_unlock(&hmp->blkmap_lock);
1004 	}
1005 
1006 	hammer_lock_ex(&hmp->blkmap_lock);
1007 
1008 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1009 
1010 	/*
1011 	 * Free space previously allocated via blockmap_alloc().
1012 	 *
1013 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1014 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1015 	 */
1016 	KKASSERT(layer2->zone == zone);
1017 	layer2->bytes_free += bytes;
1018 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1019 
1020 	/*
1021 	 * If a big-block becomes entirely free we must create a covering
1022 	 * reservation to prevent premature reuse.  Note, however, that
1023 	 * the big-block and/or reservation may still have an append_off
1024 	 * that allows further (non-reused) allocations.
1025 	 *
1026 	 * Once the reservation has been made we re-check layer2 and if
1027 	 * the big-block is still entirely free we reset the layer2 entry.
1028 	 * The reservation will prevent premature reuse.
1029 	 *
1030 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1031 	 * is completed, if the layer2 entry is still completely free at
1032 	 * that time.  Any allocations from the reservation that may have
1033 	 * occured in the mean time, or active references on the reservation
1034 	 * from new pending allocations, will prevent the invalidation from
1035 	 * occuring.
1036 	 */
1037 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1038 		base_off = hammer_xlate_to_zone2(zone_offset &
1039 						~HAMMER_BIGBLOCK_MASK64);
1040 
1041 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1042 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1043 			layer2->zone = 0;
1044 			layer2->append_off = 0;
1045 			hammer_modify_buffer(trans, buffer1,
1046 					     layer1, sizeof(*layer1));
1047 			++layer1->blocks_free;
1048 			layer1->layer1_crc = crc32(layer1,
1049 						   HAMMER_LAYER1_CRCSIZE);
1050 			hammer_modify_buffer_done(buffer1);
1051 			hammer_modify_volume_field(trans,
1052 					trans->rootvol,
1053 					vol0_stat_freebigblocks);
1054 			++root_volume->ondisk->vol0_stat_freebigblocks;
1055 			hmp->copy_stat_freebigblocks =
1056 			   root_volume->ondisk->vol0_stat_freebigblocks;
1057 			hammer_modify_volume_done(trans->rootvol);
1058 		}
1059 	}
1060 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1061 	hammer_modify_buffer_done(buffer2);
1062 	hammer_unlock(&hmp->blkmap_lock);
1063 
1064 failed:
1065 	if (buffer1)
1066 		hammer_rel_buffer(buffer1, 0);
1067 	if (buffer2)
1068 		hammer_rel_buffer(buffer2, 0);
1069 }
1070 
1071 int
1072 hammer_blockmap_dedup(hammer_transaction_t trans,
1073 		     hammer_off_t zone_offset, int bytes)
1074 {
1075 	hammer_mount_t hmp;
1076 	hammer_blockmap_t freemap;
1077 	struct hammer_blockmap_layer1 *layer1;
1078 	struct hammer_blockmap_layer2 *layer2;
1079 	hammer_buffer_t buffer1 = NULL;
1080 	hammer_buffer_t buffer2 = NULL;
1081 	hammer_off_t layer1_offset;
1082 	hammer_off_t layer2_offset;
1083 	int32_t temp;
1084 	int error;
1085 	int zone __debugvar;
1086 
1087 	if (bytes == 0)
1088 		return (0);
1089 	hmp = trans->hmp;
1090 
1091 	/*
1092 	 * Alignment
1093 	 */
1094 	bytes = (bytes + 15) & ~15;
1095 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1096 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1097 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
1098 
1099 	/*
1100 	 * Basic zone validation & locking
1101 	 */
1102 	zone = HAMMER_ZONE_DECODE(zone_offset);
1103 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1104 	error = 0;
1105 
1106 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1107 
1108 	/*
1109 	 * Dive layer 1.
1110 	 */
1111 	layer1_offset = freemap->phys_offset +
1112 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1113 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1114 	if (error)
1115 		goto failed;
1116 	KKASSERT(layer1->phys_offset &&
1117 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1118 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1119 		hammer_lock_ex(&hmp->blkmap_lock);
1120 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1121 			panic("CRC FAILED: LAYER1");
1122 		hammer_unlock(&hmp->blkmap_lock);
1123 	}
1124 
1125 	/*
1126 	 * Dive layer 2, each entry represents a big-block.
1127 	 */
1128 	layer2_offset = layer1->phys_offset +
1129 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1130 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1131 	if (error)
1132 		goto failed;
1133 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1134 		hammer_lock_ex(&hmp->blkmap_lock);
1135 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1136 			panic("CRC FAILED: LAYER2");
1137 		hammer_unlock(&hmp->blkmap_lock);
1138 	}
1139 
1140 	hammer_lock_ex(&hmp->blkmap_lock);
1141 
1142 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1143 
1144 	/*
1145 	 * Free space previously allocated via blockmap_alloc().
1146 	 *
1147 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1148 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1149 	 */
1150 	KKASSERT(layer2->zone == zone);
1151 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1152 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1153 	if (temp > layer2->bytes_free) {
1154 		error = ERANGE;
1155 		goto underflow;
1156 	}
1157 	layer2->bytes_free -= bytes;
1158 
1159 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1160 
1161 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1162 underflow:
1163 	hammer_modify_buffer_done(buffer2);
1164 	hammer_unlock(&hmp->blkmap_lock);
1165 
1166 failed:
1167 	if (buffer1)
1168 		hammer_rel_buffer(buffer1, 0);
1169 	if (buffer2)
1170 		hammer_rel_buffer(buffer2, 0);
1171 	return (error);
1172 }
1173 
1174 /*
1175  * Backend function - finalize (offset, bytes) in a zone.
1176  *
1177  * Allocate space that was previously reserved by the frontend.
1178  */
1179 int
1180 hammer_blockmap_finalize(hammer_transaction_t trans,
1181 			 hammer_reserve_t resv,
1182 			 hammer_off_t zone_offset, int bytes)
1183 {
1184 	hammer_mount_t hmp;
1185 	hammer_volume_t root_volume;
1186 	hammer_blockmap_t freemap;
1187 	struct hammer_blockmap_layer1 *layer1;
1188 	struct hammer_blockmap_layer2 *layer2;
1189 	hammer_buffer_t buffer1 = NULL;
1190 	hammer_buffer_t buffer2 = NULL;
1191 	hammer_off_t layer1_offset;
1192 	hammer_off_t layer2_offset;
1193 	int error;
1194 	int zone;
1195 	int offset;
1196 
1197 	if (bytes == 0)
1198 		return(0);
1199 	hmp = trans->hmp;
1200 
1201 	/*
1202 	 * Alignment
1203 	 */
1204 	bytes = (bytes + 15) & ~15;
1205 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1206 
1207 	/*
1208 	 * Basic zone validation & locking
1209 	 */
1210 	zone = HAMMER_ZONE_DECODE(zone_offset);
1211 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1212 	root_volume = trans->rootvol;
1213 	error = 0;
1214 
1215 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1216 
1217 	/*
1218 	 * Dive layer 1.
1219 	 */
1220 	layer1_offset = freemap->phys_offset +
1221 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1222 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1223 	if (error)
1224 		goto failed;
1225 	KKASSERT(layer1->phys_offset &&
1226 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1227 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1228 		hammer_lock_ex(&hmp->blkmap_lock);
1229 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1230 			panic("CRC FAILED: LAYER1");
1231 		hammer_unlock(&hmp->blkmap_lock);
1232 	}
1233 
1234 	/*
1235 	 * Dive layer 2, each entry represents a big-block.
1236 	 */
1237 	layer2_offset = layer1->phys_offset +
1238 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1239 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1240 	if (error)
1241 		goto failed;
1242 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1243 		hammer_lock_ex(&hmp->blkmap_lock);
1244 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1245 			panic("CRC FAILED: LAYER2");
1246 		hammer_unlock(&hmp->blkmap_lock);
1247 	}
1248 
1249 	hammer_lock_ex(&hmp->blkmap_lock);
1250 
1251 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1252 
1253 	/*
1254 	 * Finalize some or all of the space covered by a current
1255 	 * reservation.  An allocation in the same layer may have
1256 	 * already assigned ownership.
1257 	 */
1258 	if (layer2->zone == 0) {
1259 		hammer_modify_buffer(trans, buffer1,
1260 				     layer1, sizeof(*layer1));
1261 		--layer1->blocks_free;
1262 		layer1->layer1_crc = crc32(layer1,
1263 					   HAMMER_LAYER1_CRCSIZE);
1264 		hammer_modify_buffer_done(buffer1);
1265 		layer2->zone = zone;
1266 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1267 		KKASSERT(layer2->append_off == 0);
1268 		hammer_modify_volume_field(trans,
1269 				trans->rootvol,
1270 				vol0_stat_freebigblocks);
1271 		--root_volume->ondisk->vol0_stat_freebigblocks;
1272 		hmp->copy_stat_freebigblocks =
1273 		   root_volume->ondisk->vol0_stat_freebigblocks;
1274 		hammer_modify_volume_done(trans->rootvol);
1275 	}
1276 	if (layer2->zone != zone)
1277 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1278 	KKASSERT(layer2->zone == zone);
1279 	KKASSERT(bytes != 0);
1280 	layer2->bytes_free -= bytes;
1281 
1282 	if (resv) {
1283 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1284 	}
1285 
1286 	/*
1287 	 * Finalizations can occur out of order, or combined with allocations.
1288 	 * append_off must be set to the highest allocated offset.
1289 	 */
1290 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1291 	if (layer2->append_off < offset)
1292 		layer2->append_off = offset;
1293 
1294 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1295 	hammer_modify_buffer_done(buffer2);
1296 	hammer_unlock(&hmp->blkmap_lock);
1297 
1298 failed:
1299 	if (buffer1)
1300 		hammer_rel_buffer(buffer1, 0);
1301 	if (buffer2)
1302 		hammer_rel_buffer(buffer2, 0);
1303 	return(error);
1304 }
1305 
1306 /*
1307  * Return the approximate number of free bytes in the big-block
1308  * containing the specified blockmap offset.
1309  *
1310  * WARNING: A negative number can be returned if data de-dup exists,
1311  *	    and the result will also not represent he actual number
1312  *	    of free bytes in this case.
1313  *
1314  *	    This code is used only by the reblocker.
1315  */
1316 int
1317 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1318 			int *curp, int *errorp)
1319 {
1320 	hammer_volume_t root_volume;
1321 	hammer_blockmap_t blockmap;
1322 	hammer_blockmap_t freemap;
1323 	struct hammer_blockmap_layer1 *layer1;
1324 	struct hammer_blockmap_layer2 *layer2;
1325 	hammer_buffer_t buffer = NULL;
1326 	hammer_off_t layer1_offset;
1327 	hammer_off_t layer2_offset;
1328 	int32_t bytes;
1329 	int zone;
1330 
1331 	zone = HAMMER_ZONE_DECODE(zone_offset);
1332 	KKASSERT(zone >= HAMMER_ZONE2_MAPPED_INDEX && zone < HAMMER_MAX_ZONES);
1333 	root_volume = hammer_get_root_volume(hmp, errorp);
1334 	if (*errorp) {
1335 		*curp = 0;
1336 		return(0);
1337 	}
1338 	blockmap = &hmp->blockmap[zone];
1339 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1340 
1341 	/*
1342 	 * Dive layer 1.
1343 	 */
1344 	layer1_offset = freemap->phys_offset +
1345 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1346 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1347 	if (*errorp) {
1348 		*curp = 0;
1349 		bytes = 0;
1350 		goto failed;
1351 	}
1352 	KKASSERT(layer1->phys_offset);
1353 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1354 		hammer_lock_ex(&hmp->blkmap_lock);
1355 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1356 			panic("CRC FAILED: LAYER1");
1357 		hammer_unlock(&hmp->blkmap_lock);
1358 	}
1359 
1360 	/*
1361 	 * Dive layer 2, each entry represents a big-block.
1362 	 *
1363 	 * (reuse buffer, layer1 pointer becomes invalid)
1364 	 */
1365 	layer2_offset = layer1->phys_offset +
1366 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1367 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1368 	if (*errorp) {
1369 		*curp = 0;
1370 		bytes = 0;
1371 		goto failed;
1372 	}
1373 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1374 		hammer_lock_ex(&hmp->blkmap_lock);
1375 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1376 			panic("CRC FAILED: LAYER2");
1377 		hammer_unlock(&hmp->blkmap_lock);
1378 	}
1379 	KKASSERT(layer2->zone == zone);
1380 
1381 	bytes = layer2->bytes_free;
1382 
1383 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1384 		*curp = 0;
1385 	else
1386 		*curp = 1;
1387 failed:
1388 	if (buffer)
1389 		hammer_rel_buffer(buffer, 0);
1390 	hammer_rel_volume(root_volume, 0);
1391 	if (hammer_debug_general & 0x0800) {
1392 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1393 			(long long)zone_offset, bytes);
1394 	}
1395 	return(bytes);
1396 }
1397 
1398 
1399 /*
1400  * Lookup a blockmap offset and verify blockmap layers.
1401  */
1402 hammer_off_t
1403 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1404 			int *errorp)
1405 {
1406 	hammer_volume_t root_volume;
1407 	hammer_blockmap_t freemap;
1408 	struct hammer_blockmap_layer1 *layer1;
1409 	struct hammer_blockmap_layer2 *layer2;
1410 	hammer_buffer_t buffer = NULL;
1411 	hammer_off_t layer1_offset;
1412 	hammer_off_t layer2_offset;
1413 	hammer_off_t result_offset;
1414 	hammer_off_t base_off;
1415 	hammer_reserve_t resv __debugvar;
1416 	int zone;
1417 
1418 	/*
1419 	 * Calculate the zone-2 offset.
1420 	 */
1421 	zone = HAMMER_ZONE_DECODE(zone_offset);
1422 	result_offset = hammer_xlate_to_zone2(zone_offset);
1423 
1424 	/*
1425 	 * Validate the allocation zone
1426 	 */
1427 	root_volume = hammer_get_root_volume(hmp, errorp);
1428 	if (*errorp)
1429 		return(0);
1430 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1431 	KKASSERT(freemap->phys_offset != 0);
1432 
1433 	/*
1434 	 * Dive layer 1.
1435 	 */
1436 	layer1_offset = freemap->phys_offset +
1437 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1438 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1439 	if (*errorp)
1440 		goto failed;
1441 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1442 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1443 		hammer_lock_ex(&hmp->blkmap_lock);
1444 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1445 			panic("CRC FAILED: LAYER1");
1446 		hammer_unlock(&hmp->blkmap_lock);
1447 	}
1448 
1449 	/*
1450 	 * Dive layer 2, each entry represents a big-block.
1451 	 */
1452 	layer2_offset = layer1->phys_offset +
1453 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1454 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1455 
1456 	if (*errorp)
1457 		goto failed;
1458 	if (layer2->zone == 0) {
1459 		base_off = hammer_xlate_to_zone2(zone_offset &
1460 						~HAMMER_BIGBLOCK_MASK64);
1461 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1462 				 base_off);
1463 		KKASSERT(resv && resv->zone == zone);
1464 
1465 	} else if (layer2->zone != zone) {
1466 		panic("hammer_blockmap_lookup_verify: bad zone %d/%d",
1467 			layer2->zone, zone);
1468 	}
1469 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1470 		hammer_lock_ex(&hmp->blkmap_lock);
1471 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1472 			panic("CRC FAILED: LAYER2");
1473 		hammer_unlock(&hmp->blkmap_lock);
1474 	}
1475 
1476 failed:
1477 	if (buffer)
1478 		hammer_rel_buffer(buffer, 0);
1479 	hammer_rel_volume(root_volume, 0);
1480 	if (hammer_debug_general & 0x0800) {
1481 		kprintf("hammer_blockmap_lookup_verify: %016llx -> %016llx\n",
1482 			(long long)zone_offset, (long long)result_offset);
1483 	}
1484 	return(result_offset);
1485 }
1486 
1487 
1488 /*
1489  * Check space availability
1490  *
1491  * MPSAFE - does not require fs_token
1492  */
1493 int
1494 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1495 {
1496 	const int in_size = sizeof(struct hammer_inode_data) +
1497 			    sizeof(union hammer_btree_elm);
1498 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1499 	int64_t usedbytes;
1500 
1501 	usedbytes = hmp->rsv_inodes * in_size +
1502 		    hmp->rsv_recs * rec_size +
1503 		    hmp->rsv_databytes +
1504 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1505 		    ((int64_t)hammer_limit_dirtybufspace) +
1506 		    (slop << HAMMER_BIGBLOCK_BITS);
1507 
1508 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1509 	if (resp)
1510 		*resp = usedbytes;
1511 
1512 	if (hmp->copy_stat_freebigblocks >=
1513 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1514 		return(0);
1515 	}
1516 	return (ENOSPC);
1517 }
1518 
1519