xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision cfd1aba3)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include "hammer.h"
39 
40 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
41 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
42 				    hammer_off_t base_offset, int zone,
43 				    struct hammer_blockmap_layer2 *layer2);
44 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
45 static int update_bytes_free(hammer_reserve_t resv, int bytes);
46 
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
52 
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56 	if (res1->zone_offset < res2->zone_offset)
57 		return(-1);
58 	if (res1->zone_offset > res2->zone_offset)
59 		return(1);
60 	return(0);
61 }
62 
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
68 		      hammer_off_t hint, int *errorp)
69 {
70 	hammer_mount_t hmp;
71 	hammer_volume_t root_volume;
72 	hammer_blockmap_t blockmap;
73 	hammer_blockmap_t freemap;
74 	hammer_reserve_t resv;
75 	struct hammer_blockmap_layer1 *layer1;
76 	struct hammer_blockmap_layer2 *layer2;
77 	hammer_buffer_t buffer1 = NULL;
78 	hammer_buffer_t buffer2 = NULL;
79 	hammer_buffer_t buffer3 = NULL;
80 	hammer_off_t tmp_offset;
81 	hammer_off_t next_offset;
82 	hammer_off_t result_offset;
83 	hammer_off_t layer1_offset;
84 	hammer_off_t layer2_offset;
85 	hammer_off_t base_off;
86 	int loops = 0;
87 	int offset;		/* offset within big-block */
88 	int use_hint;
89 
90 	hmp = trans->hmp;
91 
92 	/*
93 	 * Deal with alignment and buffer-boundary issues.
94 	 *
95 	 * Be careful, certain primary alignments are used below to allocate
96 	 * new blockmap blocks.
97 	 */
98 	bytes = (bytes + 15) & ~15;
99 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
101 
102 	/*
103 	 * Setup
104 	 */
105 	root_volume = trans->rootvol;
106 	*errorp = 0;
107 	blockmap = &hmp->blockmap[zone];
108 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110 
111 	/*
112 	 * Use the hint if we have one.
113 	 */
114 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
115 		next_offset = (hint + 15) & ~(hammer_off_t)15;
116 		use_hint = 1;
117 	} else {
118 		next_offset = blockmap->next_offset;
119 		use_hint = 0;
120 	}
121 again:
122 
123 	/*
124 	 * use_hint is turned off if we leave the hinted big-block.
125 	 */
126 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
127 		next_offset = blockmap->next_offset;
128 		use_hint = 0;
129 	}
130 
131 	/*
132 	 * Check for wrap
133 	 */
134 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
135 		if (++loops == 2) {
136 			result_offset = 0;
137 			*errorp = ENOSPC;
138 			goto failed;
139 		}
140 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
141 	}
142 
143 	/*
144 	 * The allocation request may not cross a buffer boundary.  Special
145 	 * large allocations must not cross a large-block boundary.
146 	 */
147 	tmp_offset = next_offset + bytes - 1;
148 	if (bytes <= HAMMER_BUFSIZE) {
149 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
150 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
151 			goto again;
152 		}
153 	} else {
154 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
155 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
156 			goto again;
157 		}
158 	}
159 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
160 
161 	/*
162 	 * Dive layer 1.
163 	 */
164 	layer1_offset = freemap->phys_offset +
165 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
166 
167 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
168 	if (*errorp) {
169 		result_offset = 0;
170 		goto failed;
171 	}
172 
173 	/*
174 	 * Check CRC.
175 	 */
176 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
177 		hammer_lock_ex(&hmp->blkmap_lock);
178 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
179 			panic("CRC FAILED: LAYER1");
180 		hammer_unlock(&hmp->blkmap_lock);
181 	}
182 
183 	/*
184 	 * If we are at a big-block boundary and layer1 indicates no
185 	 * free big-blocks, then we cannot allocate a new bigblock in
186 	 * layer2, skip to the next layer1 entry.
187 	 */
188 	if (offset == 0 && layer1->blocks_free == 0) {
189 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
190 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
191 		goto again;
192 	}
193 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
194 
195 	/*
196 	 * Skip this layer1 entry if it is pointing to a layer2 big-block
197 	 * on a volume that we are currently trying to remove from the
198 	 * file-system. This is used by the volume-del code together with
199 	 * the reblocker to free up a volume.
200 	 */
201 	if ((int)HAMMER_VOL_DECODE(layer1->phys_offset) ==
202 	    hmp->volume_to_remove) {
203 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
204 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
205 		goto again;
206 	}
207 
208 	/*
209 	 * Dive layer 2, each entry represents a large-block.
210 	 */
211 	layer2_offset = layer1->phys_offset +
212 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
213 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
214 	if (*errorp) {
215 		result_offset = 0;
216 		goto failed;
217 	}
218 
219 	/*
220 	 * Check CRC.  This can race another thread holding the lock
221 	 * and in the middle of modifying layer2.
222 	 */
223 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
224 		hammer_lock_ex(&hmp->blkmap_lock);
225 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
226 			panic("CRC FAILED: LAYER2");
227 		hammer_unlock(&hmp->blkmap_lock);
228 	}
229 
230 	/*
231 	 * Skip the layer if the zone is owned by someone other then us.
232 	 */
233 	if (layer2->zone && layer2->zone != zone) {
234 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
235 		goto again;
236 	}
237 	if (offset < layer2->append_off) {
238 		next_offset += layer2->append_off - offset;
239 		goto again;
240 	}
241 
242 #if 0
243 	/*
244 	 * If operating in the current non-hint blockmap block, do not
245 	 * allow it to get over-full.  Also drop any active hinting so
246 	 * blockmap->next_offset is updated at the end.
247 	 *
248 	 * We do this for B-Tree and meta-data allocations to provide
249 	 * localization for updates.
250 	 */
251 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
252 	     zone == HAMMER_ZONE_META_INDEX) &&
253 	    offset >= HAMMER_LARGEBLOCK_OVERFILL &&
254 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_LARGEBLOCK_MASK64)
255 	) {
256 		if (offset >= HAMMER_LARGEBLOCK_OVERFILL) {
257 			next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
258 			use_hint = 0;
259 			goto again;
260 		}
261 	}
262 #endif
263 
264 	/*
265 	 * We need the lock from this point on.  We have to re-check zone
266 	 * ownership after acquiring the lock and also check for reservations.
267 	 */
268 	hammer_lock_ex(&hmp->blkmap_lock);
269 
270 	if (layer2->zone && layer2->zone != zone) {
271 		hammer_unlock(&hmp->blkmap_lock);
272 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
273 		goto again;
274 	}
275 	if (offset < layer2->append_off) {
276 		hammer_unlock(&hmp->blkmap_lock);
277 		next_offset += layer2->append_off - offset;
278 		goto again;
279 	}
280 
281 	/*
282 	 * The bigblock might be reserved by another zone.  If it is reserved
283 	 * by our zone we may have to move next_offset past the append_off.
284 	 */
285 	base_off = (next_offset &
286 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
287 		    HAMMER_ZONE_RAW_BUFFER;
288 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
289 	if (resv) {
290 		if (resv->zone != zone) {
291 			hammer_unlock(&hmp->blkmap_lock);
292 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
293 				      ~HAMMER_LARGEBLOCK_MASK64;
294 			goto again;
295 		}
296 		if (offset < resv->append_off) {
297 			hammer_unlock(&hmp->blkmap_lock);
298 			next_offset += resv->append_off - offset;
299 			goto again;
300 		}
301 		++resv->refs;
302 	}
303 
304 	/*
305 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
306 	 * of the layer for real.  At this point we've validated any
307 	 * reservation that might exist and can just ignore resv.
308 	 */
309 	if (layer2->zone == 0) {
310 		/*
311 		 * Assign the bigblock to our zone
312 		 */
313 		hammer_modify_buffer(trans, buffer1,
314 				     layer1, sizeof(*layer1));
315 		--layer1->blocks_free;
316 		layer1->layer1_crc = crc32(layer1,
317 					   HAMMER_LAYER1_CRCSIZE);
318 		hammer_modify_buffer_done(buffer1);
319 		hammer_modify_buffer(trans, buffer2,
320 				     layer2, sizeof(*layer2));
321 		layer2->zone = zone;
322 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
323 		KKASSERT(layer2->append_off == 0);
324 		hammer_modify_volume_field(trans, trans->rootvol,
325 					   vol0_stat_freebigblocks);
326 		--root_volume->ondisk->vol0_stat_freebigblocks;
327 		hmp->copy_stat_freebigblocks =
328 			root_volume->ondisk->vol0_stat_freebigblocks;
329 		hammer_modify_volume_done(trans->rootvol);
330 	} else {
331 		hammer_modify_buffer(trans, buffer2,
332 				     layer2, sizeof(*layer2));
333 	}
334 	KKASSERT(layer2->zone == zone);
335 
336 	/*
337 	 * NOTE: bytes_free can legally go negative due to de-dup.
338 	 */
339 	layer2->bytes_free -= bytes;
340 	KKASSERT(layer2->append_off <= offset);
341 	layer2->append_off = offset + bytes;
342 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
343 	hammer_modify_buffer_done(buffer2);
344 
345 	/*
346 	 * We hold the blockmap lock and should be the only ones
347 	 * capable of modifying resv->append_off.  Track the allocation
348 	 * as appropriate.
349 	 */
350 	KKASSERT(bytes != 0);
351 	if (resv) {
352 		KKASSERT(resv->append_off <= offset);
353 		resv->append_off = offset + bytes;
354 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
355 		hammer_blockmap_reserve_complete(hmp, resv);
356 	}
357 
358 	/*
359 	 * If we are allocating from the base of a new buffer we can avoid
360 	 * a disk read by calling hammer_bnew().
361 	 */
362 	if ((next_offset & HAMMER_BUFMASK) == 0) {
363 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
364 				errorp, &buffer3);
365 	}
366 	result_offset = next_offset;
367 
368 	/*
369 	 * If we weren't supplied with a hint or could not use the hint
370 	 * then we wound up using blockmap->next_offset as the hint and
371 	 * need to save it.
372 	 */
373 	if (use_hint == 0) {
374 		hammer_modify_volume(NULL, root_volume, NULL, 0);
375 		blockmap->next_offset = next_offset + bytes;
376 		hammer_modify_volume_done(root_volume);
377 	}
378 	hammer_unlock(&hmp->blkmap_lock);
379 failed:
380 
381 	/*
382 	 * Cleanup
383 	 */
384 	if (buffer1)
385 		hammer_rel_buffer(buffer1, 0);
386 	if (buffer2)
387 		hammer_rel_buffer(buffer2, 0);
388 	if (buffer3)
389 		hammer_rel_buffer(buffer3, 0);
390 
391 	return(result_offset);
392 }
393 
394 /*
395  * Frontend function - Reserve bytes in a zone.
396  *
397  * This code reserves bytes out of a blockmap without committing to any
398  * meta-data modifications, allowing the front-end to directly issue disk
399  * write I/O for large blocks of data
400  *
401  * The backend later finalizes the reservation with hammer_blockmap_finalize()
402  * upon committing the related record.
403  */
404 hammer_reserve_t
405 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
406 			hammer_off_t *zone_offp, int *errorp)
407 {
408 	hammer_volume_t root_volume;
409 	hammer_blockmap_t blockmap;
410 	hammer_blockmap_t freemap;
411 	struct hammer_blockmap_layer1 *layer1;
412 	struct hammer_blockmap_layer2 *layer2;
413 	hammer_buffer_t buffer1 = NULL;
414 	hammer_buffer_t buffer2 = NULL;
415 	hammer_buffer_t buffer3 = NULL;
416 	hammer_off_t tmp_offset;
417 	hammer_off_t next_offset;
418 	hammer_off_t layer1_offset;
419 	hammer_off_t layer2_offset;
420 	hammer_off_t base_off;
421 	hammer_reserve_t resv;
422 	hammer_reserve_t resx;
423 	int loops = 0;
424 	int offset;
425 
426 	/*
427 	 * Setup
428 	 */
429 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
430 	root_volume = hammer_get_root_volume(hmp, errorp);
431 	if (*errorp)
432 		return(NULL);
433 	blockmap = &hmp->blockmap[zone];
434 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
435 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
436 
437 	/*
438 	 * Deal with alignment and buffer-boundary issues.
439 	 *
440 	 * Be careful, certain primary alignments are used below to allocate
441 	 * new blockmap blocks.
442 	 */
443 	bytes = (bytes + 15) & ~15;
444 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
445 
446 	next_offset = blockmap->next_offset;
447 again:
448 	resv = NULL;
449 	/*
450 	 * Check for wrap
451 	 */
452 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
453 		if (++loops == 2) {
454 			*errorp = ENOSPC;
455 			goto failed;
456 		}
457 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
458 	}
459 
460 	/*
461 	 * The allocation request may not cross a buffer boundary.  Special
462 	 * large allocations must not cross a large-block boundary.
463 	 */
464 	tmp_offset = next_offset + bytes - 1;
465 	if (bytes <= HAMMER_BUFSIZE) {
466 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
467 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
468 			goto again;
469 		}
470 	} else {
471 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
472 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
473 			goto again;
474 		}
475 	}
476 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
477 
478 	/*
479 	 * Dive layer 1.
480 	 */
481 	layer1_offset = freemap->phys_offset +
482 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
483 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
484 	if (*errorp)
485 		goto failed;
486 
487 	/*
488 	 * Check CRC.
489 	 */
490 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
491 		hammer_lock_ex(&hmp->blkmap_lock);
492 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
493 			panic("CRC FAILED: LAYER1");
494 		hammer_unlock(&hmp->blkmap_lock);
495 	}
496 
497 	/*
498 	 * If we are at a big-block boundary and layer1 indicates no
499 	 * free big-blocks, then we cannot allocate a new bigblock in
500 	 * layer2, skip to the next layer1 entry.
501 	 */
502 	if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
503 	    layer1->blocks_free == 0) {
504 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
505 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
506 		goto again;
507 	}
508 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
509 
510 	/*
511 	 * Dive layer 2, each entry represents a large-block.
512 	 */
513 	layer2_offset = layer1->phys_offset +
514 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
515 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
516 	if (*errorp)
517 		goto failed;
518 
519 	/*
520 	 * Check CRC if not allocating into uninitialized space (which we
521 	 * aren't when reserving space).
522 	 */
523 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
524 		hammer_lock_ex(&hmp->blkmap_lock);
525 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
526 			panic("CRC FAILED: LAYER2");
527 		hammer_unlock(&hmp->blkmap_lock);
528 	}
529 
530 	/*
531 	 * Skip the layer if the zone is owned by someone other then us.
532 	 */
533 	if (layer2->zone && layer2->zone != zone) {
534 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
535 		goto again;
536 	}
537 	if (offset < layer2->append_off) {
538 		next_offset += layer2->append_off - offset;
539 		goto again;
540 	}
541 
542 	/*
543 	 * We need the lock from this point on.  We have to re-check zone
544 	 * ownership after acquiring the lock and also check for reservations.
545 	 */
546 	hammer_lock_ex(&hmp->blkmap_lock);
547 
548 	if (layer2->zone && layer2->zone != zone) {
549 		hammer_unlock(&hmp->blkmap_lock);
550 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
551 		goto again;
552 	}
553 	if (offset < layer2->append_off) {
554 		hammer_unlock(&hmp->blkmap_lock);
555 		next_offset += layer2->append_off - offset;
556 		goto again;
557 	}
558 
559 	/*
560 	 * The bigblock might be reserved by another zone.  If it is reserved
561 	 * by our zone we may have to move next_offset past the append_off.
562 	 */
563 	base_off = (next_offset &
564 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
565 		    HAMMER_ZONE_RAW_BUFFER;
566 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
567 	if (resv) {
568 		if (resv->zone != zone) {
569 			hammer_unlock(&hmp->blkmap_lock);
570 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
571 				      ~HAMMER_LARGEBLOCK_MASK64;
572 			goto again;
573 		}
574 		if (offset < resv->append_off) {
575 			hammer_unlock(&hmp->blkmap_lock);
576 			next_offset += resv->append_off - offset;
577 			goto again;
578 		}
579 		++resv->refs;
580 		resx = NULL;
581 	} else {
582 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
583 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
584 		resx->refs = 1;
585 		resx->zone = zone;
586 		resx->zone_offset = base_off;
587 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
588 			resx->flags |= HAMMER_RESF_LAYER2FREE;
589 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
590 		KKASSERT(resv == NULL);
591 		resv = resx;
592 		++hammer_count_reservations;
593 	}
594 	resv->append_off = offset + bytes;
595 
596 	/*
597 	 * If we are not reserving a whole buffer but are at the start of
598 	 * a new block, call hammer_bnew() to avoid a disk read.
599 	 *
600 	 * If we are reserving a whole buffer (or more), the caller will
601 	 * probably use a direct read, so do nothing.
602 	 *
603 	 * If we do not have a whole lot of system memory we really can't
604 	 * afford to block while holding the blkmap_lock!
605 	 */
606 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
607 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE))
608 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
609 	}
610 
611 	/*
612 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
613 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
614 	 * be big-block aligned.
615 	 */
616 	blockmap->next_offset = next_offset + bytes;
617 	hammer_unlock(&hmp->blkmap_lock);
618 
619 failed:
620 	if (buffer1)
621 		hammer_rel_buffer(buffer1, 0);
622 	if (buffer2)
623 		hammer_rel_buffer(buffer2, 0);
624 	if (buffer3)
625 		hammer_rel_buffer(buffer3, 0);
626 	hammer_rel_volume(root_volume, 0);
627 	*zone_offp = next_offset;
628 
629 	return(resv);
630 }
631 
632 /*
633  * Frontend function - Dedup bytes in a zone.
634  *
635  * Dedup reservations work exactly the same as normal write reservations
636  * except we only adjust bytes_free field and don't touch append offset.
637  * Finalization mechanic for dedup reservations is also the same as for
638  * normal write ones - the backend finalizes the reservation with
639  * hammer_blockmap_finalize().
640  */
641 hammer_reserve_t
642 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
643 			      hammer_off_t zone_offset, int *errorp)
644 {
645 	hammer_volume_t root_volume;
646 	hammer_blockmap_t freemap;
647 	struct hammer_blockmap_layer1 *layer1;
648 	struct hammer_blockmap_layer2 *layer2;
649 	hammer_buffer_t buffer1 = NULL;
650 	hammer_buffer_t buffer2 = NULL;
651 	hammer_off_t layer1_offset;
652 	hammer_off_t layer2_offset;
653 	hammer_off_t base_off;
654 	hammer_reserve_t resv = NULL;
655 	hammer_reserve_t resx = NULL;
656 
657 	/*
658 	 * Setup
659 	 */
660 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
661 	root_volume = hammer_get_root_volume(hmp, errorp);
662 	if (*errorp)
663 		return (NULL);
664 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
665 	KKASSERT(freemap->phys_offset != 0);
666 
667 	bytes = (bytes + 15) & ~15;
668 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
669 
670 	/*
671 	 * Dive layer 1.
672 	 */
673 	layer1_offset = freemap->phys_offset +
674 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
675 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
676 	if (*errorp)
677 		goto failed;
678 
679 	/*
680 	 * Check CRC.
681 	 */
682 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
683 		hammer_lock_ex(&hmp->blkmap_lock);
684 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
685 			panic("CRC FAILED: LAYER1");
686 		hammer_unlock(&hmp->blkmap_lock);
687 	}
688 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
689 
690 	/*
691 	 * Dive layer 2, each entry represents a large-block.
692 	 */
693 	layer2_offset = layer1->phys_offset +
694 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
695 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
696 	if (*errorp)
697 		goto failed;
698 
699 	/*
700 	 * Check CRC.
701 	 */
702 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
703 		hammer_lock_ex(&hmp->blkmap_lock);
704 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
705 			panic("CRC FAILED: LAYER2");
706 		hammer_unlock(&hmp->blkmap_lock);
707 	}
708 
709 	/*
710 	 * Fail if the zone is owned by someone other than us.
711 	 */
712 	if (layer2->zone && layer2->zone != zone)
713 		goto failed;
714 
715 	/*
716 	 * We need the lock from this point on.  We have to re-check zone
717 	 * ownership after acquiring the lock and also check for reservations.
718 	 */
719 	hammer_lock_ex(&hmp->blkmap_lock);
720 
721 	if (layer2->zone && layer2->zone != zone) {
722 		hammer_unlock(&hmp->blkmap_lock);
723 		goto failed;
724 	}
725 
726 	base_off = (zone_offset &
727 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
728 		    HAMMER_ZONE_RAW_BUFFER;
729 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
730 	if (resv) {
731 		if (resv->zone != zone) {
732 			hammer_unlock(&hmp->blkmap_lock);
733 			resv = NULL;
734 			goto failed;
735 		}
736 		/*
737 		 * Due to possible big block underflow we can't simply
738 		 * subtract bytes from bytes_free.
739 		 */
740 		if (update_bytes_free(resv, bytes) == 0) {
741 			hammer_unlock(&hmp->blkmap_lock);
742 			resv = NULL;
743 			goto failed;
744 		}
745 		++resv->refs;
746 		resx = NULL;
747 	} else {
748 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
749 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
750 		resx->refs = 1;
751 		resx->zone = zone;
752 		resx->bytes_free = layer2->bytes_free;
753 		/*
754 		 * Due to possible big block underflow we can't simply
755 		 * subtract bytes from bytes_free.
756 		 */
757 		if (update_bytes_free(resx, bytes) == 0) {
758 			hammer_unlock(&hmp->blkmap_lock);
759 			kfree(resx, hmp->m_misc);
760 			goto failed;
761 		}
762 		resx->zone_offset = base_off;
763 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
764 		KKASSERT(resv == NULL);
765 		resv = resx;
766 		++hammer_count_reservations;
767 	}
768 
769 	hammer_unlock(&hmp->blkmap_lock);
770 
771 failed:
772 	if (buffer1)
773 		hammer_rel_buffer(buffer1, 0);
774 	if (buffer2)
775 		hammer_rel_buffer(buffer2, 0);
776 	hammer_rel_volume(root_volume, 0);
777 
778 	return(resv);
779 }
780 
781 static int
782 update_bytes_free(hammer_reserve_t resv, int bytes)
783 {
784 	int32_t temp;
785 
786 	/*
787 	 * Big-block underflow check
788 	 */
789 	temp = resv->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
790 	cpu_ccfence(); /* XXX do we really need it ? */
791 	if (temp > resv->bytes_free) {
792 		kprintf("BIGBLOCK UNDERFLOW\n");
793 		return (0);
794 	}
795 
796 	resv->bytes_free -= bytes;
797 	return (1);
798 }
799 
800 /*
801  * Dereference a reservation structure.  Upon the final release the
802  * underlying big-block is checked and if it is entirely free we delete
803  * any related HAMMER buffers to avoid potential conflicts with future
804  * reuse of the big-block.
805  */
806 void
807 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
808 {
809 	hammer_off_t base_offset;
810 	int error;
811 
812 	KKASSERT(resv->refs > 0);
813 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
814 		 HAMMER_ZONE_RAW_BUFFER);
815 
816 	/*
817 	 * Setting append_off to the max prevents any new allocations
818 	 * from occuring while we are trying to dispose of the reservation,
819 	 * allowing us to safely delete any related HAMMER buffers.
820 	 *
821 	 * If we are unable to clean out all related HAMMER buffers we
822 	 * requeue the delay.
823 	 */
824 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
825 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
826 		base_offset = resv->zone_offset & ~HAMMER_OFF_ZONE_MASK;
827 		base_offset = HAMMER_ZONE_ENCODE(resv->zone, base_offset);
828 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
829 			hammer_dedup_cache_inval(hmp, base_offset);
830 		error = hammer_del_buffers(hmp, base_offset,
831 					   resv->zone_offset,
832 					   HAMMER_LARGEBLOCK_SIZE,
833 					   1);
834 		if (hammer_debug_general & 0x20000) {
835 			kprintf("hammer: dellgblk %016jx error %d\n",
836 				(intmax_t)base_offset, error);
837 		}
838 		if (error)
839 			hammer_reserve_setdelay(hmp, resv);
840 	}
841 	if (--resv->refs == 0) {
842 		if (hammer_debug_general & 0x20000) {
843 			kprintf("hammer: delresvr %016jx zone %02x\n",
844 				(intmax_t)resv->zone_offset, resv->zone);
845 		}
846 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
847 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
848 		kfree(resv, hmp->m_misc);
849 		--hammer_count_reservations;
850 	}
851 }
852 
853 /*
854  * Prevent a potentially free big-block from being reused until after
855  * the related flushes have completely cycled, otherwise crash recovery
856  * could resurrect a data block that was already reused and overwritten.
857  *
858  * The caller might reset the underlying layer2 entry's append_off to 0, so
859  * our covering append_off must be set to max to prevent any reallocation
860  * until after the flush delays complete, not to mention proper invalidation
861  * of any underlying cached blocks.
862  */
863 static void
864 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
865 			int zone, struct hammer_blockmap_layer2 *layer2)
866 {
867 	hammer_reserve_t resv;
868 
869 	/*
870 	 * Allocate the reservation if necessary.
871 	 *
872 	 * NOTE: need lock in future around resv lookup/allocation and
873 	 * the setdelay call, currently refs is not bumped until the call.
874 	 */
875 again:
876 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
877 	if (resv == NULL) {
878 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
879 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
880 		resv->zone = zone;
881 		resv->zone_offset = base_offset;
882 		resv->refs = 0;
883 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
884 
885 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
886 			resv->flags |= HAMMER_RESF_LAYER2FREE;
887 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
888 			kfree(resv, hmp->m_misc);
889 			goto again;
890 		}
891 		++hammer_count_reservations;
892 	} else {
893 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
894 			resv->flags |= HAMMER_RESF_LAYER2FREE;
895 	}
896 	hammer_reserve_setdelay(hmp, resv);
897 }
898 
899 /*
900  * Enter the reservation on the on-delay list, or move it if it
901  * is already on the list.
902  */
903 static void
904 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
905 {
906 	if (resv->flags & HAMMER_RESF_ONDELAY) {
907 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
908 		resv->flush_group = hmp->flusher.next + 1;
909 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
910 	} else {
911 		++resv->refs;
912 		++hmp->rsv_fromdelay;
913 		resv->flags |= HAMMER_RESF_ONDELAY;
914 		resv->flush_group = hmp->flusher.next + 1;
915 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
916 	}
917 }
918 
919 /*
920  * Reserve has reached its flush point, remove it from the delay list
921  * and finish it off.  hammer_blockmap_reserve_complete() inherits
922  * the ondelay reference.
923  */
924 void
925 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
926 {
927 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
928 	resv->flags &= ~HAMMER_RESF_ONDELAY;
929 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
930 	--hmp->rsv_fromdelay;
931 	hammer_blockmap_reserve_complete(hmp, resv);
932 }
933 
934 /*
935  * Backend function - free (offset, bytes) in a zone.
936  *
937  * XXX error return
938  */
939 void
940 hammer_blockmap_free(hammer_transaction_t trans,
941 		     hammer_off_t zone_offset, int bytes)
942 {
943 	hammer_mount_t hmp;
944 	hammer_volume_t root_volume;
945 	hammer_blockmap_t freemap;
946 	struct hammer_blockmap_layer1 *layer1;
947 	struct hammer_blockmap_layer2 *layer2;
948 	hammer_buffer_t buffer1 = NULL;
949 	hammer_buffer_t buffer2 = NULL;
950 	hammer_off_t layer1_offset;
951 	hammer_off_t layer2_offset;
952 	hammer_off_t base_off;
953 	int error;
954 	int zone;
955 
956 	if (bytes == 0)
957 		return;
958 	hmp = trans->hmp;
959 
960 	/*
961 	 * Alignment
962 	 */
963 	bytes = (bytes + 15) & ~15;
964 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
965 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
966 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
967 
968 	/*
969 	 * Basic zone validation & locking
970 	 */
971 	zone = HAMMER_ZONE_DECODE(zone_offset);
972 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
973 	root_volume = trans->rootvol;
974 	error = 0;
975 
976 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
977 
978 	/*
979 	 * Dive layer 1.
980 	 */
981 	layer1_offset = freemap->phys_offset +
982 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
983 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
984 	if (error)
985 		goto failed;
986 	KKASSERT(layer1->phys_offset &&
987 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
988 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
989 		hammer_lock_ex(&hmp->blkmap_lock);
990 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
991 			panic("CRC FAILED: LAYER1");
992 		hammer_unlock(&hmp->blkmap_lock);
993 	}
994 
995 	/*
996 	 * Dive layer 2, each entry represents a large-block.
997 	 */
998 	layer2_offset = layer1->phys_offset +
999 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1000 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1001 	if (error)
1002 		goto failed;
1003 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1004 		hammer_lock_ex(&hmp->blkmap_lock);
1005 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1006 			panic("CRC FAILED: LAYER2");
1007 		hammer_unlock(&hmp->blkmap_lock);
1008 	}
1009 
1010 	hammer_lock_ex(&hmp->blkmap_lock);
1011 
1012 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1013 
1014 	/*
1015 	 * Free space previously allocated via blockmap_alloc().
1016 	 *
1017 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1018 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1019 	 */
1020 	KKASSERT(layer2->zone == zone);
1021 	layer2->bytes_free += bytes;
1022 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1023 
1024 	/*
1025 	 * If a big-block becomes entirely free we must create a covering
1026 	 * reservation to prevent premature reuse.  Note, however, that
1027 	 * the big-block and/or reservation may still have an append_off
1028 	 * that allows further (non-reused) allocations.
1029 	 *
1030 	 * Once the reservation has been made we re-check layer2 and if
1031 	 * the big-block is still entirely free we reset the layer2 entry.
1032 	 * The reservation will prevent premature reuse.
1033 	 *
1034 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1035 	 * is completed, if the layer2 entry is still completely free at
1036 	 * that time.  Any allocations from the reservation that may have
1037 	 * occured in the mean time, or active references on the reservation
1038 	 * from new pending allocations, will prevent the invalidation from
1039 	 * occuring.
1040 	 */
1041 	if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1042 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1043 
1044 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1045 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
1046 			layer2->zone = 0;
1047 			layer2->append_off = 0;
1048 			hammer_modify_buffer(trans, buffer1,
1049 					     layer1, sizeof(*layer1));
1050 			++layer1->blocks_free;
1051 			layer1->layer1_crc = crc32(layer1,
1052 						   HAMMER_LAYER1_CRCSIZE);
1053 			hammer_modify_buffer_done(buffer1);
1054 			hammer_modify_volume_field(trans,
1055 					trans->rootvol,
1056 					vol0_stat_freebigblocks);
1057 			++root_volume->ondisk->vol0_stat_freebigblocks;
1058 			hmp->copy_stat_freebigblocks =
1059 			   root_volume->ondisk->vol0_stat_freebigblocks;
1060 			hammer_modify_volume_done(trans->rootvol);
1061 		}
1062 	}
1063 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1064 	hammer_modify_buffer_done(buffer2);
1065 	hammer_unlock(&hmp->blkmap_lock);
1066 
1067 failed:
1068 	if (buffer1)
1069 		hammer_rel_buffer(buffer1, 0);
1070 	if (buffer2)
1071 		hammer_rel_buffer(buffer2, 0);
1072 }
1073 
1074 int
1075 hammer_blockmap_dedup(hammer_transaction_t trans,
1076 		     hammer_off_t zone_offset, int bytes)
1077 {
1078 	hammer_mount_t hmp;
1079 	hammer_blockmap_t freemap;
1080 	struct hammer_blockmap_layer1 *layer1;
1081 	struct hammer_blockmap_layer2 *layer2;
1082 	hammer_buffer_t buffer1 = NULL;
1083 	hammer_buffer_t buffer2 = NULL;
1084 	hammer_off_t layer1_offset;
1085 	hammer_off_t layer2_offset;
1086 	int32_t temp;
1087 	int error;
1088 	int zone __debugvar;
1089 
1090 	if (bytes == 0)
1091 		return (0);
1092 	hmp = trans->hmp;
1093 
1094 	/*
1095 	 * Alignment
1096 	 */
1097 	bytes = (bytes + 15) & ~15;
1098 	KKASSERT(bytes <= HAMMER_LARGEBLOCK_SIZE);
1099 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1100 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
1101 
1102 	/*
1103 	 * Basic zone validation & locking
1104 	 */
1105 	zone = HAMMER_ZONE_DECODE(zone_offset);
1106 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1107 	error = 0;
1108 
1109 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1110 
1111 	/*
1112 	 * Dive layer 1.
1113 	 */
1114 	layer1_offset = freemap->phys_offset +
1115 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1116 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1117 	if (error)
1118 		goto failed;
1119 	KKASSERT(layer1->phys_offset &&
1120 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1121 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1122 		hammer_lock_ex(&hmp->blkmap_lock);
1123 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1124 			panic("CRC FAILED: LAYER1");
1125 		hammer_unlock(&hmp->blkmap_lock);
1126 	}
1127 
1128 	/*
1129 	 * Dive layer 2, each entry represents a large-block.
1130 	 */
1131 	layer2_offset = layer1->phys_offset +
1132 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1133 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1134 	if (error)
1135 		goto failed;
1136 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1137 		hammer_lock_ex(&hmp->blkmap_lock);
1138 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1139 			panic("CRC FAILED: LAYER2");
1140 		hammer_unlock(&hmp->blkmap_lock);
1141 	}
1142 
1143 	hammer_lock_ex(&hmp->blkmap_lock);
1144 
1145 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1146 
1147 	/*
1148 	 * Free space previously allocated via blockmap_alloc().
1149 	 *
1150 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1151 	 *	 but can never become larger than HAMMER_LARGEBLOCK_SIZE.
1152 	 */
1153 	KKASSERT(layer2->zone == zone);
1154 	temp = layer2->bytes_free - HAMMER_LARGEBLOCK_SIZE * 2;
1155 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1156 	if (temp > layer2->bytes_free) {
1157 		error = ERANGE;
1158 		goto underflow;
1159 	}
1160 	layer2->bytes_free -= bytes;
1161 
1162 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
1163 
1164 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1165 underflow:
1166 	hammer_modify_buffer_done(buffer2);
1167 	hammer_unlock(&hmp->blkmap_lock);
1168 
1169 failed:
1170 	if (buffer1)
1171 		hammer_rel_buffer(buffer1, 0);
1172 	if (buffer2)
1173 		hammer_rel_buffer(buffer2, 0);
1174 	return (error);
1175 }
1176 
1177 /*
1178  * Backend function - finalize (offset, bytes) in a zone.
1179  *
1180  * Allocate space that was previously reserved by the frontend.
1181  */
1182 int
1183 hammer_blockmap_finalize(hammer_transaction_t trans,
1184 			 hammer_reserve_t resv,
1185 			 hammer_off_t zone_offset, int bytes)
1186 {
1187 	hammer_mount_t hmp;
1188 	hammer_volume_t root_volume;
1189 	hammer_blockmap_t freemap;
1190 	struct hammer_blockmap_layer1 *layer1;
1191 	struct hammer_blockmap_layer2 *layer2;
1192 	hammer_buffer_t buffer1 = NULL;
1193 	hammer_buffer_t buffer2 = NULL;
1194 	hammer_off_t layer1_offset;
1195 	hammer_off_t layer2_offset;
1196 	int error;
1197 	int zone;
1198 	int offset;
1199 
1200 	if (bytes == 0)
1201 		return(0);
1202 	hmp = trans->hmp;
1203 
1204 	/*
1205 	 * Alignment
1206 	 */
1207 	bytes = (bytes + 15) & ~15;
1208 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1209 
1210 	/*
1211 	 * Basic zone validation & locking
1212 	 */
1213 	zone = HAMMER_ZONE_DECODE(zone_offset);
1214 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1215 	root_volume = trans->rootvol;
1216 	error = 0;
1217 
1218 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1219 
1220 	/*
1221 	 * Dive layer 1.
1222 	 */
1223 	layer1_offset = freemap->phys_offset +
1224 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1225 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1226 	if (error)
1227 		goto failed;
1228 	KKASSERT(layer1->phys_offset &&
1229 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1230 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1231 		hammer_lock_ex(&hmp->blkmap_lock);
1232 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1233 			panic("CRC FAILED: LAYER1");
1234 		hammer_unlock(&hmp->blkmap_lock);
1235 	}
1236 
1237 	/*
1238 	 * Dive layer 2, each entry represents a large-block.
1239 	 */
1240 	layer2_offset = layer1->phys_offset +
1241 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1242 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1243 	if (error)
1244 		goto failed;
1245 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1246 		hammer_lock_ex(&hmp->blkmap_lock);
1247 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1248 			panic("CRC FAILED: LAYER2");
1249 		hammer_unlock(&hmp->blkmap_lock);
1250 	}
1251 
1252 	hammer_lock_ex(&hmp->blkmap_lock);
1253 
1254 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1255 
1256 	/*
1257 	 * Finalize some or all of the space covered by a current
1258 	 * reservation.  An allocation in the same layer may have
1259 	 * already assigned ownership.
1260 	 */
1261 	if (layer2->zone == 0) {
1262 		hammer_modify_buffer(trans, buffer1,
1263 				     layer1, sizeof(*layer1));
1264 		--layer1->blocks_free;
1265 		layer1->layer1_crc = crc32(layer1,
1266 					   HAMMER_LAYER1_CRCSIZE);
1267 		hammer_modify_buffer_done(buffer1);
1268 		layer2->zone = zone;
1269 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
1270 		KKASSERT(layer2->append_off == 0);
1271 		hammer_modify_volume_field(trans,
1272 				trans->rootvol,
1273 				vol0_stat_freebigblocks);
1274 		--root_volume->ondisk->vol0_stat_freebigblocks;
1275 		hmp->copy_stat_freebigblocks =
1276 		   root_volume->ondisk->vol0_stat_freebigblocks;
1277 		hammer_modify_volume_done(trans->rootvol);
1278 	}
1279 	if (layer2->zone != zone)
1280 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1281 	KKASSERT(layer2->zone == zone);
1282 	KKASSERT(bytes != 0);
1283 	layer2->bytes_free -= bytes;
1284 
1285 	if (resv) {
1286 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1287 	}
1288 
1289 	/*
1290 	 * Finalizations can occur out of order, or combined with allocations.
1291 	 * append_off must be set to the highest allocated offset.
1292 	 */
1293 	offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
1294 	if (layer2->append_off < offset)
1295 		layer2->append_off = offset;
1296 
1297 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
1298 	hammer_modify_buffer_done(buffer2);
1299 	hammer_unlock(&hmp->blkmap_lock);
1300 
1301 failed:
1302 	if (buffer1)
1303 		hammer_rel_buffer(buffer1, 0);
1304 	if (buffer2)
1305 		hammer_rel_buffer(buffer2, 0);
1306 	return(error);
1307 }
1308 
1309 /*
1310  * Return the approximate number of free bytes in the big-block
1311  * containing the specified blockmap offset.
1312  *
1313  * WARNING: A negative number can be returned if data de-dup exists,
1314  *	    and the result will also not represent he actual number
1315  *	    of free bytes in this case.
1316  *
1317  *	    This code is used only by the reblocker.
1318  */
1319 int
1320 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1321 			int *curp, int *errorp)
1322 {
1323 	hammer_volume_t root_volume;
1324 	hammer_blockmap_t blockmap;
1325 	hammer_blockmap_t freemap;
1326 	struct hammer_blockmap_layer1 *layer1;
1327 	struct hammer_blockmap_layer2 *layer2;
1328 	hammer_buffer_t buffer = NULL;
1329 	hammer_off_t layer1_offset;
1330 	hammer_off_t layer2_offset;
1331 	int32_t bytes;
1332 	int zone;
1333 
1334 	zone = HAMMER_ZONE_DECODE(zone_offset);
1335 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1336 	root_volume = hammer_get_root_volume(hmp, errorp);
1337 	if (*errorp) {
1338 		*curp = 0;
1339 		return(0);
1340 	}
1341 	blockmap = &hmp->blockmap[zone];
1342 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1343 
1344 	/*
1345 	 * Dive layer 1.
1346 	 */
1347 	layer1_offset = freemap->phys_offset +
1348 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1349 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1350 	if (*errorp) {
1351 		bytes = 0;
1352 		goto failed;
1353 	}
1354 	KKASSERT(layer1->phys_offset);
1355 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1356 		hammer_lock_ex(&hmp->blkmap_lock);
1357 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1358 			panic("CRC FAILED: LAYER1");
1359 		hammer_unlock(&hmp->blkmap_lock);
1360 	}
1361 
1362 	/*
1363 	 * Dive layer 2, each entry represents a large-block.
1364 	 *
1365 	 * (reuse buffer, layer1 pointer becomes invalid)
1366 	 */
1367 	layer2_offset = layer1->phys_offset +
1368 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1369 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1370 	if (*errorp) {
1371 		bytes = 0;
1372 		goto failed;
1373 	}
1374 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1375 		hammer_lock_ex(&hmp->blkmap_lock);
1376 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1377 			panic("CRC FAILED: LAYER2");
1378 		hammer_unlock(&hmp->blkmap_lock);
1379 	}
1380 	KKASSERT(layer2->zone == zone);
1381 
1382 	bytes = layer2->bytes_free;
1383 
1384 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
1385 		*curp = 0;
1386 	else
1387 		*curp = 1;
1388 failed:
1389 	if (buffer)
1390 		hammer_rel_buffer(buffer, 0);
1391 	hammer_rel_volume(root_volume, 0);
1392 	if (hammer_debug_general & 0x0800) {
1393 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1394 			(long long)zone_offset, bytes);
1395 	}
1396 	return(bytes);
1397 }
1398 
1399 
1400 /*
1401  * Lookup a blockmap offset.
1402  */
1403 hammer_off_t
1404 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1405 		       int *errorp)
1406 {
1407 	hammer_volume_t root_volume;
1408 	hammer_blockmap_t freemap;
1409 	struct hammer_blockmap_layer1 *layer1;
1410 	struct hammer_blockmap_layer2 *layer2;
1411 	hammer_buffer_t buffer = NULL;
1412 	hammer_off_t layer1_offset;
1413 	hammer_off_t layer2_offset;
1414 	hammer_off_t result_offset;
1415 	hammer_off_t base_off;
1416 	hammer_reserve_t resv __debugvar;
1417 	int zone;
1418 
1419 	/*
1420 	 * Calculate the zone-2 offset.
1421 	 */
1422 	zone = HAMMER_ZONE_DECODE(zone_offset);
1423 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1424 
1425 	result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1426 			HAMMER_ZONE_RAW_BUFFER;
1427 
1428 	/*
1429 	 * We can actually stop here, normal blockmaps are now direct-mapped
1430 	 * onto the freemap and so represent zone-2 addresses.
1431 	 */
1432 	if (hammer_verify_zone == 0) {
1433 		*errorp = 0;
1434 		return(result_offset);
1435 	}
1436 
1437 	/*
1438 	 * Validate the allocation zone
1439 	 */
1440 	root_volume = hammer_get_root_volume(hmp, errorp);
1441 	if (*errorp)
1442 		return(0);
1443 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1444 	KKASSERT(freemap->phys_offset != 0);
1445 
1446 	/*
1447 	 * Dive layer 1.
1448 	 */
1449 	layer1_offset = freemap->phys_offset +
1450 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1451 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1452 	if (*errorp)
1453 		goto failed;
1454 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1455 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1456 		hammer_lock_ex(&hmp->blkmap_lock);
1457 		if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE))
1458 			panic("CRC FAILED: LAYER1");
1459 		hammer_unlock(&hmp->blkmap_lock);
1460 	}
1461 
1462 	/*
1463 	 * Dive layer 2, each entry represents a large-block.
1464 	 */
1465 	layer2_offset = layer1->phys_offset +
1466 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1467 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1468 
1469 	if (*errorp)
1470 		goto failed;
1471 	if (layer2->zone == 0) {
1472 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1473 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1474 				 base_off);
1475 		KKASSERT(resv && resv->zone == zone);
1476 
1477 	} else if (layer2->zone != zone) {
1478 		panic("hammer_blockmap_lookup: bad zone %d/%d",
1479 			layer2->zone, zone);
1480 	}
1481 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1482 		hammer_lock_ex(&hmp->blkmap_lock);
1483 		if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE))
1484 			panic("CRC FAILED: LAYER2");
1485 		hammer_unlock(&hmp->blkmap_lock);
1486 	}
1487 
1488 failed:
1489 	if (buffer)
1490 		hammer_rel_buffer(buffer, 0);
1491 	hammer_rel_volume(root_volume, 0);
1492 	if (hammer_debug_general & 0x0800) {
1493 		kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1494 			(long long)zone_offset, (long long)result_offset);
1495 	}
1496 	return(result_offset);
1497 }
1498 
1499 
1500 /*
1501  * Check space availability
1502  *
1503  * MPSAFE - does not require fs_token
1504  */
1505 int
1506 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1507 {
1508 	const int in_size = sizeof(struct hammer_inode_data) +
1509 			    sizeof(union hammer_btree_elm);
1510 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1511 	int64_t usedbytes;
1512 
1513 	usedbytes = hmp->rsv_inodes * in_size +
1514 		    hmp->rsv_recs * rec_size +
1515 		    hmp->rsv_databytes +
1516 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1517 		    ((int64_t)hammer_limit_dirtybufspace) +
1518 		    (slop << HAMMER_LARGEBLOCK_BITS);
1519 
1520 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1521 	if (resp)
1522 		*resp = usedbytes;
1523 
1524 	if (hmp->copy_stat_freebigblocks >=
1525 	    (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1526 		return(0);
1527 	}
1528 	return (ENOSPC);
1529 }
1530 
1531