xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision 44753b81)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 /*
36  * HAMMER blockmap
37  */
38 #include <vm/vm_page2.h>
39 
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay_offset(hammer_mount_t hmp,
44 				    hammer_off_t base_offset, int zone,
45 				    hammer_blockmap_layer2_t layer2);
46 static void hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv);
47 static int update_bytes_free(hammer_reserve_t resv, int bytes);
48 static int hammer_check_volume(hammer_mount_t, hammer_off_t*);
49 static void hammer_skip_volume(hammer_off_t *offsetp);
50 
51 /*
52  * Reserved big-blocks red-black tree support
53  */
54 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
55 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
56 
57 static int
58 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
59 {
60 	if (res1->zone_offset < res2->zone_offset)
61 		return(-1);
62 	if (res1->zone_offset > res2->zone_offset)
63 		return(1);
64 	return(0);
65 }
66 
67 /*
68  * Allocate bytes from a zone
69  */
70 hammer_off_t
71 hammer_blockmap_alloc(hammer_transaction_t trans, int zone, int bytes,
72 		      hammer_off_t hint, int *errorp)
73 {
74 	hammer_mount_t hmp;
75 	hammer_volume_t root_volume;
76 	hammer_blockmap_t blockmap;
77 	hammer_blockmap_t freemap;
78 	hammer_reserve_t resv;
79 	hammer_blockmap_layer1_t layer1;
80 	hammer_blockmap_layer2_t layer2;
81 	hammer_buffer_t buffer1 = NULL;
82 	hammer_buffer_t buffer2 = NULL;
83 	hammer_buffer_t buffer3 = NULL;
84 	hammer_off_t tmp_offset;
85 	hammer_off_t next_offset;
86 	hammer_off_t result_offset;
87 	hammer_off_t layer1_offset;
88 	hammer_off_t layer2_offset;
89 	hammer_off_t base_off;
90 	int loops = 0;
91 	int offset;		/* offset within big-block */
92 	int use_hint;
93 
94 	hmp = trans->hmp;
95 
96 	/*
97 	 * Deal with alignment and buffer-boundary issues.
98 	 *
99 	 * Be careful, certain primary alignments are used below to allocate
100 	 * new blockmap blocks.
101 	 */
102 	bytes = (bytes + 15) & ~15;
103 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
104 	KKASSERT(hammer_is_zone2_mapped_index(zone));
105 
106 	/*
107 	 * Setup
108 	 */
109 	root_volume = trans->rootvol;
110 	*errorp = 0;
111 	blockmap = &hmp->blockmap[zone];
112 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
113 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
114 
115 	/*
116 	 * Use the hint if we have one.
117 	 */
118 	if (hint && HAMMER_ZONE_DECODE(hint) == zone) {
119 		next_offset = (hint + 15) & ~(hammer_off_t)15;
120 		use_hint = 1;
121 	} else {
122 		next_offset = blockmap->next_offset;
123 		use_hint = 0;
124 	}
125 again:
126 
127 	/*
128 	 * use_hint is turned off if we leave the hinted big-block.
129 	 */
130 	if (use_hint && ((next_offset ^ hint) & ~HAMMER_HINTBLOCK_MASK64)) {
131 		next_offset = blockmap->next_offset;
132 		use_hint = 0;
133 	}
134 
135 	/*
136 	 * Check for wrap
137 	 */
138 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
139 		if (++loops == 2) {
140 			hmkprintf(hmp, "No space left for zone %d "
141 				"allocation\n", zone);
142 			result_offset = 0;
143 			*errorp = ENOSPC;
144 			goto failed;
145 		}
146 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
147 	}
148 
149 	/*
150 	 * The allocation request may not cross a buffer boundary.  Special
151 	 * large allocations must not cross a big-block boundary.
152 	 */
153 	tmp_offset = next_offset + bytes - 1;
154 	if (bytes <= HAMMER_BUFSIZE) {
155 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
156 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
157 			goto again;
158 		}
159 	} else {
160 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
161 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
162 			goto again;
163 		}
164 	}
165 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
166 
167 	/*
168 	 * Dive layer 1.
169 	 */
170 	layer1_offset = freemap->phys_offset +
171 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
172 
173 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
174 	if (*errorp) {
175 		result_offset = 0;
176 		goto failed;
177 	}
178 
179 	/*
180 	 * Check CRC.
181 	 */
182 	if (!hammer_crc_test_layer1(layer1)) {
183 		hammer_lock_ex(&hmp->blkmap_lock);
184 		if (!hammer_crc_test_layer1(layer1))
185 			hpanic("CRC FAILED: LAYER1");
186 		hammer_unlock(&hmp->blkmap_lock);
187 	}
188 
189 	/*
190 	 * If we are at a big-block boundary and layer1 indicates no
191 	 * free big-blocks, then we cannot allocate a new big-block in
192 	 * layer2, skip to the next layer1 entry.
193 	 */
194 	if (offset == 0 && layer1->blocks_free == 0) {
195 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
196 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
197 		if (hammer_check_volume(hmp, &next_offset)) {
198 			result_offset = 0;
199 			goto failed;
200 		}
201 		goto again;
202 	}
203 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
204 
205 	/*
206 	 * Skip the whole volume if it is pointing to a layer2 big-block
207 	 * on a volume that we are currently trying to remove from the
208 	 * file-system. This is used by the volume-del code together with
209 	 * the reblocker to free up a volume.
210 	 */
211 	if (HAMMER_VOL_DECODE(layer1->phys_offset) == hmp->volume_to_remove) {
212 		hammer_skip_volume(&next_offset);
213 		goto again;
214 	}
215 
216 	/*
217 	 * Dive layer 2, each entry represents a big-block.
218 	 */
219 	layer2_offset = layer1->phys_offset +
220 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
221 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
222 	if (*errorp) {
223 		result_offset = 0;
224 		goto failed;
225 	}
226 
227 	/*
228 	 * Check CRC.  This can race another thread holding the lock
229 	 * and in the middle of modifying layer2.
230 	 */
231 	if (!hammer_crc_test_layer2(layer2)) {
232 		hammer_lock_ex(&hmp->blkmap_lock);
233 		if (!hammer_crc_test_layer2(layer2))
234 			hpanic("CRC FAILED: LAYER2");
235 		hammer_unlock(&hmp->blkmap_lock);
236 	}
237 
238 	/*
239 	 * Skip the layer if the zone is owned by someone other then us.
240 	 */
241 	if (layer2->zone && layer2->zone != zone) {
242 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
243 		goto again;
244 	}
245 	if (offset < layer2->append_off) {
246 		next_offset += layer2->append_off - offset;
247 		goto again;
248 	}
249 
250 #if 0
251 	/*
252 	 * If operating in the current non-hint blockmap block, do not
253 	 * allow it to get over-full.  Also drop any active hinting so
254 	 * blockmap->next_offset is updated at the end.
255 	 *
256 	 * We do this for B-Tree and meta-data allocations to provide
257 	 * localization for updates.
258 	 */
259 	if ((zone == HAMMER_ZONE_BTREE_INDEX ||
260 	     zone == HAMMER_ZONE_META_INDEX) &&
261 	    offset >= HAMMER_BIGBLOCK_OVERFILL &&
262 	    !((next_offset ^ blockmap->next_offset) & ~HAMMER_BIGBLOCK_MASK64)) {
263 		if (offset >= HAMMER_BIGBLOCK_OVERFILL) {
264 			next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
265 			use_hint = 0;
266 			goto again;
267 		}
268 	}
269 #endif
270 
271 	/*
272 	 * We need the lock from this point on.  We have to re-check zone
273 	 * ownership after acquiring the lock and also check for reservations.
274 	 */
275 	hammer_lock_ex(&hmp->blkmap_lock);
276 
277 	if (layer2->zone && layer2->zone != zone) {
278 		hammer_unlock(&hmp->blkmap_lock);
279 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
280 		goto again;
281 	}
282 	if (offset < layer2->append_off) {
283 		hammer_unlock(&hmp->blkmap_lock);
284 		next_offset += layer2->append_off - offset;
285 		goto again;
286 	}
287 
288 	/*
289 	 * The big-block might be reserved by another zone.  If it is reserved
290 	 * by our zone we may have to move next_offset past the append_off.
291 	 */
292 	base_off = hammer_xlate_to_zone2(next_offset &
293 					~HAMMER_BIGBLOCK_MASK64);
294 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
295 	if (resv) {
296 		if (resv->zone != zone) {
297 			hammer_unlock(&hmp->blkmap_lock);
298 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
299 				      ~HAMMER_BIGBLOCK_MASK64;
300 			goto again;
301 		}
302 		if (offset < resv->append_off) {
303 			hammer_unlock(&hmp->blkmap_lock);
304 			next_offset += resv->append_off - offset;
305 			goto again;
306 		}
307 		++resv->refs;
308 	}
309 
310 	/*
311 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
312 	 * of the layer for real.  At this point we've validated any
313 	 * reservation that might exist and can just ignore resv.
314 	 */
315 	if (layer2->zone == 0) {
316 		/*
317 		 * Assign the big-block to our zone
318 		 */
319 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
320 		--layer1->blocks_free;
321 		hammer_crc_set_layer1(layer1);
322 		hammer_modify_buffer_done(buffer1);
323 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
324 		layer2->zone = zone;
325 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
326 		KKASSERT(layer2->append_off == 0);
327 		hammer_modify_volume_field(trans, trans->rootvol,
328 					   vol0_stat_freebigblocks);
329 		--root_volume->ondisk->vol0_stat_freebigblocks;
330 		hmp->copy_stat_freebigblocks =
331 			root_volume->ondisk->vol0_stat_freebigblocks;
332 		hammer_modify_volume_done(trans->rootvol);
333 	} else {
334 		hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
335 	}
336 	KKASSERT(layer2->zone == zone);
337 
338 	/*
339 	 * NOTE: bytes_free can legally go negative due to de-dup.
340 	 */
341 	layer2->bytes_free -= bytes;
342 	KKASSERT(layer2->append_off <= offset);
343 	layer2->append_off = offset + bytes;
344 	hammer_crc_set_layer2(layer2);
345 	hammer_modify_buffer_done(buffer2);
346 
347 	/*
348 	 * We hold the blockmap lock and should be the only ones
349 	 * capable of modifying resv->append_off.  Track the allocation
350 	 * as appropriate.
351 	 */
352 	KKASSERT(bytes != 0);
353 	if (resv) {
354 		KKASSERT(resv->append_off <= offset);
355 		resv->append_off = offset + bytes;
356 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
357 		hammer_blockmap_reserve_complete(hmp, resv);
358 	}
359 
360 	/*
361 	 * If we are allocating from the base of a new buffer we can avoid
362 	 * a disk read by calling hammer_bnew_ext().
363 	 */
364 	if ((next_offset & HAMMER_BUFMASK) == 0) {
365 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
366 				errorp, &buffer3);
367 		if (*errorp) {
368 			result_offset = 0;
369 			goto failed;
370 		}
371 	}
372 	result_offset = next_offset;
373 
374 	/*
375 	 * If we weren't supplied with a hint or could not use the hint
376 	 * then we wound up using blockmap->next_offset as the hint and
377 	 * need to save it.
378 	 */
379 	if (use_hint == 0) {
380 		hammer_modify_volume_noundo(NULL, root_volume);
381 		blockmap->next_offset = next_offset + bytes;
382 		hammer_modify_volume_done(root_volume);
383 	}
384 	hammer_unlock(&hmp->blkmap_lock);
385 failed:
386 
387 	/*
388 	 * Cleanup
389 	 */
390 	if (buffer1)
391 		hammer_rel_buffer(buffer1, 0);
392 	if (buffer2)
393 		hammer_rel_buffer(buffer2, 0);
394 	if (buffer3)
395 		hammer_rel_buffer(buffer3, 0);
396 
397 	return(result_offset);
398 }
399 
400 /*
401  * Frontend function - Reserve bytes in a zone.
402  *
403  * This code reserves bytes out of a blockmap without committing to any
404  * meta-data modifications, allowing the front-end to directly issue disk
405  * write I/O for big-blocks of data
406  *
407  * The backend later finalizes the reservation with hammer_blockmap_finalize()
408  * upon committing the related record.
409  */
410 hammer_reserve_t
411 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
412 			hammer_off_t *zone_offp, int *errorp)
413 {
414 	hammer_volume_t root_volume;
415 	hammer_blockmap_t blockmap;
416 	hammer_blockmap_t freemap;
417 	hammer_blockmap_layer1_t layer1;
418 	hammer_blockmap_layer2_t layer2;
419 	hammer_buffer_t buffer1 = NULL;
420 	hammer_buffer_t buffer2 = NULL;
421 	hammer_buffer_t buffer3 = NULL;
422 	hammer_off_t tmp_offset;
423 	hammer_off_t next_offset;
424 	hammer_off_t layer1_offset;
425 	hammer_off_t layer2_offset;
426 	hammer_off_t base_off;
427 	hammer_reserve_t resv;
428 	hammer_reserve_t resx = NULL;
429 	int loops = 0;
430 	int offset;
431 
432 	/*
433 	 * Setup
434 	 */
435 	KKASSERT(hammer_is_zone2_mapped_index(zone));
436 	root_volume = hammer_get_root_volume(hmp, errorp);
437 	if (*errorp)
438 		return(NULL);
439 	blockmap = &hmp->blockmap[zone];
440 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
441 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
442 
443 	/*
444 	 * Deal with alignment and buffer-boundary issues.
445 	 *
446 	 * Be careful, certain primary alignments are used below to allocate
447 	 * new blockmap blocks.
448 	 */
449 	bytes = (bytes + 15) & ~15;
450 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
451 
452 	next_offset = blockmap->next_offset;
453 again:
454 	resv = NULL;
455 	/*
456 	 * Check for wrap
457 	 */
458 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
459 		if (++loops == 2) {
460 			hmkprintf(hmp, "No space left for zone %d "
461 				"reservation\n", zone);
462 			*errorp = ENOSPC;
463 			goto failed;
464 		}
465 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
466 	}
467 
468 	/*
469 	 * The allocation request may not cross a buffer boundary.  Special
470 	 * large allocations must not cross a big-block boundary.
471 	 */
472 	tmp_offset = next_offset + bytes - 1;
473 	if (bytes <= HAMMER_BUFSIZE) {
474 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
475 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
476 			goto again;
477 		}
478 	} else {
479 		if ((next_offset ^ tmp_offset) & ~HAMMER_BIGBLOCK_MASK64) {
480 			next_offset = tmp_offset & ~HAMMER_BIGBLOCK_MASK64;
481 			goto again;
482 		}
483 	}
484 	offset = (int)next_offset & HAMMER_BIGBLOCK_MASK;
485 
486 	/*
487 	 * Dive layer 1.
488 	 */
489 	layer1_offset = freemap->phys_offset +
490 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
491 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
492 	if (*errorp)
493 		goto failed;
494 
495 	/*
496 	 * Check CRC.
497 	 */
498 	if (!hammer_crc_test_layer1(layer1)) {
499 		hammer_lock_ex(&hmp->blkmap_lock);
500 		if (!hammer_crc_test_layer1(layer1))
501 			hpanic("CRC FAILED: LAYER1");
502 		hammer_unlock(&hmp->blkmap_lock);
503 	}
504 
505 	/*
506 	 * If we are at a big-block boundary and layer1 indicates no
507 	 * free big-blocks, then we cannot allocate a new big-block in
508 	 * layer2, skip to the next layer1 entry.
509 	 */
510 	if ((next_offset & HAMMER_BIGBLOCK_MASK) == 0 &&
511 	    layer1->blocks_free == 0) {
512 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
513 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
514 		if (hammer_check_volume(hmp, &next_offset))
515 			goto failed;
516 		goto again;
517 	}
518 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
519 
520 	/*
521 	 * Dive layer 2, each entry represents a big-block.
522 	 */
523 	layer2_offset = layer1->phys_offset +
524 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
525 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
526 	if (*errorp)
527 		goto failed;
528 
529 	/*
530 	 * Check CRC if not allocating into uninitialized space (which we
531 	 * aren't when reserving space).
532 	 */
533 	if (!hammer_crc_test_layer2(layer2)) {
534 		hammer_lock_ex(&hmp->blkmap_lock);
535 		if (!hammer_crc_test_layer2(layer2))
536 			hpanic("CRC FAILED: LAYER2");
537 		hammer_unlock(&hmp->blkmap_lock);
538 	}
539 
540 	/*
541 	 * Skip the layer if the zone is owned by someone other then us.
542 	 */
543 	if (layer2->zone && layer2->zone != zone) {
544 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
545 		goto again;
546 	}
547 	if (offset < layer2->append_off) {
548 		next_offset += layer2->append_off - offset;
549 		goto again;
550 	}
551 
552 	/*
553 	 * We need the lock from this point on.  We have to re-check zone
554 	 * ownership after acquiring the lock and also check for reservations.
555 	 */
556 	hammer_lock_ex(&hmp->blkmap_lock);
557 
558 	if (layer2->zone && layer2->zone != zone) {
559 		hammer_unlock(&hmp->blkmap_lock);
560 		next_offset += (HAMMER_BIGBLOCK_SIZE - offset);
561 		goto again;
562 	}
563 	if (offset < layer2->append_off) {
564 		hammer_unlock(&hmp->blkmap_lock);
565 		next_offset += layer2->append_off - offset;
566 		goto again;
567 	}
568 
569 	/*
570 	 * The big-block might be reserved by another zone.  If it is reserved
571 	 * by our zone we may have to move next_offset past the append_off.
572 	 */
573 	base_off = hammer_xlate_to_zone2(next_offset &
574 					~HAMMER_BIGBLOCK_MASK64);
575 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
576 	if (resv) {
577 		if (resv->zone != zone) {
578 			hammer_unlock(&hmp->blkmap_lock);
579 			next_offset = (next_offset + HAMMER_BIGBLOCK_SIZE) &
580 				      ~HAMMER_BIGBLOCK_MASK64;
581 			goto again;
582 		}
583 		if (offset < resv->append_off) {
584 			hammer_unlock(&hmp->blkmap_lock);
585 			next_offset += resv->append_off - offset;
586 			goto again;
587 		}
588 		++resv->refs;
589 	} else {
590 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
591 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
592 		resx->refs = 1;
593 		resx->zone = zone;
594 		resx->zone_offset = base_off;
595 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
596 			resx->flags |= HAMMER_RESF_LAYER2FREE;
597 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
598 		KKASSERT(resv == NULL);
599 		resv = resx;
600 		++hammer_count_reservations;
601 	}
602 	resv->append_off = offset + bytes;
603 
604 	/*
605 	 * If we are not reserving a whole buffer but are at the start of
606 	 * a new block, call hammer_bnew() to avoid a disk read.
607 	 *
608 	 * If we are reserving a whole buffer (or more), the caller will
609 	 * probably use a direct read, so do nothing.
610 	 *
611 	 * If we do not have a whole lot of system memory we really can't
612 	 * afford to block while holding the blkmap_lock!
613 	 */
614 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
615 		if (!vm_page_count_min(HAMMER_BUFSIZE / PAGE_SIZE)) {
616 			hammer_bnew(hmp, next_offset, errorp, &buffer3);
617 			if (*errorp)
618 				goto failed;
619 		}
620 	}
621 
622 	/*
623 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
624 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
625 	 * be big-block aligned.
626 	 */
627 	blockmap->next_offset = next_offset + bytes;
628 	hammer_unlock(&hmp->blkmap_lock);
629 
630 failed:
631 	if (buffer1)
632 		hammer_rel_buffer(buffer1, 0);
633 	if (buffer2)
634 		hammer_rel_buffer(buffer2, 0);
635 	if (buffer3)
636 		hammer_rel_buffer(buffer3, 0);
637 	hammer_rel_volume(root_volume, 0);
638 	*zone_offp = next_offset;
639 
640 	return(resv);
641 }
642 
643 /*
644  * Frontend function - Dedup bytes in a zone.
645  *
646  * Dedup reservations work exactly the same as normal write reservations
647  * except we only adjust bytes_free field and don't touch append offset.
648  * Finalization mechanic for dedup reservations is also the same as for
649  * normal write ones - the backend finalizes the reservation with
650  * hammer_blockmap_finalize().
651  */
652 hammer_reserve_t
653 hammer_blockmap_reserve_dedup(hammer_mount_t hmp, int zone, int bytes,
654 			      hammer_off_t zone_offset, int *errorp)
655 {
656 	hammer_volume_t root_volume;
657 	hammer_blockmap_t freemap;
658 	hammer_blockmap_layer1_t layer1;
659 	hammer_blockmap_layer2_t layer2;
660 	hammer_buffer_t buffer1 = NULL;
661 	hammer_buffer_t buffer2 = NULL;
662 	hammer_off_t layer1_offset;
663 	hammer_off_t layer2_offset;
664 	hammer_off_t base_off;
665 	hammer_reserve_t resv = NULL;
666 	hammer_reserve_t resx = NULL;
667 
668 	/*
669 	 * Setup
670 	 */
671 	KKASSERT(hammer_is_zone2_mapped_index(zone));
672 	root_volume = hammer_get_root_volume(hmp, errorp);
673 	if (*errorp)
674 		return (NULL);
675 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
676 	KKASSERT(freemap->phys_offset != 0);
677 
678 	bytes = (bytes + 15) & ~15;
679 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
680 
681 	/*
682 	 * Dive layer 1.
683 	 */
684 	layer1_offset = freemap->phys_offset +
685 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
686 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
687 	if (*errorp)
688 		goto failed;
689 
690 	/*
691 	 * Check CRC.
692 	 */
693 	if (!hammer_crc_test_layer1(layer1)) {
694 		hammer_lock_ex(&hmp->blkmap_lock);
695 		if (!hammer_crc_test_layer1(layer1))
696 			hpanic("CRC FAILED: LAYER1");
697 		hammer_unlock(&hmp->blkmap_lock);
698 	}
699 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
700 
701 	/*
702 	 * Dive layer 2, each entry represents a big-block.
703 	 */
704 	layer2_offset = layer1->phys_offset +
705 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
706 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
707 	if (*errorp)
708 		goto failed;
709 
710 	/*
711 	 * Check CRC.
712 	 */
713 	if (!hammer_crc_test_layer2(layer2)) {
714 		hammer_lock_ex(&hmp->blkmap_lock);
715 		if (!hammer_crc_test_layer2(layer2))
716 			hpanic("CRC FAILED: LAYER2");
717 		hammer_unlock(&hmp->blkmap_lock);
718 	}
719 
720 	/*
721 	 * Fail if the zone is owned by someone other than us.
722 	 */
723 	if (layer2->zone && layer2->zone != zone)
724 		goto failed;
725 
726 	/*
727 	 * We need the lock from this point on.  We have to re-check zone
728 	 * ownership after acquiring the lock and also check for reservations.
729 	 */
730 	hammer_lock_ex(&hmp->blkmap_lock);
731 
732 	if (layer2->zone && layer2->zone != zone) {
733 		hammer_unlock(&hmp->blkmap_lock);
734 		goto failed;
735 	}
736 
737 	base_off = hammer_xlate_to_zone2(zone_offset &
738 					~HAMMER_BIGBLOCK_MASK64);
739 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
740 	if (resv) {
741 		if (resv->zone != zone) {
742 			hammer_unlock(&hmp->blkmap_lock);
743 			resv = NULL;
744 			goto failed;
745 		}
746 		/*
747 		 * Due to possible big-block underflow we can't simply
748 		 * subtract bytes from bytes_free.
749 		 */
750 		if (update_bytes_free(resv, bytes) == 0) {
751 			hammer_unlock(&hmp->blkmap_lock);
752 			resv = NULL;
753 			goto failed;
754 		}
755 		++resv->refs;
756 	} else {
757 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
758 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
759 		resx->refs = 1;
760 		resx->zone = zone;
761 		resx->bytes_free = layer2->bytes_free;
762 		/*
763 		 * Due to possible big-block underflow we can't simply
764 		 * subtract bytes from bytes_free.
765 		 */
766 		if (update_bytes_free(resx, bytes) == 0) {
767 			hammer_unlock(&hmp->blkmap_lock);
768 			kfree(resx, hmp->m_misc);
769 			goto failed;
770 		}
771 		resx->zone_offset = base_off;
772 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
773 		KKASSERT(resv == NULL);
774 		resv = resx;
775 		++hammer_count_reservations;
776 	}
777 
778 	hammer_unlock(&hmp->blkmap_lock);
779 
780 failed:
781 	if (buffer1)
782 		hammer_rel_buffer(buffer1, 0);
783 	if (buffer2)
784 		hammer_rel_buffer(buffer2, 0);
785 	hammer_rel_volume(root_volume, 0);
786 
787 	return(resv);
788 }
789 
790 static int
791 update_bytes_free(hammer_reserve_t resv, int bytes)
792 {
793 	int32_t temp;
794 
795 	/*
796 	 * Big-block underflow check
797 	 */
798 	temp = resv->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
799 	cpu_ccfence(); /* XXX do we really need it ? */
800 	if (temp > resv->bytes_free) {
801 		hdkprintf("BIGBLOCK UNDERFLOW\n");
802 		return (0);
803 	}
804 
805 	resv->bytes_free -= bytes;
806 	return (1);
807 }
808 
809 /*
810  * Dereference a reservation structure.  Upon the final release the
811  * underlying big-block is checked and if it is entirely free we delete
812  * any related HAMMER buffers to avoid potential conflicts with future
813  * reuse of the big-block.
814  */
815 void
816 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
817 {
818 	hammer_off_t base_offset;
819 	int error;
820 
821 	KKASSERT(resv->refs > 0);
822 	KKASSERT(hammer_is_zone_raw_buffer(resv->zone_offset));
823 
824 	/*
825 	 * Setting append_off to the max prevents any new allocations
826 	 * from occuring while we are trying to dispose of the reservation,
827 	 * allowing us to safely delete any related HAMMER buffers.
828 	 *
829 	 * If we are unable to clean out all related HAMMER buffers we
830 	 * requeue the delay.
831 	 */
832 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
833 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
834 		base_offset = hammer_xlate_to_zoneX(resv->zone, resv->zone_offset);
835 		if (!TAILQ_EMPTY(&hmp->dedup_lru_list))
836 			hammer_dedup_cache_inval(hmp, base_offset);
837 		error = hammer_del_buffers(hmp, base_offset,
838 					   resv->zone_offset,
839 					   HAMMER_BIGBLOCK_SIZE,
840 					   1);
841 		if (hammer_debug_general & 0x20000) {
842 			hkprintf("delbgblk %016jx error %d\n",
843 				(intmax_t)base_offset, error);
844 		}
845 		if (error)
846 			hammer_reserve_setdelay(hmp, resv);
847 	}
848 	if (--resv->refs == 0) {
849 		if (hammer_debug_general & 0x20000) {
850 			hkprintf("delresvr %016jx zone %02x\n",
851 				(intmax_t)resv->zone_offset, resv->zone);
852 		}
853 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
854 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
855 		kfree(resv, hmp->m_misc);
856 		--hammer_count_reservations;
857 	}
858 }
859 
860 /*
861  * Prevent a potentially free big-block from being reused until after
862  * the related flushes have completely cycled, otherwise crash recovery
863  * could resurrect a data block that was already reused and overwritten.
864  *
865  * The caller might reset the underlying layer2 entry's append_off to 0, so
866  * our covering append_off must be set to max to prevent any reallocation
867  * until after the flush delays complete, not to mention proper invalidation
868  * of any underlying cached blocks.
869  */
870 static void
871 hammer_reserve_setdelay_offset(hammer_mount_t hmp, hammer_off_t base_offset,
872 			int zone, hammer_blockmap_layer2_t layer2)
873 {
874 	hammer_reserve_t resv;
875 
876 	/*
877 	 * Allocate the reservation if necessary.
878 	 *
879 	 * NOTE: need lock in future around resv lookup/allocation and
880 	 * the setdelay call, currently refs is not bumped until the call.
881 	 */
882 again:
883 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
884 	if (resv == NULL) {
885 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
886 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
887 		resv->zone = zone;
888 		resv->zone_offset = base_offset;
889 		resv->refs = 0;
890 		resv->append_off = HAMMER_BIGBLOCK_SIZE;
891 
892 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
893 			resv->flags |= HAMMER_RESF_LAYER2FREE;
894 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
895 			kfree(resv, hmp->m_misc);
896 			goto again;
897 		}
898 		++hammer_count_reservations;
899 	} else {
900 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE)
901 			resv->flags |= HAMMER_RESF_LAYER2FREE;
902 	}
903 	hammer_reserve_setdelay(hmp, resv);
904 }
905 
906 /*
907  * Enter the reservation on the on-delay list, or move it if it
908  * is already on the list.
909  */
910 static void
911 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv)
912 {
913 	if (resv->flags & HAMMER_RESF_ONDELAY) {
914 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
915 		resv->flg_no = hmp->flusher.next + 1;
916 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
917 	} else {
918 		++resv->refs;
919 		++hmp->rsv_fromdelay;
920 		resv->flags |= HAMMER_RESF_ONDELAY;
921 		resv->flg_no = hmp->flusher.next + 1;
922 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
923 	}
924 }
925 
926 /*
927  * Reserve has reached its flush point, remove it from the delay list
928  * and finish it off.  hammer_blockmap_reserve_complete() inherits
929  * the ondelay reference.
930  */
931 void
932 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
933 {
934 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
935 	resv->flags &= ~HAMMER_RESF_ONDELAY;
936 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
937 	--hmp->rsv_fromdelay;
938 	hammer_blockmap_reserve_complete(hmp, resv);
939 }
940 
941 /*
942  * Backend function - free (offset, bytes) in a zone.
943  *
944  * XXX error return
945  */
946 void
947 hammer_blockmap_free(hammer_transaction_t trans,
948 		     hammer_off_t zone_offset, int bytes)
949 {
950 	hammer_mount_t hmp;
951 	hammer_volume_t root_volume;
952 	hammer_blockmap_t freemap;
953 	hammer_blockmap_layer1_t layer1;
954 	hammer_blockmap_layer2_t layer2;
955 	hammer_buffer_t buffer1 = NULL;
956 	hammer_buffer_t buffer2 = NULL;
957 	hammer_off_t layer1_offset;
958 	hammer_off_t layer2_offset;
959 	hammer_off_t base_off;
960 	int error;
961 	int zone;
962 
963 	if (bytes == 0)
964 		return;
965 	hmp = trans->hmp;
966 
967 	/*
968 	 * Alignment
969 	 */
970 	bytes = (bytes + 15) & ~15;
971 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
972 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
973 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
974 
975 	/*
976 	 * Basic zone validation & locking
977 	 */
978 	zone = HAMMER_ZONE_DECODE(zone_offset);
979 	KKASSERT(hammer_is_zone2_mapped_index(zone));
980 	root_volume = trans->rootvol;
981 	error = 0;
982 
983 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
984 
985 	/*
986 	 * Dive layer 1.
987 	 */
988 	layer1_offset = freemap->phys_offset +
989 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
990 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
991 	if (error)
992 		goto failed;
993 	KKASSERT(layer1->phys_offset &&
994 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
995 	if (!hammer_crc_test_layer1(layer1)) {
996 		hammer_lock_ex(&hmp->blkmap_lock);
997 		if (!hammer_crc_test_layer1(layer1))
998 			hpanic("CRC FAILED: LAYER1");
999 		hammer_unlock(&hmp->blkmap_lock);
1000 	}
1001 
1002 	/*
1003 	 * Dive layer 2, each entry represents a big-block.
1004 	 */
1005 	layer2_offset = layer1->phys_offset +
1006 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1007 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1008 	if (error)
1009 		goto failed;
1010 	if (!hammer_crc_test_layer2(layer2)) {
1011 		hammer_lock_ex(&hmp->blkmap_lock);
1012 		if (!hammer_crc_test_layer2(layer2))
1013 			hpanic("CRC FAILED: LAYER2");
1014 		hammer_unlock(&hmp->blkmap_lock);
1015 	}
1016 
1017 	hammer_lock_ex(&hmp->blkmap_lock);
1018 
1019 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1020 
1021 	/*
1022 	 * Free space previously allocated via blockmap_alloc().
1023 	 *
1024 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1025 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1026 	 */
1027 	KKASSERT(layer2->zone == zone);
1028 	layer2->bytes_free += bytes;
1029 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1030 
1031 	/*
1032 	 * If a big-block becomes entirely free we must create a covering
1033 	 * reservation to prevent premature reuse.  Note, however, that
1034 	 * the big-block and/or reservation may still have an append_off
1035 	 * that allows further (non-reused) allocations.
1036 	 *
1037 	 * Once the reservation has been made we re-check layer2 and if
1038 	 * the big-block is still entirely free we reset the layer2 entry.
1039 	 * The reservation will prevent premature reuse.
1040 	 *
1041 	 * NOTE: hammer_buffer's are only invalidated when the reservation
1042 	 * is completed, if the layer2 entry is still completely free at
1043 	 * that time.  Any allocations from the reservation that may have
1044 	 * occured in the mean time, or active references on the reservation
1045 	 * from new pending allocations, will prevent the invalidation from
1046 	 * occuring.
1047 	 */
1048 	if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1049 		base_off = hammer_xlate_to_zone2(zone_offset &
1050 						~HAMMER_BIGBLOCK_MASK64);
1051 
1052 		hammer_reserve_setdelay_offset(hmp, base_off, zone, layer2);
1053 		if (layer2->bytes_free == HAMMER_BIGBLOCK_SIZE) {
1054 			layer2->zone = 0;
1055 			layer2->append_off = 0;
1056 			hammer_modify_buffer(trans, buffer1,
1057 					     layer1, sizeof(*layer1));
1058 			++layer1->blocks_free;
1059 			hammer_crc_set_layer1(layer1);
1060 			hammer_modify_buffer_done(buffer1);
1061 			hammer_modify_volume_field(trans,
1062 					trans->rootvol,
1063 					vol0_stat_freebigblocks);
1064 			++root_volume->ondisk->vol0_stat_freebigblocks;
1065 			hmp->copy_stat_freebigblocks =
1066 			   root_volume->ondisk->vol0_stat_freebigblocks;
1067 			hammer_modify_volume_done(trans->rootvol);
1068 		}
1069 	}
1070 	hammer_crc_set_layer2(layer2);
1071 	hammer_modify_buffer_done(buffer2);
1072 	hammer_unlock(&hmp->blkmap_lock);
1073 
1074 failed:
1075 	if (buffer1)
1076 		hammer_rel_buffer(buffer1, 0);
1077 	if (buffer2)
1078 		hammer_rel_buffer(buffer2, 0);
1079 }
1080 
1081 int
1082 hammer_blockmap_dedup(hammer_transaction_t trans,
1083 		     hammer_off_t zone_offset, int bytes)
1084 {
1085 	hammer_mount_t hmp;
1086 	hammer_blockmap_t freemap;
1087 	hammer_blockmap_layer1_t layer1;
1088 	hammer_blockmap_layer2_t layer2;
1089 	hammer_buffer_t buffer1 = NULL;
1090 	hammer_buffer_t buffer2 = NULL;
1091 	hammer_off_t layer1_offset;
1092 	hammer_off_t layer2_offset;
1093 	int32_t temp;
1094 	int error;
1095 	int zone __debugvar;
1096 
1097 	if (bytes == 0)
1098 		return (0);
1099 	hmp = trans->hmp;
1100 
1101 	/*
1102 	 * Alignment
1103 	 */
1104 	bytes = (bytes + 15) & ~15;
1105 	KKASSERT(bytes <= HAMMER_BIGBLOCK_SIZE);
1106 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
1107 		  ~HAMMER_BIGBLOCK_MASK64) == 0);
1108 
1109 	/*
1110 	 * Basic zone validation & locking
1111 	 */
1112 	zone = HAMMER_ZONE_DECODE(zone_offset);
1113 	KKASSERT(hammer_is_zone2_mapped_index(zone));
1114 	error = 0;
1115 
1116 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1117 
1118 	/*
1119 	 * Dive layer 1.
1120 	 */
1121 	layer1_offset = freemap->phys_offset +
1122 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1123 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1124 	if (error)
1125 		goto failed;
1126 	KKASSERT(layer1->phys_offset &&
1127 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1128 	if (!hammer_crc_test_layer1(layer1)) {
1129 		hammer_lock_ex(&hmp->blkmap_lock);
1130 		if (!hammer_crc_test_layer1(layer1))
1131 			hpanic("CRC FAILED: LAYER1");
1132 		hammer_unlock(&hmp->blkmap_lock);
1133 	}
1134 
1135 	/*
1136 	 * Dive layer 2, each entry represents a big-block.
1137 	 */
1138 	layer2_offset = layer1->phys_offset +
1139 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1140 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1141 	if (error)
1142 		goto failed;
1143 	if (!hammer_crc_test_layer2(layer2)) {
1144 		hammer_lock_ex(&hmp->blkmap_lock);
1145 		if (!hammer_crc_test_layer2(layer2))
1146 			hpanic("CRC FAILED: LAYER2");
1147 		hammer_unlock(&hmp->blkmap_lock);
1148 	}
1149 
1150 	hammer_lock_ex(&hmp->blkmap_lock);
1151 
1152 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1153 
1154 	/*
1155 	 * Free space previously allocated via blockmap_alloc().
1156 	 *
1157 	 * NOTE: bytes_free can be and remain negative due to de-dup ops
1158 	 *	 but can never become larger than HAMMER_BIGBLOCK_SIZE.
1159 	 */
1160 	KKASSERT(layer2->zone == zone);
1161 	temp = layer2->bytes_free - HAMMER_BIGBLOCK_SIZE * 2;
1162 	cpu_ccfence(); /* prevent gcc from optimizing temp out */
1163 	if (temp > layer2->bytes_free) {
1164 		error = ERANGE;
1165 		goto underflow;
1166 	}
1167 	layer2->bytes_free -= bytes;
1168 
1169 	KKASSERT(layer2->bytes_free <= HAMMER_BIGBLOCK_SIZE);
1170 
1171 	hammer_crc_set_layer2(layer2);
1172 underflow:
1173 	hammer_modify_buffer_done(buffer2);
1174 	hammer_unlock(&hmp->blkmap_lock);
1175 
1176 failed:
1177 	if (buffer1)
1178 		hammer_rel_buffer(buffer1, 0);
1179 	if (buffer2)
1180 		hammer_rel_buffer(buffer2, 0);
1181 	return (error);
1182 }
1183 
1184 /*
1185  * Backend function - finalize (offset, bytes) in a zone.
1186  *
1187  * Allocate space that was previously reserved by the frontend.
1188  */
1189 int
1190 hammer_blockmap_finalize(hammer_transaction_t trans,
1191 			 hammer_reserve_t resv,
1192 			 hammer_off_t zone_offset, int bytes)
1193 {
1194 	hammer_mount_t hmp;
1195 	hammer_volume_t root_volume;
1196 	hammer_blockmap_t freemap;
1197 	hammer_blockmap_layer1_t layer1;
1198 	hammer_blockmap_layer2_t layer2;
1199 	hammer_buffer_t buffer1 = NULL;
1200 	hammer_buffer_t buffer2 = NULL;
1201 	hammer_off_t layer1_offset;
1202 	hammer_off_t layer2_offset;
1203 	int error;
1204 	int zone;
1205 	int offset;
1206 
1207 	if (bytes == 0)
1208 		return(0);
1209 	hmp = trans->hmp;
1210 
1211 	/*
1212 	 * Alignment
1213 	 */
1214 	bytes = (bytes + 15) & ~15;
1215 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
1216 
1217 	/*
1218 	 * Basic zone validation & locking
1219 	 */
1220 	zone = HAMMER_ZONE_DECODE(zone_offset);
1221 	KKASSERT(hammer_is_zone2_mapped_index(zone));
1222 	root_volume = trans->rootvol;
1223 	error = 0;
1224 
1225 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1226 
1227 	/*
1228 	 * Dive layer 1.
1229 	 */
1230 	layer1_offset = freemap->phys_offset +
1231 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1232 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1233 	if (error)
1234 		goto failed;
1235 	KKASSERT(layer1->phys_offset &&
1236 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1237 	if (!hammer_crc_test_layer1(layer1)) {
1238 		hammer_lock_ex(&hmp->blkmap_lock);
1239 		if (!hammer_crc_test_layer1(layer1))
1240 			hpanic("CRC FAILED: LAYER1");
1241 		hammer_unlock(&hmp->blkmap_lock);
1242 	}
1243 
1244 	/*
1245 	 * Dive layer 2, each entry represents a big-block.
1246 	 */
1247 	layer2_offset = layer1->phys_offset +
1248 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1249 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
1250 	if (error)
1251 		goto failed;
1252 	if (!hammer_crc_test_layer2(layer2)) {
1253 		hammer_lock_ex(&hmp->blkmap_lock);
1254 		if (!hammer_crc_test_layer2(layer2))
1255 			hpanic("CRC FAILED: LAYER2");
1256 		hammer_unlock(&hmp->blkmap_lock);
1257 	}
1258 
1259 	hammer_lock_ex(&hmp->blkmap_lock);
1260 
1261 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
1262 
1263 	/*
1264 	 * Finalize some or all of the space covered by a current
1265 	 * reservation.  An allocation in the same layer may have
1266 	 * already assigned ownership.
1267 	 */
1268 	if (layer2->zone == 0) {
1269 		hammer_modify_buffer(trans, buffer1, layer1, sizeof(*layer1));
1270 		--layer1->blocks_free;
1271 		hammer_crc_set_layer1(layer1);
1272 		hammer_modify_buffer_done(buffer1);
1273 		layer2->zone = zone;
1274 		KKASSERT(layer2->bytes_free == HAMMER_BIGBLOCK_SIZE);
1275 		KKASSERT(layer2->append_off == 0);
1276 		hammer_modify_volume_field(trans,
1277 				trans->rootvol,
1278 				vol0_stat_freebigblocks);
1279 		--root_volume->ondisk->vol0_stat_freebigblocks;
1280 		hmp->copy_stat_freebigblocks =
1281 		   root_volume->ondisk->vol0_stat_freebigblocks;
1282 		hammer_modify_volume_done(trans->rootvol);
1283 	}
1284 	if (layer2->zone != zone)
1285 		hdkprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
1286 	KKASSERT(layer2->zone == zone);
1287 	KKASSERT(bytes != 0);
1288 	layer2->bytes_free -= bytes;
1289 
1290 	if (resv) {
1291 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
1292 	}
1293 
1294 	/*
1295 	 * Finalizations can occur out of order, or combined with allocations.
1296 	 * append_off must be set to the highest allocated offset.
1297 	 */
1298 	offset = ((int)zone_offset & HAMMER_BIGBLOCK_MASK) + bytes;
1299 	if (layer2->append_off < offset)
1300 		layer2->append_off = offset;
1301 
1302 	hammer_crc_set_layer2(layer2);
1303 	hammer_modify_buffer_done(buffer2);
1304 	hammer_unlock(&hmp->blkmap_lock);
1305 
1306 failed:
1307 	if (buffer1)
1308 		hammer_rel_buffer(buffer1, 0);
1309 	if (buffer2)
1310 		hammer_rel_buffer(buffer2, 0);
1311 	return(error);
1312 }
1313 
1314 /*
1315  * Return the approximate number of free bytes in the big-block
1316  * containing the specified blockmap offset.
1317  *
1318  * WARNING: A negative number can be returned if data de-dup exists,
1319  *	    and the result will also not represent he actual number
1320  *	    of free bytes in this case.
1321  *
1322  *	    This code is used only by the reblocker.
1323  */
1324 int
1325 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
1326 			int *curp, int *errorp)
1327 {
1328 	hammer_volume_t root_volume;
1329 	hammer_blockmap_t blockmap;
1330 	hammer_blockmap_t freemap;
1331 	hammer_blockmap_layer1_t layer1;
1332 	hammer_blockmap_layer2_t layer2;
1333 	hammer_buffer_t buffer = NULL;
1334 	hammer_off_t layer1_offset;
1335 	hammer_off_t layer2_offset;
1336 	int32_t bytes;
1337 	int zone;
1338 
1339 	zone = HAMMER_ZONE_DECODE(zone_offset);
1340 	KKASSERT(hammer_is_zone2_mapped_index(zone));
1341 	root_volume = hammer_get_root_volume(hmp, errorp);
1342 	if (*errorp) {
1343 		*curp = 0;
1344 		return(0);
1345 	}
1346 	blockmap = &hmp->blockmap[zone];
1347 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1348 
1349 	/*
1350 	 * Dive layer 1.
1351 	 */
1352 	layer1_offset = freemap->phys_offset +
1353 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1354 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1355 	if (*errorp) {
1356 		*curp = 0;
1357 		bytes = 0;
1358 		goto failed;
1359 	}
1360 	KKASSERT(layer1->phys_offset);
1361 	if (!hammer_crc_test_layer1(layer1)) {
1362 		hammer_lock_ex(&hmp->blkmap_lock);
1363 		if (!hammer_crc_test_layer1(layer1))
1364 			hpanic("CRC FAILED: LAYER1");
1365 		hammer_unlock(&hmp->blkmap_lock);
1366 	}
1367 
1368 	/*
1369 	 * Dive layer 2, each entry represents a big-block.
1370 	 *
1371 	 * (reuse buffer, layer1 pointer becomes invalid)
1372 	 */
1373 	layer2_offset = layer1->phys_offset +
1374 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1375 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1376 	if (*errorp) {
1377 		*curp = 0;
1378 		bytes = 0;
1379 		goto failed;
1380 	}
1381 	if (!hammer_crc_test_layer2(layer2)) {
1382 		hammer_lock_ex(&hmp->blkmap_lock);
1383 		if (!hammer_crc_test_layer2(layer2))
1384 			hpanic("CRC FAILED: LAYER2");
1385 		hammer_unlock(&hmp->blkmap_lock);
1386 	}
1387 	KKASSERT(layer2->zone == zone);
1388 
1389 	bytes = layer2->bytes_free;
1390 
1391 	/*
1392 	 * *curp becomes 1 only when no error and,
1393 	 * next_offset and zone_offset are in the same big-block.
1394 	 */
1395 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_BIGBLOCK_MASK64)
1396 		*curp = 0;  /* not same */
1397 	else
1398 		*curp = 1;
1399 failed:
1400 	if (buffer)
1401 		hammer_rel_buffer(buffer, 0);
1402 	hammer_rel_volume(root_volume, 0);
1403 	if (hammer_debug_general & 0x4000) {
1404 		hdkprintf("%016jx -> %d\n", (intmax_t)zone_offset, bytes);
1405 	}
1406 	return(bytes);
1407 }
1408 
1409 
1410 /*
1411  * Lookup a blockmap offset and verify blockmap layers.
1412  */
1413 hammer_off_t
1414 hammer_blockmap_lookup_verify(hammer_mount_t hmp, hammer_off_t zone_offset,
1415 			int *errorp)
1416 {
1417 	hammer_volume_t root_volume;
1418 	hammer_blockmap_t freemap;
1419 	hammer_blockmap_layer1_t layer1;
1420 	hammer_blockmap_layer2_t layer2;
1421 	hammer_buffer_t buffer = NULL;
1422 	hammer_off_t layer1_offset;
1423 	hammer_off_t layer2_offset;
1424 	hammer_off_t result_offset;
1425 	hammer_off_t base_off;
1426 	hammer_reserve_t resv __debugvar;
1427 	int zone;
1428 
1429 	/*
1430 	 * Calculate the zone-2 offset.
1431 	 */
1432 	zone = HAMMER_ZONE_DECODE(zone_offset);
1433 	result_offset = hammer_xlate_to_zone2(zone_offset);
1434 
1435 	/*
1436 	 * Validate the allocation zone
1437 	 */
1438 	root_volume = hammer_get_root_volume(hmp, errorp);
1439 	if (*errorp)
1440 		return(0);
1441 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1442 	KKASSERT(freemap->phys_offset != 0);
1443 
1444 	/*
1445 	 * Dive layer 1.
1446 	 */
1447 	layer1_offset = freemap->phys_offset +
1448 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1449 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1450 	if (*errorp)
1451 		goto failed;
1452 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1453 	if (!hammer_crc_test_layer1(layer1)) {
1454 		hammer_lock_ex(&hmp->blkmap_lock);
1455 		if (!hammer_crc_test_layer1(layer1))
1456 			hpanic("CRC FAILED: LAYER1");
1457 		hammer_unlock(&hmp->blkmap_lock);
1458 	}
1459 
1460 	/*
1461 	 * Dive layer 2, each entry represents a big-block.
1462 	 */
1463 	layer2_offset = layer1->phys_offset +
1464 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1465 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1466 
1467 	if (*errorp)
1468 		goto failed;
1469 	if (layer2->zone == 0) {
1470 		base_off = hammer_xlate_to_zone2(zone_offset &
1471 						~HAMMER_BIGBLOCK_MASK64);
1472 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1473 				 base_off);
1474 		KKASSERT(resv && resv->zone == zone);
1475 
1476 	} else if (layer2->zone != zone) {
1477 		hpanic("bad zone %d/%d", layer2->zone, zone);
1478 	}
1479 	if (!hammer_crc_test_layer2(layer2)) {
1480 		hammer_lock_ex(&hmp->blkmap_lock);
1481 		if (!hammer_crc_test_layer2(layer2))
1482 			hpanic("CRC FAILED: LAYER2");
1483 		hammer_unlock(&hmp->blkmap_lock);
1484 	}
1485 
1486 failed:
1487 	if (buffer)
1488 		hammer_rel_buffer(buffer, 0);
1489 	hammer_rel_volume(root_volume, 0);
1490 	if (hammer_debug_general & 0x0800) {
1491 		hdkprintf("%016jx -> %016jx\n",
1492 			(intmax_t)zone_offset, (intmax_t)result_offset);
1493 	}
1494 	return(result_offset);
1495 }
1496 
1497 
1498 /*
1499  * Check space availability
1500  *
1501  * MPSAFE - does not require fs_token
1502  */
1503 int
1504 _hammer_checkspace(hammer_mount_t hmp, int slop, int64_t *resp)
1505 {
1506 	const int in_size = sizeof(struct hammer_inode_data) +
1507 			    sizeof(union hammer_btree_elm);
1508 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1509 	int64_t usedbytes;
1510 
1511 	usedbytes = hmp->rsv_inodes * in_size +
1512 		    hmp->rsv_recs * rec_size +
1513 		    hmp->rsv_databytes +
1514 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_BIGBLOCK_BITS) +
1515 		    ((int64_t)hammer_limit_dirtybufspace) +
1516 		    (slop << HAMMER_BIGBLOCK_BITS);
1517 
1518 	if (resp)
1519 		*resp = usedbytes;
1520 
1521 	if (hmp->copy_stat_freebigblocks >=
1522 	    (usedbytes >> HAMMER_BIGBLOCK_BITS)) {
1523 		return(0);
1524 	}
1525 
1526 	return (ENOSPC);
1527 }
1528 
1529 static int
1530 hammer_check_volume(hammer_mount_t hmp, hammer_off_t *offsetp)
1531 {
1532 	hammer_blockmap_t freemap;
1533 	hammer_blockmap_layer1_t layer1;
1534 	hammer_buffer_t buffer1 = NULL;
1535 	hammer_off_t layer1_offset;
1536 	int error = 0;
1537 
1538 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1539 
1540 	layer1_offset = freemap->phys_offset +
1541 			HAMMER_BLOCKMAP_LAYER1_OFFSET(*offsetp);
1542 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
1543 	if (error)
1544 		goto end;
1545 
1546 	/*
1547 	 * No more physically available space in layer1s
1548 	 * of the current volume, go to the next volume.
1549 	 */
1550 	if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL)
1551 		hammer_skip_volume(offsetp);
1552 end:
1553 	if (buffer1)
1554 		hammer_rel_buffer(buffer1, 0);
1555 	return(error);
1556 }
1557 
1558 static void
1559 hammer_skip_volume(hammer_off_t *offsetp)
1560 {
1561 	hammer_off_t offset;
1562 	int zone, vol_no;
1563 
1564 	offset = *offsetp;
1565 	zone = HAMMER_ZONE_DECODE(offset);
1566 	vol_no = HAMMER_VOL_DECODE(offset) + 1;
1567 	KKASSERT(vol_no <= HAMMER_MAX_VOLUMES);
1568 
1569 	if (vol_no == HAMMER_MAX_VOLUMES) {  /* wrap */
1570 		vol_no = 0;
1571 		++zone;
1572 	}
1573 
1574 	*offsetp = HAMMER_ENCODE(zone, vol_no, 0);
1575 }
1576