xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision 927da715)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36 
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static int hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
44                         hammer_off_t zone2_offset);
45 
46 
47 /*
48  * Reserved big-blocks red-black tree support
49  */
50 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
51 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
52 
53 static int
54 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
55 {
56 	if (res1->zone_offset < res2->zone_offset)
57 		return(-1);
58 	if (res1->zone_offset > res2->zone_offset)
59 		return(1);
60 	return(0);
61 }
62 
63 /*
64  * Allocate bytes from a zone
65  */
66 hammer_off_t
67 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
68 		      int bytes, int *errorp)
69 {
70 	hammer_mount_t hmp;
71 	hammer_volume_t root_volume;
72 	hammer_blockmap_t blockmap;
73 	hammer_blockmap_t freemap;
74 	hammer_reserve_t resv;
75 	struct hammer_blockmap_layer1 *layer1;
76 	struct hammer_blockmap_layer2 *layer2;
77 	hammer_buffer_t buffer1 = NULL;
78 	hammer_buffer_t buffer2 = NULL;
79 	hammer_buffer_t buffer3 = NULL;
80 	hammer_off_t tmp_offset;
81 	hammer_off_t next_offset;
82 	hammer_off_t result_offset;
83 	hammer_off_t layer1_offset;
84 	hammer_off_t layer2_offset;
85 	hammer_off_t base_off;
86 	int loops = 0;
87 	int offset;		/* offset within big-block */
88 
89 	hmp = trans->hmp;
90 
91 	/*
92 	 * Deal with alignment and buffer-boundary issues.
93 	 *
94 	 * Be careful, certain primary alignments are used below to allocate
95 	 * new blockmap blocks.
96 	 */
97 	bytes = (bytes + 15) & ~15;
98 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
99 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
100 
101 	/*
102 	 * Setup
103 	 */
104 	root_volume = trans->rootvol;
105 	*errorp = 0;
106 	blockmap = &hmp->blockmap[zone];
107 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
108 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
109 
110 	next_offset = blockmap->next_offset;
111 again:
112 	/*
113 	 * Check for wrap
114 	 */
115 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
116 		if (++loops == 2) {
117 			result_offset = 0;
118 			*errorp = ENOSPC;
119 			goto failed;
120 		}
121 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
122 	}
123 
124 	/*
125 	 * The allocation request may not cross a buffer boundary.  Special
126 	 * large allocations must not cross a large-block boundary.
127 	 */
128 	tmp_offset = next_offset + bytes - 1;
129 	if (bytes <= HAMMER_BUFSIZE) {
130 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
131 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
132 			goto again;
133 		}
134 	} else {
135 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
136 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
137 			goto again;
138 		}
139 	}
140 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
141 
142 	/*
143 	 * Dive layer 1.
144 	 */
145 	layer1_offset = freemap->phys_offset +
146 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
147 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
148 	if (*errorp) {
149 		result_offset = 0;
150 		goto failed;
151 	}
152 
153 	/*
154 	 * Check CRC.
155 	 */
156 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
157 		Debugger("CRC FAILED: LAYER1");
158 	}
159 
160 	/*
161 	 * If we are at a big-block boundary and layer1 indicates no
162 	 * free big-blocks, then we cannot allocate a new bigblock in
163 	 * layer2, skip to the next layer1 entry.
164 	 */
165 	if (offset == 0 && layer1->blocks_free == 0) {
166 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
167 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
168 		goto again;
169 	}
170 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
171 
172 	/*
173 	 * Dive layer 2, each entry represents a large-block.
174 	 */
175 	layer2_offset = layer1->phys_offset +
176 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
177 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
178 	if (*errorp) {
179 		result_offset = 0;
180 		goto failed;
181 	}
182 
183 	/*
184 	 * Check CRC.
185 	 */
186 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
187 		Debugger("CRC FAILED: LAYER2");
188 	}
189 
190 	/*
191 	 * Skip the layer if the zone is owned by someone other then us.
192 	 */
193 	if (layer2->zone && layer2->zone != zone) {
194 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
195 		goto again;
196 	}
197 	if (offset < layer2->append_off) {
198 		next_offset += layer2->append_off - offset;
199 		goto again;
200 	}
201 
202 	/*
203 	 * We need the lock from this point on.  We have to re-check zone
204 	 * ownership after acquiring the lock and also check for reservations.
205 	 */
206 	hammer_lock_ex(&hmp->blkmap_lock);
207 
208 	if (layer2->zone && layer2->zone != zone) {
209 		hammer_unlock(&hmp->blkmap_lock);
210 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
211 		goto again;
212 	}
213 	if (offset < layer2->append_off) {
214 		hammer_unlock(&hmp->blkmap_lock);
215 		next_offset += layer2->append_off - offset;
216 		goto again;
217 	}
218 
219 	/*
220 	 * The bigblock might be reserved by another zone.  If it is reserved
221 	 * by our zone we may have to move next_offset past the append_off.
222 	 */
223 	base_off = (next_offset &
224 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
225 		    HAMMER_ZONE_RAW_BUFFER;
226 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
227 	if (resv) {
228 		if (resv->zone != zone) {
229 			hammer_unlock(&hmp->blkmap_lock);
230 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
231 				      ~HAMMER_LARGEBLOCK_MASK64;
232 			goto again;
233 		}
234 		if (offset < resv->append_off) {
235 			hammer_unlock(&hmp->blkmap_lock);
236 			next_offset += resv->append_off - offset;
237 			goto again;
238 		}
239 	}
240 
241 	/*
242 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
243 	 * of the layer for real.  At this point we've validated any
244 	 * reservation that might exist and can just ignore resv.
245 	 */
246 	if (layer2->zone == 0) {
247 		/*
248 		 * Assign the bigblock to our zone
249 		 */
250 		hammer_modify_buffer(trans, buffer1,
251 				     layer1, sizeof(*layer1));
252 		--layer1->blocks_free;
253 		layer1->layer1_crc = crc32(layer1,
254 					   HAMMER_LAYER1_CRCSIZE);
255 		hammer_modify_buffer_done(buffer1);
256 		hammer_modify_buffer(trans, buffer2,
257 				     layer2, sizeof(*layer2));
258 		layer2->zone = zone;
259 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
260 		KKASSERT(layer2->append_off == 0);
261 		hammer_modify_volume_field(trans, trans->rootvol,
262 					   vol0_stat_freebigblocks);
263 		--root_volume->ondisk->vol0_stat_freebigblocks;
264 		hmp->copy_stat_freebigblocks =
265 			root_volume->ondisk->vol0_stat_freebigblocks;
266 		hammer_modify_volume_done(trans->rootvol);
267 	} else {
268 		hammer_modify_buffer(trans, buffer2,
269 				     layer2, sizeof(*layer2));
270 	}
271 	KKASSERT(layer2->zone == zone);
272 
273 	layer2->bytes_free -= bytes;
274 	KKASSERT(layer2->append_off <= offset);
275 	layer2->append_off = offset + bytes;
276 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
277 	hammer_modify_buffer_done(buffer2);
278 	KKASSERT(layer2->bytes_free >= 0);
279 
280 	if (resv) {
281 		KKASSERT(resv->append_off <= offset);
282 		resv->append_off = offset + bytes;
283 	}
284 
285 	/*
286 	 * If we are allocating from the base of a new buffer we can avoid
287 	 * a disk read by calling hammer_bnew().
288 	 */
289 	if ((next_offset & HAMMER_BUFMASK) == 0) {
290 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
291 				errorp, &buffer3);
292 	}
293 	result_offset = next_offset;
294 
295 	/*
296 	 * Process allocated result_offset
297 	 */
298 	hammer_modify_volume(NULL, root_volume, NULL, 0);
299 	blockmap->next_offset = next_offset + bytes;
300 	hammer_modify_volume_done(root_volume);
301 	hammer_unlock(&hmp->blkmap_lock);
302 failed:
303 
304 	/*
305 	 * Cleanup
306 	 */
307 	if (buffer1)
308 		hammer_rel_buffer(buffer1, 0);
309 	if (buffer2)
310 		hammer_rel_buffer(buffer2, 0);
311 	if (buffer3)
312 		hammer_rel_buffer(buffer3, 0);
313 
314 	return(result_offset);
315 }
316 
317 /*
318  * Frontend function - Reserve bytes in a zone.
319  *
320  * This code reserves bytes out of a blockmap without committing to any
321  * meta-data modifications, allowing the front-end to directly issue disk
322  * write I/O for large blocks of data
323  *
324  * The backend later finalizes the reservation with hammer_blockmap_finalize()
325  * upon committing the related record.
326  */
327 hammer_reserve_t
328 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
329 			hammer_off_t *zone_offp, int *errorp)
330 {
331 	hammer_volume_t root_volume;
332 	hammer_blockmap_t blockmap;
333 	hammer_blockmap_t freemap;
334 	struct hammer_blockmap_layer1 *layer1;
335 	struct hammer_blockmap_layer2 *layer2;
336 	hammer_buffer_t buffer1 = NULL;
337 	hammer_buffer_t buffer2 = NULL;
338 	hammer_buffer_t buffer3 = NULL;
339 	hammer_off_t tmp_offset;
340 	hammer_off_t next_offset;
341 	hammer_off_t layer1_offset;
342 	hammer_off_t layer2_offset;
343 	hammer_off_t base_off;
344 	hammer_reserve_t resv;
345 	hammer_reserve_t resx;
346 	int loops = 0;
347 	int offset;
348 
349 	/*
350 	 * Setup
351 	 */
352 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
353 	root_volume = hammer_get_root_volume(hmp, errorp);
354 	if (*errorp)
355 		return(NULL);
356 	blockmap = &hmp->blockmap[zone];
357 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
358 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
359 
360 	/*
361 	 * Deal with alignment and buffer-boundary issues.
362 	 *
363 	 * Be careful, certain primary alignments are used below to allocate
364 	 * new blockmap blocks.
365 	 */
366 	bytes = (bytes + 15) & ~15;
367 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
368 
369 	next_offset = blockmap->next_offset;
370 again:
371 	resv = NULL;
372 	/*
373 	 * Check for wrap
374 	 */
375 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
376 		if (++loops == 2) {
377 			*errorp = ENOSPC;
378 			goto failed;
379 		}
380 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
381 	}
382 
383 	/*
384 	 * The allocation request may not cross a buffer boundary.  Special
385 	 * large allocations must not cross a large-block boundary.
386 	 */
387 	tmp_offset = next_offset + bytes - 1;
388 	if (bytes <= HAMMER_BUFSIZE) {
389 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
390 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
391 			goto again;
392 		}
393 	} else {
394 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
395 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
396 			goto again;
397 		}
398 	}
399 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
400 
401 	/*
402 	 * Dive layer 1.
403 	 */
404 	layer1_offset = freemap->phys_offset +
405 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
406 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
407 	if (*errorp)
408 		goto failed;
409 
410 	/*
411 	 * Check CRC.
412 	 */
413 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
414 		Debugger("CRC FAILED: LAYER1");
415 	}
416 
417 	/*
418 	 * If we are at a big-block boundary and layer1 indicates no
419 	 * free big-blocks, then we cannot allocate a new bigblock in
420 	 * layer2, skip to the next layer1 entry.
421 	 */
422 	if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
423 	    layer1->blocks_free == 0) {
424 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
425 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
426 		goto again;
427 	}
428 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
429 
430 	/*
431 	 * Dive layer 2, each entry represents a large-block.
432 	 */
433 	layer2_offset = layer1->phys_offset +
434 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
435 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
436 	if (*errorp)
437 		goto failed;
438 
439 	/*
440 	 * Check CRC if not allocating into uninitialized space (which we
441 	 * aren't when reserving space).
442 	 */
443 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
444 		Debugger("CRC FAILED: LAYER2");
445 	}
446 
447 	/*
448 	 * Skip the layer if the zone is owned by someone other then us.
449 	 */
450 	if (layer2->zone && layer2->zone != zone) {
451 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
452 		goto again;
453 	}
454 	if (offset < layer2->append_off) {
455 		next_offset += layer2->append_off - offset;
456 		goto again;
457 	}
458 
459 	/*
460 	 * We need the lock from this point on.  We have to re-check zone
461 	 * ownership after acquiring the lock and also check for reservations.
462 	 */
463 	hammer_lock_ex(&hmp->blkmap_lock);
464 
465 	if (layer2->zone && layer2->zone != zone) {
466 		hammer_unlock(&hmp->blkmap_lock);
467 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
468 		goto again;
469 	}
470 	if (offset < layer2->append_off) {
471 		hammer_unlock(&hmp->blkmap_lock);
472 		next_offset += layer2->append_off - offset;
473 		goto again;
474 	}
475 
476 	/*
477 	 * The bigblock might be reserved by another zone.  If it is reserved
478 	 * by our zone we may have to move next_offset past the append_off.
479 	 */
480 	base_off = (next_offset &
481 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
482 		    HAMMER_ZONE_RAW_BUFFER;
483 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
484 	if (resv) {
485 		if (resv->zone != zone) {
486 			hammer_unlock(&hmp->blkmap_lock);
487 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
488 				      ~HAMMER_LARGEBLOCK_MASK64;
489 			goto again;
490 		}
491 		if (offset < resv->append_off) {
492 			hammer_unlock(&hmp->blkmap_lock);
493 			next_offset += resv->append_off - offset;
494 			goto again;
495 		}
496 		++resv->refs;
497 		resx = NULL;
498 	} else {
499 		resx = kmalloc(sizeof(*resv), M_HAMMER,
500 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
501 		resx->refs = 1;
502 		resx->zone = zone;
503 		resx->zone_offset = base_off;
504 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
505 		KKASSERT(resv == NULL);
506 		resv = resx;
507 		++hammer_count_reservations;
508 	}
509 	resv->append_off = offset + bytes;
510 
511 	/*
512 	 * If we are not reserving a whole buffer but are at the start of
513 	 * a new block, call hammer_bnew() to avoid a disk read.
514 	 *
515 	 * If we are reserving a whole buffer (or more), the caller will
516 	 * probably use a direct read, so do nothing.
517 	 */
518 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
519 		hammer_bnew(hmp, next_offset, errorp, &buffer3);
520 	}
521 
522 	/*
523 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
524 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
525 	 * be big-block aligned.
526 	 */
527 	blockmap->next_offset = next_offset + bytes;
528 	hammer_unlock(&hmp->blkmap_lock);
529 
530 failed:
531 	if (buffer1)
532 		hammer_rel_buffer(buffer1, 0);
533 	if (buffer2)
534 		hammer_rel_buffer(buffer2, 0);
535 	if (buffer3)
536 		hammer_rel_buffer(buffer3, 0);
537 	hammer_rel_volume(root_volume, 0);
538 	*zone_offp = next_offset;
539 
540 	return(resv);
541 }
542 
543 /*
544  * Backend function - undo a portion of a reservation.
545  */
546 void
547 hammer_blockmap_reserve_undo(hammer_reserve_t resv,
548 			 hammer_off_t zone_offset, int bytes)
549 {
550 	resv->bytes_freed += bytes;
551 }
552 
553 
554 /*
555  * A record with a storage reservation calls this function when it is
556  * being freed.  The storage may or may not have actually been allocated.
557  *
558  * This function removes the lock that prevented other entities from
559  * allocating out of the storage or removing the zone assignment.
560  */
561 void
562 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
563 {
564 	hammer_off_t zone2_offset;
565 
566 	KKASSERT(resv->refs > 0);
567 	if (--resv->refs == 0) {
568 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
569 
570 		zone2_offset = (resv->zone_offset & ~HAMMER_OFF_ZONE_MASK) |
571 				HAMMER_ZONE_RAW_BUFFER;
572 
573 		/*
574 		 * If we are releasing a zone and all of its reservations
575 		 * were undone we have to clean out all hammer and device
576 		 * buffers associated with the big block.  We do this
577 		 * primarily because the large-block may be reallocated
578 		 * from non-large-data to large-data or vise-versa, resulting
579 		 * in a different mix of 16K and 64K buffer cache buffers.
580 		 * XXX - this isn't fun and needs to be redone.
581 		 *
582 		 * Any direct allocations will cause this test to fail
583 		 * (bytes_freed will never reach append_off), which is
584 		 * the behavior we desire.  Once the zone has been assigned
585 		 * to the big-block the only way to allocate from it in the
586 		 * future is if the reblocker can completely clean it out,
587 		 * and that will also properly call hammer_del_buffers().
588 		 *
589 		 * If we don't we risk all sorts of buffer cache aliasing
590 		 * effects, including overlapping buffers with different
591 		 * sizes.
592 		 */
593 		if (resv->bytes_freed == resv->append_off) {
594 			hammer_del_buffers(hmp, resv->zone_offset,
595 					   zone2_offset,
596 					   HAMMER_LARGEBLOCK_SIZE);
597 		}
598 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
599 		kfree(resv, M_HAMMER);
600 		--hammer_count_reservations;
601 	}
602 }
603 
604 /*
605  * This ensures that no data reallocations will take place at the specified
606  * zone2_offset (pointing to the base of a bigblock) for 2 flush cycles,
607  * preventing deleted data space, which has no UNDO, from being reallocated
608  * too quickly.
609  */
610 static int
611 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_reserve_t resv,
612 			hammer_off_t zone2_offset)
613 {
614 	int error;
615 
616 	if (resv == NULL) {
617 		resv = kmalloc(sizeof(*resv), M_HAMMER,
618 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
619 		resv->refs = 1;	/* ref for on-delay list */
620 		resv->zone_offset = zone2_offset;
621 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
622 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
623 			error = EAGAIN;
624 			kfree(resv, M_HAMMER);
625 		} else {
626 			error = 0;
627 			++hammer_count_reservations;
628 		}
629 	} else if (resv->flags & HAMMER_RESF_ONDELAY) {
630 		--hmp->rsv_fromdelay;
631 		resv->flags &= ~HAMMER_RESF_ONDELAY;
632 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
633 		resv->flush_group = hmp->flusher.next + 1;
634 		error = 0;
635 	} else {
636 		++resv->refs;	/* ref for on-delay list */
637 		error = 0;
638 	}
639 	if (error == 0) {
640 		++hmp->rsv_fromdelay;
641 		resv->flags |= HAMMER_RESF_ONDELAY;
642 		resv->flush_group = hmp->flusher.next + 1;
643 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
644 	}
645 	return(error);
646 }
647 
648 void
649 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
650 {
651 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
652 	resv->flags &= ~HAMMER_RESF_ONDELAY;
653 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
654 	--hmp->rsv_fromdelay;
655 	hammer_blockmap_reserve_complete(hmp, resv);
656 }
657 
658 /*
659  * Backend function - free (offset, bytes) in a zone.
660  *
661  * XXX error return
662  */
663 void
664 hammer_blockmap_free(hammer_transaction_t trans,
665 		     hammer_off_t zone_offset, int bytes)
666 {
667 	hammer_mount_t hmp;
668 	hammer_volume_t root_volume;
669 	hammer_reserve_t resv;
670 	hammer_blockmap_t blockmap;
671 	hammer_blockmap_t freemap;
672 	struct hammer_blockmap_layer1 *layer1;
673 	struct hammer_blockmap_layer2 *layer2;
674 	hammer_buffer_t buffer1 = NULL;
675 	hammer_buffer_t buffer2 = NULL;
676 	hammer_off_t layer1_offset;
677 	hammer_off_t layer2_offset;
678 	hammer_off_t base_off;
679 	int error;
680 	int zone;
681 
682 	if (bytes == 0)
683 		return;
684 	hmp = trans->hmp;
685 
686 	/*
687 	 * Alignment
688 	 */
689 	bytes = (bytes + 15) & ~15;
690 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
691 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
692 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
693 
694 	/*
695 	 * Basic zone validation & locking
696 	 */
697 	zone = HAMMER_ZONE_DECODE(zone_offset);
698 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
699 	root_volume = trans->rootvol;
700 	error = 0;
701 
702 	blockmap = &hmp->blockmap[zone];
703 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
704 
705 	/*
706 	 * Dive layer 1.
707 	 */
708 	layer1_offset = freemap->phys_offset +
709 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
710 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
711 	if (error)
712 		goto failed;
713 	KKASSERT(layer1->phys_offset &&
714 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
715 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
716 		Debugger("CRC FAILED: LAYER1");
717 	}
718 
719 	/*
720 	 * Dive layer 2, each entry represents a large-block.
721 	 */
722 	layer2_offset = layer1->phys_offset +
723 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
724 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
725 	if (error)
726 		goto failed;
727 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
728 		Debugger("CRC FAILED: LAYER2");
729 	}
730 
731 	hammer_lock_ex(&hmp->blkmap_lock);
732 
733 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
734 
735 	/*
736 	 * Freeing previously allocated space
737 	 */
738 	KKASSERT(layer2->zone == zone);
739 	layer2->bytes_free += bytes;
740 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
741 	if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
742 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
743 again:
744 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
745 				 base_off);
746 		if (resv) {
747 			/*
748 			 * Portions of this block have been reserved, do
749 			 * not free it.
750 			 *
751 			 * Make sure the reservation remains through
752 			 * the next flush cycle so potentially undoable
753 			 * data is not overwritten.
754 			 */
755 			KKASSERT(resv->zone == zone);
756 			hammer_reserve_setdelay(hmp, resv, base_off);
757 		} else if ((blockmap->next_offset ^ zone_offset) &
758 			    ~HAMMER_LARGEBLOCK_MASK64) {
759 			/*
760 			 * Our iterator is not in the now-free big-block
761 			 * and we can release it.
762 			 *
763 			 * Make sure the reservation remains through
764 			 * the next flush cycle so potentially undoable
765 			 * data is not overwritten.
766 			 */
767 			if (hammer_reserve_setdelay(hmp, NULL, base_off))
768 				goto again;
769 			KKASSERT(layer2->zone == zone);
770 			/*
771 			 * XXX maybe incorporate this del call in the
772 			 * release code by setting base_offset, bytes_freed,
773 			 * etc.
774 			 */
775 			hammer_del_buffers(hmp,
776 					   zone_offset &
777 					      ~HAMMER_LARGEBLOCK_MASK64,
778 					   base_off,
779 					   HAMMER_LARGEBLOCK_SIZE);
780 			layer2->zone = 0;
781 			layer2->append_off = 0;
782 			hammer_modify_buffer(trans, buffer1,
783 					     layer1, sizeof(*layer1));
784 			++layer1->blocks_free;
785 			layer1->layer1_crc = crc32(layer1,
786 						   HAMMER_LAYER1_CRCSIZE);
787 			hammer_modify_buffer_done(buffer1);
788 			hammer_modify_volume_field(trans,
789 					trans->rootvol,
790 					vol0_stat_freebigblocks);
791 			++root_volume->ondisk->vol0_stat_freebigblocks;
792 			hmp->copy_stat_freebigblocks =
793 			   root_volume->ondisk->vol0_stat_freebigblocks;
794 			hammer_modify_volume_done(trans->rootvol);
795 		}
796 	}
797 
798 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
799 	hammer_modify_buffer_done(buffer2);
800 	hammer_unlock(&hmp->blkmap_lock);
801 
802 failed:
803 	if (buffer1)
804 		hammer_rel_buffer(buffer1, 0);
805 	if (buffer2)
806 		hammer_rel_buffer(buffer2, 0);
807 }
808 
809 /*
810  * Backend function - finalize (offset, bytes) in a zone.
811  *
812  * Allocate space that was previously reserved by the frontend.
813  */
814 int
815 hammer_blockmap_finalize(hammer_transaction_t trans,
816 			 hammer_off_t zone_offset, int bytes)
817 {
818 	hammer_mount_t hmp;
819 	hammer_volume_t root_volume;
820 	hammer_blockmap_t blockmap;
821 	hammer_blockmap_t freemap;
822 	struct hammer_blockmap_layer1 *layer1;
823 	struct hammer_blockmap_layer2 *layer2;
824 	hammer_buffer_t buffer1 = NULL;
825 	hammer_buffer_t buffer2 = NULL;
826 	hammer_off_t layer1_offset;
827 	hammer_off_t layer2_offset;
828 	int error;
829 	int zone;
830 	int offset;
831 
832 	if (bytes == 0)
833 		return(0);
834 	hmp = trans->hmp;
835 
836 	/*
837 	 * Alignment
838 	 */
839 	bytes = (bytes + 15) & ~15;
840 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
841 
842 	/*
843 	 * Basic zone validation & locking
844 	 */
845 	zone = HAMMER_ZONE_DECODE(zone_offset);
846 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
847 	root_volume = trans->rootvol;
848 	error = 0;
849 
850 	blockmap = &hmp->blockmap[zone];
851 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
852 
853 	/*
854 	 * Dive layer 1.
855 	 */
856 	layer1_offset = freemap->phys_offset +
857 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
858 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
859 	if (error)
860 		goto failed;
861 	KKASSERT(layer1->phys_offset &&
862 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
863 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
864 		Debugger("CRC FAILED: LAYER1");
865 	}
866 
867 	/*
868 	 * Dive layer 2, each entry represents a large-block.
869 	 */
870 	layer2_offset = layer1->phys_offset +
871 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
872 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
873 	if (error)
874 		goto failed;
875 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
876 		Debugger("CRC FAILED: LAYER2");
877 	}
878 
879 	hammer_lock_ex(&hmp->blkmap_lock);
880 
881 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
882 
883 	/*
884 	 * Finalize some or all of the space covered by a current
885 	 * reservation.  An allocation in the same layer may have
886 	 * already assigned ownership.
887 	 */
888 	if (layer2->zone == 0) {
889 		hammer_modify_buffer(trans, buffer1,
890 				     layer1, sizeof(*layer1));
891 		--layer1->blocks_free;
892 		layer1->layer1_crc = crc32(layer1,
893 					   HAMMER_LAYER1_CRCSIZE);
894 		hammer_modify_buffer_done(buffer1);
895 		layer2->zone = zone;
896 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
897 		KKASSERT(layer2->append_off == 0);
898 		hammer_modify_volume_field(trans,
899 				trans->rootvol,
900 				vol0_stat_freebigblocks);
901 		--root_volume->ondisk->vol0_stat_freebigblocks;
902 		hmp->copy_stat_freebigblocks =
903 		   root_volume->ondisk->vol0_stat_freebigblocks;
904 		hammer_modify_volume_done(trans->rootvol);
905 	}
906 	if (layer2->zone != zone)
907 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
908 	KKASSERT(layer2->zone == zone);
909 	layer2->bytes_free -= bytes;
910 
911 	/*
912 	 * Finalizations can occur out of order, or combined with allocations.
913 	 * append_off must be set to the highest allocated offset.
914 	 */
915 	offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
916 	if (layer2->append_off < offset)
917 		layer2->append_off = offset;
918 
919 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
920 	hammer_modify_buffer_done(buffer2);
921 	hammer_unlock(&hmp->blkmap_lock);
922 
923 failed:
924 	if (buffer1)
925 		hammer_rel_buffer(buffer1, 0);
926 	if (buffer2)
927 		hammer_rel_buffer(buffer2, 0);
928 	return(error);
929 }
930 
931 /*
932  * Return the number of free bytes in the big-block containing the
933  * specified blockmap offset.
934  */
935 int
936 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
937 			int *curp, int *errorp)
938 {
939 	hammer_volume_t root_volume;
940 	hammer_blockmap_t blockmap;
941 	hammer_blockmap_t freemap;
942 	struct hammer_blockmap_layer1 *layer1;
943 	struct hammer_blockmap_layer2 *layer2;
944 	hammer_buffer_t buffer = NULL;
945 	hammer_off_t layer1_offset;
946 	hammer_off_t layer2_offset;
947 	int bytes;
948 	int zone;
949 
950 	zone = HAMMER_ZONE_DECODE(zone_offset);
951 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
952 	root_volume = hammer_get_root_volume(hmp, errorp);
953 	if (*errorp) {
954 		*curp = 0;
955 		return(0);
956 	}
957 	blockmap = &hmp->blockmap[zone];
958 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
959 
960 	/*
961 	 * Dive layer 1.
962 	 */
963 	layer1_offset = freemap->phys_offset +
964 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
965 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
966 	if (*errorp) {
967 		bytes = 0;
968 		goto failed;
969 	}
970 	KKASSERT(layer1->phys_offset);
971 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
972 		Debugger("CRC FAILED: LAYER1");
973 	}
974 
975 	/*
976 	 * Dive layer 2, each entry represents a large-block.
977 	 *
978 	 * (reuse buffer, layer1 pointer becomes invalid)
979 	 */
980 	layer2_offset = layer1->phys_offset +
981 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
982 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
983 	if (*errorp) {
984 		bytes = 0;
985 		goto failed;
986 	}
987 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
988 		Debugger("CRC FAILED: LAYER2");
989 	}
990 	KKASSERT(layer2->zone == zone);
991 
992 	bytes = layer2->bytes_free;
993 
994 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
995 		*curp = 0;
996 	else
997 		*curp = 1;
998 failed:
999 	if (buffer)
1000 		hammer_rel_buffer(buffer, 0);
1001 	hammer_rel_volume(root_volume, 0);
1002 	if (hammer_debug_general & 0x0800) {
1003 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
1004 			zone_offset, bytes);
1005 	}
1006 	return(bytes);
1007 }
1008 
1009 
1010 /*
1011  * Lookup a blockmap offset.
1012  */
1013 hammer_off_t
1014 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
1015 		       int *errorp)
1016 {
1017 	hammer_volume_t root_volume;
1018 	hammer_blockmap_t freemap;
1019 	struct hammer_blockmap_layer1 *layer1;
1020 	struct hammer_blockmap_layer2 *layer2;
1021 	hammer_buffer_t buffer = NULL;
1022 	hammer_off_t layer1_offset;
1023 	hammer_off_t layer2_offset;
1024 	hammer_off_t result_offset;
1025 	hammer_off_t base_off;
1026 	hammer_reserve_t resv;
1027 	int zone;
1028 
1029 	/*
1030 	 * Calculate the zone-2 offset.
1031 	 */
1032 	zone = HAMMER_ZONE_DECODE(zone_offset);
1033 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1034 
1035 	result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1036 			HAMMER_ZONE_RAW_BUFFER;
1037 
1038 	/*
1039 	 * We can actually stop here, normal blockmaps are now direct-mapped
1040 	 * onto the freemap and so represent zone-2 addresses.
1041 	 */
1042 	if (hammer_verify_zone == 0) {
1043 		*errorp = 0;
1044 		return(result_offset);
1045 	}
1046 
1047 	/*
1048 	 * Validate the allocation zone
1049 	 */
1050 	root_volume = hammer_get_root_volume(hmp, errorp);
1051 	if (*errorp)
1052 		return(0);
1053 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1054 	KKASSERT(freemap->phys_offset != 0);
1055 
1056 	/*
1057 	 * Dive layer 1.
1058 	 */
1059 	layer1_offset = freemap->phys_offset +
1060 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1061 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1062 	if (*errorp)
1063 		goto failed;
1064 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1065 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1066 		Debugger("CRC FAILED: LAYER1");
1067 	}
1068 
1069 	/*
1070 	 * Dive layer 2, each entry represents a large-block.
1071 	 */
1072 	layer2_offset = layer1->phys_offset +
1073 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1074 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1075 
1076 	if (*errorp)
1077 		goto failed;
1078 	if (layer2->zone == 0) {
1079 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1080 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1081 				 base_off);
1082 		KKASSERT(resv && resv->zone == zone);
1083 
1084 	} else if (layer2->zone != zone) {
1085 		panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1086 			layer2->zone, zone);
1087 	}
1088 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1089 		Debugger("CRC FAILED: LAYER2");
1090 	}
1091 
1092 failed:
1093 	if (buffer)
1094 		hammer_rel_buffer(buffer, 0);
1095 	hammer_rel_volume(root_volume, 0);
1096 	if (hammer_debug_general & 0x0800) {
1097 		kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1098 			zone_offset, result_offset);
1099 	}
1100 	return(result_offset);
1101 }
1102 
1103 
1104 /*
1105  * Check space availability
1106  */
1107 int
1108 hammer_checkspace(hammer_mount_t hmp, int slop)
1109 {
1110 	const int in_size = sizeof(struct hammer_inode_data) +
1111 			    sizeof(union hammer_btree_elm);
1112 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1113 	int64_t usedbytes;
1114 
1115 	usedbytes = hmp->rsv_inodes * in_size +
1116 		    hmp->rsv_recs * rec_size +
1117 		    hmp->rsv_databytes +
1118 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1119 		    ((int64_t)hidirtybufspace << 2) +
1120 		    (slop << HAMMER_LARGEBLOCK_BITS);
1121 
1122 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1123 
1124 	if (hmp->copy_stat_freebigblocks >=
1125 	    (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1126 		return(0);
1127 	}
1128 	return (ENOSPC);
1129 }
1130 
1131