xref: /dragonfly/sys/vfs/hammer/hammer_blockmap.c (revision a615f06f)
1 /*
2  * Copyright (c) 2008 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sys/vfs/hammer/hammer_blockmap.c,v 1.27 2008/07/31 22:30:33 dillon Exp $
35  */
36 
37 /*
38  * HAMMER blockmap
39  */
40 #include "hammer.h"
41 
42 static int hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2);
43 static void hammer_reserve_setdelay(hammer_mount_t hmp,
44 				    hammer_off_t base_offset,
45 				    struct hammer_blockmap_layer2 *layer2);
46 
47 
48 /*
49  * Reserved big-blocks red-black tree support
50  */
51 RB_GENERATE2(hammer_res_rb_tree, hammer_reserve, rb_node,
52 	     hammer_res_rb_compare, hammer_off_t, zone_offset);
53 
54 static int
55 hammer_res_rb_compare(hammer_reserve_t res1, hammer_reserve_t res2)
56 {
57 	if (res1->zone_offset < res2->zone_offset)
58 		return(-1);
59 	if (res1->zone_offset > res2->zone_offset)
60 		return(1);
61 	return(0);
62 }
63 
64 /*
65  * Allocate bytes from a zone
66  */
67 hammer_off_t
68 hammer_blockmap_alloc(hammer_transaction_t trans, int zone,
69 		      int bytes, int *errorp)
70 {
71 	hammer_mount_t hmp;
72 	hammer_volume_t root_volume;
73 	hammer_blockmap_t blockmap;
74 	hammer_blockmap_t freemap;
75 	hammer_reserve_t resv;
76 	struct hammer_blockmap_layer1 *layer1;
77 	struct hammer_blockmap_layer2 *layer2;
78 	hammer_buffer_t buffer1 = NULL;
79 	hammer_buffer_t buffer2 = NULL;
80 	hammer_buffer_t buffer3 = NULL;
81 	hammer_off_t tmp_offset;
82 	hammer_off_t next_offset;
83 	hammer_off_t result_offset;
84 	hammer_off_t layer1_offset;
85 	hammer_off_t layer2_offset;
86 	hammer_off_t base_off;
87 	int loops = 0;
88 	int offset;		/* offset within big-block */
89 
90 	hmp = trans->hmp;
91 
92 	/*
93 	 * Deal with alignment and buffer-boundary issues.
94 	 *
95 	 * Be careful, certain primary alignments are used below to allocate
96 	 * new blockmap blocks.
97 	 */
98 	bytes = (bytes + 15) & ~15;
99 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
100 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
101 
102 	/*
103 	 * Setup
104 	 */
105 	root_volume = trans->rootvol;
106 	*errorp = 0;
107 	blockmap = &hmp->blockmap[zone];
108 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
109 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
110 
111 	next_offset = blockmap->next_offset;
112 again:
113 	/*
114 	 * Check for wrap
115 	 */
116 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
117 		if (++loops == 2) {
118 			result_offset = 0;
119 			*errorp = ENOSPC;
120 			goto failed;
121 		}
122 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
123 	}
124 
125 	/*
126 	 * The allocation request may not cross a buffer boundary.  Special
127 	 * large allocations must not cross a large-block boundary.
128 	 */
129 	tmp_offset = next_offset + bytes - 1;
130 	if (bytes <= HAMMER_BUFSIZE) {
131 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
132 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
133 			goto again;
134 		}
135 	} else {
136 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
137 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
138 			goto again;
139 		}
140 	}
141 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
142 
143 	/*
144 	 * Dive layer 1.
145 	 */
146 	layer1_offset = freemap->phys_offset +
147 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
148 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
149 	if (*errorp) {
150 		result_offset = 0;
151 		goto failed;
152 	}
153 
154 	/*
155 	 * Check CRC.
156 	 */
157 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
158 		Debugger("CRC FAILED: LAYER1");
159 	}
160 
161 	/*
162 	 * If we are at a big-block boundary and layer1 indicates no
163 	 * free big-blocks, then we cannot allocate a new bigblock in
164 	 * layer2, skip to the next layer1 entry.
165 	 */
166 	if (offset == 0 && layer1->blocks_free == 0) {
167 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
168 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
169 		goto again;
170 	}
171 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
172 
173 	/*
174 	 * Dive layer 2, each entry represents a large-block.
175 	 */
176 	layer2_offset = layer1->phys_offset +
177 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
178 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
179 	if (*errorp) {
180 		result_offset = 0;
181 		goto failed;
182 	}
183 
184 	/*
185 	 * Check CRC.
186 	 */
187 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
188 		Debugger("CRC FAILED: LAYER2");
189 	}
190 
191 	/*
192 	 * Skip the layer if the zone is owned by someone other then us.
193 	 */
194 	if (layer2->zone && layer2->zone != zone) {
195 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
196 		goto again;
197 	}
198 	if (offset < layer2->append_off) {
199 		next_offset += layer2->append_off - offset;
200 		goto again;
201 	}
202 
203 	/*
204 	 * We need the lock from this point on.  We have to re-check zone
205 	 * ownership after acquiring the lock and also check for reservations.
206 	 */
207 	hammer_lock_ex(&hmp->blkmap_lock);
208 
209 	if (layer2->zone && layer2->zone != zone) {
210 		hammer_unlock(&hmp->blkmap_lock);
211 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
212 		goto again;
213 	}
214 	if (offset < layer2->append_off) {
215 		hammer_unlock(&hmp->blkmap_lock);
216 		next_offset += layer2->append_off - offset;
217 		goto again;
218 	}
219 
220 	/*
221 	 * The bigblock might be reserved by another zone.  If it is reserved
222 	 * by our zone we may have to move next_offset past the append_off.
223 	 */
224 	base_off = (next_offset &
225 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
226 		    HAMMER_ZONE_RAW_BUFFER;
227 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
228 	if (resv) {
229 		if (resv->zone != zone) {
230 			hammer_unlock(&hmp->blkmap_lock);
231 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
232 				      ~HAMMER_LARGEBLOCK_MASK64;
233 			goto again;
234 		}
235 		if (offset < resv->append_off) {
236 			hammer_unlock(&hmp->blkmap_lock);
237 			next_offset += resv->append_off - offset;
238 			goto again;
239 		}
240 	}
241 
242 	/*
243 	 * Ok, we can allocate out of this layer2 big-block.  Assume ownership
244 	 * of the layer for real.  At this point we've validated any
245 	 * reservation that might exist and can just ignore resv.
246 	 */
247 	if (layer2->zone == 0) {
248 		/*
249 		 * Assign the bigblock to our zone
250 		 */
251 		hammer_modify_buffer(trans, buffer1,
252 				     layer1, sizeof(*layer1));
253 		--layer1->blocks_free;
254 		layer1->layer1_crc = crc32(layer1,
255 					   HAMMER_LAYER1_CRCSIZE);
256 		hammer_modify_buffer_done(buffer1);
257 		hammer_modify_buffer(trans, buffer2,
258 				     layer2, sizeof(*layer2));
259 		layer2->zone = zone;
260 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
261 		KKASSERT(layer2->append_off == 0);
262 		hammer_modify_volume_field(trans, trans->rootvol,
263 					   vol0_stat_freebigblocks);
264 		--root_volume->ondisk->vol0_stat_freebigblocks;
265 		hmp->copy_stat_freebigblocks =
266 			root_volume->ondisk->vol0_stat_freebigblocks;
267 		hammer_modify_volume_done(trans->rootvol);
268 	} else {
269 		hammer_modify_buffer(trans, buffer2,
270 				     layer2, sizeof(*layer2));
271 	}
272 	KKASSERT(layer2->zone == zone);
273 
274 	layer2->bytes_free -= bytes;
275 	KKASSERT(layer2->append_off <= offset);
276 	layer2->append_off = offset + bytes;
277 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
278 	hammer_modify_buffer_done(buffer2);
279 	KKASSERT(layer2->bytes_free >= 0);
280 
281 	if (resv) {
282 		KKASSERT(resv->append_off <= offset);
283 		resv->append_off = offset + bytes;
284 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
285 	}
286 
287 	/*
288 	 * If we are allocating from the base of a new buffer we can avoid
289 	 * a disk read by calling hammer_bnew().
290 	 */
291 	if ((next_offset & HAMMER_BUFMASK) == 0) {
292 		hammer_bnew_ext(trans->hmp, next_offset, bytes,
293 				errorp, &buffer3);
294 	}
295 	result_offset = next_offset;
296 
297 	/*
298 	 * Process allocated result_offset
299 	 */
300 	hammer_modify_volume(NULL, root_volume, NULL, 0);
301 	blockmap->next_offset = next_offset + bytes;
302 	hammer_modify_volume_done(root_volume);
303 	hammer_unlock(&hmp->blkmap_lock);
304 failed:
305 
306 	/*
307 	 * Cleanup
308 	 */
309 	if (buffer1)
310 		hammer_rel_buffer(buffer1, 0);
311 	if (buffer2)
312 		hammer_rel_buffer(buffer2, 0);
313 	if (buffer3)
314 		hammer_rel_buffer(buffer3, 0);
315 
316 	return(result_offset);
317 }
318 
319 /*
320  * Frontend function - Reserve bytes in a zone.
321  *
322  * This code reserves bytes out of a blockmap without committing to any
323  * meta-data modifications, allowing the front-end to directly issue disk
324  * write I/O for large blocks of data
325  *
326  * The backend later finalizes the reservation with hammer_blockmap_finalize()
327  * upon committing the related record.
328  */
329 hammer_reserve_t
330 hammer_blockmap_reserve(hammer_mount_t hmp, int zone, int bytes,
331 			hammer_off_t *zone_offp, int *errorp)
332 {
333 	hammer_volume_t root_volume;
334 	hammer_blockmap_t blockmap;
335 	hammer_blockmap_t freemap;
336 	struct hammer_blockmap_layer1 *layer1;
337 	struct hammer_blockmap_layer2 *layer2;
338 	hammer_buffer_t buffer1 = NULL;
339 	hammer_buffer_t buffer2 = NULL;
340 	hammer_buffer_t buffer3 = NULL;
341 	hammer_off_t tmp_offset;
342 	hammer_off_t next_offset;
343 	hammer_off_t layer1_offset;
344 	hammer_off_t layer2_offset;
345 	hammer_off_t base_off;
346 	hammer_reserve_t resv;
347 	hammer_reserve_t resx;
348 	int loops = 0;
349 	int offset;
350 
351 	/*
352 	 * Setup
353 	 */
354 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
355 	root_volume = hammer_get_root_volume(hmp, errorp);
356 	if (*errorp)
357 		return(NULL);
358 	blockmap = &hmp->blockmap[zone];
359 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
360 	KKASSERT(HAMMER_ZONE_DECODE(blockmap->next_offset) == zone);
361 
362 	/*
363 	 * Deal with alignment and buffer-boundary issues.
364 	 *
365 	 * Be careful, certain primary alignments are used below to allocate
366 	 * new blockmap blocks.
367 	 */
368 	bytes = (bytes + 15) & ~15;
369 	KKASSERT(bytes > 0 && bytes <= HAMMER_XBUFSIZE);
370 
371 	next_offset = blockmap->next_offset;
372 again:
373 	resv = NULL;
374 	/*
375 	 * Check for wrap
376 	 */
377 	if (next_offset == HAMMER_ZONE_ENCODE(zone + 1, 0)) {
378 		if (++loops == 2) {
379 			*errorp = ENOSPC;
380 			goto failed;
381 		}
382 		next_offset = HAMMER_ZONE_ENCODE(zone, 0);
383 	}
384 
385 	/*
386 	 * The allocation request may not cross a buffer boundary.  Special
387 	 * large allocations must not cross a large-block boundary.
388 	 */
389 	tmp_offset = next_offset + bytes - 1;
390 	if (bytes <= HAMMER_BUFSIZE) {
391 		if ((next_offset ^ tmp_offset) & ~HAMMER_BUFMASK64) {
392 			next_offset = tmp_offset & ~HAMMER_BUFMASK64;
393 			goto again;
394 		}
395 	} else {
396 		if ((next_offset ^ tmp_offset) & ~HAMMER_LARGEBLOCK_MASK64) {
397 			next_offset = tmp_offset & ~HAMMER_LARGEBLOCK_MASK64;
398 			goto again;
399 		}
400 	}
401 	offset = (int)next_offset & HAMMER_LARGEBLOCK_MASK;
402 
403 	/*
404 	 * Dive layer 1.
405 	 */
406 	layer1_offset = freemap->phys_offset +
407 			HAMMER_BLOCKMAP_LAYER1_OFFSET(next_offset);
408 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer1);
409 	if (*errorp)
410 		goto failed;
411 
412 	/*
413 	 * Check CRC.
414 	 */
415 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
416 		Debugger("CRC FAILED: LAYER1");
417 	}
418 
419 	/*
420 	 * If we are at a big-block boundary and layer1 indicates no
421 	 * free big-blocks, then we cannot allocate a new bigblock in
422 	 * layer2, skip to the next layer1 entry.
423 	 */
424 	if ((next_offset & HAMMER_LARGEBLOCK_MASK) == 0 &&
425 	    layer1->blocks_free == 0) {
426 		next_offset = (next_offset + HAMMER_BLOCKMAP_LAYER2) &
427 			      ~HAMMER_BLOCKMAP_LAYER2_MASK;
428 		goto again;
429 	}
430 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
431 
432 	/*
433 	 * Dive layer 2, each entry represents a large-block.
434 	 */
435 	layer2_offset = layer1->phys_offset +
436 			HAMMER_BLOCKMAP_LAYER2_OFFSET(next_offset);
437 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer2);
438 	if (*errorp)
439 		goto failed;
440 
441 	/*
442 	 * Check CRC if not allocating into uninitialized space (which we
443 	 * aren't when reserving space).
444 	 */
445 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
446 		Debugger("CRC FAILED: LAYER2");
447 	}
448 
449 	/*
450 	 * Skip the layer if the zone is owned by someone other then us.
451 	 */
452 	if (layer2->zone && layer2->zone != zone) {
453 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
454 		goto again;
455 	}
456 	if (offset < layer2->append_off) {
457 		next_offset += layer2->append_off - offset;
458 		goto again;
459 	}
460 
461 	/*
462 	 * We need the lock from this point on.  We have to re-check zone
463 	 * ownership after acquiring the lock and also check for reservations.
464 	 */
465 	hammer_lock_ex(&hmp->blkmap_lock);
466 
467 	if (layer2->zone && layer2->zone != zone) {
468 		hammer_unlock(&hmp->blkmap_lock);
469 		next_offset += (HAMMER_LARGEBLOCK_SIZE - offset);
470 		goto again;
471 	}
472 	if (offset < layer2->append_off) {
473 		hammer_unlock(&hmp->blkmap_lock);
474 		next_offset += layer2->append_off - offset;
475 		goto again;
476 	}
477 
478 	/*
479 	 * The bigblock might be reserved by another zone.  If it is reserved
480 	 * by our zone we may have to move next_offset past the append_off.
481 	 */
482 	base_off = (next_offset &
483 		    (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) |
484 		    HAMMER_ZONE_RAW_BUFFER;
485 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_off);
486 	if (resv) {
487 		if (resv->zone != zone) {
488 			hammer_unlock(&hmp->blkmap_lock);
489 			next_offset = (next_offset + HAMMER_LARGEBLOCK_SIZE) &
490 				      ~HAMMER_LARGEBLOCK_MASK64;
491 			goto again;
492 		}
493 		if (offset < resv->append_off) {
494 			hammer_unlock(&hmp->blkmap_lock);
495 			next_offset += resv->append_off - offset;
496 			goto again;
497 		}
498 		++resv->refs;
499 		resx = NULL;
500 	} else {
501 		resx = kmalloc(sizeof(*resv), hmp->m_misc,
502 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
503 		resx->refs = 1;
504 		resx->zone = zone;
505 		resx->zone_offset = base_off;
506 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
507 			resx->flags |= HAMMER_RESF_LAYER2FREE;
508 		resv = RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resx);
509 		KKASSERT(resv == NULL);
510 		resv = resx;
511 		++hammer_count_reservations;
512 	}
513 	resv->append_off = offset + bytes;
514 
515 	/*
516 	 * If we are not reserving a whole buffer but are at the start of
517 	 * a new block, call hammer_bnew() to avoid a disk read.
518 	 *
519 	 * If we are reserving a whole buffer (or more), the caller will
520 	 * probably use a direct read, so do nothing.
521 	 */
522 	if (bytes < HAMMER_BUFSIZE && (next_offset & HAMMER_BUFMASK) == 0) {
523 		hammer_bnew(hmp, next_offset, errorp, &buffer3);
524 	}
525 
526 	/*
527 	 * Adjust our iterator and alloc_offset.  The layer1 and layer2
528 	 * space beyond alloc_offset is uninitialized.  alloc_offset must
529 	 * be big-block aligned.
530 	 */
531 	blockmap->next_offset = next_offset + bytes;
532 	hammer_unlock(&hmp->blkmap_lock);
533 
534 failed:
535 	if (buffer1)
536 		hammer_rel_buffer(buffer1, 0);
537 	if (buffer2)
538 		hammer_rel_buffer(buffer2, 0);
539 	if (buffer3)
540 		hammer_rel_buffer(buffer3, 0);
541 	hammer_rel_volume(root_volume, 0);
542 	*zone_offp = next_offset;
543 
544 	return(resv);
545 }
546 
547 #if 0
548 /*
549  * Backend function - undo a portion of a reservation.
550  */
551 void
552 hammer_blockmap_reserve_undo(hammer_mount_t hmp, hammer_reserve_t resv,
553 			 hammer_off_t zone_offset, int bytes)
554 {
555 	resv->bytes_freed += bytes;
556 }
557 
558 #endif
559 
560 /*
561  * Dereference a reservation structure.  Upon the final release the
562  * underlying big-block is checked and if it is entirely free we delete
563  * any related HAMMER buffers to avoid potential conflicts with future
564  * reuse of the big-block.
565  */
566 void
567 hammer_blockmap_reserve_complete(hammer_mount_t hmp, hammer_reserve_t resv)
568 {
569 	hammer_off_t base_offset;
570 
571 	KKASSERT(resv->refs > 0);
572 	KKASSERT((resv->zone_offset & HAMMER_OFF_ZONE_MASK) ==
573 		 HAMMER_ZONE_RAW_BUFFER);
574 
575 	/*
576 	 * Setting append_off to the max prevents any new allocations
577 	 * from occuring while we are trying to dispose of the reservation,
578 	 * allowing us to safely delete any related HAMMER buffers.
579 	 */
580 	if (resv->refs == 1 && (resv->flags & HAMMER_RESF_LAYER2FREE)) {
581 		resv->append_off = HAMMER_LARGEBLOCK_SIZE;
582 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
583 		base_offset = resv->zone_offset & ~HAMMER_ZONE_RAW_BUFFER;
584 		base_offset = HAMMER_ZONE_ENCODE(base_offset, resv->zone);
585 		hammer_del_buffers(hmp, base_offset, resv->zone_offset,
586 				   HAMMER_LARGEBLOCK_SIZE);
587 	}
588 	if (--resv->refs == 0) {
589 		KKASSERT((resv->flags & HAMMER_RESF_ONDELAY) == 0);
590 		RB_REMOVE(hammer_res_rb_tree, &hmp->rb_resv_root, resv);
591 		kfree(resv, hmp->m_misc);
592 		--hammer_count_reservations;
593 	}
594 }
595 
596 /*
597  * Prevent a potentially free big-block from being reused until after
598  * the related flushes have completely cycled, otherwise crash recovery
599  * could resurrect a data block that was already reused and overwritten.
600  *
601  * Return 0 if the layer2 entry is still completely free after the
602  * reservation has been allocated.
603  */
604 static void
605 hammer_reserve_setdelay(hammer_mount_t hmp, hammer_off_t base_offset,
606 			struct hammer_blockmap_layer2 *layer2)
607 {
608 	hammer_reserve_t resv;
609 
610 	/*
611 	 * Allocate the reservation if necessary.
612 	 */
613 again:
614 	resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root, base_offset);
615 	if (resv == NULL) {
616 		resv = kmalloc(sizeof(*resv), hmp->m_misc,
617 			       M_WAITOK | M_ZERO | M_USE_RESERVE);
618 		resv->zone_offset = base_offset;
619 		resv->refs = 0;
620 		/* XXX inherent lock until refs bumped later on */
621 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE)
622 			resv->flags |= HAMMER_RESF_LAYER2FREE;
623 		if (RB_INSERT(hammer_res_rb_tree, &hmp->rb_resv_root, resv)) {
624 			kfree(resv, hmp->m_misc);
625 			goto again;
626 		}
627 		++hammer_count_reservations;
628 	}
629 
630 	/*
631 	 * Enter the reservation on the on-delay list, or move it if it
632 	 * is already on the list.
633 	 */
634 	if (resv->flags & HAMMER_RESF_ONDELAY) {
635 		TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
636 		resv->flush_group = hmp->flusher.next + 1;
637 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
638 	} else {
639 		++resv->refs;
640 		++hmp->rsv_fromdelay;
641 		resv->flags |= HAMMER_RESF_ONDELAY;
642 		resv->flush_group = hmp->flusher.next + 1;
643 		TAILQ_INSERT_TAIL(&hmp->delay_list, resv, delay_entry);
644 	}
645 }
646 
647 void
648 hammer_reserve_clrdelay(hammer_mount_t hmp, hammer_reserve_t resv)
649 {
650 	KKASSERT(resv->flags & HAMMER_RESF_ONDELAY);
651 	resv->flags &= ~HAMMER_RESF_ONDELAY;
652 	TAILQ_REMOVE(&hmp->delay_list, resv, delay_entry);
653 	--hmp->rsv_fromdelay;
654 	hammer_blockmap_reserve_complete(hmp, resv);
655 }
656 
657 /*
658  * Backend function - free (offset, bytes) in a zone.
659  *
660  * XXX error return
661  */
662 void
663 hammer_blockmap_free(hammer_transaction_t trans,
664 		     hammer_off_t zone_offset, int bytes)
665 {
666 	hammer_mount_t hmp;
667 	hammer_volume_t root_volume;
668 	hammer_blockmap_t blockmap;
669 	hammer_blockmap_t freemap;
670 	struct hammer_blockmap_layer1 *layer1;
671 	struct hammer_blockmap_layer2 *layer2;
672 	hammer_buffer_t buffer1 = NULL;
673 	hammer_buffer_t buffer2 = NULL;
674 	hammer_off_t layer1_offset;
675 	hammer_off_t layer2_offset;
676 	hammer_off_t base_off;
677 	int error;
678 	int zone;
679 
680 	if (bytes == 0)
681 		return;
682 	hmp = trans->hmp;
683 
684 	/*
685 	 * Alignment
686 	 */
687 	bytes = (bytes + 15) & ~15;
688 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
689 	KKASSERT(((zone_offset ^ (zone_offset + (bytes - 1))) &
690 		  ~HAMMER_LARGEBLOCK_MASK64) == 0);
691 
692 	/*
693 	 * Basic zone validation & locking
694 	 */
695 	zone = HAMMER_ZONE_DECODE(zone_offset);
696 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
697 	root_volume = trans->rootvol;
698 	error = 0;
699 
700 	blockmap = &hmp->blockmap[zone];
701 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
702 
703 	/*
704 	 * Dive layer 1.
705 	 */
706 	layer1_offset = freemap->phys_offset +
707 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
708 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
709 	if (error)
710 		goto failed;
711 	KKASSERT(layer1->phys_offset &&
712 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
713 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
714 		Debugger("CRC FAILED: LAYER1");
715 	}
716 
717 	/*
718 	 * Dive layer 2, each entry represents a large-block.
719 	 */
720 	layer2_offset = layer1->phys_offset +
721 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
722 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
723 	if (error)
724 		goto failed;
725 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
726 		Debugger("CRC FAILED: LAYER2");
727 	}
728 
729 	hammer_lock_ex(&hmp->blkmap_lock);
730 
731 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
732 
733 	/*
734 	 * Free space previously allocated via blockmap_alloc().
735 	 */
736 	KKASSERT(layer2->zone == zone);
737 	layer2->bytes_free += bytes;
738 	KKASSERT(layer2->bytes_free <= HAMMER_LARGEBLOCK_SIZE);
739 
740 	/*
741 	 * If a big-block becomes entirely free we must create a covering
742 	 * reservation to prevent premature reuse.  Note, however, that
743 	 * the big-block and/or reservation may still have an append_off
744 	 * that allows further (non-reused) allocations.
745 	 *
746 	 * Once the reservation has been made we re-check layer2 and if
747 	 * the big-block is still entirely free we reset the layer2 entry.
748 	 * The reservation will prevent premature reuse.
749 	 *
750 	 * NOTE: hammer_buffer's are only invalidated when the reservation
751 	 * is completed, if the layer2 entry is still completely free at
752 	 * that time.  Any allocations from the reservation that may have
753 	 * occured in the mean time, or active references on the reservation
754 	 * from new pending allocations, will prevent the invalidation from
755 	 * occuring.
756 	 */
757 	if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
758 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
759 
760 		hammer_reserve_setdelay(hmp, base_off, layer2);
761 		if (layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE) {
762 			layer2->zone = 0;
763 			layer2->append_off = 0;
764 			hammer_modify_buffer(trans, buffer1,
765 					     layer1, sizeof(*layer1));
766 			++layer1->blocks_free;
767 			layer1->layer1_crc = crc32(layer1,
768 						   HAMMER_LAYER1_CRCSIZE);
769 			hammer_modify_buffer_done(buffer1);
770 			hammer_modify_volume_field(trans,
771 					trans->rootvol,
772 					vol0_stat_freebigblocks);
773 			++root_volume->ondisk->vol0_stat_freebigblocks;
774 			hmp->copy_stat_freebigblocks =
775 			   root_volume->ondisk->vol0_stat_freebigblocks;
776 			hammer_modify_volume_done(trans->rootvol);
777 		}
778 	}
779 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
780 	hammer_modify_buffer_done(buffer2);
781 	hammer_unlock(&hmp->blkmap_lock);
782 
783 failed:
784 	if (buffer1)
785 		hammer_rel_buffer(buffer1, 0);
786 	if (buffer2)
787 		hammer_rel_buffer(buffer2, 0);
788 }
789 
790 /*
791  * Backend function - finalize (offset, bytes) in a zone.
792  *
793  * Allocate space that was previously reserved by the frontend.
794  */
795 int
796 hammer_blockmap_finalize(hammer_transaction_t trans,
797 			 hammer_reserve_t resv,
798 			 hammer_off_t zone_offset, int bytes)
799 {
800 	hammer_mount_t hmp;
801 	hammer_volume_t root_volume;
802 	hammer_blockmap_t blockmap;
803 	hammer_blockmap_t freemap;
804 	struct hammer_blockmap_layer1 *layer1;
805 	struct hammer_blockmap_layer2 *layer2;
806 	hammer_buffer_t buffer1 = NULL;
807 	hammer_buffer_t buffer2 = NULL;
808 	hammer_off_t layer1_offset;
809 	hammer_off_t layer2_offset;
810 	int error;
811 	int zone;
812 	int offset;
813 
814 	if (bytes == 0)
815 		return(0);
816 	hmp = trans->hmp;
817 
818 	/*
819 	 * Alignment
820 	 */
821 	bytes = (bytes + 15) & ~15;
822 	KKASSERT(bytes <= HAMMER_XBUFSIZE);
823 
824 	/*
825 	 * Basic zone validation & locking
826 	 */
827 	zone = HAMMER_ZONE_DECODE(zone_offset);
828 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
829 	root_volume = trans->rootvol;
830 	error = 0;
831 
832 	blockmap = &hmp->blockmap[zone];
833 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
834 
835 	/*
836 	 * Dive layer 1.
837 	 */
838 	layer1_offset = freemap->phys_offset +
839 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
840 	layer1 = hammer_bread(hmp, layer1_offset, &error, &buffer1);
841 	if (error)
842 		goto failed;
843 	KKASSERT(layer1->phys_offset &&
844 		 layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
845 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
846 		Debugger("CRC FAILED: LAYER1");
847 	}
848 
849 	/*
850 	 * Dive layer 2, each entry represents a large-block.
851 	 */
852 	layer2_offset = layer1->phys_offset +
853 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
854 	layer2 = hammer_bread(hmp, layer2_offset, &error, &buffer2);
855 	if (error)
856 		goto failed;
857 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
858 		Debugger("CRC FAILED: LAYER2");
859 	}
860 
861 	hammer_lock_ex(&hmp->blkmap_lock);
862 
863 	hammer_modify_buffer(trans, buffer2, layer2, sizeof(*layer2));
864 
865 	/*
866 	 * Finalize some or all of the space covered by a current
867 	 * reservation.  An allocation in the same layer may have
868 	 * already assigned ownership.
869 	 */
870 	if (layer2->zone == 0) {
871 		hammer_modify_buffer(trans, buffer1,
872 				     layer1, sizeof(*layer1));
873 		--layer1->blocks_free;
874 		layer1->layer1_crc = crc32(layer1,
875 					   HAMMER_LAYER1_CRCSIZE);
876 		hammer_modify_buffer_done(buffer1);
877 		layer2->zone = zone;
878 		KKASSERT(layer2->bytes_free == HAMMER_LARGEBLOCK_SIZE);
879 		KKASSERT(layer2->append_off == 0);
880 		hammer_modify_volume_field(trans,
881 				trans->rootvol,
882 				vol0_stat_freebigblocks);
883 		--root_volume->ondisk->vol0_stat_freebigblocks;
884 		hmp->copy_stat_freebigblocks =
885 		   root_volume->ondisk->vol0_stat_freebigblocks;
886 		hammer_modify_volume_done(trans->rootvol);
887 	}
888 	if (layer2->zone != zone)
889 		kprintf("layer2 zone mismatch %d %d\n", layer2->zone, zone);
890 	KKASSERT(layer2->zone == zone);
891 	layer2->bytes_free -= bytes;
892 	if (resv)
893 		resv->flags &= ~HAMMER_RESF_LAYER2FREE;
894 
895 	/*
896 	 * Finalizations can occur out of order, or combined with allocations.
897 	 * append_off must be set to the highest allocated offset.
898 	 */
899 	offset = ((int)zone_offset & HAMMER_LARGEBLOCK_MASK) + bytes;
900 	if (layer2->append_off < offset)
901 		layer2->append_off = offset;
902 
903 	layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
904 	hammer_modify_buffer_done(buffer2);
905 	hammer_unlock(&hmp->blkmap_lock);
906 
907 failed:
908 	if (buffer1)
909 		hammer_rel_buffer(buffer1, 0);
910 	if (buffer2)
911 		hammer_rel_buffer(buffer2, 0);
912 	return(error);
913 }
914 
915 /*
916  * Return the number of free bytes in the big-block containing the
917  * specified blockmap offset.
918  */
919 int
920 hammer_blockmap_getfree(hammer_mount_t hmp, hammer_off_t zone_offset,
921 			int *curp, int *errorp)
922 {
923 	hammer_volume_t root_volume;
924 	hammer_blockmap_t blockmap;
925 	hammer_blockmap_t freemap;
926 	struct hammer_blockmap_layer1 *layer1;
927 	struct hammer_blockmap_layer2 *layer2;
928 	hammer_buffer_t buffer = NULL;
929 	hammer_off_t layer1_offset;
930 	hammer_off_t layer2_offset;
931 	int bytes;
932 	int zone;
933 
934 	zone = HAMMER_ZONE_DECODE(zone_offset);
935 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
936 	root_volume = hammer_get_root_volume(hmp, errorp);
937 	if (*errorp) {
938 		*curp = 0;
939 		return(0);
940 	}
941 	blockmap = &hmp->blockmap[zone];
942 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
943 
944 	/*
945 	 * Dive layer 1.
946 	 */
947 	layer1_offset = freemap->phys_offset +
948 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
949 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
950 	if (*errorp) {
951 		bytes = 0;
952 		goto failed;
953 	}
954 	KKASSERT(layer1->phys_offset);
955 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
956 		Debugger("CRC FAILED: LAYER1");
957 	}
958 
959 	/*
960 	 * Dive layer 2, each entry represents a large-block.
961 	 *
962 	 * (reuse buffer, layer1 pointer becomes invalid)
963 	 */
964 	layer2_offset = layer1->phys_offset +
965 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
966 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
967 	if (*errorp) {
968 		bytes = 0;
969 		goto failed;
970 	}
971 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
972 		Debugger("CRC FAILED: LAYER2");
973 	}
974 	KKASSERT(layer2->zone == zone);
975 
976 	bytes = layer2->bytes_free;
977 
978 	if ((blockmap->next_offset ^ zone_offset) & ~HAMMER_LARGEBLOCK_MASK64)
979 		*curp = 0;
980 	else
981 		*curp = 1;
982 failed:
983 	if (buffer)
984 		hammer_rel_buffer(buffer, 0);
985 	hammer_rel_volume(root_volume, 0);
986 	if (hammer_debug_general & 0x0800) {
987 		kprintf("hammer_blockmap_getfree: %016llx -> %d\n",
988 			zone_offset, bytes);
989 	}
990 	return(bytes);
991 }
992 
993 
994 /*
995  * Lookup a blockmap offset.
996  */
997 hammer_off_t
998 hammer_blockmap_lookup(hammer_mount_t hmp, hammer_off_t zone_offset,
999 		       int *errorp)
1000 {
1001 	hammer_volume_t root_volume;
1002 	hammer_blockmap_t freemap;
1003 	struct hammer_blockmap_layer1 *layer1;
1004 	struct hammer_blockmap_layer2 *layer2;
1005 	hammer_buffer_t buffer = NULL;
1006 	hammer_off_t layer1_offset;
1007 	hammer_off_t layer2_offset;
1008 	hammer_off_t result_offset;
1009 	hammer_off_t base_off;
1010 	hammer_reserve_t resv;
1011 	int zone;
1012 
1013 	/*
1014 	 * Calculate the zone-2 offset.
1015 	 */
1016 	zone = HAMMER_ZONE_DECODE(zone_offset);
1017 	KKASSERT(zone >= HAMMER_ZONE_BTREE_INDEX && zone < HAMMER_MAX_ZONES);
1018 
1019 	result_offset = (zone_offset & ~HAMMER_OFF_ZONE_MASK) |
1020 			HAMMER_ZONE_RAW_BUFFER;
1021 
1022 	/*
1023 	 * We can actually stop here, normal blockmaps are now direct-mapped
1024 	 * onto the freemap and so represent zone-2 addresses.
1025 	 */
1026 	if (hammer_verify_zone == 0) {
1027 		*errorp = 0;
1028 		return(result_offset);
1029 	}
1030 
1031 	/*
1032 	 * Validate the allocation zone
1033 	 */
1034 	root_volume = hammer_get_root_volume(hmp, errorp);
1035 	if (*errorp)
1036 		return(0);
1037 	freemap = &hmp->blockmap[HAMMER_ZONE_FREEMAP_INDEX];
1038 	KKASSERT(freemap->phys_offset != 0);
1039 
1040 	/*
1041 	 * Dive layer 1.
1042 	 */
1043 	layer1_offset = freemap->phys_offset +
1044 			HAMMER_BLOCKMAP_LAYER1_OFFSET(zone_offset);
1045 	layer1 = hammer_bread(hmp, layer1_offset, errorp, &buffer);
1046 	if (*errorp)
1047 		goto failed;
1048 	KKASSERT(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
1049 	if (layer1->layer1_crc != crc32(layer1, HAMMER_LAYER1_CRCSIZE)) {
1050 		Debugger("CRC FAILED: LAYER1");
1051 	}
1052 
1053 	/*
1054 	 * Dive layer 2, each entry represents a large-block.
1055 	 */
1056 	layer2_offset = layer1->phys_offset +
1057 			HAMMER_BLOCKMAP_LAYER2_OFFSET(zone_offset);
1058 	layer2 = hammer_bread(hmp, layer2_offset, errorp, &buffer);
1059 
1060 	if (*errorp)
1061 		goto failed;
1062 	if (layer2->zone == 0) {
1063 		base_off = (zone_offset & (~HAMMER_LARGEBLOCK_MASK64 & ~HAMMER_OFF_ZONE_MASK)) | HAMMER_ZONE_RAW_BUFFER;
1064 		resv = RB_LOOKUP(hammer_res_rb_tree, &hmp->rb_resv_root,
1065 				 base_off);
1066 		KKASSERT(resv && resv->zone == zone);
1067 
1068 	} else if (layer2->zone != zone) {
1069 		panic("hammer_blockmap_lookup: bad zone %d/%d\n",
1070 			layer2->zone, zone);
1071 	}
1072 	if (layer2->entry_crc != crc32(layer2, HAMMER_LAYER2_CRCSIZE)) {
1073 		Debugger("CRC FAILED: LAYER2");
1074 	}
1075 
1076 failed:
1077 	if (buffer)
1078 		hammer_rel_buffer(buffer, 0);
1079 	hammer_rel_volume(root_volume, 0);
1080 	if (hammer_debug_general & 0x0800) {
1081 		kprintf("hammer_blockmap_lookup: %016llx -> %016llx\n",
1082 			zone_offset, result_offset);
1083 	}
1084 	return(result_offset);
1085 }
1086 
1087 
1088 /*
1089  * Check space availability
1090  */
1091 int
1092 hammer_checkspace(hammer_mount_t hmp, int slop)
1093 {
1094 	const int in_size = sizeof(struct hammer_inode_data) +
1095 			    sizeof(union hammer_btree_elm);
1096 	const int rec_size = (sizeof(union hammer_btree_elm) * 2);
1097 	int64_t usedbytes;
1098 
1099 	usedbytes = hmp->rsv_inodes * in_size +
1100 		    hmp->rsv_recs * rec_size +
1101 		    hmp->rsv_databytes +
1102 		    ((int64_t)hmp->rsv_fromdelay << HAMMER_LARGEBLOCK_BITS) +
1103 		    ((int64_t)hidirtybufspace << 2) +
1104 		    (slop << HAMMER_LARGEBLOCK_BITS);
1105 
1106 	hammer_count_extra_space_used = usedbytes;	/* debugging */
1107 
1108 	if (hmp->copy_stat_freebigblocks >=
1109 	    (usedbytes >> HAMMER_LARGEBLOCK_BITS)) {
1110 		return(0);
1111 	}
1112 	return (ENOSPC);
1113 }
1114 
1115