xref: /dragonfly/sys/vfs/hammer2/hammer2_freemap.c (revision dc71b7ab)
1 /*
2  * Copyright (c) 2011-2013 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@dragonflybsd.org>
6  * by Venkatesh Srinivas <vsrinivas@dragonflybsd.org>
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  *
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in
16  *    the documentation and/or other materials provided with the
17  *    distribution.
18  * 3. Neither the name of The DragonFly Project nor the names of its
19  *    contributors may be used to endorse or promote products derived
20  *    from this software without specific, prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
23  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
24  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
25  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
26  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
27  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
28  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
29  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
30  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
31  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
32  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33  * SUCH DAMAGE.
34  */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/fcntl.h>
39 #include <sys/buf.h>
40 #include <sys/proc.h>
41 #include <sys/namei.h>
42 #include <sys/mount.h>
43 #include <sys/vnode.h>
44 #include <sys/mountctl.h>
45 
46 #include "hammer2.h"
47 
48 #define USE_SIMPLE_ALLOC	0
49 
50 #if !USE_SIMPLE_ALLOC
51 
52 static int hammer2_freemap_try_alloc(hammer2_trans_t *trans,
53 			hammer2_chain_t **parentp, hammer2_blockref_t *bref,
54 			int radix, hammer2_off_t bpref, hammer2_off_t *bnext);
55 static int hammer2_freemap_iterate(hammer2_trans_t *trans,
56 			hammer2_chain_t **parentp, hammer2_chain_t **chainp,
57 			hammer2_off_t bpref, hammer2_off_t *bnextp);
58 
59 #endif
60 
61 /*
62  * Calculate the device offset for the specified FREEMAP_NODE or FREEMAP_LEAF
63  * bref.  Return a combined media offset and physical size radix.  Freemap
64  * chains use fixed storage offsets in the 4MB reserved area at the
65  * beginning of each 2GB zone
66  *
67  * Rotate between four possibilities.  Theoretically this means we have three
68  * good freemaps in case of a crash which we can use as a base for the fixup
69  * scan at mount-time.
70  */
71 #define H2FMBASE(key, radix)	((key) & ~(((hammer2_off_t)1 << (radix)) - 1))
72 #define H2FMSHIFT(radix)	((hammer2_off_t)1 << (radix))
73 
74 static
75 int
76 hammer2_freemap_reserve(hammer2_mount_t *hmp, hammer2_blockref_t *bref,
77 			int radix)
78 {
79 	hammer2_off_t off;
80 	size_t bytes;
81 
82 	/*
83 	 * Physical allocation size -> radix.  Typically either 256 for
84 	 * a level 0 freemap leaf or 65536 for a level N freemap node.
85 	 *
86 	 * NOTE: A 256 byte bitmap represents 256 x 8 x 1024 = 2MB of storage.
87 	 *	 Do not use hammer2_allocsize() here as it has a min cap.
88 	 */
89 	bytes = 1 << radix;
90 
91 	/*
92 	 * Adjust by HAMMER2_ZONE_FREEMAP_{A,B,C,D} using the existing
93 	 * offset as a basis.
94 	 */
95 	if ((bref->data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) {
96 		off = HAMMER2_ZONE_FREEMAP_A;
97 	} else {
98 		off = HAMMER2_ZONE_FREEMAP_A;
99 #if 0
100 		off = bref->data_off & ~HAMMER2_OFF_MASK_RADIX &
101 		      (((hammer2_off_t)1 << HAMMER2_FREEMAP_LEVEL1_RADIX) - 1);
102 		off = off / HAMMER2_PBUFSIZE;
103 		KKASSERT(off >= HAMMER2_ZONE_FREEMAP_A);
104 		KKASSERT(off < HAMMER2_ZONE_FREEMAP_D + 8);
105 
106 		if (off >= HAMMER2_ZONE_FREEMAP_D)
107 			off = HAMMER2_ZONE_FREEMAP_A;
108 		else if (off >= HAMMER2_ZONE_FREEMAP_C)
109 			off = HAMMER2_ZONE_FREEMAP_D;
110 		else if (off >= HAMMER2_ZONE_FREEMAP_B)
111 			off = HAMMER2_ZONE_FREEMAP_C;
112 		else
113 			off = HAMMER2_ZONE_FREEMAP_B;
114 #endif
115 	}
116 	off = off * HAMMER2_PBUFSIZE;
117 
118 	/*
119 	 * Calculate the block offset of the reserved block.  This will
120 	 * point into the 4MB reserved area at the base of the appropriate
121 	 * 2GB zone, once added to the FREEMAP_x selection above.
122 	 */
123 	switch(bref->keybits) {
124 	/* case HAMMER2_FREEMAP_LEVEL5_RADIX: not applicable */
125 	case HAMMER2_FREEMAP_LEVEL4_RADIX:	/* 2EB */
126 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
127 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
128 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL4_RADIX) +
129 		       HAMMER2_ZONEFM_LEVEL4 * HAMMER2_PBUFSIZE;
130 		break;
131 	case HAMMER2_FREEMAP_LEVEL3_RADIX:	/* 2PB */
132 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
133 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
134 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL3_RADIX) +
135 		       HAMMER2_ZONEFM_LEVEL3 * HAMMER2_PBUFSIZE;
136 		break;
137 	case HAMMER2_FREEMAP_LEVEL2_RADIX:	/* 2TB */
138 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
139 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
140 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL2_RADIX) +
141 		       HAMMER2_ZONEFM_LEVEL2 * HAMMER2_PBUFSIZE;
142 		break;
143 	case HAMMER2_FREEMAP_LEVEL1_RADIX:	/* 2GB */
144 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE);
145 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVELN_PSIZE);
146 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
147 		       HAMMER2_ZONEFM_LEVEL1 * HAMMER2_PBUFSIZE;
148 		break;
149 	case HAMMER2_FREEMAP_LEVEL0_RADIX:	/* 2MB (256 byte bitmap) */
150 		/*
151 		 * Terminal bitmap, start with 2GB base, then offset by
152 		 * 256 bytes of device offset per 2MB of logical space
153 		 * (8 bits per byte, 1024 byte allocation chunking).
154 		 */
155 		KKASSERT(bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF);
156 		KKASSERT(bytes == HAMMER2_FREEMAP_LEVEL0_PSIZE);
157 		off += H2FMBASE(bref->key, HAMMER2_FREEMAP_LEVEL1_RADIX) +
158 		       HAMMER2_ZONEFM_LEVEL0 * HAMMER2_PBUFSIZE;
159 
160 		off += ((bref->key >> HAMMER2_FREEMAP_LEVEL0_RADIX) &
161 		        ((1 << (HAMMER2_FREEMAP_LEVEL1_RADIX -
162 			       HAMMER2_FREEMAP_LEVEL0_RADIX)) - 1)) *
163 			HAMMER2_FREEMAP_LEVEL0_PSIZE;
164 		break;
165 	default:
166 		panic("freemap: bad radix(2) %p %d\n", bref, bref->keybits);
167 		/* NOT REACHED */
168 		break;
169 	}
170 	bref->data_off = off | radix;
171 	return (0);
172 }
173 
174 /*
175  * Allocate media space, returning a combined data offset and radix.
176  * THIS ROUTINE IS USE FOR DEBUGGING ONLY.
177  *
178  * This version of the routine is ONLY usable for testing and debug
179  * purposes and only if the filesystem never instantiated an actual
180  * freemap.  It uses the initial allocation iterator that newfs_hammer2
181  * used to build the filesystem to allocate new space and is not capable
182  * of dealing with freed space.
183  */
184 #if USE_SIMPLE_ALLOC
185 
186 static
187 int
188 hammer2_freemap_simple_alloc(hammer2_mount_t *hmp, hammer2_blockref_t *bref,
189 			     int radix)
190 {
191 	hammer2_off_t data_off;
192 	hammer2_off_t data_next;
193 	hammer2_freecache_t *fc;
194 	/*struct buf *bp;*/
195 	size_t bytes;
196 	int fctype;
197 
198 	bytes = (size_t)(1 << radix);
199 	KKASSERT(bytes >= HAMMER2_MIN_ALLOC &&
200 		 bytes <= HAMMER2_MAX_ALLOC);
201 
202 	/*
203 	 * Must not be used if the filesystem is using a real freemap.
204 	 */
205 	KKASSERT(hmp->voldata.freemap_blockset.blockref[0].data_off == 0);
206 
207 	switch(bref->type) {
208 	case HAMMER2_BREF_TYPE_INODE:
209 		fctype = HAMMER2_FREECACHE_INODE;
210 		break;
211 	case HAMMER2_BREF_TYPE_INDIRECT:
212 		fctype = HAMMER2_FREECACHE_INODE;
213 		break;
214 	case HAMMER2_BREF_TYPE_DATA:
215 		fctype = HAMMER2_FREECACHE_DATA;
216 		break;
217 	default:
218 		fctype = HAMMER2_FREECACHE_DATA;
219 		break;
220 	}
221 
222 	if (radix <= HAMMER2_MAX_RADIX)
223 		fc = &hmp->freecache[fctype][radix];
224 	else
225 		fc = NULL;
226 
227 	lockmgr(&hmp->alloclk, LK_EXCLUSIVE);
228 	if (fc && fc->single) {
229 		/*
230 		 * Allocate from our single-block cache.
231 		 */
232 		data_off = fc->single;
233 		fc->single = 0;
234 	} else if (fc && fc->bulk) {
235 		/*
236 		 * Allocate from our packing cache.
237 		 */
238 		data_off = fc->bulk;
239 		fc->bulk += bytes;
240 		if ((fc->bulk & HAMMER2_SEGMASK) == 0)
241 			fc->bulk = 0;
242 	} else {
243 		/*
244 		 * Allocate from the allocation iterator using a SEGSIZE
245 		 * aligned block and reload the packing cache if possible.
246 		 *
247 		 * Skip reserved areas at the beginning of each zone.
248 		 */
249 		hammer2_voldata_lock(hmp);
250 		data_off = hmp->voldata.allocator_beg;
251 		data_off = (data_off + HAMMER2_SEGMASK64) & ~HAMMER2_SEGMASK64;
252 		if ((data_off & HAMMER2_ZONE_MASK64) < HAMMER2_ZONE_SEG) {
253 			KKASSERT((data_off & HAMMER2_ZONE_MASK64) == 0);
254 			data_off += HAMMER2_ZONE_SEG64;
255 		}
256 		data_next = data_off + bytes;
257 
258 		if ((data_next & HAMMER2_SEGMASK) == 0) {
259 			hmp->voldata.allocator_beg = data_next;
260 		} else {
261 			KKASSERT(radix <= HAMMER2_MAX_RADIX);
262 			hmp->voldata.allocator_beg =
263 					(data_next + HAMMER2_SEGMASK64) &
264 					~HAMMER2_SEGMASK64;
265 			fc->bulk = data_next;
266 		}
267 		hammer2_voldata_unlock(hmp, 1);
268 	}
269 	lockmgr(&hmp->alloclk, LK_RELEASE);
270 
271 	bref->data_off = data_off | radix;
272 	return (0);
273 }
274 
275 #endif
276 
277 /*
278  * Normal freemap allocator
279  *
280  * Use available hints to allocate space using the freemap.  Create missing
281  * freemap infrastructure on-the-fly as needed (including marking initial
282  * allocations using the iterator as allocated, instantiating new 2GB zones,
283  * and dealing with the end-of-media edge case).
284  *
285  * ip and bpref are only used as a heuristic to determine locality of
286  * reference.  bref->key may also be used heuristically.
287  */
288 int
289 hammer2_freemap_alloc(hammer2_trans_t *trans,
290 		      hammer2_blockref_t *bref, size_t bytes)
291 {
292 	hammer2_mount_t *hmp = trans->hmp;
293 	hammer2_chain_t *parent;
294 	hammer2_off_t bpref;
295 	hammer2_off_t bnext;
296 	int radix;
297 	int error;
298 
299 	/*
300 	 * Validate the allocation size.  It must be a power of 2.
301 	 */
302 	radix = hammer2_getradix(bytes);
303 	KKASSERT((size_t)1 << radix == bytes);
304 
305 	/*
306 	 * Freemap elements are assigned from the reserve area.
307 	 * Note that FREEMAP_LEVEL0_PSIZE is 256 bytes which is
308 	 * allowed for this case.
309 	 */
310 	if (bref->type == HAMMER2_BREF_TYPE_FREEMAP_NODE ||
311 	    bref->type == HAMMER2_BREF_TYPE_FREEMAP_LEAF) {
312 		return(hammer2_freemap_reserve(hmp, bref, radix));
313 	}
314 #if USE_SIMPLE_ALLOC
315 	return (hammer2_freemap_simple_alloc(hmp, bref, radix));
316 #else
317 
318 	/*
319 	 * Calculate actual allocation in bytes, and radix.  This ensures
320 	 * a minimum 1KB allocation.
321 	 */
322 	KKASSERT(bytes >= HAMMER2_MIN_ALLOC &&
323 		 bytes <= HAMMER2_MAX_ALLOC);
324 
325 #if 0
326 	/*
327 	 * Calculate starting point
328 	 */
329 	if (bref->data_off & ~HAMMER2_OFF_MASK_RADIX)
330 		bpref = bref->data_off & ~HAMMER2_OFF_MASK_RADIX;
331 	else if (trans->tmp_bpref)
332 		bpref = trans->tmp_bpref;
333 	else if (trans->tmp_ip)
334 		bpref = trans->tmp_ip->chain->bref.data_off;
335 	else
336 #endif
337 		bpref = hmp->heur_last_alloc;	/* SMP race ok, heuristic */
338 
339 	/*
340 	 * Make sure bpref is in-bounds.  It's ok if bpref covers a zone's
341 	 * reserved area, the try code will iterate past it.
342 	 */
343 	if (bpref > hmp->voldata.volu_size)
344 		bpref = hmp->voldata.volu_size - 1;
345 
346 	/*
347 	 * Iterate the freemap looking for free space before and after.
348 	 */
349 	parent = &hmp->fchain;
350 	hammer2_chain_lock(parent, HAMMER2_RESOLVE_ALWAYS);
351 	error = EAGAIN;
352 	bnext = bpref;
353 
354 	while (error == EAGAIN) {
355 		error = hammer2_freemap_try_alloc(trans, &parent, bref,
356 						  radix, bpref, &bnext);
357 	}
358 	hmp->heur_last_alloc = bnext;	/* XXX */
359 	hammer2_chain_unlock(parent);
360 
361 	return (error);
362 #endif
363 }
364 
365 #if !USE_SIMPLE_ALLOC
366 
367 /*
368  * Attempt to allocate (1 << radix) bytes from the freemap at bnext.
369  * Return 0 on success with the bref appropriately updated, non-zero
370  * on failure.  Updates bnextp and returns EAGAIN to continue the
371  * iteration.
372  *
373  * This function will create missing freemap infrastructure as well as
374  * properly initialize reserved areas as already having been allocated.
375  */
376 static __inline
377 int
378 countbits(uint64_t *data)
379 {
380 	int i;
381 	int r = 0;
382 	uint64_t v;
383 
384 	for (i = 0; i < 32; ++i) {
385 		v = data[i];
386 		while (v) {
387 			if (v & 1)
388 				++r;
389 			v >>= 1;
390 		}
391 	}
392 	return(r);
393 }
394 
395 static int
396 hammer2_freemap_try_alloc(hammer2_trans_t *trans, hammer2_chain_t **parentp,
397 			  hammer2_blockref_t *bref, int radix,
398 			  hammer2_off_t bpref, hammer2_off_t *bnextp)
399 {
400 	hammer2_mount_t *hmp = trans->hmp;
401 	hammer2_off_t l0mask;
402 	hammer2_off_t l0size;
403 	hammer2_chain_t *chain;
404 	hammer2_off_t key;
405 	hammer2_off_t tmp;
406 	size_t bytes;
407 	uint64_t mask;
408 	uint64_t tmp_mask;
409 	uint64_t *data;
410 	int error = 0;
411 	int bits;
412 	int index;
413 	int count;
414 	int subindex;
415 
416 	/*
417 	 * Calculate the number of bytes being allocated, the number
418 	 * of contiguous bits of bitmap being allocated, and the bitmap
419 	 * mask.
420 	 *
421 	 * WARNING! cpu hardware may mask bits == 64 -> 0 and blow up the
422 	 *	    mask calculation.
423 	 */
424 	bytes = (size_t)1 << radix;
425 	bits = 1 << (radix - HAMMER2_MIN_RADIX);
426 	mask = (bits == 64) ? (uint64_t)-1 : (((uint64_t)1 << bits) - 1);
427 
428 	/*
429 	 * Lookup the level0 freemap chain, creating and initializing one
430 	 * if necessary.  Intermediate levels will be created automatically
431 	 * when necessary by hammer2_chain_create().
432 	 */
433 	key = H2FMBASE(*bnextp, HAMMER2_FREEMAP_LEVEL0_RADIX);
434 	l0mask = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX) - 1;
435 	l0size = H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
436 
437 	chain = hammer2_chain_lookup(parentp, key, key + l0mask,
438 				     HAMMER2_LOOKUP_FREEMAP |
439 				     HAMMER2_LOOKUP_ALWAYS |
440 				     HAMMER2_LOOKUP_MATCHIND/*XXX*/);
441 	if (chain == NULL) {
442 		/*
443 		 * Create the missing leaf, be sure to initialize
444 		 * the auxillary freemap tracking information in
445 		 * the bref.check.freemap structure.
446 		 */
447 #if 0
448 		kprintf("freemap create L0 @ %016jx bpref %016jx\n",
449 			key, bpref);
450 #endif
451 		error = hammer2_chain_create(trans, parentp, &chain,
452 				     key, HAMMER2_FREEMAP_LEVEL0_RADIX,
453 				     HAMMER2_BREF_TYPE_FREEMAP_LEAF,
454 				     HAMMER2_FREEMAP_LEVEL0_PSIZE);
455 		if (error == 0) {
456 			hammer2_chain_modify(trans, &chain, 0);
457 			bzero(chain->data->bmdata.array,
458 			      HAMMER2_FREEMAP_LEVEL0_PSIZE);
459 			chain->bref.check.freemap.biggest =
460 					HAMMER2_FREEMAP_LEVEL0_RADIX;
461 			chain->bref.check.freemap.avail = l0size;
462 
463 			/*
464 			 * Preset bitmap for existing static allocations.
465 			 * 64-bit-align so we don't have to worry about
466 			 * endian for the memset().
467 			 */
468 			tmp = (hmp->voldata.allocator_beg +
469 			       HAMMER2_MAX_ALLOC - 1) &
470 			      ~(hammer2_off_t)(HAMMER2_MAX_ALLOC - 1);
471 			if (key < tmp) {
472 				if (key + l0size <= tmp) {
473 					memset(chain->data->bmdata.array, -1,
474 					       l0size / HAMMER2_MIN_ALLOC / 8);
475 					chain->bref.check.freemap.avail = 0;
476 				} else {
477 					count = (tmp - key) / HAMMER2_MIN_ALLOC;
478 					kprintf("Init L0 BASE %d\n", count);
479 					memset(chain->data->bmdata.array, -1,
480 					       count / 8);
481 					chain->bref.check.freemap.avail -=
482 						count * HAMMER2_MIN_ALLOC;
483 				}
484 			}
485 
486 			/*
487 			 * Preset bitmap for reserved area.  Calculate
488 			 * 2GB base.
489 			 */
490 			tmp = H2FMBASE(key, HAMMER2_FREEMAP_LEVEL1_RADIX);
491 			if (key - tmp < HAMMER2_ZONE_SEG) {
492 				memset(chain->data->bmdata.array, -1,
493 				       l0size / HAMMER2_MIN_ALLOC / 8);
494 				chain->bref.check.freemap.avail = 0;
495 			}
496 
497 			/*
498 			 * Preset bitmap for end of media
499 			 */
500 			if (key >= trans->hmp->voldata.volu_size) {
501 				memset(chain->data->bmdata.array, -1,
502 				       l0size / HAMMER2_MIN_ALLOC / 8);
503 				chain->bref.check.freemap.avail = 0;
504 			}
505 		}
506 	} else if (chain->bref.check.freemap.biggest < radix) {
507 		/*
508 		 * Already flagged as not having enough space
509 		 */
510 		error = ENOSPC;
511 	} else {
512 		/*
513 		 * Modify existing chain to setup for adjustment.
514 		 */
515 		hammer2_chain_modify(trans, &chain, 0);
516 	}
517 	if (error)
518 		goto skip;
519 
520 	/*
521 	 * Calculate mask and count.  Each bit represents 1KB and (currently)
522 	 * the maximum allocation is 65536 bytes.  Allocations are always
523 	 * natively aligned.
524 	 */
525 	count = HAMMER2_FREEMAP_LEVEL0_PSIZE / sizeof(uint64_t); /* 32 */
526 	data = &chain->data->bmdata.array[0];
527 
528 	tmp_mask = 0; /* avoid compiler warnings */
529 	subindex = 0; /* avoid compiler warnings */
530 
531 	/*
532 	 * Allocate data and meta-data from the beginning and inodes
533 	 * from the end.
534 	 */
535 	if (bref->type != HAMMER2_BREF_TYPE_INODE) {
536 		for (index = 0; index < count; ++index) {
537 			if (data[index] == (uint64_t)-1) /* all allocated */
538 				continue;
539 			tmp_mask = mask;		 /* iterate */
540 			for (subindex = 0; subindex < 64; subindex += bits) {
541 				if ((data[index] & tmp_mask) == 0)
542 					break;
543 				tmp_mask <<= bits;
544 			}
545 			if (subindex != 64) {
546 				key += HAMMER2_MIN_ALLOC * 64 * index;
547 				key += HAMMER2_MIN_ALLOC * subindex;
548 				break;
549 			}
550 		}
551 		if (index == count)
552 			error = ENOSPC;
553 	} else {
554 		for (index = count - 1; index >= 0; --index) {
555 			if (data[index] == (uint64_t)-1) /* all allocated */
556 				continue;
557 			tmp_mask = mask << (64 - bits);
558 			for (subindex = 64 - bits;
559 			     subindex >= 0;
560 			     subindex -= bits) {
561 				if ((data[index] & tmp_mask) == 0)
562 					break;
563 				tmp_mask >>= bits;
564 			}
565 			if (subindex != -bits) {
566 				key += HAMMER2_MIN_ALLOC * 64 * index;
567 				key += HAMMER2_MIN_ALLOC * subindex;
568 				break;
569 			}
570 		}
571 		if (index == -1)
572 			error = ENOSPC;
573 	}
574 
575 skip:
576 	if (error == 0) {
577 		/*
578 		 * Assert validity.  Must be beyond the static allocator used
579 		 * by newfs_hammer2 (and thus also beyond the aux area),
580 		 * not go past the volume size, and must not be in the
581 		 * reserved segment area for a zone.
582 		 */
583 		int prebuf;
584 
585 		KKASSERT(key >= hmp->voldata.allocator_beg &&
586 			 key + bytes <= hmp->voldata.volu_size);
587 		KKASSERT((key & HAMMER2_ZONE_MASK64) >= HAMMER2_ZONE_SEG);
588 
589 		/*
590 		 * Modify the chain and set the bitmap appropriately.
591 		 *
592 		 * Determine if we can massage the buffer cache buffer
593 		 * to avoid a read.  If the allocation is smaller than
594 		 * the minimum IO size we look at the bitmap mask covering
595 		 * the allocation at the minimum IO size.  If it is
596 		 * unallocated we instantiate and clear the buffer which
597 		 * marks it B_CACHE and validates it without issuing a read.
598 		 *
599 		 * For allocation requests >= MINIOSIZE other code will deal
600 		 * with the read-avoidance when the chain is locked.
601 		 */
602 		prebuf = 0;
603 		hammer2_chain_modify(trans, &chain, 0);
604 		data = &chain->data->bmdata.array[0];
605 		if (radix < HAMMER2_MINIORADIX) {
606 			uint64_t iomask;
607 			int iobmradix = HAMMER2_MINIORADIX - HAMMER2_MIN_RADIX;
608 			int ioindex;
609 			int iobmskip = 1 << iobmradix;
610 
611 			iomask = ((uint64_t)1 << iobmskip) - 1;
612 			for (ioindex = 0; ioindex < 64; ioindex += iobmskip) {
613 				if (tmp_mask & iomask) {
614 					if ((data[index] & iomask) == 0)
615 						prebuf = 1;
616 					break;
617 				}
618 				iomask <<= iobmskip;
619 			}
620 		}
621 
622 		KKASSERT((data[index] & tmp_mask) == 0);
623 		data[index] |= tmp_mask;
624 
625 		/*
626 		 * We return the allocated space in bref->data_off.
627 		 */
628 		*bnextp = key;
629 		bref->data_off = key | radix;
630 
631 		if (prebuf) {
632 			struct buf *bp;
633 			hammer2_off_t pbase;
634 
635 			pbase = key & ~(hammer2_off_t)(HAMMER2_MINIOSIZE - 1);
636 
637 			bp = getblk(hmp->devvp, pbase,
638 				    HAMMER2_MINIOSIZE, GETBLK_NOWAIT, 0);
639 			if (bp) {
640 				if ((bp->b_flags & B_CACHE) == 0)
641 					vfs_bio_clrbuf(bp);
642 				bqrelse(bp);
643 			}
644 		}
645 
646 #if 0
647 		kprintf("alloc cp=%p %016jx %016jx using %016jx chain->data %d\n",
648 			chain,
649 			bref->key, bref->data_off, chain->bref.data_off,
650 			countbits(data));
651 #endif
652 	} else if (error == ENOSPC) {
653 		/*
654 		 * Return EAGAIN with next iteration in *bnextp, or
655 		 * return ENOSPC if the allocation map has been exhausted.
656 		 */
657 		if (chain->bref.check.freemap.biggest > radix)
658 			chain->bref.check.freemap.biggest = radix;
659 		error = hammer2_freemap_iterate(trans, parentp, &chain,
660 						bpref, bnextp);
661 	}
662 
663 	/*
664 	 * Cleanup
665 	 */
666 	if (chain)
667 		hammer2_chain_unlock(chain);
668 	return (error);
669 }
670 
671 #if 0
672 	/*
673 	 * When making meta-data allocations smaller than LBUFSIZE we will
674 	 * use a LBUFSIZE'd buffer.  The first chunk allocated from such a
675 	 * buffer instantiates a device buffer and marks it clean to avoid
676 	 * unnecessary read-before-write ops.  XXX buffer cache buffer
677 	 * sharing.  XXX mixed data/meta-data issues.
678 	 */
679 	if (bytes < HAMMER2_MINIOSIZE &&
680 	    (data_off & (HAMMER2_MINIOSIZE - 1)) == 0 &&
681 	    (bitmap shows this is the initial allocation)) {
682 		bp = getblk(hmp->devvp, data_off, HAMMER2_MINIOSIZE, 0, 0);
683 		bp->b_flags |= B_CACHE;
684 		bp->b_resid = 0;
685 		bqrelse(bp);
686 	}
687 #endif
688 
689 static int
690 hammer2_freemap_iterate(hammer2_trans_t *trans, hammer2_chain_t **parentp,
691 			hammer2_chain_t **chainp,
692 			hammer2_off_t bpref, hammer2_off_t *bnextp)
693 {
694 	*bnextp += H2FMSHIFT(HAMMER2_FREEMAP_LEVEL0_RADIX);
695 	if (*bnextp >= trans->hmp->voldata.volu_size)
696 		return (ENOSPC);
697 	return(EAGAIN);
698 }
699 
700 #endif
701 
702 #if 0
703 
704 void
705 hammer2_freemap_free(hammer2_mount_t *hmp, hammer2_off_t data_off, int type)
706 {
707 	hammer2_freecache_t *fc;
708 	int radix;
709 	int fctype;
710 
711 	switch(type) {
712 	case HAMMER2_BREF_TYPE_INODE:
713 		fctype = HAMMER2_FREECACHE_INODE;
714 		break;
715 	case HAMMER2_BREF_TYPE_INDIRECT:
716 		fctype = HAMMER2_FREECACHE_INODE;
717 		break;
718 	case HAMMER2_BREF_TYPE_DATA:
719 		fctype = HAMMER2_FREECACHE_DATA;
720 		break;
721 	default:
722 		fctype = HAMMER2_FREECACHE_DATA;
723 		break;
724 	}
725 	radix = (int)data_off & HAMMER2_OFF_MASK_RADIX;
726 	data_off &= ~HAMMER2_OFF_MASK_RADIX;
727 	if (radix >= HAMMER2_MAX_RADIX)
728 		return;
729 
730 	fc = &hmp->freecache[fctype][radix];
731 	if (fc->single == 0) {
732 		lockmgr(&hmp->alloclk, LK_EXCLUSIVE);
733 		fc->single = data_off;
734 		lockmgr(&hmp->alloclk, LK_RELEASE);
735 	}
736 }
737 
738 #endif
739 
740 #if 0
741 /*
742  * Allocate media space, returning a combined data offset and radix.
743  * Also return the related (device) buffer cache buffer.
744  */
745 hammer2_off_t
746 hammer2_freemap_alloc_bp(hammer2_mount_t *hmp, size_t bytes, struct buf **bpp)
747 {
748 }
749 
750 #endif
751