xref: /dragonfly/sbin/hammer/ondisk.c (revision fae225dc)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/diskslice.h>
36 #include <sys/diskmbr.h>
37 
38 #include "hammer_util.h"
39 
40 static void check_volume(volume_info_t volume);
41 static void get_buffer_readahead(buffer_info_t base);
42 static __inline int readhammervol(volume_info_t volume);
43 static __inline int readhammerbuf(buffer_info_t buffer);
44 static __inline int writehammervol(volume_info_t volume);
45 static __inline int writehammerbuf(buffer_info_t buffer);
46 
47 uuid_t Hammer_FSType;
48 uuid_t Hammer_FSId;
49 int UseReadBehind = -4;
50 int UseReadAhead = 4;
51 int DebugOpt;
52 uint32_t HammerVersion = -1;
53 
54 TAILQ_HEAD(volume_list, volume_info);
55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
56 static int valid_hammer_volumes;
57 
58 static __inline
59 int
60 buffer_hash(hammer_off_t zone2_offset)
61 {
62 	int hi;
63 
64 	hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
65 	return(hi);
66 }
67 
68 static
69 buffer_info_t
70 find_buffer(hammer_off_t zone2_offset)
71 {
72 	volume_info_t volume;
73 	buffer_info_t buffer;
74 	int hi;
75 
76 	volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
77 	assert(volume);
78 
79 	hi = buffer_hash(zone2_offset);
80 	TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry) {
81 		if (buffer->zone2_offset == zone2_offset)
82 			return(buffer);
83 	}
84 	return(NULL);
85 }
86 
87 static
88 volume_info_t
89 __alloc_volume(const char *volname, int oflags)
90 {
91 	volume_info_t volume;
92 	int i;
93 
94 	volume = calloc(1, sizeof(*volume));
95 	volume->vol_no = -1;
96 	volume->rdonly = (oflags == O_RDONLY);
97 	volume->name = strdup(volname);
98 	volume->fd = open(volume->name, oflags);
99 	if (volume->fd < 0) {
100 		err(1, "alloc_volume: Failed to open %s", volume->name);
101 		/* not reached */
102 	}
103 	check_volume(volume);
104 
105 	volume->ondisk = calloc(1, HAMMER_BUFSIZE);
106 
107 	for (i = 0; i < HAMMER_BUFLISTS; ++i)
108 		TAILQ_INIT(&volume->buffer_lists[i]);
109 
110 	return(volume);
111 }
112 
113 static
114 void
115 __add_volume(const volume_info_t volume)
116 {
117 	volume_info_t scan;
118 	struct stat st1, st2;
119 
120 	if (fstat(volume->fd, &st1) != 0) {
121 		errx(1, "add_volume: %s: Failed to stat", volume->name);
122 		/* not reached */
123 	}
124 
125 	TAILQ_FOREACH(scan, &VolList, entry) {
126 		if (scan->vol_no == volume->vol_no) {
127 			errx(1, "add_volume: %s: Duplicate volume number %d "
128 				"against %s",
129 				volume->name, volume->vol_no, scan->name);
130 			/* not reached */
131 		}
132 		if (fstat(scan->fd, &st2) != 0) {
133 			errx(1, "add_volume: %s: Failed to stat %s",
134 				volume->name, scan->name);
135 			/* not reached */
136 		}
137 		if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
138 			errx(1, "add_volume: %s: Specified more than once",
139 				volume->name);
140 			/* not reached */
141 		}
142 	}
143 
144 	TAILQ_INSERT_TAIL(&VolList, volume, entry);
145 }
146 
147 static
148 void
149 __verify_volume(const volume_info_t volume)
150 {
151 	hammer_volume_ondisk_t ondisk = volume->ondisk;
152 
153 	if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) {
154 		errx(1, "verify_volume: Invalid volume signature %016jx",
155 			ondisk->vol_signature);
156 		/* not reached */
157 	}
158 	if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) {
159 		errx(1, "verify_volume: Invalid root volume# %d",
160 			ondisk->vol_rootvol);
161 		/* not reached */
162 	}
163 	if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType))) {
164 		errx(1, "verify_volume: %s: Header does not indicate "
165 			"that this is a HAMMER volume", volume->name);
166 		/* not reached */
167 	}
168 	if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId))) {
169 		errx(1, "verify_volume: %s: FSId does not match other volumes!",
170 			volume->name);
171 		/* not reached */
172 	}
173 	if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN ||
174 	    ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) {
175 		errx(1, "verify_volume: %s: Invalid volume version %u",
176 			volume->name, ondisk->vol_version);
177 		/* not reached */
178 	}
179 }
180 
181 /*
182  * Initialize a volume structure and ondisk vol_no field.
183  */
184 volume_info_t
185 init_volume(const char *filename, int oflags, int32_t vol_no)
186 {
187 	volume_info_t volume;
188 
189 	volume = __alloc_volume(filename, oflags);
190 	volume->vol_no = volume->ondisk->vol_no = vol_no;
191 
192 	__add_volume(volume);
193 
194 	return(volume);
195 }
196 
197 /*
198  * Initialize a volume structure and read ondisk volume header.
199  */
200 volume_info_t
201 load_volume(const char *filename, int oflags, int verify_volume)
202 {
203 	volume_info_t volume;
204 	int n;
205 
206 	volume = __alloc_volume(filename, oflags);
207 
208 	n = readhammervol(volume);
209 	if (n == -1) {
210 		err(1, "load_volume: %s: Read failed at offset 0",
211 		    volume->name);
212 		/* not reached */
213 	}
214 	volume->vol_no = volume->ondisk->vol_no;
215 	if (volume->vol_no == HAMMER_ROOT_VOLNO)
216 		HammerVersion = volume->ondisk->vol_version;
217 
218 	if (valid_hammer_volumes++ == 0)
219 		Hammer_FSId = volume->ondisk->vol_fsid;
220 	if (verify_volume)
221 		__verify_volume(volume);
222 
223 	__add_volume(volume);
224 
225 	return(volume);
226 }
227 
228 /*
229  * Check basic volume characteristics.
230  */
231 static
232 void
233 check_volume(volume_info_t volume)
234 {
235 	struct partinfo pinfo;
236 	struct stat st;
237 
238 	/*
239 	 * Allow the formatting of block devices or regular files
240 	 */
241 	if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) {
242 		if (fstat(volume->fd, &st) < 0) {
243 			err(1, "Unable to stat %s", volume->name);
244 			/* not reached */
245 		}
246 		if (S_ISREG(st.st_mode)) {
247 			volume->size = st.st_size;
248 			volume->type = "REGFILE";
249 		} else {
250 			errx(1, "Unsupported file type for %s", volume->name);
251 			/* not reached */
252 		}
253 	} else {
254 		/*
255 		 * When formatting a block device as a HAMMER volume the
256 		 * sector size must be compatible.  HAMMER uses 16384 byte
257 		 * filesystem buffers.
258 		 */
259 		if (pinfo.reserved_blocks) {
260 			errx(1, "HAMMER cannot be placed in a partition "
261 				"which overlaps the disklabel or MBR");
262 			/* not reached */
263 		}
264 		if (pinfo.media_blksize > HAMMER_BUFSIZE ||
265 		    HAMMER_BUFSIZE % pinfo.media_blksize) {
266 			errx(1, "A media sector size of %d is not supported",
267 			     pinfo.media_blksize);
268 			/* not reached */
269 		}
270 
271 		volume->size = pinfo.media_size;
272 		volume->device_offset = pinfo.media_offset;
273 		volume->type = "DEVICE";
274 	}
275 }
276 
277 int
278 is_regfile(const volume_info_t volume)
279 {
280 	return(strcmp(volume->type, "REGFILE") ? 0 : 1);
281 }
282 
283 void
284 assert_volume_offset(const volume_info_t volume)
285 {
286 	assert(hammer_is_zone_raw_buffer(volume->vol_free_off));
287 	assert(hammer_is_zone_raw_buffer(volume->vol_free_end));
288 	if (volume->vol_free_off >= volume->vol_free_end) {
289 		errx(1, "Ran out of room, filesystem too small");
290 		/* not reached */
291 	}
292 }
293 
294 volume_info_t
295 get_volume(int32_t vol_no)
296 {
297 	volume_info_t volume;
298 
299 	TAILQ_FOREACH(volume, &VolList, entry) {
300 		if (volume->vol_no == vol_no)
301 			break;
302 	}
303 
304 	return(volume);
305 }
306 
307 volume_info_t
308 get_root_volume(void)
309 {
310 	return(get_volume(HAMMER_ROOT_VOLNO));
311 }
312 
313 static
314 hammer_off_t
315 __blockmap_xlate_to_zone2(hammer_off_t buf_offset)
316 {
317 	hammer_off_t zone2_offset;
318 	int error = 0;
319 
320 	if (hammer_is_zone_raw_buffer(buf_offset))
321 		zone2_offset = buf_offset;
322 	else
323 		zone2_offset = blockmap_lookup(buf_offset, &error);
324 
325 	if (error)
326 		return(HAMMER_OFF_BAD);
327 	assert(hammer_is_zone_raw_buffer(zone2_offset));
328 
329 	return(zone2_offset);
330 }
331 
332 static
333 buffer_info_t
334 __alloc_buffer(hammer_off_t zone2_offset, int isnew)
335 {
336 	volume_info_t volume;
337 	buffer_info_t buffer;
338 	int hi;
339 
340 	volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
341 	assert(volume != NULL);
342 
343 	buffer = calloc(1, sizeof(*buffer));
344 	buffer->zone2_offset = zone2_offset;
345 	buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset);
346 	buffer->volume = volume;
347 	buffer->ondisk = calloc(1, HAMMER_BUFSIZE);
348 
349 	if (isnew <= 0) {
350 		if (readhammerbuf(buffer) == -1) {
351 			err(1, "Failed to read %s:%016jx at %016jx",
352 			    volume->name,
353 			    (intmax_t)buffer->zone2_offset,
354 			    (intmax_t)buffer->raw_offset);
355 			/* not reached */
356 		}
357 	}
358 
359 	hi = buffer_hash(zone2_offset);
360 	TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry);
361 	hammer_cache_add(&buffer->cache);
362 
363 	return(buffer);
364 }
365 
366 /*
367  * Acquire the 16KB buffer for specified zone offset.
368  */
369 static
370 buffer_info_t
371 get_buffer(hammer_off_t buf_offset, int isnew)
372 {
373 	buffer_info_t buffer;
374 	hammer_off_t zone2_offset;
375 	int dora = 0;
376 
377 	zone2_offset = __blockmap_xlate_to_zone2(buf_offset);
378 	if (zone2_offset == HAMMER_OFF_BAD)
379 		return(NULL);
380 
381 	zone2_offset &= ~HAMMER_BUFMASK64;
382 	buffer = find_buffer(zone2_offset);
383 
384 	if (buffer == NULL) {
385 		buffer = __alloc_buffer(zone2_offset, isnew);
386 		dora = (isnew == 0);
387 	} else {
388 		assert(isnew != -1);
389 		hammer_cache_used(&buffer->cache);
390 	}
391 	assert(buffer->ondisk != NULL);
392 
393 	++buffer->cache.refs;
394 	hammer_cache_flush();
395 
396 	if (isnew > 0) {
397 		assert(buffer->cache.modified == 0);
398 		bzero(buffer->ondisk, HAMMER_BUFSIZE);
399 		buffer->cache.modified = 1;
400 	}
401 	if (dora)
402 		get_buffer_readahead(buffer);
403 	return(buffer);
404 }
405 
406 static
407 void
408 get_buffer_readahead(const buffer_info_t base)
409 {
410 	buffer_info_t buffer;
411 	volume_info_t volume;
412 	hammer_off_t zone2_offset;
413 	int64_t raw_offset;
414 	int ri = UseReadBehind;
415 	int re = UseReadAhead;
416 
417 	raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
418 	volume = base->volume;
419 
420 	while (ri < re) {
421 		if (raw_offset >= volume->ondisk->vol_buf_end)
422 			break;
423 		if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) {
424 			++ri;
425 			raw_offset += HAMMER_BUFSIZE;
426 			continue;
427 		}
428 		zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
429 			raw_offset - volume->ondisk->vol_buf_beg);
430 		buffer = find_buffer(zone2_offset);
431 		if (buffer == NULL) {
432 			/* call with -1 to prevent another readahead */
433 			buffer = get_buffer(zone2_offset, -1);
434 			rel_buffer(buffer);
435 		}
436 		++ri;
437 		raw_offset += HAMMER_BUFSIZE;
438 	}
439 }
440 
441 void
442 rel_buffer(buffer_info_t buffer)
443 {
444 	volume_info_t volume;
445 	int hi;
446 
447 	if (buffer == NULL)
448 		return;
449 	assert(buffer->cache.refs > 0);
450 	if (--buffer->cache.refs == 0) {
451 		if (buffer->cache.delete) {
452 			hi = buffer_hash(buffer->zone2_offset);
453 			volume = buffer->volume;
454 			if (buffer->cache.modified)
455 				flush_buffer(buffer);
456 			TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
457 			hammer_cache_del(&buffer->cache);
458 			free(buffer->ondisk);
459 			free(buffer);
460 		}
461 	}
462 }
463 
464 /*
465  * Retrieve a pointer to a buffer data given a zone-X buffer offset.
466  * The underlying bufferp is freed if isnew or the corresponding zone-2
467  * offset is out of range of the cached data.  If bufferp is freed,
468  * a referenced buffer is loaded into it.
469  */
470 void *
471 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew)
472 {
473 	hammer_off_t xor = 0;
474 	hammer_volume_ondisk_t ondisk;
475 
476 	if (*bufferp != NULL) {
477 		if (hammer_is_zone_undo(buf_offset)) {
478 			ondisk = (*bufferp)->volume->ondisk;
479 			xor = hammer_xlate_to_undo(ondisk, buf_offset) ^
480 				(*bufferp)->zone2_offset;
481 		} else if (hammer_is_zone_direct_xlated(buf_offset)) {
482 			xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^
483 			      HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset);
484 		} else {
485 			assert(0);
486 		}
487 		if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) {
488 			rel_buffer(*bufferp);
489 			*bufferp = NULL;
490 		} else {
491 			hammer_cache_used(&(*bufferp)->cache);
492 		}
493 	}
494 
495 	if (*bufferp == NULL) {
496 		*bufferp = get_buffer(buf_offset, isnew);
497 		if (*bufferp == NULL)
498 			return(NULL);
499 	}
500 
501 	return((char *)(*bufferp)->ondisk +
502 		((int32_t)buf_offset & HAMMER_BUFMASK));
503 }
504 
505 /*
506  * Allocate HAMMER elements - B-Tree nodes
507  */
508 hammer_node_ondisk_t
509 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp)
510 {
511 	hammer_node_ondisk_t node;
512 
513 	node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
514 			      offp, data_bufferp);
515 	bzero(node, sizeof(*node));
516 	return(node);
517 }
518 
519 /*
520  * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc)
521  */
522 void *
523 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
524 		   buffer_info_t *data_bufferp)
525 {
526 	void *data;
527 
528 	data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
529 			      offp, data_bufferp);
530 	bzero(data, data_len);
531 	return(data);
532 }
533 
534 /*
535  * Format a new blockmap.  This is mostly a degenerate case because
536  * all allocations are now actually done from the freemap.
537  */
538 void
539 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset)
540 {
541 	hammer_blockmap_t blockmap;
542 	hammer_off_t zone_base;
543 
544 	/* Only root volume needs formatting */
545 	assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
546 
547 	assert(hammer_is_index_record(zone));
548 
549 	blockmap = &root_vol->ondisk->vol0_blockmap[zone];
550 	zone_base = HAMMER_ZONE_ENCODE(zone, offset);
551 
552 	bzero(blockmap, sizeof(*blockmap));
553 	blockmap->phys_offset = 0;
554 	blockmap->first_offset = zone_base;
555 	blockmap->next_offset = zone_base;
556 	blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
557 	hammer_crc_set_blockmap(HammerVersion, blockmap);
558 }
559 
560 /*
561  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
562  * code will load each volume's freemap.
563  */
564 void
565 format_freemap(volume_info_t root_vol)
566 {
567 	buffer_info_t buffer = NULL;
568 	hammer_off_t layer1_offset;
569 	hammer_blockmap_t blockmap;
570 	hammer_blockmap_layer1_t layer1;
571 	int i, isnew;
572 
573 	/* Only root volume needs formatting */
574 	assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
575 
576 	layer1_offset = bootstrap_bigblock(root_vol);
577 	for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) {
578 		isnew = ((i % HAMMER_BUFSIZE) == 0);
579 		layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew);
580 		bzero(layer1, sizeof(*layer1));
581 		layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
582 		layer1->blocks_free = 0;
583 		hammer_crc_set_layer1(HammerVersion, layer1);
584 	}
585 	assert(i == HAMMER_BIGBLOCK_SIZE);
586 	rel_buffer(buffer);
587 
588 	blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
589 	bzero(blockmap, sizeof(*blockmap));
590 	blockmap->phys_offset = layer1_offset;
591 	blockmap->first_offset = 0;
592 	blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
593 	blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
594 	hammer_crc_set_blockmap(HammerVersion, blockmap);
595 }
596 
597 /*
598  * Load the volume's remaining free space into the freemap.
599  *
600  * Returns the number of big-blocks available.
601  */
602 int64_t
603 initialize_freemap(volume_info_t volume)
604 {
605 	volume_info_t root_vol;
606 	buffer_info_t buffer1 = NULL;
607 	buffer_info_t buffer2 = NULL;
608 	hammer_blockmap_layer1_t layer1;
609 	hammer_blockmap_layer2_t layer2;
610 	hammer_off_t layer1_offset;
611 	hammer_off_t layer2_offset;
612 	hammer_off_t phys_offset;
613 	hammer_off_t block_offset;
614 	hammer_off_t aligned_vol_free_end;
615 	hammer_blockmap_t freemap;
616 	int64_t count = 0;
617 	int64_t layer1_count = 0;
618 
619 	root_vol = get_root_volume();
620 
621 	assert_volume_offset(volume);
622 	aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
623 
624 	printf("initialize freemap volume %d\n", volume->vol_no);
625 
626 	/*
627 	 * Initialize the freemap.  First preallocate the big-blocks required
628 	 * to implement layer2.   This preallocation is a bootstrap allocation
629 	 * using blocks from the target volume.
630 	 */
631 	freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
632 
633 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
634 	     phys_offset < aligned_vol_free_end;
635 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
636 		layer1_offset = freemap->phys_offset +
637 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
638 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
639 		if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
640 			layer1->phys_offset = bootstrap_bigblock(volume);
641 			layer1->blocks_free = 0;
642 			buffer1->cache.modified = 1;
643 			hammer_crc_set_layer1(HammerVersion, layer1);
644 		}
645 	}
646 
647 	/*
648 	 * Now fill everything in.
649 	 */
650 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
651 	     phys_offset < aligned_vol_free_end;
652 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
653 		layer1_count = 0;
654 		layer1_offset = freemap->phys_offset +
655 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
656 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
657 		assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
658 
659 		for (block_offset = 0;
660 		     block_offset < HAMMER_BLOCKMAP_LAYER2;
661 		     block_offset += HAMMER_BIGBLOCK_SIZE) {
662 			layer2_offset = layer1->phys_offset +
663 				        HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
664 			layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
665 			bzero(layer2, sizeof(*layer2));
666 
667 			if (phys_offset + block_offset < volume->vol_free_off) {
668 				/*
669 				 * Big-blocks already allocated as part
670 				 * of the freemap bootstrap.
671 				 */
672 				layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
673 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
674 				layer2->bytes_free = 0;
675 			} else if (phys_offset + block_offset < volume->vol_free_end) {
676 				layer2->zone = 0;
677 				layer2->append_off = 0;
678 				layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
679 				++count;
680 				++layer1_count;
681 			} else {
682 				layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
683 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
684 				layer2->bytes_free = 0;
685 			}
686 			hammer_crc_set_layer2(HammerVersion, layer2);
687 			buffer2->cache.modified = 1;
688 		}
689 
690 		layer1->blocks_free += layer1_count;
691 		hammer_crc_set_layer1(HammerVersion, layer1);
692 		buffer1->cache.modified = 1;
693 	}
694 
695 	rel_buffer(buffer1);
696 	rel_buffer(buffer2);
697 	return(count);
698 }
699 
700 /*
701  * Returns the number of big-blocks available for filesystem data and undos
702  * without formatting.
703  */
704 int64_t
705 count_freemap(const volume_info_t volume)
706 {
707 	hammer_off_t phys_offset;
708 	hammer_off_t vol_free_off;
709 	hammer_off_t aligned_vol_free_end;
710 	int64_t count = 0;
711 
712 	vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
713 
714 	assert_volume_offset(volume);
715 	aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
716 
717 	if (volume->vol_no == HAMMER_ROOT_VOLNO)
718 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
719 
720 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
721 	     phys_offset < aligned_vol_free_end;
722 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
723 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
724 	}
725 
726 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
727 	     phys_offset < aligned_vol_free_end;
728 	     phys_offset += HAMMER_BIGBLOCK_SIZE) {
729 		if (phys_offset < vol_free_off)
730 			;
731 		else if (phys_offset < volume->vol_free_end)
732 			++count;
733 	}
734 
735 	return(count);
736 }
737 
738 /*
739  * Format the undomap for the root volume.
740  */
741 void
742 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size)
743 {
744 	hammer_off_t undo_limit;
745 	hammer_blockmap_t blockmap;
746 	hammer_volume_ondisk_t ondisk;
747 	buffer_info_t buffer = NULL;
748 	hammer_off_t scan;
749 	int n;
750 	int limit_index;
751 	uint32_t seqno;
752 
753 	/* Only root volume needs formatting */
754 	assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
755 	ondisk = root_vol->ondisk;
756 
757 	/*
758 	 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
759 	 * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks.
760 	 * Size to approximately 0.1% of the disk.
761 	 *
762 	 * The minimum UNDO fifo size is 512MB, or approximately 1% of
763 	 * the recommended 50G disk.
764 	 *
765 	 * Changing this minimum is rather dangerous as complex filesystem
766 	 * operations can cause the UNDO FIFO to fill up otherwise.
767 	 */
768 	undo_limit = *undo_buffer_size;
769 	if (undo_limit == 0) {
770 		undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000;
771 		if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS)
772 			undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS;
773 	}
774 	undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit);
775 	if (undo_limit < HAMMER_BIGBLOCK_SIZE)
776 		undo_limit = HAMMER_BIGBLOCK_SIZE;
777 	if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS)
778 		undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS;
779 	*undo_buffer_size = undo_limit;
780 
781 	blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
782 	bzero(blockmap, sizeof(*blockmap));
783 	blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
784 	blockmap->first_offset = HAMMER_ENCODE_UNDO(0);
785 	blockmap->next_offset = blockmap->first_offset;
786 	blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit);
787 	hammer_crc_set_blockmap(HammerVersion, blockmap);
788 
789 	limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
790 	assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS);
791 
792 	for (n = 0; n < limit_index; ++n)
793 		ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol);
794 	while (n < HAMMER_MAX_UNDO_BIGBLOCKS)
795 		ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
796 
797 	/*
798 	 * Pre-initialize the UNDO blocks (HAMMER version 4+)
799 	 */
800 	printf("initializing the undo map (%jd MB)\n",
801 		(intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) /
802 		(1024 * 1024));
803 
804 	scan = blockmap->first_offset;
805 	seqno = 0;
806 
807 	while (scan < blockmap->alloc_offset) {
808 		hammer_fifo_head_t head;
809 		hammer_fifo_tail_t tail;
810 		int bytes = HAMMER_UNDO_ALIGN;
811 		int isnew = ((scan & HAMMER_BUFMASK64) == 0);
812 
813 		head = get_buffer_data(scan, &buffer, isnew);
814 		buffer->cache.modified = 1;
815 		tail = (void *)((char *)head + bytes - sizeof(*tail));
816 
817 		bzero(head, bytes);
818 		head->hdr_signature = HAMMER_HEAD_SIGNATURE;
819 		head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
820 		head->hdr_size = bytes;
821 		head->hdr_seq = seqno++;
822 
823 		tail->tail_signature = HAMMER_TAIL_SIGNATURE;
824 		tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
825 		tail->tail_size = bytes;
826 
827 		hammer_crc_set_fifo_head(HammerVersion, head, bytes);
828 
829 		scan += bytes;
830 	}
831 	rel_buffer(buffer);
832 }
833 
834 const char *zone_labels[] = {
835 	"",		/* 0 */
836 	"raw_volume",	/* 1 */
837 	"raw_buffer",	/* 2 */
838 	"undo",		/* 3 */
839 	"freemap",	/* 4 */
840 	"",		/* 5 */
841 	"",		/* 6 */
842 	"",		/* 7 */
843 	"btree",	/* 8 */
844 	"meta",		/* 9 */
845 	"large_data",	/* 10 */
846 	"small_data",	/* 11 */
847 	"",		/* 12 */
848 	"",		/* 13 */
849 	"",		/* 14 */
850 	"unavail",	/* 15 */
851 };
852 
853 void
854 print_blockmap(const volume_info_t volume)
855 {
856 	hammer_blockmap_t blockmap;
857 	hammer_volume_ondisk_t ondisk;
858 	int64_t size, used;
859 	int i;
860 #define INDENT ""
861 
862 	ondisk = volume->ondisk;
863 	printf(INDENT"vol_label\t%s\n", ondisk->vol_label);
864 	printf(INDENT"vol_count\t%d\n", ondisk->vol_count);
865 	printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg));
866 	printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg));
867 	printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg));
868 	printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end));
869 	printf(INDENT"vol0_next_tid\t%016jx\n",
870 	       (uintmax_t)ondisk->vol0_next_tid);
871 
872 	blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
873 	size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset);
874 	if (blockmap->first_offset <= blockmap->next_offset)
875 		used = blockmap->next_offset - blockmap->first_offset;
876 	else
877 		used = blockmap->alloc_offset - blockmap->first_offset +
878 			HAMMER_OFF_LONG_ENCODE(blockmap->next_offset);
879 	printf(INDENT"undo_size\t%s\n", sizetostr(size));
880 	printf(INDENT"undo_used\t%s\n", sizetostr(used));
881 
882 	printf(INDENT"zone #             "
883 	       "phys             first            next             alloc\n");
884 	for (i = 0; i < HAMMER_MAX_ZONES; i++) {
885 		blockmap = &ondisk->vol0_blockmap[i];
886 		printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n",
887 			i, zone_labels[i],
888 			(uintmax_t)blockmap->phys_offset,
889 			(uintmax_t)blockmap->first_offset,
890 			(uintmax_t)blockmap->next_offset,
891 			(uintmax_t)blockmap->alloc_offset);
892 	}
893 }
894 
895 /*
896  * Flush various tracking structures to disk
897  */
898 void
899 flush_all_volumes(void)
900 {
901 	volume_info_t volume;
902 
903 	TAILQ_FOREACH(volume, &VolList, entry)
904 		flush_volume(volume);
905 }
906 
907 void
908 flush_volume(volume_info_t volume)
909 {
910 	buffer_info_t buffer;
911 	int i;
912 
913 	for (i = 0; i < HAMMER_BUFLISTS; ++i) {
914 		TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
915 			flush_buffer(buffer);
916 	}
917 	if (writehammervol(volume) == -1) {
918 		err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
919 		/* not reached */
920 	}
921 }
922 
923 void
924 flush_buffer(buffer_info_t buffer)
925 {
926 	volume_info_t volume;
927 
928 	volume = buffer->volume;
929 	if (writehammerbuf(buffer) == -1) {
930 		err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
931 		/* not reached */
932 	}
933 	buffer->cache.modified = 0;
934 }
935 
936 /*
937  * Core I/O operations
938  */
939 static
940 int
941 __read(volume_info_t volume, void *data, int64_t offset, int size)
942 {
943 	ssize_t n;
944 
945 	n = pread(volume->fd, data, size, offset);
946 	if (n != size)
947 		return(-1);
948 	return(0);
949 }
950 
951 static __inline
952 int
953 readhammervol(volume_info_t volume)
954 {
955 	return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
956 }
957 
958 static __inline
959 int
960 readhammerbuf(buffer_info_t buffer)
961 {
962 	return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset,
963 		HAMMER_BUFSIZE));
964 }
965 
966 static
967 int
968 __write(volume_info_t volume, const void *data, int64_t offset, int size)
969 {
970 	ssize_t n;
971 
972 	if (volume->rdonly)
973 		return(0);
974 
975 	n = pwrite(volume->fd, data, size, offset);
976 	if (n != size)
977 		return(-1);
978 	return(0);
979 }
980 
981 static __inline
982 int
983 writehammervol(volume_info_t volume)
984 {
985 	return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
986 }
987 
988 static __inline
989 int
990 writehammerbuf(buffer_info_t buffer)
991 {
992 	return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset,
993 		HAMMER_BUFSIZE));
994 }
995 
996 int64_t
997 init_boot_area_size(int64_t value, off_t avg_vol_size)
998 {
999 	if (value == 0) {
1000 		value = HAMMER_BOOT_NOMBYTES;
1001 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
1002 			value >>= 1;
1003 	}
1004 
1005 	if (value < HAMMER_BOOT_MINBYTES)
1006 		value = HAMMER_BOOT_MINBYTES;
1007 	else if (value > HAMMER_BOOT_MAXBYTES)
1008 		value = HAMMER_BOOT_MAXBYTES;
1009 
1010 	return(value);
1011 }
1012 
1013 int64_t
1014 init_memory_log_size(int64_t value, off_t avg_vol_size)
1015 {
1016 	if (value == 0) {
1017 		value = HAMMER_MEM_NOMBYTES;
1018 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
1019 			value >>= 1;
1020 	}
1021 
1022 	if (value < HAMMER_MEM_MINBYTES)
1023 		value = HAMMER_MEM_MINBYTES;
1024 	else if (value > HAMMER_MEM_MAXBYTES)
1025 		value = HAMMER_MEM_MAXBYTES;
1026 
1027 	return(value);
1028 }
1029