xref: /dragonfly/sbin/hammer/ondisk.c (revision 78478697)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/diskslice.h>
36 #include <sys/diskmbr.h>
37 
38 #include "hammer_util.h"
39 
40 static void get_buffer_readahead(struct buffer_info *base);
41 static __inline void *get_ondisk(hammer_off_t buf_offset,
42 			struct buffer_info **bufferp, int isnew);
43 static int readhammerbuf(struct volume_info *vol, void *data, int64_t offset);
44 static int writehammerbuf(struct volume_info *vol, const void *data,
45 			int64_t offset);
46 
47 int DebugOpt;
48 
49 uuid_t Hammer_FSType;
50 uuid_t Hammer_FSId;
51 int64_t BootAreaSize;
52 int64_t MemAreaSize;
53 int64_t UndoBufferSize;
54 int	RootVolNo = -1;
55 int	UseReadBehind = -4;
56 int	UseReadAhead = 4;
57 int	AssertOnFailure = 1;
58 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
59 
60 static __inline
61 int
62 buffer_hash(hammer_off_t buf_offset)
63 {
64 	int hi;
65 
66 	hi = (int)(buf_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
67 	return(hi);
68 }
69 
70 static struct buffer_info*
71 find_buffer(struct volume_info *volume, hammer_off_t buf_offset)
72 {
73 	int hi;
74 	struct buffer_info *buf;
75 
76 	hi = buffer_hash(buf_offset);
77 	TAILQ_FOREACH(buf, &volume->buffer_lists[hi], entry)
78 		if (buf->buf_offset == buf_offset)
79 			return(buf);
80 	return(NULL);
81 }
82 
83 /*
84  * Lookup the requested information structure and related on-disk buffer.
85  * Missing structures are created.
86  */
87 struct volume_info *
88 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
89 {
90 	struct volume_info *vol;
91 	struct volume_info *scan;
92 	struct hammer_volume_ondisk *ondisk;
93 	int i, n;
94 	struct stat st1, st2;
95 
96 	/*
97 	 * Allocate the volume structure
98 	 */
99 	vol = malloc(sizeof(*vol));
100 	bzero(vol, sizeof(*vol));
101 	for (i = 0; i < HAMMER_BUFLISTS; ++i)
102 		TAILQ_INIT(&vol->buffer_lists[i]);
103 	vol->name = strdup(filename);
104 	vol->fd = open(vol->name, oflags);
105 	if (vol->fd < 0) {
106 		err(1, "setup_volume: %s: Open failed", vol->name);
107 	}
108 
109 	/*
110 	 * Read or initialize the volume header
111 	 */
112 	vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
113 	if (isnew > 0) {
114 		bzero(ondisk, HAMMER_BUFSIZE);
115 	} else {
116 		n = readhammerbuf(vol, ondisk, 0);
117 		if (n == -1) {
118 			err(1, "setup_volume: %s: Read failed at offset 0",
119 			    vol->name);
120 		}
121 		vol_no = ondisk->vol_no;
122 		if (RootVolNo < 0) {
123 			RootVolNo = ondisk->vol_rootvol;
124 		} else if (RootVolNo != (int)ondisk->vol_rootvol) {
125 			errx(1, "setup_volume: %s: root volume disagreement: "
126 				"%d vs %d",
127 				vol->name, RootVolNo, ondisk->vol_rootvol);
128 		}
129 
130 		if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
131 			errx(1, "setup_volume: %s: Header does not indicate "
132 				"that this is a hammer volume", vol->name);
133 		}
134 		if (TAILQ_EMPTY(&VolList)) {
135 			Hammer_FSId = vol->ondisk->vol_fsid;
136 		} else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
137 			errx(1, "setup_volume: %s: FSId does match other "
138 				"volumes!", vol->name);
139 		}
140 	}
141 	vol->vol_no = vol_no;
142 
143 	if (isnew > 0) {
144 		vol->cache.modified = 1;
145         }
146 
147 	if (fstat(vol->fd, &st1) != 0){
148 		errx(1, "setup_volume: %s: Failed to stat", vol->name);
149 	}
150 
151 	/*
152 	 * Link the volume structure in
153 	 */
154 	TAILQ_FOREACH(scan, &VolList, entry) {
155 		if (scan->vol_no == vol_no) {
156 			errx(1, "setup_volume: %s: Duplicate volume number %d "
157 				"against %s", vol->name, vol_no, scan->name);
158 		}
159 		if (fstat(scan->fd, &st2) != 0){
160 			errx(1, "setup_volume: %s: Failed to stat %s",
161 				vol->name, scan->name);
162 		}
163 		if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
164 			errx(1, "setup_volume: %s: Specified more than once",
165 				vol->name);
166 		}
167 	}
168 	TAILQ_INSERT_TAIL(&VolList, vol, entry);
169 	return(vol);
170 }
171 
172 /*
173  * Check basic volume characteristics.
174  */
175 void
176 check_volume(struct volume_info *vol)
177 {
178 	struct partinfo pinfo;
179 	struct stat st;
180 
181 	/*
182 	 * Get basic information about the volume
183 	 */
184 	if (ioctl(vol->fd, DIOCGPART, &pinfo) < 0) {
185 		/*
186 		 * Allow the formatting of regular files as HAMMER volumes
187 		 */
188 		if (fstat(vol->fd, &st) < 0)
189 			err(1, "Unable to stat %s", vol->name);
190 		vol->size = st.st_size;
191 		vol->type = "REGFILE";
192 	} else {
193 		/*
194 		 * When formatting a block device as a HAMMER volume the
195 		 * sector size must be compatible.  HAMMER uses 16384 byte
196 		 * filesystem buffers.
197 		 */
198 		if (pinfo.reserved_blocks) {
199 			errx(1, "HAMMER cannot be placed in a partition "
200 				"which overlaps the disklabel or MBR");
201 		}
202 		if (pinfo.media_blksize > HAMMER_BUFSIZE ||
203 		    HAMMER_BUFSIZE % pinfo.media_blksize) {
204 			errx(1, "A media sector size of %d is not supported",
205 			     pinfo.media_blksize);
206 		}
207 
208 		vol->size = pinfo.media_size;
209 		vol->device_offset = pinfo.media_offset;
210 		vol->type = "DEVICE";
211 	}
212 }
213 
214 struct volume_info *
215 get_volume(int32_t vol_no)
216 {
217 	struct volume_info *vol;
218 
219 	TAILQ_FOREACH(vol, &VolList, entry) {
220 		if (vol->vol_no == vol_no)
221 			break;
222 	}
223 	if (vol == NULL) {
224 		if (AssertOnFailure)
225 			errx(1, "get_volume: Volume %d does not exist!",
226 				vol_no);
227 		return(NULL);
228 	}
229 	++vol->cache.refs;
230 	/* not added to or removed from hammer cache */
231 	return(vol);
232 }
233 
234 void
235 rel_volume(struct volume_info *volume)
236 {
237 	if (volume == NULL)
238 		return;
239 	/* not added to or removed from hammer cache */
240 	--volume->cache.refs;
241 }
242 
243 /*
244  * Acquire the specified buffer.  isnew is -1 only when called
245  * via get_buffer_readahead() to prevent another readahead.
246  */
247 struct buffer_info *
248 get_buffer(hammer_off_t buf_offset, int isnew)
249 {
250 	void *ondisk;
251 	struct buffer_info *buf;
252 	struct volume_info *volume;
253 	hammer_off_t orig_offset = buf_offset;
254 	int vol_no;
255 	int zone;
256 	int hi, n;
257 	int dora = 0;
258 
259 	zone = HAMMER_ZONE_DECODE(buf_offset);
260 	if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) {
261 		buf_offset = blockmap_lookup(buf_offset, NULL, NULL, NULL);
262 	}
263 	if (buf_offset == HAMMER_OFF_BAD)
264 		return(NULL);
265 
266 	if (AssertOnFailure) {
267 		assert((buf_offset & HAMMER_OFF_ZONE_MASK) ==
268 		       HAMMER_ZONE_RAW_BUFFER);
269 	}
270 	vol_no = HAMMER_VOL_DECODE(buf_offset);
271 	volume = get_volume(vol_no);
272 	if (volume == NULL)
273 		return(NULL);
274 
275 	buf_offset &= ~HAMMER_BUFMASK64;
276 	buf = find_buffer(volume, buf_offset);
277 
278 	if (buf == NULL) {
279 		buf = malloc(sizeof(*buf));
280 		bzero(buf, sizeof(*buf));
281 		if (DebugOpt > 1) {
282 			fprintf(stderr, "get_buffer: %016llx %016llx at %p\n",
283 				(long long)orig_offset, (long long)buf_offset,
284 				buf);
285 		}
286 		buf->buf_offset = buf_offset;
287 		buf->raw_offset = hammer_xlate_to_phys(volume->ondisk,
288 							buf_offset);
289 		buf->volume = volume;
290 		hi = buffer_hash(buf_offset);
291 		TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buf, entry);
292 		++volume->cache.refs;
293 		buf->cache.u.buffer = buf;
294 		hammer_cache_add(&buf->cache, ISBUFFER);
295 		dora = (isnew == 0);
296 	} else {
297 		if (DebugOpt > 1) {
298 			fprintf(stderr, "get_buffer: %016llx %016llx at %p *\n",
299 				(long long)orig_offset, (long long)buf_offset,
300 				buf);
301 		}
302 		hammer_cache_used(&buf->cache);
303 		++buf->use_count;
304 	}
305 	++buf->cache.refs;
306 	hammer_cache_flush();
307 	if ((ondisk = buf->ondisk) == NULL) {
308 		buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
309 		if (isnew <= 0) {
310 			n = readhammerbuf(volume, ondisk, buf->raw_offset);
311 			if (n == -1) {
312 				if (AssertOnFailure)
313 					err(1, "get_buffer: %s:%016llx "
314 					    "Read failed at offset %016llx",
315 					    volume->name,
316 					    (long long)buf->buf_offset,
317 					    (long long)buf->raw_offset);
318 				bzero(ondisk, HAMMER_BUFSIZE);
319 			}
320 		}
321 	}
322 	if (isnew > 0) {
323 		bzero(ondisk, HAMMER_BUFSIZE);
324 		buf->cache.modified = 1;
325 	}
326 	if (dora)
327 		get_buffer_readahead(buf);
328 	return(buf);
329 }
330 
331 static void
332 get_buffer_readahead(struct buffer_info *base)
333 {
334 	struct buffer_info *buf;
335 	struct volume_info *vol;
336 	hammer_off_t buf_offset;
337 	int64_t raw_offset;
338 	int ri = UseReadBehind;
339 	int re = UseReadAhead;
340 
341 	raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
342 	vol = base->volume;
343 
344 	while (ri < re) {
345 		if (raw_offset >= vol->ondisk->vol_buf_end)
346 			break;
347 		if (raw_offset < vol->ondisk->vol_buf_beg || ri == 0) {
348 			++ri;
349 			raw_offset += HAMMER_BUFSIZE;
350 			continue;
351 		}
352 		buf_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no,
353 			raw_offset - vol->ondisk->vol_buf_beg);
354 		buf = find_buffer(vol, buf_offset);
355 		if (buf == NULL) {
356 			buf = get_buffer(buf_offset, -1);
357 			rel_buffer(buf);
358 		}
359 		++ri;
360 		raw_offset += HAMMER_BUFSIZE;
361 	}
362 }
363 
364 void
365 rel_buffer(struct buffer_info *buffer)
366 {
367 	struct volume_info *volume;
368 	int hi;
369 
370 	if (buffer == NULL)
371 		return;
372 	assert(buffer->cache.refs > 0);
373 	if (--buffer->cache.refs == 0) {
374 		if (buffer->cache.delete) {
375 			hi = buffer_hash(buffer->buf_offset);
376 			volume = buffer->volume;
377 			if (buffer->cache.modified)
378 				flush_buffer(buffer);
379 			TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
380 			hammer_cache_del(&buffer->cache);
381 			free(buffer->ondisk);
382 			free(buffer);
383 			rel_volume(volume);
384 		}
385 	}
386 }
387 
388 /*
389  * Retrieve a pointer to a buffer data given a buffer offset.  The underlying
390  * bufferp is freed if isnew or the offset is out of range of the cached data.
391  * If bufferp is freed a referenced buffer is loaded into it.
392  */
393 void *
394 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
395 		int isnew)
396 {
397 	if (*bufferp != NULL) {
398 		if (isnew > 0 ||
399 		    (((*bufferp)->buf_offset ^ buf_offset) & ~HAMMER_BUFMASK64)) {
400 			rel_buffer(*bufferp);
401 			*bufferp = NULL;
402 		}
403 	}
404 	return(get_ondisk(buf_offset, bufferp, isnew));
405 }
406 
407 /*
408  * Retrieve a pointer to a B-Tree node given a zone offset.  The underlying
409  * bufferp is freed if non-NULL and a referenced buffer is loaded into it.
410  */
411 hammer_node_ondisk_t
412 get_node(hammer_off_t node_offset, struct buffer_info **bufferp)
413 {
414 	if (*bufferp != NULL) {
415 		rel_buffer(*bufferp);
416 		*bufferp = NULL;
417 	}
418 	return(get_ondisk(node_offset, bufferp, 0));
419 }
420 
421 /*
422  * Return a pointer to a buffer data given a buffer offset.
423  * If *bufferp is NULL acquire the buffer otherwise use that buffer.
424  */
425 static __inline
426 void *
427 get_ondisk(hammer_off_t buf_offset, struct buffer_info **bufferp, int isnew)
428 {
429 	struct buffer_info *buffer;
430 
431 	buffer = *bufferp;
432 	if (buffer == NULL) {
433 		buffer = *bufferp = get_buffer(buf_offset, isnew);
434 		if (buffer == NULL)
435 			return(NULL);
436 	}
437 
438 	return((char *)buffer->ondisk +
439 		((int32_t)buf_offset & HAMMER_BUFMASK));
440 }
441 
442 /*
443  * Allocate HAMMER elements - btree nodes, meta data, data storage
444  */
445 void *
446 alloc_btree_element(hammer_off_t *offp, struct buffer_info **data_bufferp)
447 {
448 	hammer_node_ondisk_t node;
449 
450 	node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
451 			      offp, data_bufferp);
452 	bzero(node, sizeof(*node));
453 	return (node);
454 }
455 
456 void *
457 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
458 		   struct buffer_info **data_bufferp)
459 {
460 	void *data;
461 
462 	data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
463 			      offp, data_bufferp);
464 	bzero(data, data_len);
465 	return (data);
466 }
467 
468 /*
469  * The only data_len supported by HAMMER userspace for large data zone
470  * (zone 10) is HAMMER_BUFSIZE which is 16KB.  >16KB data does not fit
471  * in a buffer allocated by get_buffer().  Also alloc_blockmap() does
472  * not consider >16KB buffer size.
473  */
474 void *
475 alloc_data_element(hammer_off_t *offp, int32_t data_len,
476 		   struct buffer_info **data_bufferp)
477 {
478 	void *data;
479 	int zone;
480 
481 	if (data_len == 0)
482 		return(NULL);
483 
484 	zone = hammer_data_zone_index(data_len);
485 	assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
486 	assert(zone == HAMMER_ZONE_LARGE_DATA_INDEX ||
487 	       zone == HAMMER_ZONE_SMALL_DATA_INDEX);
488 
489 	data = alloc_blockmap(zone, data_len, offp, data_bufferp);
490 	bzero(data, data_len);
491 	return(data);
492 }
493 
494 /*
495  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
496  * code will load each volume's freemap.
497  */
498 void
499 format_freemap(struct volume_info *root_vol)
500 {
501 	struct buffer_info *buffer = NULL;
502 	hammer_off_t layer1_offset;
503 	hammer_blockmap_t blockmap;
504 	struct hammer_blockmap_layer1 *layer1;
505 	int i, isnew;
506 
507 	/* Only root volume needs formatting */
508 	assert(root_vol->vol_no == RootVolNo);
509 
510 	layer1_offset = alloc_bigblock(root_vol, HAMMER_ZONE_FREEMAP_INDEX);
511 	for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) {
512 		isnew = ((i % HAMMER_BUFSIZE) == 0);
513 		layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew);
514 		bzero(layer1, sizeof(*layer1));
515 		layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
516 		layer1->blocks_free = 0;
517 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
518 	}
519 	assert(i == HAMMER_BIGBLOCK_SIZE);
520 	rel_buffer(buffer);
521 
522 	blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
523 	bzero(blockmap, sizeof(*blockmap));
524 	blockmap->phys_offset = layer1_offset;
525 	blockmap->first_offset = 0;
526 	blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
527 	blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
528 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
529 	root_vol->cache.modified = 1;
530 }
531 
532 /*
533  * Load the volume's remaining free space into the freemap.
534  *
535  * Returns the number of big-blocks available.
536  */
537 int64_t
538 initialize_freemap(struct volume_info *vol)
539 {
540 	struct volume_info *root_vol;
541 	struct buffer_info *buffer1 = NULL;
542 	struct buffer_info *buffer2 = NULL;
543 	struct hammer_blockmap_layer1 *layer1;
544 	struct hammer_blockmap_layer2 *layer2;
545 	hammer_off_t layer1_base;
546 	hammer_off_t layer1_offset;
547 	hammer_off_t layer2_offset;
548 	hammer_off_t phys_offset;
549 	hammer_off_t block_offset;
550 	hammer_off_t aligned_vol_free_end;
551 	hammer_blockmap_t freemap;
552 	int64_t count = 0;
553 	int64_t layer1_count = 0;
554 
555 	root_vol = get_volume(RootVolNo);
556 	aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
557 				& ~HAMMER_BLOCKMAP_LAYER2_MASK;
558 
559 	printf("initialize freemap volume %d\n", vol->vol_no);
560 
561 	/*
562 	 * Initialize the freemap.  First preallocate the big-blocks required
563 	 * to implement layer2.   This preallocation is a bootstrap allocation
564 	 * using blocks from the target volume.
565 	 */
566 	freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
567 	layer1_base = freemap->phys_offset;
568 
569 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
570 	     phys_offset < aligned_vol_free_end;
571 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
572 		layer1_offset = layer1_base +
573 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
574 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
575 		if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
576 			layer1->phys_offset = alloc_bigblock(vol,
577 						HAMMER_ZONE_FREEMAP_INDEX);
578 			layer1->blocks_free = 0;
579 			buffer1->cache.modified = 1;
580 			layer1->layer1_crc = crc32(layer1,
581 						   HAMMER_LAYER1_CRCSIZE);
582 		}
583 	}
584 
585 	/*
586 	 * Now fill everything in.
587 	 */
588 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
589 	     phys_offset < aligned_vol_free_end;
590 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
591 		layer1_count = 0;
592 		layer1_offset = layer1_base +
593 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
594 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
595 		assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
596 
597 		for (block_offset = 0;
598 		     block_offset < HAMMER_BLOCKMAP_LAYER2;
599 		     block_offset += HAMMER_BIGBLOCK_SIZE) {
600 			layer2_offset = layer1->phys_offset +
601 				        HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
602 			layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
603 			bzero(layer2, sizeof(*layer2));
604 
605 			if (phys_offset + block_offset < vol->vol_free_off) {
606 				/*
607 				 * Fixups XXX - big-blocks already allocated as part
608 				 * of the freemap bootstrap.
609 				 */
610 				if (layer2->zone == 0) {
611 					layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
612 					layer2->append_off = HAMMER_BIGBLOCK_SIZE;
613 					layer2->bytes_free = 0;
614 				}
615 			} else if (phys_offset + block_offset < vol->vol_free_end) {
616 				layer2->zone = 0;
617 				layer2->append_off = 0;
618 				layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
619 				++count;
620 				++layer1_count;
621 			} else {
622 				layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
623 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
624 				layer2->bytes_free = 0;
625 			}
626 			layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
627 			buffer2->cache.modified = 1;
628 		}
629 
630 		layer1->blocks_free += layer1_count;
631 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
632 		buffer1->cache.modified = 1;
633 	}
634 
635 	rel_buffer(buffer1);
636 	rel_buffer(buffer2);
637 	rel_volume(root_vol);
638 	return(count);
639 }
640 
641 /*
642  * Returns the number of big-blocks available for filesystem data and undos
643  * without formatting.
644  */
645 int64_t
646 count_freemap(struct volume_info *vol)
647 {
648 	hammer_off_t phys_offset;
649 	hammer_off_t vol_free_off;
650 	hammer_off_t aligned_vol_free_end;
651 	int64_t count = 0;
652 
653 	vol_free_off = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
654 	aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
655 				& ~HAMMER_BLOCKMAP_LAYER2_MASK;
656 
657 	if (vol->vol_no == RootVolNo)
658 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
659 
660 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
661 	     phys_offset < aligned_vol_free_end;
662 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
663 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
664 	}
665 
666 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
667 	     phys_offset < aligned_vol_free_end;
668 	     phys_offset += HAMMER_BIGBLOCK_SIZE) {
669 		if (phys_offset < vol_free_off) {
670 			;
671 		} else if (phys_offset < vol->vol_free_end) {
672 			++count;
673 		}
674 	}
675 
676 	return(count);
677 }
678 
679 /*
680  * Format the undomap for the root volume.
681  */
682 void
683 format_undomap(struct volume_info *root_vol)
684 {
685 	const int undo_zone = HAMMER_ZONE_UNDO_INDEX;
686 	hammer_off_t undo_limit;
687 	hammer_blockmap_t blockmap;
688 	struct hammer_volume_ondisk *ondisk;
689 	struct buffer_info *buffer = NULL;
690 	hammer_off_t scan;
691 	int n;
692 	int limit_index;
693 	uint32_t seqno;
694 
695 	/* Only root volume needs formatting */
696 	assert(root_vol->vol_no == RootVolNo);
697 	ondisk = root_vol->ondisk;
698 
699 	/*
700 	 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
701 	 * up to HAMMER_UNDO_LAYER2 big-blocks.  Size to approximately
702 	 * 0.1% of the disk.
703 	 *
704 	 * The minimum UNDO fifo size is 500MB, or approximately 1% of
705 	 * the recommended 50G disk.
706 	 *
707 	 * Changing this minimum is rather dangerous as complex filesystem
708 	 * operations can cause the UNDO FIFO to fill up otherwise.
709 	 */
710 	undo_limit = UndoBufferSize;
711 	if (undo_limit == 0) {
712 		undo_limit = (ondisk->vol_buf_end - ondisk->vol_buf_beg) / 1000;
713 		if (undo_limit < 500*1024*1024)
714 			undo_limit = 500*1024*1024;
715 	}
716 	undo_limit = (undo_limit + HAMMER_BIGBLOCK_MASK64) &
717 		     ~HAMMER_BIGBLOCK_MASK64;
718 	if (undo_limit < HAMMER_BIGBLOCK_SIZE)
719 		undo_limit = HAMMER_BIGBLOCK_SIZE;
720 	if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2)
721 		undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2;
722 	UndoBufferSize = undo_limit;
723 
724 	blockmap = &ondisk->vol0_blockmap[undo_zone];
725 	bzero(blockmap, sizeof(*blockmap));
726 	blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
727 	blockmap->first_offset = HAMMER_ZONE_ENCODE(undo_zone, 0);
728 	blockmap->next_offset = blockmap->first_offset;
729 	blockmap->alloc_offset = HAMMER_ZONE_ENCODE(undo_zone, undo_limit);
730 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
731 
732 	limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
733 	assert(limit_index <= HAMMER_UNDO_LAYER2);
734 
735 	for (n = 0; n < limit_index; ++n) {
736 		ondisk->vol0_undo_array[n] = alloc_bigblock(NULL,
737 							HAMMER_ZONE_UNDO_INDEX);
738 	}
739 	while (n < HAMMER_UNDO_LAYER2) {
740 		ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
741 	}
742 
743 	/*
744 	 * Pre-initialize the UNDO blocks (HAMMER version 4+)
745 	 */
746 	printf("initializing the undo map (%jd MB)\n",
747 		(intmax_t)(blockmap->alloc_offset & HAMMER_OFF_LONG_MASK) /
748 		(1024 * 1024));
749 
750 	scan = blockmap->first_offset;
751 	seqno = 0;
752 
753 	while (scan < blockmap->alloc_offset) {
754 		hammer_fifo_head_t head;
755 		hammer_fifo_tail_t tail;
756 		int isnew;
757 		int bytes = HAMMER_UNDO_ALIGN;
758 
759 		isnew = ((scan & HAMMER_BUFMASK64) == 0);
760 		head = get_buffer_data(scan, &buffer, isnew);
761 		buffer->cache.modified = 1;
762 		tail = (void *)((char *)head + bytes - sizeof(*tail));
763 
764 		bzero(head, bytes);
765 		head->hdr_signature = HAMMER_HEAD_SIGNATURE;
766 		head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
767 		head->hdr_size = bytes;
768 		head->hdr_seq = seqno++;
769 
770 		tail->tail_signature = HAMMER_TAIL_SIGNATURE;
771 		tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
772 		tail->tail_size = bytes;
773 
774 		head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^
775 				crc32(head + 1, bytes - sizeof(*head));
776 
777 		scan += bytes;
778 	}
779 	rel_buffer(buffer);
780 }
781 
782 /*
783  * Format a new blockmap.  This is mostly a degenerate case because
784  * all allocations are now actually done from the freemap.
785  */
786 void
787 format_blockmap(hammer_blockmap_t blockmap, int zone, hammer_off_t offset)
788 {
789 	hammer_off_t zone_base = HAMMER_ZONE_ENCODE(zone, offset);
790 
791 	bzero(blockmap, sizeof(*blockmap));
792 	blockmap->phys_offset = 0;
793 	blockmap->first_offset = zone_base;
794 	blockmap->next_offset = zone_base;
795 	blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
796 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
797 }
798 
799 /*
800  * Flush various tracking structures to disk
801  */
802 void
803 flush_all_volumes(void)
804 {
805 	struct volume_info *vol;
806 
807 	TAILQ_FOREACH(vol, &VolList, entry)
808 		flush_volume(vol);
809 }
810 
811 void
812 flush_volume(struct volume_info *volume)
813 {
814 	struct buffer_info *buffer;
815 	int i;
816 
817 	for (i = 0; i < HAMMER_BUFLISTS; ++i) {
818 		TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
819 			flush_buffer(buffer);
820 	}
821 	if (writehammerbuf(volume, volume->ondisk, 0) == -1)
822 		err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
823 	volume->cache.modified = 0;
824 }
825 
826 void
827 flush_buffer(struct buffer_info *buffer)
828 {
829 	struct volume_info *vol;
830 
831 	vol = buffer->volume;
832 	if (writehammerbuf(vol, buffer->ondisk, buffer->raw_offset) == -1)
833 		err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
834 	buffer->cache.modified = 0;
835 }
836 
837 /*
838  * Core I/O operations
839  */
840 static int
841 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
842 {
843 	ssize_t n;
844 
845 	n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
846 	if (n != HAMMER_BUFSIZE)
847 		return(-1);
848 	return(0);
849 }
850 
851 static int
852 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
853 {
854 	ssize_t n;
855 
856 	n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
857 	if (n != HAMMER_BUFSIZE)
858 		return(-1);
859 	return(0);
860 }
861 
862 int64_t init_boot_area_size(int64_t value, off_t avg_vol_size)
863 {
864 	if (value == 0) {
865 		value = HAMMER_BOOT_NOMBYTES;
866 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
867 			value >>= 1;
868 		if (value < HAMMER_BOOT_MINBYTES)
869 			value = 0;
870 	} else if (value < HAMMER_BOOT_MINBYTES) {
871 		value = HAMMER_BOOT_MINBYTES;
872 	}
873 
874 	return(value);
875 }
876 
877 int64_t init_mem_area_size(int64_t value, off_t avg_vol_size)
878 {
879 	if (value == 0) {
880 		value = HAMMER_MEM_NOMBYTES;
881 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
882 			value >>= 1;
883 		if (value < HAMMER_MEM_MINBYTES)
884 			value = 0;
885 	} else if (value < HAMMER_MEM_MINBYTES) {
886 		value = HAMMER_MEM_MINBYTES;
887 	}
888 
889 	return(value);
890 }
891