xref: /dragonfly/sbin/hammer/ondisk.c (revision 0db87cb7)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/diskslice.h>
36 #include <sys/diskmbr.h>
37 
38 #include "hammer_util.h"
39 
40 static void get_buffer_readahead(struct buffer_info *base);
41 static __inline void *get_ondisk(hammer_off_t buf_offset,
42 			struct buffer_info **bufferp, int isnew);
43 static int readhammerbuf(struct volume_info *vol, void *data, int64_t offset);
44 static int writehammerbuf(struct volume_info *vol, const void *data,
45 			int64_t offset);
46 
47 int DebugOpt;
48 
49 uuid_t Hammer_FSType;
50 uuid_t Hammer_FSId;
51 int64_t BootAreaSize;
52 int64_t MemAreaSize;
53 int64_t UndoBufferSize;
54 int     NumVolumes;
55 int	RootVolNo = -1;
56 int	UseReadBehind = -4;
57 int	UseReadAhead = 4;
58 int	AssertOnFailure = 1;
59 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
60 
61 static __inline
62 int
63 buffer_hash(hammer_off_t buf_offset)
64 {
65 	int hi;
66 
67 	hi = (int)(buf_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
68 	return(hi);
69 }
70 
71 static struct buffer_info*
72 find_buffer(struct volume_info *volume, hammer_off_t buf_offset)
73 {
74 	int hi;
75 	struct buffer_info *buf;
76 
77 	hi = buffer_hash(buf_offset);
78 	TAILQ_FOREACH(buf, &volume->buffer_lists[hi], entry)
79 		if (buf->buf_offset == buf_offset)
80 			return(buf);
81 	return(NULL);
82 }
83 
84 /*
85  * Lookup the requested information structure and related on-disk buffer.
86  * Missing structures are created.
87  */
88 struct volume_info *
89 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
90 {
91 	struct volume_info *vol;
92 	struct volume_info *scan;
93 	struct hammer_volume_ondisk *ondisk;
94 	int i, n;
95 	struct stat st1, st2;
96 
97 	/*
98 	 * Allocate the volume structure
99 	 */
100 	vol = malloc(sizeof(*vol));
101 	bzero(vol, sizeof(*vol));
102 	for (i = 0; i < HAMMER_BUFLISTS; ++i)
103 		TAILQ_INIT(&vol->buffer_lists[i]);
104 	vol->name = strdup(filename);
105 	vol->fd = open(vol->name, oflags);
106 	if (vol->fd < 0) {
107 		err(1, "setup_volume: %s: Open failed", vol->name);
108 	}
109 
110 	/*
111 	 * Read or initialize the volume header
112 	 */
113 	vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
114 	if (isnew > 0) {
115 		bzero(ondisk, HAMMER_BUFSIZE);
116 	} else {
117 		n = readhammerbuf(vol, ondisk, 0);
118 		if (n == -1) {
119 			err(1, "setup_volume: %s: Read failed at offset 0",
120 			    vol->name);
121 		}
122 		vol_no = ondisk->vol_no;
123 		if (RootVolNo < 0) {
124 			RootVolNo = ondisk->vol_rootvol;
125 		} else if (RootVolNo != (int)ondisk->vol_rootvol) {
126 			errx(1, "setup_volume: %s: root volume disagreement: "
127 				"%d vs %d",
128 				vol->name, RootVolNo, ondisk->vol_rootvol);
129 		}
130 
131 		if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
132 			errx(1, "setup_volume: %s: Header does not indicate "
133 				"that this is a hammer volume", vol->name);
134 		}
135 		if (TAILQ_EMPTY(&VolList)) {
136 			Hammer_FSId = vol->ondisk->vol_fsid;
137 		} else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
138 			errx(1, "setup_volume: %s: FSId does match other "
139 				"volumes!", vol->name);
140 		}
141 	}
142 	vol->vol_no = vol_no;
143 
144 	if (isnew > 0) {
145 		vol->cache.modified = 1;
146         }
147 
148 	if (fstat(vol->fd, &st1) != 0){
149 		errx(1, "setup_volume: %s: Failed to stat", vol->name);
150 	}
151 
152 	/*
153 	 * Link the volume structure in
154 	 */
155 	TAILQ_FOREACH(scan, &VolList, entry) {
156 		if (scan->vol_no == vol_no) {
157 			errx(1, "setup_volume: %s: Duplicate volume number %d "
158 				"against %s", vol->name, vol_no, scan->name);
159 		}
160 		if (fstat(scan->fd, &st2) != 0){
161 			errx(1, "setup_volume: %s: Failed to stat %s",
162 				vol->name, scan->name);
163 		}
164 		if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
165 			errx(1, "setup_volume: %s: Specified more than once",
166 				vol->name);
167 		}
168 	}
169 	TAILQ_INSERT_TAIL(&VolList, vol, entry);
170 	return(vol);
171 }
172 
173 /*
174  * Check basic volume characteristics.
175  */
176 void
177 check_volume(struct volume_info *vol)
178 {
179 	struct partinfo pinfo;
180 	struct stat st;
181 
182 	/*
183 	 * Get basic information about the volume
184 	 */
185 	if (ioctl(vol->fd, DIOCGPART, &pinfo) < 0) {
186 		/*
187 		 * Allow the formatting of regular files as HAMMER volumes
188 		 */
189 		if (fstat(vol->fd, &st) < 0)
190 			err(1, "Unable to stat %s", vol->name);
191 		vol->size = st.st_size;
192 		vol->type = "REGFILE";
193 	} else {
194 		/*
195 		 * When formatting a block device as a HAMMER volume the
196 		 * sector size must be compatible.  HAMMER uses 16384 byte
197 		 * filesystem buffers.
198 		 */
199 		if (pinfo.reserved_blocks) {
200 			errx(1, "HAMMER cannot be placed in a partition "
201 				"which overlaps the disklabel or MBR");
202 		}
203 		if (pinfo.media_blksize > HAMMER_BUFSIZE ||
204 		    HAMMER_BUFSIZE % pinfo.media_blksize) {
205 			errx(1, "A media sector size of %d is not supported",
206 			     pinfo.media_blksize);
207 		}
208 
209 		vol->size = pinfo.media_size;
210 		vol->device_offset = pinfo.media_offset;
211 		vol->type = "DEVICE";
212 	}
213 
214 	/*
215 	 * Reserve space for (future) header junk, setup our poor-man's
216 	 * big-block allocator.
217 	 */
218 	vol->vol_alloc = HAMMER_BUFSIZE * 16;
219 }
220 
221 struct volume_info *
222 get_volume(int32_t vol_no)
223 {
224 	struct volume_info *vol;
225 
226 	TAILQ_FOREACH(vol, &VolList, entry) {
227 		if (vol->vol_no == vol_no)
228 			break;
229 	}
230 	if (vol == NULL) {
231 		if (AssertOnFailure)
232 			errx(1, "get_volume: Volume %d does not exist!",
233 				vol_no);
234 		return(NULL);
235 	}
236 	++vol->cache.refs;
237 	/* not added to or removed from hammer cache */
238 	return(vol);
239 }
240 
241 void
242 rel_volume(struct volume_info *volume)
243 {
244 	if (volume == NULL)
245 		return;
246 	/* not added to or removed from hammer cache */
247 	--volume->cache.refs;
248 }
249 
250 /*
251  * Acquire the specified buffer.  isnew is -1 only when called
252  * via get_buffer_readahead() to prevent another readahead.
253  */
254 struct buffer_info *
255 get_buffer(hammer_off_t buf_offset, int isnew)
256 {
257 	void *ondisk;
258 	struct buffer_info *buf;
259 	struct volume_info *volume;
260 	hammer_off_t orig_offset = buf_offset;
261 	int vol_no;
262 	int zone;
263 	int hi, n;
264 	int dora = 0;
265 
266 	zone = HAMMER_ZONE_DECODE(buf_offset);
267 	if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) {
268 		buf_offset = blockmap_lookup(buf_offset, NULL, NULL, NULL);
269 	}
270 	if (buf_offset == HAMMER_OFF_BAD)
271 		return(NULL);
272 
273 	if (AssertOnFailure) {
274 		assert((buf_offset & HAMMER_OFF_ZONE_MASK) ==
275 		       HAMMER_ZONE_RAW_BUFFER);
276 	}
277 	vol_no = HAMMER_VOL_DECODE(buf_offset);
278 	volume = get_volume(vol_no);
279 	if (volume == NULL)
280 		return(NULL);
281 
282 	buf_offset &= ~HAMMER_BUFMASK64;
283 	buf = find_buffer(volume, buf_offset);
284 
285 	if (buf == NULL) {
286 		buf = malloc(sizeof(*buf));
287 		bzero(buf, sizeof(*buf));
288 		if (DebugOpt > 1) {
289 			fprintf(stderr, "get_buffer: %016llx %016llx at %p\n",
290 				(long long)orig_offset, (long long)buf_offset,
291 				buf);
292 		}
293 		buf->buf_offset = buf_offset;
294 		buf->raw_offset = volume->ondisk->vol_buf_beg +
295 				  (buf_offset & HAMMER_OFF_SHORT_MASK);
296 		buf->volume = volume;
297 		hi = buffer_hash(buf_offset);
298 		TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buf, entry);
299 		++volume->cache.refs;
300 		buf->cache.u.buffer = buf;
301 		hammer_cache_add(&buf->cache, ISBUFFER);
302 		dora = (isnew == 0);
303 	} else {
304 		if (DebugOpt > 1) {
305 			fprintf(stderr, "get_buffer: %016llx %016llx at %p *\n",
306 				(long long)orig_offset, (long long)buf_offset,
307 				buf);
308 		}
309 		hammer_cache_used(&buf->cache);
310 		++buf->use_count;
311 	}
312 	++buf->cache.refs;
313 	hammer_cache_flush();
314 	if ((ondisk = buf->ondisk) == NULL) {
315 		buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
316 		if (isnew <= 0) {
317 			n = readhammerbuf(volume, ondisk, buf->raw_offset);
318 			if (n == -1) {
319 				if (AssertOnFailure)
320 					err(1, "get_buffer: %s:%016llx "
321 					    "Read failed at offset %016llx",
322 					    volume->name,
323 					    (long long)buf->buf_offset,
324 					    (long long)buf->raw_offset);
325 				bzero(ondisk, HAMMER_BUFSIZE);
326 			}
327 		}
328 	}
329 	if (isnew > 0) {
330 		bzero(ondisk, HAMMER_BUFSIZE);
331 		buf->cache.modified = 1;
332 	}
333 	if (dora)
334 		get_buffer_readahead(buf);
335 	return(buf);
336 }
337 
338 static void
339 get_buffer_readahead(struct buffer_info *base)
340 {
341 	struct buffer_info *buf;
342 	struct volume_info *vol;
343 	hammer_off_t buf_offset;
344 	int64_t raw_offset;
345 	int ri = UseReadBehind;
346 	int re = UseReadAhead;
347 
348 	raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
349 	vol = base->volume;
350 
351 	while (ri < re) {
352 		if (raw_offset >= vol->ondisk->vol_buf_end)
353 			break;
354 		if (raw_offset < vol->ondisk->vol_buf_beg || ri == 0) {
355 			++ri;
356 			raw_offset += HAMMER_BUFSIZE;
357 			continue;
358 		}
359 		buf_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no,
360 			raw_offset - vol->ondisk->vol_buf_beg);
361 		buf = find_buffer(vol, buf_offset);
362 		if (buf == NULL) {
363 			buf = get_buffer(buf_offset, -1);
364 			rel_buffer(buf);
365 		}
366 		++ri;
367 		raw_offset += HAMMER_BUFSIZE;
368 	}
369 }
370 
371 void
372 rel_buffer(struct buffer_info *buffer)
373 {
374 	struct volume_info *volume;
375 	int hi;
376 
377 	if (buffer == NULL)
378 		return;
379 	assert(buffer->cache.refs > 0);
380 	if (--buffer->cache.refs == 0) {
381 		if (buffer->cache.delete) {
382 			hi = buffer_hash(buffer->buf_offset);
383 			volume = buffer->volume;
384 			if (buffer->cache.modified)
385 				flush_buffer(buffer);
386 			TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
387 			hammer_cache_del(&buffer->cache);
388 			free(buffer->ondisk);
389 			free(buffer);
390 			rel_volume(volume);
391 		}
392 	}
393 }
394 
395 /*
396  * Retrieve a pointer to a buffer data given a buffer offset.  The underlying
397  * bufferp is freed if isnew or the offset is out of range of the cached data.
398  * If bufferp is freed a referenced buffer is loaded into it.
399  */
400 void *
401 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
402 		int isnew)
403 {
404 	if (*bufferp != NULL) {
405 		if (isnew > 0 ||
406 		    (((*bufferp)->buf_offset ^ buf_offset) & ~HAMMER_BUFMASK64)) {
407 			rel_buffer(*bufferp);
408 			*bufferp = NULL;
409 		}
410 	}
411 	return(get_ondisk(buf_offset, bufferp, isnew));
412 }
413 
414 /*
415  * Retrieve a pointer to a B-Tree node given a zone offset.  The underlying
416  * bufferp is freed if non-NULL and a referenced buffer is loaded into it.
417  */
418 hammer_node_ondisk_t
419 get_node(hammer_off_t node_offset, struct buffer_info **bufferp)
420 {
421 	if (*bufferp != NULL) {
422 		rel_buffer(*bufferp);
423 		*bufferp = NULL;
424 	}
425 	return(get_ondisk(node_offset, bufferp, 0));
426 }
427 
428 /*
429  * Return a pointer to a buffer data given a buffer offset.
430  * If *bufferp is NULL acquire the buffer otherwise use that buffer.
431  */
432 static __inline
433 void *
434 get_ondisk(hammer_off_t buf_offset, struct buffer_info **bufferp, int isnew)
435 {
436 	struct buffer_info *buffer;
437 
438 	buffer = *bufferp;
439 	if (buffer == NULL) {
440 		buffer = *bufferp = get_buffer(buf_offset, isnew);
441 		if (buffer == NULL)
442 			return(NULL);
443 	}
444 
445 	return((char *)buffer->ondisk +
446 		((int32_t)buf_offset & HAMMER_BUFMASK));
447 }
448 
449 /*
450  * Allocate HAMMER elements - btree nodes, meta data, data storage
451  */
452 void *
453 alloc_btree_element(hammer_off_t *offp, struct buffer_info **data_bufferp)
454 {
455 	hammer_node_ondisk_t node;
456 
457 	node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
458 			      offp, data_bufferp);
459 	bzero(node, sizeof(*node));
460 	return (node);
461 }
462 
463 void *
464 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
465 		   struct buffer_info **data_bufferp)
466 {
467 	void *data;
468 
469 	data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
470 			      offp, data_bufferp);
471 	bzero(data, data_len);
472 	return (data);
473 }
474 
475 /*
476  * The only data_len supported by HAMMER userspace for large data zone
477  * (zone 10) is HAMMER_BUFSIZE which is 16KB.  >16KB data does not fit
478  * in a buffer allocated by get_buffer().  Also alloc_blockmap() does
479  * not consider >16KB buffer size.
480  */
481 void *
482 alloc_data_element(hammer_off_t *offp, int32_t data_len,
483 		   struct buffer_info **data_bufferp)
484 {
485 	void *data;
486 	int zone;
487 
488 	if (data_len == 0)
489 		return(NULL);
490 
491 	zone = hammer_data_zone_index(data_len);
492 	assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
493 	assert(zone == HAMMER_ZONE_LARGE_DATA_INDEX ||
494 	       zone == HAMMER_ZONE_SMALL_DATA_INDEX);
495 
496 	data = alloc_blockmap(zone, data_len, offp, data_bufferp);
497 	bzero(data, data_len);
498 	return(data);
499 }
500 
501 /*
502  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
503  * code will load each volume's freemap.
504  */
505 void
506 format_freemap(struct volume_info *root_vol)
507 {
508 	struct buffer_info *buffer = NULL;
509 	hammer_off_t layer1_offset;
510 	hammer_blockmap_t blockmap;
511 	struct hammer_blockmap_layer1 *layer1;
512 	int i, isnew;
513 
514 	/* Only root volume needs formatting */
515 	assert(root_vol->vol_no == RootVolNo);
516 
517 	layer1_offset = alloc_bigblock(root_vol, HAMMER_ZONE_FREEMAP_INDEX);
518 	for (i = 0; i < (int)HAMMER_BLOCKMAP_RADIX1; ++i) {
519 		isnew = ((i % HAMMER_BLOCKMAP_RADIX1_PERBUFFER) == 0);
520 		layer1 = get_buffer_data(layer1_offset + i * sizeof(*layer1),
521 					 &buffer, isnew);
522 		bzero(layer1, sizeof(*layer1));
523 		layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
524 		layer1->blocks_free = 0;
525 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
526 	}
527 	rel_buffer(buffer);
528 
529 	blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
530 	bzero(blockmap, sizeof(*blockmap));
531 	blockmap->phys_offset = layer1_offset;
532 	blockmap->first_offset = 0;
533 	blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
534 	blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
535 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
536 	root_vol->cache.modified = 1;
537 }
538 
539 /*
540  * Load the volume's remaining free space into the freemap.
541  *
542  * Returns the number of big-blocks available.
543  */
544 int64_t
545 initialize_freemap(struct volume_info *vol)
546 {
547 	struct volume_info *root_vol;
548 	struct buffer_info *buffer1 = NULL;
549 	struct buffer_info *buffer2 = NULL;
550 	struct hammer_blockmap_layer1 *layer1;
551 	struct hammer_blockmap_layer2 *layer2;
552 	hammer_off_t layer1_base;
553 	hammer_off_t layer1_offset;
554 	hammer_off_t layer2_offset;
555 	hammer_off_t phys_offset;
556 	hammer_off_t block_offset;
557 	hammer_off_t aligned_vol_free_end;
558 	hammer_blockmap_t freemap;
559 	int64_t count = 0;
560 	int64_t layer1_count = 0;
561 
562 	root_vol = get_volume(RootVolNo);
563 	aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
564 				& ~HAMMER_BLOCKMAP_LAYER2_MASK;
565 
566 	printf("initialize freemap volume %d\n", vol->vol_no);
567 
568 	/*
569 	 * Initialize the freemap.  First preallocate the big-blocks required
570 	 * to implement layer2.   This preallocation is a bootstrap allocation
571 	 * using blocks from the target volume.
572 	 */
573 	freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
574 	layer1_base = freemap->phys_offset;
575 
576 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
577 	     phys_offset < aligned_vol_free_end;
578 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
579 		layer1_offset = layer1_base +
580 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
581 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
582 		if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
583 			layer1->phys_offset = alloc_bigblock(vol,
584 						HAMMER_ZONE_FREEMAP_INDEX);
585 			layer1->blocks_free = 0;
586 			buffer1->cache.modified = 1;
587 			layer1->layer1_crc = crc32(layer1,
588 						   HAMMER_LAYER1_CRCSIZE);
589 		}
590 	}
591 
592 	/*
593 	 * Now fill everything in.
594 	 */
595 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
596 	     phys_offset < aligned_vol_free_end;
597 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
598 		layer1_count = 0;
599 		layer1_offset = layer1_base +
600 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
601 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
602 		assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
603 
604 		for (block_offset = 0;
605 		     block_offset < HAMMER_BLOCKMAP_LAYER2;
606 		     block_offset += HAMMER_BIGBLOCK_SIZE) {
607 			layer2_offset = layer1->phys_offset +
608 				        HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
609 			layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
610 			bzero(layer2, sizeof(*layer2));
611 
612 			if (phys_offset + block_offset < vol->vol_free_off) {
613 				/*
614 				 * Fixups XXX - big-blocks already allocated as part
615 				 * of the freemap bootstrap.
616 				 */
617 				if (layer2->zone == 0) {
618 					layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
619 					layer2->append_off = HAMMER_BIGBLOCK_SIZE;
620 					layer2->bytes_free = 0;
621 				}
622 			} else if (phys_offset + block_offset < vol->vol_free_end) {
623 				layer2->zone = 0;
624 				layer2->append_off = 0;
625 				layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
626 				++count;
627 				++layer1_count;
628 			} else {
629 				layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
630 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
631 				layer2->bytes_free = 0;
632 			}
633 			layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
634 			buffer2->cache.modified = 1;
635 		}
636 
637 		layer1->blocks_free += layer1_count;
638 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
639 		buffer1->cache.modified = 1;
640 	}
641 
642 	rel_buffer(buffer1);
643 	rel_buffer(buffer2);
644 	rel_volume(root_vol);
645 	return(count);
646 }
647 
648 /*
649  * Returns the number of big-blocks available for filesystem data and undos
650  * without formatting.
651  */
652 int64_t
653 count_freemap(struct volume_info *vol)
654 {
655 	hammer_off_t phys_offset;
656 	hammer_off_t vol_free_off;
657 	hammer_off_t aligned_vol_free_end;
658 	int64_t count = 0;
659 
660 	vol_free_off = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
661 	aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
662 				& ~HAMMER_BLOCKMAP_LAYER2_MASK;
663 
664 	if (vol->vol_no == RootVolNo)
665 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
666 
667 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
668 	     phys_offset < aligned_vol_free_end;
669 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
670 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
671 	}
672 
673 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
674 	     phys_offset < aligned_vol_free_end;
675 	     phys_offset += HAMMER_BIGBLOCK_SIZE) {
676 		if (phys_offset < vol_free_off) {
677 			;
678 		} else if (phys_offset < vol->vol_free_end) {
679 			++count;
680 		}
681 	}
682 
683 	return(count);
684 }
685 
686 /*
687  * Format the undomap for the root volume.
688  */
689 void
690 format_undomap(struct volume_info *root_vol)
691 {
692 	const int undo_zone = HAMMER_ZONE_UNDO_INDEX;
693 	hammer_off_t undo_limit;
694 	hammer_blockmap_t blockmap;
695 	struct hammer_volume_ondisk *ondisk;
696 	struct buffer_info *buffer = NULL;
697 	hammer_off_t scan;
698 	int n;
699 	int limit_index;
700 	u_int32_t seqno;
701 
702 	/* Only root volume needs formatting */
703 	assert(root_vol->vol_no == RootVolNo);
704 	ondisk = root_vol->ondisk;
705 
706 	/*
707 	 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
708 	 * up to HAMMER_UNDO_LAYER2 big-blocks.  Size to approximately
709 	 * 0.1% of the disk.
710 	 *
711 	 * The minimum UNDO fifo size is 500MB, or approximately 1% of
712 	 * the recommended 50G disk.
713 	 *
714 	 * Changing this minimum is rather dangerous as complex filesystem
715 	 * operations can cause the UNDO FIFO to fill up otherwise.
716 	 */
717 	undo_limit = UndoBufferSize;
718 	if (undo_limit == 0) {
719 		undo_limit = (ondisk->vol_buf_end - ondisk->vol_buf_beg) / 1000;
720 		if (undo_limit < 500*1024*1024)
721 			undo_limit = 500*1024*1024;
722 	}
723 	undo_limit = (undo_limit + HAMMER_BIGBLOCK_MASK64) &
724 		     ~HAMMER_BIGBLOCK_MASK64;
725 	if (undo_limit < HAMMER_BIGBLOCK_SIZE)
726 		undo_limit = HAMMER_BIGBLOCK_SIZE;
727 	if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2)
728 		undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2;
729 	UndoBufferSize = undo_limit;
730 
731 	blockmap = &ondisk->vol0_blockmap[undo_zone];
732 	bzero(blockmap, sizeof(*blockmap));
733 	blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
734 	blockmap->first_offset = HAMMER_ZONE_ENCODE(undo_zone, 0);
735 	blockmap->next_offset = blockmap->first_offset;
736 	blockmap->alloc_offset = HAMMER_ZONE_ENCODE(undo_zone, undo_limit);
737 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
738 
739 	limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
740 	assert(limit_index <= HAMMER_UNDO_LAYER2);
741 
742 	for (n = 0; n < limit_index; ++n) {
743 		ondisk->vol0_undo_array[n] = alloc_bigblock(NULL,
744 							HAMMER_ZONE_UNDO_INDEX);
745 	}
746 	while (n < HAMMER_UNDO_LAYER2) {
747 		ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
748 	}
749 
750 	/*
751 	 * Pre-initialize the UNDO blocks (HAMMER version 4+)
752 	 */
753 	printf("initializing the undo map (%jd MB)\n",
754 		(intmax_t)(blockmap->alloc_offset & HAMMER_OFF_LONG_MASK) /
755 		(1024 * 1024));
756 
757 	scan = blockmap->first_offset;
758 	seqno = 0;
759 
760 	while (scan < blockmap->alloc_offset) {
761 		hammer_fifo_head_t head;
762 		hammer_fifo_tail_t tail;
763 		int isnew;
764 		int bytes = HAMMER_UNDO_ALIGN;
765 
766 		isnew = ((scan & HAMMER_BUFMASK64) == 0);
767 		head = get_buffer_data(scan, &buffer, isnew);
768 		buffer->cache.modified = 1;
769 		tail = (void *)((char *)head + bytes - sizeof(*tail));
770 
771 		bzero(head, bytes);
772 		head->hdr_signature = HAMMER_HEAD_SIGNATURE;
773 		head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
774 		head->hdr_size = bytes;
775 		head->hdr_seq = seqno++;
776 
777 		tail->tail_signature = HAMMER_TAIL_SIGNATURE;
778 		tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
779 		tail->tail_size = bytes;
780 
781 		head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^
782 				crc32(head + 1, bytes - sizeof(*head));
783 
784 		scan += bytes;
785 	}
786 	rel_buffer(buffer);
787 }
788 
789 /*
790  * Format a new blockmap.  This is mostly a degenerate case because
791  * all allocations are now actually done from the freemap.
792  */
793 void
794 format_blockmap(hammer_blockmap_t blockmap, int zone, hammer_off_t offset)
795 {
796 	hammer_off_t zone_base = HAMMER_ZONE_ENCODE(zone, offset);
797 
798 	bzero(blockmap, sizeof(*blockmap));
799 	blockmap->phys_offset = 0;
800 	blockmap->first_offset = zone_base;
801 	blockmap->next_offset = zone_base;
802 	blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
803 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
804 }
805 
806 /*
807  * Flush various tracking structures to disk
808  */
809 void
810 flush_all_volumes(void)
811 {
812 	struct volume_info *vol;
813 
814 	TAILQ_FOREACH(vol, &VolList, entry)
815 		flush_volume(vol);
816 }
817 
818 void
819 flush_volume(struct volume_info *volume)
820 {
821 	struct buffer_info *buffer;
822 	int i;
823 
824 	for (i = 0; i < HAMMER_BUFLISTS; ++i) {
825 		TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
826 			flush_buffer(buffer);
827 	}
828 	if (writehammerbuf(volume, volume->ondisk, 0) == -1)
829 		err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
830 	volume->cache.modified = 0;
831 }
832 
833 void
834 flush_buffer(struct buffer_info *buffer)
835 {
836 	struct volume_info *vol;
837 
838 	vol = buffer->volume;
839 	if (writehammerbuf(vol, buffer->ondisk, buffer->raw_offset) == -1)
840 		err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
841 	buffer->cache.modified = 0;
842 }
843 
844 /*
845  * Core I/O operations
846  */
847 static int
848 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
849 {
850 	ssize_t n;
851 
852 	n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
853 	if (n != HAMMER_BUFSIZE)
854 		return(-1);
855 	return(0);
856 }
857 
858 static int
859 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
860 {
861 	ssize_t n;
862 
863 	n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
864 	if (n != HAMMER_BUFSIZE)
865 		return(-1);
866 	return(0);
867 }
868 
869 int64_t init_boot_area_size(int64_t value, off_t avg_vol_size)
870 {
871 	if (value == 0) {
872 		value = HAMMER_BOOT_NOMBYTES;
873 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
874 			value >>= 1;
875 		if (value < HAMMER_BOOT_MINBYTES)
876 			value = 0;
877 	} else if (value < HAMMER_BOOT_MINBYTES) {
878 		value = HAMMER_BOOT_MINBYTES;
879 	}
880 
881 	return(value);
882 }
883 
884 int64_t init_mem_area_size(int64_t value, off_t avg_vol_size)
885 {
886 	if (value == 0) {
887 		value = HAMMER_MEM_NOMBYTES;
888 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
889 			value >>= 1;
890 		if (value < HAMMER_MEM_MINBYTES)
891 			value = 0;
892 	} else if (value < HAMMER_MEM_MINBYTES) {
893 		value = HAMMER_MEM_MINBYTES;
894 	}
895 
896 	return(value);
897 }
898