xref: /dragonfly/sbin/hammer/ondisk.c (revision c17e6018)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/stat.h>
36 #include <sys/diskslice.h>
37 #include <sys/diskmbr.h>
38 #include <assert.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <stddef.h>
45 #include <err.h>
46 #include <fcntl.h>
47 
48 #include "hammer_util.h"
49 
50 static void get_buffer_readahead(struct buffer_info *base);
51 static __inline void *get_ondisk(hammer_off_t buf_offset,
52 			struct buffer_info **bufferp, int isnew);
53 static int readhammerbuf(struct volume_info *vol, void *data, int64_t offset);
54 static int writehammerbuf(struct volume_info *vol, const void *data,
55 			int64_t offset);
56 
57 int DebugOpt;
58 
59 uuid_t Hammer_FSType;
60 uuid_t Hammer_FSId;
61 int64_t BootAreaSize;
62 int64_t MemAreaSize;
63 int64_t UndoBufferSize;
64 int     NumVolumes;
65 int	RootVolNo = -1;
66 int	UseReadBehind = -4;
67 int	UseReadAhead = 4;
68 int	AssertOnFailure = 1;
69 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
70 
71 static __inline
72 int
73 buffer_hash(hammer_off_t buf_offset)
74 {
75 	int hi;
76 
77 	hi = (int)(buf_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
78 	return(hi);
79 }
80 
81 static struct buffer_info*
82 find_buffer(struct volume_info *volume, hammer_off_t buf_offset)
83 {
84 	int hi;
85 	struct buffer_info *buf;
86 
87 	hi = buffer_hash(buf_offset);
88 	TAILQ_FOREACH(buf, &volume->buffer_lists[hi], entry)
89 		if (buf->buf_offset == buf_offset)
90 			return(buf);
91 	return(NULL);
92 }
93 
94 /*
95  * Lookup the requested information structure and related on-disk buffer.
96  * Missing structures are created.
97  */
98 struct volume_info *
99 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
100 {
101 	struct volume_info *vol;
102 	struct volume_info *scan;
103 	struct hammer_volume_ondisk *ondisk;
104 	int i, n;
105 	struct stat st1, st2;
106 
107 	/*
108 	 * Allocate the volume structure
109 	 */
110 	vol = malloc(sizeof(*vol));
111 	bzero(vol, sizeof(*vol));
112 	for (i = 0; i < HAMMER_BUFLISTS; ++i)
113 		TAILQ_INIT(&vol->buffer_lists[i]);
114 	vol->name = strdup(filename);
115 	vol->fd = open(vol->name, oflags);
116 	if (vol->fd < 0) {
117 		err(1, "setup_volume: %s: Open failed", vol->name);
118 	}
119 
120 	/*
121 	 * Read or initialize the volume header
122 	 */
123 	vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
124 	if (isnew > 0) {
125 		bzero(ondisk, HAMMER_BUFSIZE);
126 	} else {
127 		n = readhammerbuf(vol, ondisk, 0);
128 		if (n == -1) {
129 			err(1, "setup_volume: %s: Read failed at offset 0",
130 			    vol->name);
131 		}
132 		vol_no = ondisk->vol_no;
133 		if (RootVolNo < 0) {
134 			RootVolNo = ondisk->vol_rootvol;
135 		} else if (RootVolNo != (int)ondisk->vol_rootvol) {
136 			errx(1, "setup_volume: %s: root volume disagreement: "
137 				"%d vs %d",
138 				vol->name, RootVolNo, ondisk->vol_rootvol);
139 		}
140 
141 		if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
142 			errx(1, "setup_volume: %s: Header does not indicate "
143 				"that this is a hammer volume", vol->name);
144 		}
145 		if (TAILQ_EMPTY(&VolList)) {
146 			Hammer_FSId = vol->ondisk->vol_fsid;
147 		} else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
148 			errx(1, "setup_volume: %s: FSId does match other "
149 				"volumes!", vol->name);
150 		}
151 	}
152 	vol->vol_no = vol_no;
153 
154 	if (isnew > 0) {
155 		vol->cache.modified = 1;
156         }
157 
158 	if (fstat(vol->fd, &st1) != 0){
159 		errx(1, "setup_volume: %s: Failed to stat", vol->name);
160 	}
161 
162 	/*
163 	 * Link the volume structure in
164 	 */
165 	TAILQ_FOREACH(scan, &VolList, entry) {
166 		if (scan->vol_no == vol_no) {
167 			errx(1, "setup_volume: %s: Duplicate volume number %d "
168 				"against %s", vol->name, vol_no, scan->name);
169 		}
170 		if (fstat(scan->fd, &st2) != 0){
171 			errx(1, "setup_volume: %s: Failed to stat %s",
172 				vol->name, scan->name);
173 		}
174 		if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
175 			errx(1, "setup_volume: %s: Specified more than once",
176 				vol->name);
177 		}
178 	}
179 	TAILQ_INSERT_TAIL(&VolList, vol, entry);
180 	return(vol);
181 }
182 
183 /*
184  * Check basic volume characteristics.
185  */
186 void
187 check_volume(struct volume_info *vol)
188 {
189 	struct partinfo pinfo;
190 	struct stat st;
191 
192 	/*
193 	 * Get basic information about the volume
194 	 */
195 	if (ioctl(vol->fd, DIOCGPART, &pinfo) < 0) {
196 		/*
197 		 * Allow the formatting of regular files as HAMMER volumes
198 		 */
199 		if (fstat(vol->fd, &st) < 0)
200 			err(1, "Unable to stat %s", vol->name);
201 		vol->size = st.st_size;
202 		vol->type = "REGFILE";
203 	} else {
204 		/*
205 		 * When formatting a block device as a HAMMER volume the
206 		 * sector size must be compatible.  HAMMER uses 16384 byte
207 		 * filesystem buffers.
208 		 */
209 		if (pinfo.reserved_blocks) {
210 			errx(1, "HAMMER cannot be placed in a partition "
211 				"which overlaps the disklabel or MBR");
212 		}
213 		if (pinfo.media_blksize > HAMMER_BUFSIZE ||
214 		    HAMMER_BUFSIZE % pinfo.media_blksize) {
215 			errx(1, "A media sector size of %d is not supported",
216 			     pinfo.media_blksize);
217 		}
218 
219 		vol->size = pinfo.media_size;
220 		vol->device_offset = pinfo.media_offset;
221 		vol->type = "DEVICE";
222 	}
223 
224 	/*
225 	 * Reserve space for (future) header junk, setup our poor-man's
226 	 * big-block allocator.
227 	 */
228 	vol->vol_alloc = HAMMER_BUFSIZE * 16;
229 }
230 
231 struct volume_info *
232 get_volume(int32_t vol_no)
233 {
234 	struct volume_info *vol;
235 
236 	TAILQ_FOREACH(vol, &VolList, entry) {
237 		if (vol->vol_no == vol_no)
238 			break;
239 	}
240 	if (vol == NULL) {
241 		if (AssertOnFailure)
242 			errx(1, "get_volume: Volume %d does not exist!",
243 				vol_no);
244 		return(NULL);
245 	}
246 	++vol->cache.refs;
247 	/* not added to or removed from hammer cache */
248 	return(vol);
249 }
250 
251 void
252 rel_volume(struct volume_info *volume)
253 {
254 	if (volume == NULL)
255 		return;
256 	/* not added to or removed from hammer cache */
257 	--volume->cache.refs;
258 }
259 
260 /*
261  * Acquire the specified buffer.  isnew is -1 only when called
262  * via get_buffer_readahead() to prevent another readahead.
263  */
264 struct buffer_info *
265 get_buffer(hammer_off_t buf_offset, int isnew)
266 {
267 	void *ondisk;
268 	struct buffer_info *buf;
269 	struct volume_info *volume;
270 	hammer_off_t orig_offset = buf_offset;
271 	int vol_no;
272 	int zone;
273 	int hi, n;
274 	int dora = 0;
275 
276 	zone = HAMMER_ZONE_DECODE(buf_offset);
277 	if (zone > HAMMER_ZONE_RAW_BUFFER_INDEX) {
278 		buf_offset = blockmap_lookup(buf_offset, NULL, NULL, NULL);
279 	}
280 	if (buf_offset == HAMMER_OFF_BAD)
281 		return(NULL);
282 
283 	if (AssertOnFailure) {
284 		assert((buf_offset & HAMMER_OFF_ZONE_MASK) ==
285 		       HAMMER_ZONE_RAW_BUFFER);
286 	}
287 	vol_no = HAMMER_VOL_DECODE(buf_offset);
288 	volume = get_volume(vol_no);
289 	if (volume == NULL)
290 		return(NULL);
291 
292 	buf_offset &= ~HAMMER_BUFMASK64;
293 	buf = find_buffer(volume, buf_offset);
294 
295 	if (buf == NULL) {
296 		buf = malloc(sizeof(*buf));
297 		bzero(buf, sizeof(*buf));
298 		if (DebugOpt > 1) {
299 			fprintf(stderr, "get_buffer: %016llx %016llx at %p\n",
300 				(long long)orig_offset, (long long)buf_offset,
301 				buf);
302 		}
303 		buf->buf_offset = buf_offset;
304 		buf->raw_offset = volume->ondisk->vol_buf_beg +
305 				  (buf_offset & HAMMER_OFF_SHORT_MASK);
306 		buf->volume = volume;
307 		hi = buffer_hash(buf_offset);
308 		TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buf, entry);
309 		++volume->cache.refs;
310 		buf->cache.u.buffer = buf;
311 		hammer_cache_add(&buf->cache, ISBUFFER);
312 		dora = (isnew == 0);
313 	} else {
314 		if (DebugOpt > 1) {
315 			fprintf(stderr, "get_buffer: %016llx %016llx at %p *\n",
316 				(long long)orig_offset, (long long)buf_offset,
317 				buf);
318 		}
319 		hammer_cache_used(&buf->cache);
320 		++buf->use_count;
321 	}
322 	++buf->cache.refs;
323 	hammer_cache_flush();
324 	if ((ondisk = buf->ondisk) == NULL) {
325 		buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
326 		if (isnew <= 0) {
327 			n = readhammerbuf(volume, ondisk, buf->raw_offset);
328 			if (n == -1) {
329 				if (AssertOnFailure)
330 					err(1, "get_buffer: %s:%016llx "
331 					    "Read failed at offset %016llx",
332 					    volume->name,
333 					    (long long)buf->buf_offset,
334 					    (long long)buf->raw_offset);
335 				bzero(ondisk, HAMMER_BUFSIZE);
336 			}
337 		}
338 	}
339 	if (isnew > 0) {
340 		bzero(ondisk, HAMMER_BUFSIZE);
341 		buf->cache.modified = 1;
342 	}
343 	if (dora)
344 		get_buffer_readahead(buf);
345 	return(buf);
346 }
347 
348 static void
349 get_buffer_readahead(struct buffer_info *base)
350 {
351 	struct buffer_info *buf;
352 	struct volume_info *vol;
353 	hammer_off_t buf_offset;
354 	int64_t raw_offset;
355 	int ri = UseReadBehind;
356 	int re = UseReadAhead;
357 
358 	raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
359 	vol = base->volume;
360 
361 	while (ri < re) {
362 		if (raw_offset >= vol->ondisk->vol_buf_end)
363 			break;
364 		if (raw_offset < vol->ondisk->vol_buf_beg || ri == 0) {
365 			++ri;
366 			raw_offset += HAMMER_BUFSIZE;
367 			continue;
368 		}
369 		buf_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no,
370 			raw_offset - vol->ondisk->vol_buf_beg);
371 		buf = find_buffer(vol, buf_offset);
372 		if (buf == NULL) {
373 			buf = get_buffer(buf_offset, -1);
374 			rel_buffer(buf);
375 		}
376 		++ri;
377 		raw_offset += HAMMER_BUFSIZE;
378 	}
379 }
380 
381 void
382 rel_buffer(struct buffer_info *buffer)
383 {
384 	struct volume_info *volume;
385 	int hi;
386 
387 	if (buffer == NULL)
388 		return;
389 	assert(buffer->cache.refs > 0);
390 	if (--buffer->cache.refs == 0) {
391 		if (buffer->cache.delete) {
392 			hi = buffer_hash(buffer->buf_offset);
393 			volume = buffer->volume;
394 			if (buffer->cache.modified)
395 				flush_buffer(buffer);
396 			TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
397 			hammer_cache_del(&buffer->cache);
398 			free(buffer->ondisk);
399 			free(buffer);
400 			rel_volume(volume);
401 		}
402 	}
403 }
404 
405 /*
406  * Retrieve a pointer to a buffer data given a buffer offset.  The underlying
407  * bufferp is freed if isnew or the offset is out of range of the cached data.
408  * If bufferp is freed a referenced buffer is loaded into it.
409  */
410 void *
411 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
412 		int isnew)
413 {
414 	if (*bufferp != NULL) {
415 		if (isnew > 0 ||
416 		    (((*bufferp)->buf_offset ^ buf_offset) & ~HAMMER_BUFMASK64)) {
417 			rel_buffer(*bufferp);
418 			*bufferp = NULL;
419 		}
420 	}
421 	return(get_ondisk(buf_offset, bufferp, isnew));
422 }
423 
424 /*
425  * Retrieve a pointer to a B-Tree node given a zone offset.  The underlying
426  * bufferp is freed if non-NULL and a referenced buffer is loaded into it.
427  */
428 hammer_node_ondisk_t
429 get_node(hammer_off_t node_offset, struct buffer_info **bufferp)
430 {
431 	if (*bufferp != NULL) {
432 		rel_buffer(*bufferp);
433 		*bufferp = NULL;
434 	}
435 	return(get_ondisk(node_offset, bufferp, 0));
436 }
437 
438 /*
439  * Return a pointer to a buffer data given a buffer offset.
440  * If *bufferp is NULL acquire the buffer otherwise use that buffer.
441  */
442 static __inline
443 void *
444 get_ondisk(hammer_off_t buf_offset, struct buffer_info **bufferp, int isnew)
445 {
446 	struct buffer_info *buffer;
447 
448 	buffer = *bufferp;
449 	if (buffer == NULL) {
450 		buffer = *bufferp = get_buffer(buf_offset, isnew);
451 		if (buffer == NULL)
452 			return(NULL);
453 	}
454 
455 	return((char *)buffer->ondisk +
456 		((int32_t)buf_offset & HAMMER_BUFMASK));
457 }
458 
459 /*
460  * Allocate HAMMER elements - btree nodes, meta data, data storage
461  */
462 void *
463 alloc_btree_element(hammer_off_t *offp, struct buffer_info **data_bufferp)
464 {
465 	hammer_node_ondisk_t node;
466 
467 	node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
468 			      offp, data_bufferp);
469 	bzero(node, sizeof(*node));
470 	return (node);
471 }
472 
473 void *
474 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
475 		   struct buffer_info **data_bufferp)
476 {
477 	void *data;
478 
479 	data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
480 			      offp, data_bufferp);
481 	bzero(data, data_len);
482 	return (data);
483 }
484 
485 /*
486  * The only data_len supported by HAMMER userspace for large data zone
487  * (zone 10) is HAMMER_BUFSIZE which is 16KB.  >16KB data does not fit
488  * in a buffer allocated by get_buffer().  Also alloc_blockmap() does
489  * not consider >16KB buffer size.
490  */
491 void *
492 alloc_data_element(hammer_off_t *offp, int32_t data_len,
493 		   struct buffer_info **data_bufferp)
494 {
495 	void *data;
496 
497 	if (data_len >= HAMMER_BUFSIZE) {
498 		assert(data_len == HAMMER_BUFSIZE); /* just one buffer */
499 		data = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
500 				      offp, data_bufferp);
501 		bzero(data, data_len);
502 	} else if (data_len) {
503 		data = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
504 				      offp, data_bufferp);
505 		bzero(data, data_len);
506 	} else {
507 		data = NULL;
508 	}
509 	return (data);
510 }
511 
512 /*
513  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
514  * code will load each volume's freemap.
515  */
516 void
517 format_freemap(struct volume_info *root_vol)
518 {
519 	struct buffer_info *buffer = NULL;
520 	hammer_off_t layer1_offset;
521 	hammer_blockmap_t blockmap;
522 	struct hammer_blockmap_layer1 *layer1;
523 	int i, isnew;
524 
525 	/* Only root volume needs formatting */
526 	assert(root_vol->vol_no == RootVolNo);
527 
528 	layer1_offset = alloc_bigblock(root_vol, HAMMER_ZONE_FREEMAP_INDEX);
529 	for (i = 0; i < (int)HAMMER_BLOCKMAP_RADIX1; ++i) {
530 		isnew = ((i % HAMMER_BLOCKMAP_RADIX1_PERBUFFER) == 0);
531 		layer1 = get_buffer_data(layer1_offset + i * sizeof(*layer1),
532 					 &buffer, isnew);
533 		bzero(layer1, sizeof(*layer1));
534 		layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
535 		layer1->blocks_free = 0;
536 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
537 	}
538 	rel_buffer(buffer);
539 
540 	blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
541 	bzero(blockmap, sizeof(*blockmap));
542 	blockmap->phys_offset = layer1_offset;
543 	blockmap->first_offset = 0;
544 	blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
545 	blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
546 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
547 	root_vol->cache.modified = 1;
548 }
549 
550 /*
551  * Load the volume's remaining free space into the freemap.
552  *
553  * Returns the number of big-blocks available.
554  */
555 int64_t
556 initialize_freemap(struct volume_info *vol)
557 {
558 	struct volume_info *root_vol;
559 	struct buffer_info *buffer1 = NULL;
560 	struct buffer_info *buffer2 = NULL;
561 	struct hammer_blockmap_layer1 *layer1;
562 	struct hammer_blockmap_layer2 *layer2;
563 	hammer_off_t layer1_base;
564 	hammer_off_t layer1_offset;
565 	hammer_off_t layer2_offset;
566 	hammer_off_t phys_offset;
567 	hammer_off_t block_offset;
568 	hammer_off_t aligned_vol_free_end;
569 	hammer_blockmap_t freemap;
570 	int64_t count = 0;
571 	int64_t layer1_count = 0;
572 
573 	root_vol = get_volume(RootVolNo);
574 	aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
575 				& ~HAMMER_BLOCKMAP_LAYER2_MASK;
576 
577 	printf("initialize freemap volume %d\n", vol->vol_no);
578 
579 	/*
580 	 * Initialize the freemap.  First preallocate the big-blocks required
581 	 * to implement layer2.   This preallocation is a bootstrap allocation
582 	 * using blocks from the target volume.
583 	 */
584 	freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
585 	layer1_base = freemap->phys_offset;
586 
587 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
588 	     phys_offset < aligned_vol_free_end;
589 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
590 		layer1_offset = layer1_base +
591 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
592 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
593 		if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
594 			layer1->phys_offset = alloc_bigblock(vol,
595 						HAMMER_ZONE_FREEMAP_INDEX);
596 			layer1->blocks_free = 0;
597 			buffer1->cache.modified = 1;
598 			layer1->layer1_crc = crc32(layer1,
599 						   HAMMER_LAYER1_CRCSIZE);
600 		}
601 	}
602 
603 	/*
604 	 * Now fill everything in.
605 	 */
606 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
607 	     phys_offset < aligned_vol_free_end;
608 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
609 		layer1_count = 0;
610 		layer1_offset = layer1_base +
611 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
612 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
613 		assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
614 
615 		for (block_offset = 0;
616 		     block_offset < HAMMER_BLOCKMAP_LAYER2;
617 		     block_offset += HAMMER_BIGBLOCK_SIZE) {
618 			layer2_offset = layer1->phys_offset +
619 				        HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
620 			layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
621 			bzero(layer2, sizeof(*layer2));
622 
623 			if (phys_offset + block_offset < vol->vol_free_off) {
624 				/*
625 				 * Fixups XXX - big-blocks already allocated as part
626 				 * of the freemap bootstrap.
627 				 */
628 				if (layer2->zone == 0) {
629 					layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
630 					layer2->append_off = HAMMER_BIGBLOCK_SIZE;
631 					layer2->bytes_free = 0;
632 				}
633 			} else if (phys_offset + block_offset < vol->vol_free_end) {
634 				layer2->zone = 0;
635 				layer2->append_off = 0;
636 				layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
637 				++count;
638 				++layer1_count;
639 			} else {
640 				layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
641 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
642 				layer2->bytes_free = 0;
643 			}
644 			layer2->entry_crc = crc32(layer2, HAMMER_LAYER2_CRCSIZE);
645 			buffer2->cache.modified = 1;
646 		}
647 
648 		layer1->blocks_free += layer1_count;
649 		layer1->layer1_crc = crc32(layer1, HAMMER_LAYER1_CRCSIZE);
650 		buffer1->cache.modified = 1;
651 	}
652 
653 	rel_buffer(buffer1);
654 	rel_buffer(buffer2);
655 	rel_volume(root_vol);
656 	return(count);
657 }
658 
659 /*
660  * Returns the number of big-blocks available for filesystem data and undos
661  * without formatting.
662  */
663 int64_t
664 count_freemap(struct volume_info *vol)
665 {
666 	hammer_off_t phys_offset;
667 	hammer_off_t vol_free_off;
668 	hammer_off_t aligned_vol_free_end;
669 	int64_t count = 0;
670 
671 	vol_free_off = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
672 	aligned_vol_free_end = (vol->vol_free_end + HAMMER_BLOCKMAP_LAYER2_MASK)
673 				& ~HAMMER_BLOCKMAP_LAYER2_MASK;
674 
675 	if (vol->vol_no == RootVolNo)
676 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
677 
678 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
679 	     phys_offset < aligned_vol_free_end;
680 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
681 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
682 	}
683 
684 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(vol->vol_no, 0);
685 	     phys_offset < aligned_vol_free_end;
686 	     phys_offset += HAMMER_BIGBLOCK_SIZE) {
687 		if (phys_offset < vol_free_off) {
688 			;
689 		} else if (phys_offset < vol->vol_free_end) {
690 			++count;
691 		}
692 	}
693 
694 	return(count);
695 }
696 
697 /*
698  * Format the undomap for the root volume.
699  */
700 void
701 format_undomap(struct volume_info *root_vol)
702 {
703 	const int undo_zone = HAMMER_ZONE_UNDO_INDEX;
704 	hammer_off_t undo_limit;
705 	hammer_blockmap_t blockmap;
706 	struct hammer_volume_ondisk *ondisk;
707 	struct buffer_info *buffer = NULL;
708 	hammer_off_t scan;
709 	int n;
710 	int limit_index;
711 	u_int32_t seqno;
712 
713 	/* Only root volume needs formatting */
714 	assert(root_vol->vol_no == RootVolNo);
715 	ondisk = root_vol->ondisk;
716 
717 	/*
718 	 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
719 	 * up to HAMMER_UNDO_LAYER2 big-blocks.  Size to approximately
720 	 * 0.1% of the disk.
721 	 *
722 	 * The minimum UNDO fifo size is 500MB, or approximately 1% of
723 	 * the recommended 50G disk.
724 	 *
725 	 * Changing this minimum is rather dangerous as complex filesystem
726 	 * operations can cause the UNDO FIFO to fill up otherwise.
727 	 */
728 	undo_limit = UndoBufferSize;
729 	if (undo_limit == 0) {
730 		undo_limit = (ondisk->vol_buf_end - ondisk->vol_buf_beg) / 1000;
731 		if (undo_limit < 500*1024*1024)
732 			undo_limit = 500*1024*1024;
733 	}
734 	undo_limit = (undo_limit + HAMMER_BIGBLOCK_MASK64) &
735 		     ~HAMMER_BIGBLOCK_MASK64;
736 	if (undo_limit < HAMMER_BIGBLOCK_SIZE)
737 		undo_limit = HAMMER_BIGBLOCK_SIZE;
738 	if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2)
739 		undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_UNDO_LAYER2;
740 	UndoBufferSize = undo_limit;
741 
742 	blockmap = &ondisk->vol0_blockmap[undo_zone];
743 	bzero(blockmap, sizeof(*blockmap));
744 	blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
745 	blockmap->first_offset = HAMMER_ZONE_ENCODE(undo_zone, 0);
746 	blockmap->next_offset = blockmap->first_offset;
747 	blockmap->alloc_offset = HAMMER_ZONE_ENCODE(undo_zone, undo_limit);
748 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
749 
750 	limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
751 	assert(limit_index <= HAMMER_UNDO_LAYER2);
752 
753 	for (n = 0; n < limit_index; ++n) {
754 		ondisk->vol0_undo_array[n] = alloc_bigblock(NULL,
755 							HAMMER_ZONE_UNDO_INDEX);
756 	}
757 	while (n < HAMMER_UNDO_LAYER2) {
758 		ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
759 	}
760 
761 	/*
762 	 * Pre-initialize the UNDO blocks (HAMMER version 4+)
763 	 */
764 	printf("initializing the undo map (%jd MB)\n",
765 		(intmax_t)(blockmap->alloc_offset & HAMMER_OFF_LONG_MASK) /
766 		(1024 * 1024));
767 
768 	scan = blockmap->first_offset;
769 	seqno = 0;
770 
771 	while (scan < blockmap->alloc_offset) {
772 		hammer_fifo_head_t head;
773 		hammer_fifo_tail_t tail;
774 		int isnew;
775 		int bytes = HAMMER_UNDO_ALIGN;
776 
777 		isnew = ((scan & HAMMER_BUFMASK64) == 0);
778 		head = get_buffer_data(scan, &buffer, isnew);
779 		buffer->cache.modified = 1;
780 		tail = (void *)((char *)head + bytes - sizeof(*tail));
781 
782 		bzero(head, bytes);
783 		head->hdr_signature = HAMMER_HEAD_SIGNATURE;
784 		head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
785 		head->hdr_size = bytes;
786 		head->hdr_seq = seqno++;
787 
788 		tail->tail_signature = HAMMER_TAIL_SIGNATURE;
789 		tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
790 		tail->tail_size = bytes;
791 
792 		head->hdr_crc = crc32(head, HAMMER_FIFO_HEAD_CRCOFF) ^
793 				crc32(head + 1, bytes - sizeof(*head));
794 
795 		scan += bytes;
796 	}
797 	rel_buffer(buffer);
798 }
799 
800 /*
801  * Format a new blockmap.  This is mostly a degenerate case because
802  * all allocations are now actually done from the freemap.
803  */
804 void
805 format_blockmap(hammer_blockmap_t blockmap, int zone, hammer_off_t offset)
806 {
807 	hammer_off_t zone_base = HAMMER_ZONE_ENCODE(zone, offset);
808 
809 	bzero(blockmap, sizeof(*blockmap));
810 	blockmap->phys_offset = 0;
811 	blockmap->first_offset = zone_base;
812 	blockmap->next_offset = zone_base;
813 	blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
814 	blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
815 }
816 
817 /*
818  * Flush various tracking structures to disk
819  */
820 void
821 flush_all_volumes(void)
822 {
823 	struct volume_info *vol;
824 
825 	TAILQ_FOREACH(vol, &VolList, entry)
826 		flush_volume(vol);
827 }
828 
829 void
830 flush_volume(struct volume_info *volume)
831 {
832 	struct buffer_info *buffer;
833 	int i;
834 
835 	for (i = 0; i < HAMMER_BUFLISTS; ++i) {
836 		TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
837 			flush_buffer(buffer);
838 	}
839 	if (writehammerbuf(volume, volume->ondisk, 0) == -1)
840 		err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
841 	volume->cache.modified = 0;
842 }
843 
844 void
845 flush_buffer(struct buffer_info *buffer)
846 {
847 	struct volume_info *vol;
848 
849 	vol = buffer->volume;
850 	if (writehammerbuf(vol, buffer->ondisk, buffer->raw_offset) == -1)
851 		err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
852 	buffer->cache.modified = 0;
853 }
854 
855 /*
856  * Core I/O operations
857  */
858 static int
859 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
860 {
861 	ssize_t n;
862 
863 	n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
864 	if (n != HAMMER_BUFSIZE)
865 		return(-1);
866 	return(0);
867 }
868 
869 static int
870 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
871 {
872 	ssize_t n;
873 
874 	n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
875 	if (n != HAMMER_BUFSIZE)
876 		return(-1);
877 	return(0);
878 }
879 
880 int64_t init_boot_area_size(int64_t value, off_t avg_vol_size)
881 {
882 	if (value == 0) {
883 		value = HAMMER_BOOT_NOMBYTES;
884 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
885 			value >>= 1;
886 		if (value < HAMMER_BOOT_MINBYTES)
887 			value = 0;
888 	} else if (value < HAMMER_BOOT_MINBYTES) {
889 		value = HAMMER_BOOT_MINBYTES;
890 	}
891 
892 	return(value);
893 }
894 
895 int64_t init_mem_area_size(int64_t value, off_t avg_vol_size)
896 {
897 	if (value == 0) {
898 		value = HAMMER_MEM_NOMBYTES;
899 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
900 			value >>= 1;
901 		if (value < HAMMER_MEM_MINBYTES)
902 			value = 0;
903 	} else if (value < HAMMER_MEM_MINBYTES) {
904 		value = HAMMER_MEM_MINBYTES;
905 	}
906 
907 	return(value);
908 }
909