xref: /dragonfly/sbin/hammer/ondisk.c (revision 62dc643e)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  */
34 
35 #include <sys/diskslice.h>
36 #include <sys/diskmbr.h>
37 
38 #include "hammer_util.h"
39 
40 static void check_volume(volume_info_t volume);
41 static void get_buffer_readahead(buffer_info_t base);
42 static __inline int readhammervol(volume_info_t volume);
43 static __inline int readhammerbuf(buffer_info_t buffer);
44 static __inline int writehammervol(volume_info_t volume);
45 static __inline int writehammerbuf(buffer_info_t buffer);
46 
47 uuid_t Hammer_FSType;
48 uuid_t Hammer_FSId;
49 int UseReadBehind = -4;
50 int UseReadAhead = 4;
51 int DebugOpt;
52 uint32_t HammerVersion = -1;
53 
54 TAILQ_HEAD(volume_list, volume_info);
55 static struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
56 static int valid_hammer_volumes;
57 
58 static __inline
59 int
60 buffer_hash(hammer_off_t zone2_offset)
61 {
62 	int hi;
63 
64 	hi = (int)(zone2_offset / HAMMER_BUFSIZE) & HAMMER_BUFLISTMASK;
65 	return(hi);
66 }
67 
68 static
69 buffer_info_t
70 find_buffer(hammer_off_t zone2_offset)
71 {
72 	volume_info_t volume;
73 	buffer_info_t buffer;
74 	int hi;
75 
76 	volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
77 	assert(volume);
78 
79 	hi = buffer_hash(zone2_offset);
80 	TAILQ_FOREACH(buffer, &volume->buffer_lists[hi], entry) {
81 		if (buffer->zone2_offset == zone2_offset)
82 			return(buffer);
83 	}
84 	return(NULL);
85 }
86 
87 static
88 volume_info_t
89 __alloc_volume(const char *volname, int oflags)
90 {
91 	volume_info_t volume;
92 	int i;
93 
94 	volume = calloc(1, sizeof(*volume));
95 	volume->vol_no = -1;
96 	volume->rdonly = (oflags == O_RDONLY);
97 	volume->name = strdup(volname);
98 	volume->fd = open(volume->name, oflags);
99 	if (volume->fd < 0) {
100 		err(1, "alloc_volume: Failed to open %s", volume->name);
101 		/* not reached */
102 	}
103 	check_volume(volume);
104 
105 	volume->ondisk = calloc(1, HAMMER_BUFSIZE);
106 
107 	for (i = 0; i < HAMMER_BUFLISTS; ++i)
108 		TAILQ_INIT(&volume->buffer_lists[i]);
109 
110 	return(volume);
111 }
112 
113 static
114 void
115 __add_volume(const volume_info_t volume)
116 {
117 	volume_info_t scan;
118 	struct stat st1, st2;
119 
120 	if (fstat(volume->fd, &st1) != 0) {
121 		errx(1, "add_volume: %s: Failed to stat", volume->name);
122 		/* not reached */
123 	}
124 
125 	TAILQ_FOREACH(scan, &VolList, entry) {
126 		if (scan->vol_no == volume->vol_no) {
127 			errx(1, "add_volume: %s: Duplicate volume number %d "
128 				"against %s",
129 				volume->name, volume->vol_no, scan->name);
130 			/* not reached */
131 		}
132 		if (fstat(scan->fd, &st2) != 0) {
133 			errx(1, "add_volume: %s: Failed to stat %s",
134 				volume->name, scan->name);
135 			/* not reached */
136 		}
137 		if ((st1.st_ino == st2.st_ino) && (st1.st_dev == st2.st_dev)) {
138 			errx(1, "add_volume: %s: Specified more than once",
139 				volume->name);
140 			/* not reached */
141 		}
142 	}
143 
144 	TAILQ_INSERT_TAIL(&VolList, volume, entry);
145 }
146 
147 static
148 void
149 __verify_volume(const volume_info_t volume)
150 {
151 	hammer_volume_ondisk_t ondisk = volume->ondisk;
152 
153 	if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) {
154 		errx(1, "verify_volume: Invalid volume signature %016jx",
155 			ondisk->vol_signature);
156 		/* not reached */
157 	}
158 	if (ondisk->vol_rootvol != HAMMER_ROOT_VOLNO) {
159 		errx(1, "verify_volume: Invalid root volume# %d",
160 			ondisk->vol_rootvol);
161 		/* not reached */
162 	}
163 	if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType))) {
164 		errx(1, "verify_volume: %s: Header does not indicate "
165 			"that this is a HAMMER volume", volume->name);
166 		/* not reached */
167 	}
168 	if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId))) {
169 		errx(1, "verify_volume: %s: FSId does not match other volumes!",
170 			volume->name);
171 		/* not reached */
172 	}
173 	if (ondisk->vol_version < HAMMER_VOL_VERSION_MIN ||
174 	    ondisk->vol_version >= HAMMER_VOL_VERSION_WIP) {
175 		errx(1, "verify_volume: %s: Invalid volume version %u",
176 			volume->name, ondisk->vol_version);
177 		/* not reached */
178 	}
179 }
180 
181 /*
182  * Initialize a volume structure and ondisk vol_no field.
183  */
184 volume_info_t
185 init_volume(const char *filename, int oflags, int32_t vol_no)
186 {
187 	volume_info_t volume;
188 
189 	volume = __alloc_volume(filename, oflags);
190 	volume->vol_no = volume->ondisk->vol_no = vol_no;
191 
192 	__add_volume(volume);
193 
194 	return(volume);
195 }
196 
197 /*
198  * Initialize a volume structure and read ondisk volume header.
199  */
200 volume_info_t
201 load_volume(const char *filename, int oflags, int verify_volume)
202 {
203 	volume_info_t volume;
204 	int n;
205 
206 	volume = __alloc_volume(filename, oflags);
207 
208 	n = readhammervol(volume);
209 	if (n == -1) {
210 		err(1, "load_volume: %s: Read failed at offset 0",
211 		    volume->name);
212 		/* not reached */
213 	}
214 	volume->vol_no = volume->ondisk->vol_no;
215 	if (volume->vol_no == HAMMER_ROOT_VOLNO)
216 		HammerVersion = volume->ondisk->vol_version;
217 
218 	if (valid_hammer_volumes++ == 0)
219 		Hammer_FSId = volume->ondisk->vol_fsid;
220 	if (verify_volume)
221 		__verify_volume(volume);
222 
223 	__add_volume(volume);
224 
225 	return(volume);
226 }
227 
228 /*
229  * Check basic volume characteristics.
230  */
231 static
232 void
233 check_volume(volume_info_t volume)
234 {
235 	struct partinfo pinfo;
236 	struct stat st;
237 
238 	/*
239 	 * Allow the formatting of block devices or regular files
240 	 */
241 	if (ioctl(volume->fd, DIOCGPART, &pinfo) < 0) {
242 		if (fstat(volume->fd, &st) < 0) {
243 			err(1, "Unable to stat %s", volume->name);
244 			/* not reached */
245 		}
246 		if (S_ISREG(st.st_mode)) {
247 			volume->size = st.st_size;
248 			volume->type = "REGFILE";
249 		} else {
250 			errx(1, "Unsupported file type for %s", volume->name);
251 			/* not reached */
252 		}
253 	} else {
254 		/*
255 		 * When formatting a block device as a HAMMER volume the
256 		 * sector size must be compatible.  HAMMER uses 16384 byte
257 		 * filesystem buffers.
258 		 */
259 		if (pinfo.reserved_blocks) {
260 			errx(1, "HAMMER cannot be placed in a partition "
261 				"which overlaps the disklabel or MBR");
262 			/* not reached */
263 		}
264 		if (pinfo.media_blksize > HAMMER_BUFSIZE ||
265 		    HAMMER_BUFSIZE % pinfo.media_blksize) {
266 			errx(1, "A media sector size of %d is not supported",
267 			     pinfo.media_blksize);
268 			/* not reached */
269 		}
270 
271 		volume->size = pinfo.media_size;
272 		volume->device_offset = pinfo.media_offset;
273 		volume->type = "DEVICE";
274 	}
275 }
276 
277 int
278 is_regfile(const volume_info_t volume)
279 {
280 	return(strcmp(volume->type, "REGFILE") ? 0 : 1);
281 }
282 
283 void
284 assert_volume_offset(const volume_info_t volume)
285 {
286 	assert(hammer_is_zone_raw_buffer(volume->vol_free_off));
287 	assert(hammer_is_zone_raw_buffer(volume->vol_free_end));
288 	if (volume->vol_free_off >= volume->vol_free_end) {
289 		errx(1, "Ran out of room, filesystem too small");
290 		/* not reached */
291 	}
292 }
293 
294 volume_info_t
295 get_volume(int32_t vol_no)
296 {
297 	volume_info_t volume;
298 
299 	TAILQ_FOREACH(volume, &VolList, entry) {
300 		if (volume->vol_no == vol_no)
301 			break;
302 	}
303 
304 	return(volume);
305 }
306 
307 volume_info_t
308 get_root_volume(void)
309 {
310 	return(get_volume(HAMMER_ROOT_VOLNO));
311 }
312 
313 static
314 hammer_off_t
315 __blockmap_xlate_to_zone2(hammer_off_t buf_offset)
316 {
317 	hammer_off_t zone2_offset;
318 	int error = 0;
319 
320 	if (hammer_is_zone_raw_buffer(buf_offset))
321 		zone2_offset = buf_offset;
322 	else
323 		zone2_offset = blockmap_lookup(buf_offset, &error);
324 
325 	if (error)
326 		return(HAMMER_OFF_BAD);
327 	assert(hammer_is_zone_raw_buffer(zone2_offset));
328 
329 	return(zone2_offset);
330 }
331 
332 static
333 buffer_info_t
334 __alloc_buffer(hammer_off_t zone2_offset, int isnew)
335 {
336 	volume_info_t volume;
337 	buffer_info_t buffer;
338 	int hi;
339 
340 	volume = get_volume(HAMMER_VOL_DECODE(zone2_offset));
341 	assert(volume != NULL);
342 
343 	buffer = calloc(1, sizeof(*buffer));
344 	buffer->zone2_offset = zone2_offset;
345 	buffer->raw_offset = hammer_xlate_to_phys(volume->ondisk, zone2_offset);
346 	buffer->volume = volume;
347 	buffer->ondisk = calloc(1, HAMMER_BUFSIZE);
348 
349 	if (isnew <= 0) {
350 		if (readhammerbuf(buffer) == -1) {
351 			err(1, "Failed to read %s:%016jx at %016jx",
352 			    volume->name,
353 			    (intmax_t)buffer->zone2_offset,
354 			    (intmax_t)buffer->raw_offset);
355 			/* not reached */
356 		}
357 	}
358 
359 	hi = buffer_hash(zone2_offset);
360 	TAILQ_INSERT_TAIL(&volume->buffer_lists[hi], buffer, entry);
361 	hammer_cache_add(&buffer->cache);
362 
363 	return(buffer);
364 }
365 
366 /*
367  * Acquire the 16KB buffer for specified zone offset.
368  */
369 static
370 buffer_info_t
371 get_buffer(hammer_off_t buf_offset, int isnew)
372 {
373 	buffer_info_t buffer;
374 	hammer_off_t zone2_offset;
375 	int dora = 0;
376 
377 	zone2_offset = __blockmap_xlate_to_zone2(buf_offset);
378 	if (zone2_offset == HAMMER_OFF_BAD)
379 		return(NULL);
380 
381 	zone2_offset &= ~HAMMER_BUFMASK64;
382 	buffer = find_buffer(zone2_offset);
383 
384 	if (buffer == NULL) {
385 		buffer = __alloc_buffer(zone2_offset, isnew);
386 		dora = (isnew == 0);
387 	} else {
388 		assert(isnew != -1);
389 		hammer_cache_used(&buffer->cache);
390 	}
391 	assert(buffer->ondisk != NULL);
392 
393 	++buffer->cache.refs;
394 	hammer_cache_flush();
395 
396 	if (isnew > 0) {
397 		assert(buffer->cache.modified == 0);
398 		bzero(buffer->ondisk, HAMMER_BUFSIZE);
399 		buffer->cache.modified = 1;
400 	}
401 	if (dora)
402 		get_buffer_readahead(buffer);
403 	return(buffer);
404 }
405 
406 static
407 void
408 get_buffer_readahead(const buffer_info_t base)
409 {
410 	buffer_info_t buffer;
411 	volume_info_t volume;
412 	hammer_off_t zone2_offset;
413 	int64_t raw_offset;
414 	int ri = UseReadBehind;
415 	int re = UseReadAhead;
416 
417 	raw_offset = base->raw_offset + ri * HAMMER_BUFSIZE;
418 	volume = base->volume;
419 
420 	while (ri < re) {
421 		if (raw_offset >= volume->ondisk->vol_buf_end)
422 			break;
423 		if (raw_offset < volume->ondisk->vol_buf_beg || ri == 0) {
424 			++ri;
425 			raw_offset += HAMMER_BUFSIZE;
426 			continue;
427 		}
428 		zone2_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no,
429 			raw_offset - volume->ondisk->vol_buf_beg);
430 		buffer = find_buffer(zone2_offset);
431 		if (buffer == NULL) {
432 			/* call with -1 to prevent another readahead */
433 			buffer = get_buffer(zone2_offset, -1);
434 			rel_buffer(buffer);
435 		}
436 		++ri;
437 		raw_offset += HAMMER_BUFSIZE;
438 	}
439 }
440 
441 void
442 rel_buffer(buffer_info_t buffer)
443 {
444 	volume_info_t volume;
445 	int hi;
446 
447 	if (buffer == NULL)
448 		return;
449 	assert(buffer->cache.refs > 0);
450 	if (--buffer->cache.refs == 0) {
451 		if (buffer->cache.delete) {
452 			hi = buffer_hash(buffer->zone2_offset);
453 			volume = buffer->volume;
454 			if (buffer->cache.modified)
455 				flush_buffer(buffer);
456 			TAILQ_REMOVE(&volume->buffer_lists[hi], buffer, entry);
457 			hammer_cache_del(&buffer->cache);
458 			free(buffer->ondisk);
459 			free(buffer);
460 		}
461 	}
462 }
463 
464 /*
465  * Retrieve a pointer to a buffer data given a buffer offset.  The underlying
466  * bufferp is freed if isnew or the offset is out of range of the cached data.
467  * If bufferp is freed a referenced buffer is loaded into it.
468  */
469 void *
470 get_buffer_data(hammer_off_t buf_offset, buffer_info_t *bufferp, int isnew)
471 {
472 	hammer_off_t xor;
473 
474 	if (*bufferp != NULL) {
475 		/* XXX xor is always non zero for indirect zones */
476 		xor = HAMMER_OFF_LONG_ENCODE(buf_offset) ^
477 		      HAMMER_OFF_LONG_ENCODE((*bufferp)->zone2_offset);
478 		if (isnew > 0 || (xor & ~HAMMER_BUFMASK64)) {
479 			rel_buffer(*bufferp);
480 			*bufferp = NULL;
481 		}
482 	}
483 
484 	if (*bufferp == NULL) {
485 		*bufferp = get_buffer(buf_offset, isnew);
486 		if (*bufferp == NULL)
487 			return(NULL);
488 	}
489 
490 	return(((char *)(*bufferp)->ondisk) +
491 		((int32_t)buf_offset & HAMMER_BUFMASK));
492 }
493 
494 /*
495  * Allocate HAMMER elements - B-Tree nodes
496  */
497 hammer_node_ondisk_t
498 alloc_btree_node(hammer_off_t *offp, buffer_info_t *data_bufferp)
499 {
500 	hammer_node_ondisk_t node;
501 
502 	node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
503 			      offp, data_bufferp);
504 	bzero(node, sizeof(*node));
505 	return(node);
506 }
507 
508 /*
509  * Allocate HAMMER elements - meta data (inode, direntry, PFS, etc)
510  */
511 void *
512 alloc_meta_element(hammer_off_t *offp, int32_t data_len,
513 		   buffer_info_t *data_bufferp)
514 {
515 	void *data;
516 
517 	data = alloc_blockmap(HAMMER_ZONE_META_INDEX, data_len,
518 			      offp, data_bufferp);
519 	bzero(data, data_len);
520 	return(data);
521 }
522 
523 /*
524  * Format a new blockmap.  This is mostly a degenerate case because
525  * all allocations are now actually done from the freemap.
526  */
527 void
528 format_blockmap(volume_info_t root_vol, int zone, hammer_off_t offset)
529 {
530 	hammer_blockmap_t blockmap;
531 	hammer_off_t zone_base;
532 
533 	/* Only root volume needs formatting */
534 	assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
535 
536 	assert(hammer_is_index_record(zone));
537 
538 	blockmap = &root_vol->ondisk->vol0_blockmap[zone];
539 	zone_base = HAMMER_ZONE_ENCODE(zone, offset);
540 
541 	bzero(blockmap, sizeof(*blockmap));
542 	blockmap->phys_offset = 0;
543 	blockmap->first_offset = zone_base;
544 	blockmap->next_offset = zone_base;
545 	blockmap->alloc_offset = HAMMER_ENCODE(zone, 255, -1);
546 	hammer_crc_set_blockmap(HammerVersion, blockmap);
547 }
548 
549 /*
550  * Format a new freemap.  Set all layer1 entries to UNAVAIL.  The initialize
551  * code will load each volume's freemap.
552  */
553 void
554 format_freemap(volume_info_t root_vol)
555 {
556 	buffer_info_t buffer = NULL;
557 	hammer_off_t layer1_offset;
558 	hammer_blockmap_t blockmap;
559 	hammer_blockmap_layer1_t layer1;
560 	int i, isnew;
561 
562 	/* Only root volume needs formatting */
563 	assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
564 
565 	layer1_offset = bootstrap_bigblock(root_vol);
566 	for (i = 0; i < HAMMER_BIGBLOCK_SIZE; i += sizeof(*layer1)) {
567 		isnew = ((i % HAMMER_BUFSIZE) == 0);
568 		layer1 = get_buffer_data(layer1_offset + i, &buffer, isnew);
569 		bzero(layer1, sizeof(*layer1));
570 		layer1->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
571 		layer1->blocks_free = 0;
572 		hammer_crc_set_layer1(HammerVersion, layer1);
573 	}
574 	assert(i == HAMMER_BIGBLOCK_SIZE);
575 	rel_buffer(buffer);
576 
577 	blockmap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
578 	bzero(blockmap, sizeof(*blockmap));
579 	blockmap->phys_offset = layer1_offset;
580 	blockmap->first_offset = 0;
581 	blockmap->next_offset = HAMMER_ENCODE_RAW_BUFFER(0, 0);
582 	blockmap->alloc_offset = HAMMER_ENCODE_RAW_BUFFER(255, -1);
583 	hammer_crc_set_blockmap(HammerVersion, blockmap);
584 }
585 
586 /*
587  * Load the volume's remaining free space into the freemap.
588  *
589  * Returns the number of big-blocks available.
590  */
591 int64_t
592 initialize_freemap(volume_info_t volume)
593 {
594 	volume_info_t root_vol;
595 	buffer_info_t buffer1 = NULL;
596 	buffer_info_t buffer2 = NULL;
597 	hammer_blockmap_layer1_t layer1;
598 	hammer_blockmap_layer2_t layer2;
599 	hammer_off_t layer1_offset;
600 	hammer_off_t layer2_offset;
601 	hammer_off_t phys_offset;
602 	hammer_off_t block_offset;
603 	hammer_off_t aligned_vol_free_end;
604 	hammer_blockmap_t freemap;
605 	int64_t count = 0;
606 	int64_t layer1_count = 0;
607 
608 	root_vol = get_root_volume();
609 
610 	assert_volume_offset(volume);
611 	aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
612 
613 	printf("initialize freemap volume %d\n", volume->vol_no);
614 
615 	/*
616 	 * Initialize the freemap.  First preallocate the big-blocks required
617 	 * to implement layer2.   This preallocation is a bootstrap allocation
618 	 * using blocks from the target volume.
619 	 */
620 	freemap = &root_vol->ondisk->vol0_blockmap[HAMMER_ZONE_FREEMAP_INDEX];
621 
622 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
623 	     phys_offset < aligned_vol_free_end;
624 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
625 		layer1_offset = freemap->phys_offset +
626 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
627 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
628 		if (layer1->phys_offset == HAMMER_BLOCKMAP_UNAVAIL) {
629 			layer1->phys_offset = bootstrap_bigblock(volume);
630 			layer1->blocks_free = 0;
631 			buffer1->cache.modified = 1;
632 			hammer_crc_set_layer1(HammerVersion, layer1);
633 		}
634 	}
635 
636 	/*
637 	 * Now fill everything in.
638 	 */
639 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
640 	     phys_offset < aligned_vol_free_end;
641 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
642 		layer1_count = 0;
643 		layer1_offset = freemap->phys_offset +
644 				HAMMER_BLOCKMAP_LAYER1_OFFSET(phys_offset);
645 		layer1 = get_buffer_data(layer1_offset, &buffer1, 0);
646 		assert(layer1->phys_offset != HAMMER_BLOCKMAP_UNAVAIL);
647 
648 		for (block_offset = 0;
649 		     block_offset < HAMMER_BLOCKMAP_LAYER2;
650 		     block_offset += HAMMER_BIGBLOCK_SIZE) {
651 			layer2_offset = layer1->phys_offset +
652 				        HAMMER_BLOCKMAP_LAYER2_OFFSET(block_offset);
653 			layer2 = get_buffer_data(layer2_offset, &buffer2, 0);
654 			bzero(layer2, sizeof(*layer2));
655 
656 			if (phys_offset + block_offset < volume->vol_free_off) {
657 				/*
658 				 * Big-blocks already allocated as part
659 				 * of the freemap bootstrap.
660 				 */
661 				layer2->zone = HAMMER_ZONE_FREEMAP_INDEX;
662 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
663 				layer2->bytes_free = 0;
664 			} else if (phys_offset + block_offset < volume->vol_free_end) {
665 				layer2->zone = 0;
666 				layer2->append_off = 0;
667 				layer2->bytes_free = HAMMER_BIGBLOCK_SIZE;
668 				++count;
669 				++layer1_count;
670 			} else {
671 				layer2->zone = HAMMER_ZONE_UNAVAIL_INDEX;
672 				layer2->append_off = HAMMER_BIGBLOCK_SIZE;
673 				layer2->bytes_free = 0;
674 			}
675 			hammer_crc_set_layer2(HammerVersion, layer2);
676 			buffer2->cache.modified = 1;
677 		}
678 
679 		layer1->blocks_free += layer1_count;
680 		hammer_crc_set_layer1(HammerVersion, layer1);
681 		buffer1->cache.modified = 1;
682 	}
683 
684 	rel_buffer(buffer1);
685 	rel_buffer(buffer2);
686 	return(count);
687 }
688 
689 /*
690  * Returns the number of big-blocks available for filesystem data and undos
691  * without formatting.
692  */
693 int64_t
694 count_freemap(const volume_info_t volume)
695 {
696 	hammer_off_t phys_offset;
697 	hammer_off_t vol_free_off;
698 	hammer_off_t aligned_vol_free_end;
699 	int64_t count = 0;
700 
701 	vol_free_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
702 
703 	assert_volume_offset(volume);
704 	aligned_vol_free_end = HAMMER_BLOCKMAP_LAYER2_DOALIGN(volume->vol_free_end);
705 
706 	if (volume->vol_no == HAMMER_ROOT_VOLNO)
707 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
708 
709 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
710 	     phys_offset < aligned_vol_free_end;
711 	     phys_offset += HAMMER_BLOCKMAP_LAYER2) {
712 		vol_free_off += HAMMER_BIGBLOCK_SIZE;
713 	}
714 
715 	for (phys_offset = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 0);
716 	     phys_offset < aligned_vol_free_end;
717 	     phys_offset += HAMMER_BIGBLOCK_SIZE) {
718 		if (phys_offset < vol_free_off)
719 			;
720 		else if (phys_offset < volume->vol_free_end)
721 			++count;
722 	}
723 
724 	return(count);
725 }
726 
727 /*
728  * Format the undomap for the root volume.
729  */
730 void
731 format_undomap(volume_info_t root_vol, int64_t *undo_buffer_size)
732 {
733 	hammer_off_t undo_limit;
734 	hammer_blockmap_t blockmap;
735 	hammer_volume_ondisk_t ondisk;
736 	buffer_info_t buffer = NULL;
737 	hammer_off_t scan;
738 	int n;
739 	int limit_index;
740 	uint32_t seqno;
741 
742 	/* Only root volume needs formatting */
743 	assert(root_vol->vol_no == HAMMER_ROOT_VOLNO);
744 	ondisk = root_vol->ondisk;
745 
746 	/*
747 	 * Size the undo buffer in multiples of HAMMER_BIGBLOCK_SIZE,
748 	 * up to HAMMER_MAX_UNDO_BIGBLOCKS big-blocks.
749 	 * Size to approximately 0.1% of the disk.
750 	 *
751 	 * The minimum UNDO fifo size is 512MB, or approximately 1% of
752 	 * the recommended 50G disk.
753 	 *
754 	 * Changing this minimum is rather dangerous as complex filesystem
755 	 * operations can cause the UNDO FIFO to fill up otherwise.
756 	 */
757 	undo_limit = *undo_buffer_size;
758 	if (undo_limit == 0) {
759 		undo_limit = HAMMER_VOL_BUF_SIZE(ondisk) / 1000;
760 		if (undo_limit < HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS)
761 			undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MIN_UNDO_BIGBLOCKS;
762 	}
763 	undo_limit = HAMMER_BIGBLOCK_DOALIGN(undo_limit);
764 	if (undo_limit < HAMMER_BIGBLOCK_SIZE)
765 		undo_limit = HAMMER_BIGBLOCK_SIZE;
766 	if (undo_limit > HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS)
767 		undo_limit = HAMMER_BIGBLOCK_SIZE * HAMMER_MAX_UNDO_BIGBLOCKS;
768 	*undo_buffer_size = undo_limit;
769 
770 	blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
771 	bzero(blockmap, sizeof(*blockmap));
772 	blockmap->phys_offset = HAMMER_BLOCKMAP_UNAVAIL;
773 	blockmap->first_offset = HAMMER_ENCODE_UNDO(0);
774 	blockmap->next_offset = blockmap->first_offset;
775 	blockmap->alloc_offset = HAMMER_ENCODE_UNDO(undo_limit);
776 	hammer_crc_set_blockmap(HammerVersion, blockmap);
777 
778 	limit_index = undo_limit / HAMMER_BIGBLOCK_SIZE;
779 	assert(limit_index <= HAMMER_MAX_UNDO_BIGBLOCKS);
780 
781 	for (n = 0; n < limit_index; ++n)
782 		ondisk->vol0_undo_array[n] = alloc_undo_bigblock(root_vol);
783 	while (n < HAMMER_MAX_UNDO_BIGBLOCKS)
784 		ondisk->vol0_undo_array[n++] = HAMMER_BLOCKMAP_UNAVAIL;
785 
786 	/*
787 	 * Pre-initialize the UNDO blocks (HAMMER version 4+)
788 	 */
789 	printf("initializing the undo map (%jd MB)\n",
790 		(intmax_t)HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset) /
791 		(1024 * 1024));
792 
793 	scan = blockmap->first_offset;
794 	seqno = 0;
795 
796 	while (scan < blockmap->alloc_offset) {
797 		hammer_fifo_head_t head;
798 		hammer_fifo_tail_t tail;
799 		int bytes = HAMMER_UNDO_ALIGN;
800 		int isnew = ((scan & HAMMER_BUFMASK64) == 0);
801 
802 		head = get_buffer_data(scan, &buffer, isnew);
803 		buffer->cache.modified = 1;
804 		tail = (void *)((char *)head + bytes - sizeof(*tail));
805 
806 		bzero(head, bytes);
807 		head->hdr_signature = HAMMER_HEAD_SIGNATURE;
808 		head->hdr_type = HAMMER_HEAD_TYPE_DUMMY;
809 		head->hdr_size = bytes;
810 		head->hdr_seq = seqno++;
811 
812 		tail->tail_signature = HAMMER_TAIL_SIGNATURE;
813 		tail->tail_type = HAMMER_HEAD_TYPE_DUMMY;
814 		tail->tail_size = bytes;
815 
816 		hammer_crc_set_fifo_head(HammerVersion, head, bytes);
817 
818 		scan += bytes;
819 	}
820 	rel_buffer(buffer);
821 }
822 
823 const char *zone_labels[] = {
824 	"",		/* 0 */
825 	"raw_volume",	/* 1 */
826 	"raw_buffer",	/* 2 */
827 	"undo",		/* 3 */
828 	"freemap",	/* 4 */
829 	"",		/* 5 */
830 	"",		/* 6 */
831 	"",		/* 7 */
832 	"btree",	/* 8 */
833 	"meta",		/* 9 */
834 	"large_data",	/* 10 */
835 	"small_data",	/* 11 */
836 	"",		/* 12 */
837 	"",		/* 13 */
838 	"",		/* 14 */
839 	"unavail",	/* 15 */
840 };
841 
842 void
843 print_blockmap(const volume_info_t volume)
844 {
845 	hammer_blockmap_t blockmap;
846 	hammer_volume_ondisk_t ondisk;
847 	int64_t size, used;
848 	int i;
849 #define INDENT ""
850 
851 	ondisk = volume->ondisk;
852 	printf(INDENT"vol_label\t%s\n", ondisk->vol_label);
853 	printf(INDENT"vol_count\t%d\n", ondisk->vol_count);
854 	printf(INDENT"vol_bot_beg\t%s\n", sizetostr(ondisk->vol_bot_beg));
855 	printf(INDENT"vol_mem_beg\t%s\n", sizetostr(ondisk->vol_mem_beg));
856 	printf(INDENT"vol_buf_beg\t%s\n", sizetostr(ondisk->vol_buf_beg));
857 	printf(INDENT"vol_buf_end\t%s\n", sizetostr(ondisk->vol_buf_end));
858 	printf(INDENT"vol0_next_tid\t%016jx\n",
859 	       (uintmax_t)ondisk->vol0_next_tid);
860 
861 	blockmap = &ondisk->vol0_blockmap[HAMMER_ZONE_UNDO_INDEX];
862 	size = HAMMER_OFF_LONG_ENCODE(blockmap->alloc_offset);
863 	if (blockmap->first_offset <= blockmap->next_offset)
864 		used = blockmap->next_offset - blockmap->first_offset;
865 	else
866 		used = blockmap->alloc_offset - blockmap->first_offset +
867 			HAMMER_OFF_LONG_ENCODE(blockmap->next_offset);
868 	printf(INDENT"undo_size\t%s\n", sizetostr(size));
869 	printf(INDENT"undo_used\t%s\n", sizetostr(used));
870 
871 	printf(INDENT"zone #             "
872 	       "phys             first            next             alloc\n");
873 	for (i = 0; i < HAMMER_MAX_ZONES; i++) {
874 		blockmap = &ondisk->vol0_blockmap[i];
875 		printf(INDENT"zone %-2d %-10s %016jx %016jx %016jx %016jx\n",
876 			i, zone_labels[i],
877 			(uintmax_t)blockmap->phys_offset,
878 			(uintmax_t)blockmap->first_offset,
879 			(uintmax_t)blockmap->next_offset,
880 			(uintmax_t)blockmap->alloc_offset);
881 	}
882 }
883 
884 /*
885  * Flush various tracking structures to disk
886  */
887 void
888 flush_all_volumes(void)
889 {
890 	volume_info_t volume;
891 
892 	TAILQ_FOREACH(volume, &VolList, entry)
893 		flush_volume(volume);
894 }
895 
896 void
897 flush_volume(volume_info_t volume)
898 {
899 	buffer_info_t buffer;
900 	int i;
901 
902 	for (i = 0; i < HAMMER_BUFLISTS; ++i) {
903 		TAILQ_FOREACH(buffer, &volume->buffer_lists[i], entry)
904 			flush_buffer(buffer);
905 	}
906 	if (writehammervol(volume) == -1) {
907 		err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
908 		/* not reached */
909 	}
910 }
911 
912 void
913 flush_buffer(buffer_info_t buffer)
914 {
915 	volume_info_t volume;
916 
917 	volume = buffer->volume;
918 	if (writehammerbuf(buffer) == -1) {
919 		err(1, "Write volume %d (%s)", volume->vol_no, volume->name);
920 		/* not reached */
921 	}
922 	buffer->cache.modified = 0;
923 }
924 
925 /*
926  * Core I/O operations
927  */
928 static
929 int
930 __read(volume_info_t volume, void *data, int64_t offset, int size)
931 {
932 	ssize_t n;
933 
934 	n = pread(volume->fd, data, size, offset);
935 	if (n != size)
936 		return(-1);
937 	return(0);
938 }
939 
940 static __inline
941 int
942 readhammervol(volume_info_t volume)
943 {
944 	return(__read(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
945 }
946 
947 static __inline
948 int
949 readhammerbuf(buffer_info_t buffer)
950 {
951 	return(__read(buffer->volume, buffer->ondisk, buffer->raw_offset,
952 		HAMMER_BUFSIZE));
953 }
954 
955 static
956 int
957 __write(volume_info_t volume, const void *data, int64_t offset, int size)
958 {
959 	ssize_t n;
960 
961 	if (volume->rdonly)
962 		return(0);
963 
964 	n = pwrite(volume->fd, data, size, offset);
965 	if (n != size)
966 		return(-1);
967 	return(0);
968 }
969 
970 static __inline
971 int
972 writehammervol(volume_info_t volume)
973 {
974 	return(__write(volume, volume->ondisk, 0, HAMMER_BUFSIZE));
975 }
976 
977 static __inline
978 int
979 writehammerbuf(buffer_info_t buffer)
980 {
981 	return(__write(buffer->volume, buffer->ondisk, buffer->raw_offset,
982 		HAMMER_BUFSIZE));
983 }
984 
985 int64_t
986 init_boot_area_size(int64_t value, off_t avg_vol_size)
987 {
988 	if (value == 0) {
989 		value = HAMMER_BOOT_NOMBYTES;
990 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
991 			value >>= 1;
992 	}
993 
994 	if (value < HAMMER_BOOT_MINBYTES)
995 		value = HAMMER_BOOT_MINBYTES;
996 	else if (value > HAMMER_BOOT_MAXBYTES)
997 		value = HAMMER_BOOT_MAXBYTES;
998 
999 	return(value);
1000 }
1001 
1002 int64_t
1003 init_memory_log_size(int64_t value, off_t avg_vol_size)
1004 {
1005 	if (value == 0) {
1006 		value = HAMMER_MEM_NOMBYTES;
1007 		while (value > avg_vol_size / HAMMER_MAX_VOLUMES)
1008 			value >>= 1;
1009 	}
1010 
1011 	if (value < HAMMER_MEM_MINBYTES)
1012 		value = HAMMER_MEM_MINBYTES;
1013 	else if (value > HAMMER_MEM_MAXBYTES)
1014 		value = HAMMER_MEM_MAXBYTES;
1015 
1016 	return(value);
1017 }
1018