xref: /dragonfly/sbin/hammer/ondisk.c (revision 1558c73f)
1 /*
2  * Copyright (c) 2007 The DragonFly Project.  All rights reserved.
3  *
4  * This code is derived from software contributed to The DragonFly Project
5  * by Matthew Dillon <dillon@backplane.com>
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in
15  *    the documentation and/or other materials provided with the
16  *    distribution.
17  * 3. Neither the name of The DragonFly Project nor the names of its
18  *    contributors may be used to endorse or promote products derived
19  *    from this software without specific, prior written permission.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
25  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  * $DragonFly: src/sbin/hammer/ondisk.c,v 1.11 2008/02/10 09:50:55 dillon Exp $
35  */
36 
37 #include <sys/types.h>
38 #include <assert.h>
39 #include <stdio.h>
40 #include <stdlib.h>
41 #include <stdarg.h>
42 #include <string.h>
43 #include <unistd.h>
44 #include <err.h>
45 #include <fcntl.h>
46 #include "hammer_util.h"
47 
48 static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
49 		       struct buffer_info **bufferp);
50 static hammer_off_t alloc_bigblock(void);
51 #if 0
52 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type);
53 static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
54 			struct buffer_info **bufp, u_int16_t hdr_type);
55 static void readhammerbuf(struct volume_info *vol, void *data,
56 			int64_t offset);
57 #endif
58 static void writehammerbuf(struct volume_info *vol, const void *data,
59 			int64_t offset);
60 
61 
62 uuid_t Hammer_FSType;
63 uuid_t Hammer_FSId;
64 int64_t BootAreaSize;
65 int64_t MemAreaSize;
66 int     UsingSuperClusters;
67 int     NumVolumes;
68 int	RootVolNo = -1;
69 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList);
70 
71 /*
72  * Lookup the requested information structure and related on-disk buffer.
73  * Missing structures are created.
74  */
75 struct volume_info *
76 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags)
77 {
78 	struct volume_info *vol;
79 	struct volume_info *scan;
80 	struct hammer_volume_ondisk *ondisk;
81 	int n;
82 
83 	/*
84 	 * Allocate the volume structure
85 	 */
86 	vol = malloc(sizeof(*vol));
87 	bzero(vol, sizeof(*vol));
88 	TAILQ_INIT(&vol->buffer_list);
89 	vol->name = strdup(filename);
90 	vol->fd = open(filename, oflags);
91 	if (vol->fd < 0) {
92 		free(vol->name);
93 		free(vol);
94 		err(1, "setup_volume: %s: Open failed", filename);
95 	}
96 
97 	/*
98 	 * Read or initialize the volume header
99 	 */
100 	vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
101 	if (isnew) {
102 		bzero(ondisk, HAMMER_BUFSIZE);
103 	} else {
104 		n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0);
105 		if (n != HAMMER_BUFSIZE) {
106 			err(1, "setup_volume: %s: Read failed at offset 0",
107 			    filename);
108 		}
109 		vol_no = ondisk->vol_no;
110 		if (RootVolNo < 0) {
111 			RootVolNo = ondisk->vol_rootvol;
112 		} else if (RootVolNo != (int)ondisk->vol_rootvol) {
113 			errx(1, "setup_volume: %s: root volume disagreement: "
114 				"%d vs %d",
115 				vol->name, RootVolNo, ondisk->vol_rootvol);
116 		}
117 
118 		if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) {
119 			errx(1, "setup_volume: %s: Header does not indicate "
120 				"that this is a hammer volume", vol->name);
121 		}
122 		if (TAILQ_EMPTY(&VolList)) {
123 			Hammer_FSId = vol->ondisk->vol_fsid;
124 		} else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) {
125 			errx(1, "setup_volume: %s: FSId does match other "
126 				"volumes!", vol->name);
127 		}
128 	}
129 	vol->vol_no = vol_no;
130 
131 	if (isnew) {
132 		/*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/
133 		vol->cache.modified = 1;
134         }
135 
136 	/*
137 	 * Link the volume structure in
138 	 */
139 	TAILQ_FOREACH(scan, &VolList, entry) {
140 		if (scan->vol_no == vol_no) {
141 			errx(1, "setup_volume %s: Duplicate volume number %d "
142 				"against %s", filename, vol_no, scan->name);
143 		}
144 	}
145 	TAILQ_INSERT_TAIL(&VolList, vol, entry);
146 	return(vol);
147 }
148 
149 struct volume_info *
150 get_volume(int32_t vol_no)
151 {
152 	struct volume_info *vol;
153 
154 	TAILQ_FOREACH(vol, &VolList, entry) {
155 		if (vol->vol_no == vol_no)
156 			break;
157 	}
158 	if (vol == NULL)
159 		errx(1, "get_volume: Volume %d does not exist!", vol_no);
160 	++vol->cache.refs;
161 	/* not added to or removed from hammer cache */
162 	return(vol);
163 }
164 
165 void
166 rel_volume(struct volume_info *volume)
167 {
168 	/* not added to or removed from hammer cache */
169 	--volume->cache.refs;
170 }
171 
172 /*
173  * Acquire the specified buffer.
174  */
175 struct buffer_info *
176 get_buffer(hammer_off_t buf_offset, int isnew)
177 {
178 	void *ondisk;
179 	struct buffer_info *buf;
180 	struct volume_info *volume;
181 	int n;
182 	int vol_no;
183 
184 	assert((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER);
185 
186 	vol_no = HAMMER_VOL_DECODE(buf_offset);
187 	volume = get_volume(vol_no);
188 	buf_offset &= ~HAMMER_BUFMASK64;
189 
190 	TAILQ_FOREACH(buf, &volume->buffer_list, entry) {
191 		if (buf->buf_offset == buf_offset)
192 			break;
193 	}
194 	if (buf == NULL) {
195 		buf = malloc(sizeof(*buf));
196 		bzero(buf, sizeof(*buf));
197 		buf->buf_offset = buf_offset;
198 		buf->buf_disk_offset = volume->ondisk->vol_buf_beg +
199 					(buf_offset & HAMMER_OFF_SHORT_MASK);
200 		buf->volume = volume;
201 		TAILQ_INSERT_TAIL(&volume->buffer_list, buf, entry);
202 		++volume->cache.refs;
203 		buf->cache.u.buffer = buf;
204 		hammer_cache_add(&buf->cache, ISBUFFER);
205 	}
206 	++buf->cache.refs;
207 	hammer_cache_flush();
208 	if ((ondisk = buf->ondisk) == NULL) {
209 		buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE);
210 		if (isnew == 0) {
211 			n = pread(volume->fd, ondisk, HAMMER_BUFSIZE,
212 				  buf->buf_disk_offset);
213 			if (n != HAMMER_BUFSIZE) {
214 				err(1, "get_buffer: %s:%016llx Read failed at "
215 				       "offset %lld",
216 				    volume->name, buf->buf_offset,
217 				    buf->buf_disk_offset);
218 			}
219 		}
220 	}
221 	if (isnew) {
222 		bzero(ondisk, HAMMER_BUFSIZE);
223 		buf->cache.modified = 1;
224 	}
225 	return(buf);
226 }
227 
228 void
229 rel_buffer(struct buffer_info *buffer)
230 {
231 	struct volume_info *volume;
232 
233 	assert(buffer->cache.refs > 0);
234 	if (--buffer->cache.refs == 0) {
235 		if (buffer->cache.delete) {
236 			volume = buffer->volume;
237 			if (buffer->cache.modified)
238 				flush_buffer(buffer);
239 			TAILQ_REMOVE(&volume->buffer_list, buffer, entry);
240 			hammer_cache_del(&buffer->cache);
241 			free(buffer->ondisk);
242 			free(buffer);
243 			rel_volume(volume);
244 		}
245 	}
246 }
247 
248 void *
249 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp,
250 		int isnew)
251 {
252 	struct buffer_info *buffer;
253 
254 	if (*bufferp) {
255 		rel_buffer(*bufferp);
256 	}
257 	buffer = *bufferp = get_buffer(buf_offset, isnew);
258 	return((char *)buffer->ondisk + ((int32_t)buf_offset & HAMMER_BUFMASK));
259 }
260 
261 /*
262  * Retrieve a pointer to a B-Tree node given a cluster offset.  The underlying
263  * bufp is freed if non-NULL and a referenced buffer is loaded into it.
264  */
265 hammer_node_ondisk_t
266 get_node(hammer_off_t node_offset, struct buffer_info **bufp)
267 {
268 	struct buffer_info *buf;
269 
270 	if (*bufp)
271 		rel_buffer(*bufp);
272 	*bufp = buf = get_buffer(node_offset, 0);
273 	return((void *)((char *)buf->ondisk +
274 			(int32_t)(node_offset & HAMMER_BUFMASK)));
275 }
276 
277 /*
278  * Allocate HAMMER elements - btree nodes, data storage, and record elements
279  *
280  * NOTE: hammer_alloc_fifo() initializes the fifo header for the returned
281  * item and zero's out the remainder, so don't bzero() it.
282  */
283 void *
284 alloc_btree_element(hammer_off_t *offp)
285 {
286 	struct buffer_info *buffer = NULL;
287 	hammer_node_ondisk_t node;
288 
289 	node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node),
290 			      offp, &buffer);
291 	bzero(node, sizeof(*node));
292 	/* XXX buffer not released, pointer remains valid */
293 	return(node);
294 }
295 
296 hammer_record_ondisk_t
297 alloc_record_element(hammer_off_t *offp, int32_t data_len, void **datap)
298 {
299 	struct buffer_info *record_buffer = NULL;
300 	struct buffer_info *data_buffer = NULL;
301 	hammer_record_ondisk_t rec;
302 
303 	rec = alloc_blockmap(HAMMER_ZONE_RECORD_INDEX, sizeof(*rec),
304 			     offp, &record_buffer);
305 	bzero(rec, sizeof(*rec));
306 
307 	if (data_len >= HAMMER_BUFSIZE) {
308 		assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */
309 		*datap = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len,
310 					&rec->base.data_off, &data_buffer);
311 		rec->base.data_len = data_len;
312 		bzero(*datap, data_len);
313 	} else if (data_len) {
314 		*datap = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len,
315 					&rec->base.data_off, &data_buffer);
316 		rec->base.data_len = data_len;
317 		bzero(*datap, data_len);
318 	} else {
319 		*datap = NULL;
320 	}
321 	/* XXX buf not released, ptr remains valid */
322 	return(rec);
323 }
324 
325 /*
326  * Format a new blockmap
327  */
328 void
329 format_blockmap(hammer_blockmap_entry_t blockmap, hammer_off_t zone_off)
330 {
331 	blockmap->phys_offset = alloc_bigblock();
332 	blockmap->alloc_offset = zone_off;
333 }
334 
335 static
336 void *
337 alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp,
338 	       struct buffer_info **bufferp)
339 {
340 	struct buffer_info *buffer;
341 	struct volume_info *volume;
342 	hammer_blockmap_entry_t rootmap;
343 	hammer_blockmap_entry_t blockmap;
344 	void *ptr;
345 	int i;
346 
347 	volume = get_volume(RootVolNo);
348 
349 	rootmap = &volume->ondisk->vol0_blockmap[zone];
350 
351 	/*
352 	 * Alignment and buffer-boundary issues
353 	 */
354 	bytes = (bytes + 7) & ~7;
355 	if ((rootmap->phys_offset ^ (rootmap->phys_offset + bytes - 1)) &
356 	    ~HAMMER_BUFMASK64) {
357 		volume->cache.modified = 1;
358 		rootmap->phys_offset = (rootmap->phys_offset + bytes) &
359 				       ~HAMMER_BUFMASK64;
360 	}
361 
362 	/*
363 	 * Dive layer 2
364 	 */
365 	i = (rootmap->alloc_offset >> (HAMMER_LARGEBLOCK_BITS +
366 	     HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK;
367 
368 	blockmap = get_buffer_data(rootmap->phys_offset + i * sizeof(*blockmap),
369 				   bufferp, 0);
370 	buffer = *bufferp;
371 	if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_LAYER1_MASK) == 0) {
372 		buffer->cache.modified = 1;
373 		bzero(blockmap, sizeof(*blockmap));
374 		blockmap->phys_offset = alloc_bigblock();
375 	}
376 
377 	/*
378 	 * Dive layer 1
379 	 */
380 	i = (rootmap->alloc_offset >> HAMMER_LARGEBLOCK_BITS) &
381 	    HAMMER_BLOCKMAP_RADIX_MASK;
382 
383 	blockmap = get_buffer_data(
384 		blockmap->phys_offset + i * sizeof(*blockmap), bufferp, 0);
385 	buffer = *bufferp;
386 
387 	if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) {
388 		buffer->cache.modified = 1;
389 		bzero(blockmap, sizeof(*blockmap));
390 		blockmap->phys_offset = alloc_bigblock();
391 		blockmap->bytes_free = HAMMER_LARGEBLOCK_SIZE;
392 	}
393 
394 	buffer->cache.modified = 1;
395 	volume->cache.modified = 1;
396 	blockmap->bytes_free -= bytes;
397 	*result_offp = rootmap->alloc_offset;
398 	rootmap->alloc_offset += bytes;
399 
400 	i = (rootmap->phys_offset >> HAMMER_BUFFER_BITS) &
401 	    HAMMER_BUFFERS_PER_LARGEBLOCK_MASK;
402 	ptr = get_buffer_data(
403 		blockmap->phys_offset + i * HAMMER_BUFSIZE +
404 		 ((int32_t)*result_offp & HAMMER_BUFMASK), bufferp, 0);
405 	buffer->cache.modified = 1;
406 
407 	rel_volume(volume);
408 	return(ptr);
409 }
410 
411 static
412 hammer_off_t
413 alloc_bigblock(void)
414 {
415 	struct volume_info *volume;
416 	hammer_off_t result_offset;
417 
418 	volume = get_volume(RootVolNo);
419 	result_offset = volume->ondisk->vol0_free_off;
420 	volume->ondisk->vol0_free_off += HAMMER_LARGEBLOCK_SIZE;
421 	if ((volume->ondisk->vol0_free_off & HAMMER_OFF_SHORT_MASK) >
422 	    (hammer_off_t)(volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)) {
423 		panic("alloc_bigblock: Ran out of room, filesystem too small");
424 	}
425 	rel_volume(volume);
426 	return(result_offset);
427 }
428 
429 #if 0
430 /*
431  * Reserve space from the FIFO.  Make sure that bytes does not cross a
432  * record boundary.
433  *
434  * Zero out base_bytes and initialize the fifo head and tail.  The
435  * data area is not zerod.
436  */
437 static
438 hammer_off_t
439 hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes,
440 		  struct buffer_info **bufp, u_int16_t hdr_type)
441 {
442 	struct buffer_info *buf;
443 	struct volume_info *volume;
444 	hammer_fifo_head_t head;
445 	hammer_fifo_tail_t tail;
446 	hammer_off_t off;
447 	int32_t aligned_bytes;
448 
449 	aligned_bytes = (base_bytes + ext_bytes + HAMMER_TAIL_ONDISK_SIZE +
450 			 HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK;
451 
452 	volume = get_volume(RootVolNo);
453 	off = volume->ondisk->vol0_fifo_end;
454 
455 	/*
456 	 * For now don't deal with transitions across buffer boundaries,
457 	 * only newfs_hammer uses this function.
458 	 */
459 	assert((off & ~HAMMER_BUFMASK64) ==
460 		((off + aligned_bytes) & ~HAMMER_BUFMASK));
461 
462 	*bufp = buf = get_buffer(off, 0);
463 
464 	buf->cache.modified = 1;
465 	volume->cache.modified = 1;
466 
467 	head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK));
468 	bzero(head, base_bytes);
469 
470 	head->hdr_signature = HAMMER_HEAD_SIGNATURE;
471 	head->hdr_type = hdr_type;
472 	head->hdr_size = aligned_bytes;
473 	head->hdr_seq = volume->ondisk->vol0_next_seq++;
474 
475 	tail = (void*)((char *)head + aligned_bytes - HAMMER_TAIL_ONDISK_SIZE);
476 	tail->tail_signature = HAMMER_TAIL_SIGNATURE;
477 	tail->tail_type = hdr_type;
478 	tail->tail_size = aligned_bytes;
479 
480 	volume->ondisk->vol0_fifo_end += aligned_bytes;
481 	volume->cache.modified = 1;
482 
483 	rel_volume(volume);
484 
485 	return(off);
486 }
487 
488 #endif
489 
490 /*
491  * Flush various tracking structures to disk
492  */
493 
494 /*
495  * Flush various tracking structures to disk
496  */
497 void
498 flush_all_volumes(void)
499 {
500 	struct volume_info *vol;
501 
502 	TAILQ_FOREACH(vol, &VolList, entry)
503 		flush_volume(vol);
504 }
505 
506 void
507 flush_volume(struct volume_info *volume)
508 {
509 	struct buffer_info *buffer;
510 
511 	TAILQ_FOREACH(buffer, &volume->buffer_list, entry)
512 		flush_buffer(buffer);
513 	writehammerbuf(volume, volume->ondisk, 0);
514 	volume->cache.modified = 0;
515 }
516 
517 void
518 flush_buffer(struct buffer_info *buffer)
519 {
520 	writehammerbuf(buffer->volume, buffer->ondisk, buffer->buf_disk_offset);
521 	buffer->cache.modified = 0;
522 }
523 
524 #if 0
525 /*
526  * Generic buffer initialization
527  */
528 static void
529 init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type)
530 {
531 	head->hdr_signature = HAMMER_HEAD_SIGNATURE;
532 	head->hdr_type = hdr_type;
533 	head->hdr_size = 0;
534 	head->hdr_crc = 0;
535 	head->hdr_seq = 0;
536 }
537 
538 #endif
539 
540 #if 0
541 /*
542  * Core I/O operations
543  */
544 static void
545 readhammerbuf(struct volume_info *vol, void *data, int64_t offset)
546 {
547 	ssize_t n;
548 
549 	n = pread(vol->fd, data, HAMMER_BUFSIZE, offset);
550 	if (n != HAMMER_BUFSIZE)
551 		err(1, "Read volume %d (%s)", vol->vol_no, vol->name);
552 }
553 
554 #endif
555 
556 static void
557 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset)
558 {
559 	ssize_t n;
560 
561 	n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset);
562 	if (n != HAMMER_BUFSIZE)
563 		err(1, "Write volume %d (%s)", vol->vol_no, vol->name);
564 }
565 
566 void
567 panic(const char *ctl, ...)
568 {
569 	va_list va;
570 
571 	va_start(va, ctl);
572 	vfprintf(stderr, ctl, va);
573 	va_end(va);
574 	fprintf(stderr, "\n");
575 	exit(1);
576 }
577 
578