1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sbin/hammer/ondisk.c,v 1.11 2008/02/10 09:50:55 dillon Exp $ 35 */ 36 37 #include <sys/types.h> 38 #include <assert.h> 39 #include <stdio.h> 40 #include <stdlib.h> 41 #include <stdarg.h> 42 #include <string.h> 43 #include <unistd.h> 44 #include <err.h> 45 #include <fcntl.h> 46 #include "hammer_util.h" 47 48 static void *alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp, 49 struct buffer_info **bufferp); 50 static hammer_off_t alloc_bigblock(void); 51 #if 0 52 static void init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type); 53 static hammer_off_t hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes, 54 struct buffer_info **bufp, u_int16_t hdr_type); 55 static void readhammerbuf(struct volume_info *vol, void *data, 56 int64_t offset); 57 #endif 58 static void writehammerbuf(struct volume_info *vol, const void *data, 59 int64_t offset); 60 61 62 uuid_t Hammer_FSType; 63 uuid_t Hammer_FSId; 64 int64_t BootAreaSize; 65 int64_t MemAreaSize; 66 int UsingSuperClusters; 67 int NumVolumes; 68 int RootVolNo = -1; 69 struct volume_list VolList = TAILQ_HEAD_INITIALIZER(VolList); 70 71 /* 72 * Lookup the requested information structure and related on-disk buffer. 73 * Missing structures are created. 74 */ 75 struct volume_info * 76 setup_volume(int32_t vol_no, const char *filename, int isnew, int oflags) 77 { 78 struct volume_info *vol; 79 struct volume_info *scan; 80 struct hammer_volume_ondisk *ondisk; 81 int n; 82 83 /* 84 * Allocate the volume structure 85 */ 86 vol = malloc(sizeof(*vol)); 87 bzero(vol, sizeof(*vol)); 88 TAILQ_INIT(&vol->buffer_list); 89 vol->name = strdup(filename); 90 vol->fd = open(filename, oflags); 91 if (vol->fd < 0) { 92 free(vol->name); 93 free(vol); 94 err(1, "setup_volume: %s: Open failed", filename); 95 } 96 97 /* 98 * Read or initialize the volume header 99 */ 100 vol->ondisk = ondisk = malloc(HAMMER_BUFSIZE); 101 if (isnew) { 102 bzero(ondisk, HAMMER_BUFSIZE); 103 } else { 104 n = pread(vol->fd, ondisk, HAMMER_BUFSIZE, 0); 105 if (n != HAMMER_BUFSIZE) { 106 err(1, "setup_volume: %s: Read failed at offset 0", 107 filename); 108 } 109 vol_no = ondisk->vol_no; 110 if (RootVolNo < 0) { 111 RootVolNo = ondisk->vol_rootvol; 112 } else if (RootVolNo != (int)ondisk->vol_rootvol) { 113 errx(1, "setup_volume: %s: root volume disagreement: " 114 "%d vs %d", 115 vol->name, RootVolNo, ondisk->vol_rootvol); 116 } 117 118 if (bcmp(&Hammer_FSType, &ondisk->vol_fstype, sizeof(Hammer_FSType)) != 0) { 119 errx(1, "setup_volume: %s: Header does not indicate " 120 "that this is a hammer volume", vol->name); 121 } 122 if (TAILQ_EMPTY(&VolList)) { 123 Hammer_FSId = vol->ondisk->vol_fsid; 124 } else if (bcmp(&Hammer_FSId, &ondisk->vol_fsid, sizeof(Hammer_FSId)) != 0) { 125 errx(1, "setup_volume: %s: FSId does match other " 126 "volumes!", vol->name); 127 } 128 } 129 vol->vol_no = vol_no; 130 131 if (isnew) { 132 /*init_fifo_head(&ondisk->head, HAMMER_HEAD_TYPE_VOL);*/ 133 vol->cache.modified = 1; 134 } 135 136 /* 137 * Link the volume structure in 138 */ 139 TAILQ_FOREACH(scan, &VolList, entry) { 140 if (scan->vol_no == vol_no) { 141 errx(1, "setup_volume %s: Duplicate volume number %d " 142 "against %s", filename, vol_no, scan->name); 143 } 144 } 145 TAILQ_INSERT_TAIL(&VolList, vol, entry); 146 return(vol); 147 } 148 149 struct volume_info * 150 get_volume(int32_t vol_no) 151 { 152 struct volume_info *vol; 153 154 TAILQ_FOREACH(vol, &VolList, entry) { 155 if (vol->vol_no == vol_no) 156 break; 157 } 158 if (vol == NULL) 159 errx(1, "get_volume: Volume %d does not exist!", vol_no); 160 ++vol->cache.refs; 161 /* not added to or removed from hammer cache */ 162 return(vol); 163 } 164 165 void 166 rel_volume(struct volume_info *volume) 167 { 168 /* not added to or removed from hammer cache */ 169 --volume->cache.refs; 170 } 171 172 /* 173 * Acquire the specified buffer. 174 */ 175 struct buffer_info * 176 get_buffer(hammer_off_t buf_offset, int isnew) 177 { 178 void *ondisk; 179 struct buffer_info *buf; 180 struct volume_info *volume; 181 int n; 182 int vol_no; 183 184 assert((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER); 185 186 vol_no = HAMMER_VOL_DECODE(buf_offset); 187 volume = get_volume(vol_no); 188 buf_offset &= ~HAMMER_BUFMASK64; 189 190 TAILQ_FOREACH(buf, &volume->buffer_list, entry) { 191 if (buf->buf_offset == buf_offset) 192 break; 193 } 194 if (buf == NULL) { 195 buf = malloc(sizeof(*buf)); 196 bzero(buf, sizeof(*buf)); 197 buf->buf_offset = buf_offset; 198 buf->buf_disk_offset = volume->ondisk->vol_buf_beg + 199 (buf_offset & HAMMER_OFF_SHORT_MASK); 200 buf->volume = volume; 201 TAILQ_INSERT_TAIL(&volume->buffer_list, buf, entry); 202 ++volume->cache.refs; 203 buf->cache.u.buffer = buf; 204 hammer_cache_add(&buf->cache, ISBUFFER); 205 } 206 ++buf->cache.refs; 207 hammer_cache_flush(); 208 if ((ondisk = buf->ondisk) == NULL) { 209 buf->ondisk = ondisk = malloc(HAMMER_BUFSIZE); 210 if (isnew == 0) { 211 n = pread(volume->fd, ondisk, HAMMER_BUFSIZE, 212 buf->buf_disk_offset); 213 if (n != HAMMER_BUFSIZE) { 214 err(1, "get_buffer: %s:%016llx Read failed at " 215 "offset %lld", 216 volume->name, buf->buf_offset, 217 buf->buf_disk_offset); 218 } 219 } 220 } 221 if (isnew) { 222 bzero(ondisk, HAMMER_BUFSIZE); 223 buf->cache.modified = 1; 224 } 225 return(buf); 226 } 227 228 void 229 rel_buffer(struct buffer_info *buffer) 230 { 231 struct volume_info *volume; 232 233 assert(buffer->cache.refs > 0); 234 if (--buffer->cache.refs == 0) { 235 if (buffer->cache.delete) { 236 volume = buffer->volume; 237 if (buffer->cache.modified) 238 flush_buffer(buffer); 239 TAILQ_REMOVE(&volume->buffer_list, buffer, entry); 240 hammer_cache_del(&buffer->cache); 241 free(buffer->ondisk); 242 free(buffer); 243 rel_volume(volume); 244 } 245 } 246 } 247 248 void * 249 get_buffer_data(hammer_off_t buf_offset, struct buffer_info **bufferp, 250 int isnew) 251 { 252 struct buffer_info *buffer; 253 254 if (*bufferp) { 255 rel_buffer(*bufferp); 256 } 257 buffer = *bufferp = get_buffer(buf_offset, isnew); 258 return((char *)buffer->ondisk + ((int32_t)buf_offset & HAMMER_BUFMASK)); 259 } 260 261 /* 262 * Retrieve a pointer to a B-Tree node given a cluster offset. The underlying 263 * bufp is freed if non-NULL and a referenced buffer is loaded into it. 264 */ 265 hammer_node_ondisk_t 266 get_node(hammer_off_t node_offset, struct buffer_info **bufp) 267 { 268 struct buffer_info *buf; 269 270 if (*bufp) 271 rel_buffer(*bufp); 272 *bufp = buf = get_buffer(node_offset, 0); 273 return((void *)((char *)buf->ondisk + 274 (int32_t)(node_offset & HAMMER_BUFMASK))); 275 } 276 277 /* 278 * Allocate HAMMER elements - btree nodes, data storage, and record elements 279 * 280 * NOTE: hammer_alloc_fifo() initializes the fifo header for the returned 281 * item and zero's out the remainder, so don't bzero() it. 282 */ 283 void * 284 alloc_btree_element(hammer_off_t *offp) 285 { 286 struct buffer_info *buffer = NULL; 287 hammer_node_ondisk_t node; 288 289 node = alloc_blockmap(HAMMER_ZONE_BTREE_INDEX, sizeof(*node), 290 offp, &buffer); 291 bzero(node, sizeof(*node)); 292 /* XXX buffer not released, pointer remains valid */ 293 return(node); 294 } 295 296 hammer_record_ondisk_t 297 alloc_record_element(hammer_off_t *offp, int32_t data_len, void **datap) 298 { 299 struct buffer_info *record_buffer = NULL; 300 struct buffer_info *data_buffer = NULL; 301 hammer_record_ondisk_t rec; 302 303 rec = alloc_blockmap(HAMMER_ZONE_RECORD_INDEX, sizeof(*rec), 304 offp, &record_buffer); 305 bzero(rec, sizeof(*rec)); 306 307 if (data_len >= HAMMER_BUFSIZE) { 308 assert(data_len <= HAMMER_BUFSIZE); /* just one buffer */ 309 *datap = alloc_blockmap(HAMMER_ZONE_LARGE_DATA_INDEX, data_len, 310 &rec->base.data_off, &data_buffer); 311 rec->base.data_len = data_len; 312 bzero(*datap, data_len); 313 } else if (data_len) { 314 *datap = alloc_blockmap(HAMMER_ZONE_SMALL_DATA_INDEX, data_len, 315 &rec->base.data_off, &data_buffer); 316 rec->base.data_len = data_len; 317 bzero(*datap, data_len); 318 } else { 319 *datap = NULL; 320 } 321 /* XXX buf not released, ptr remains valid */ 322 return(rec); 323 } 324 325 /* 326 * Format a new blockmap 327 */ 328 void 329 format_blockmap(hammer_blockmap_entry_t blockmap, hammer_off_t zone_off) 330 { 331 blockmap->phys_offset = alloc_bigblock(); 332 blockmap->alloc_offset = zone_off; 333 } 334 335 static 336 void * 337 alloc_blockmap(int zone, int bytes, hammer_off_t *result_offp, 338 struct buffer_info **bufferp) 339 { 340 struct buffer_info *buffer; 341 struct volume_info *volume; 342 hammer_blockmap_entry_t rootmap; 343 hammer_blockmap_entry_t blockmap; 344 void *ptr; 345 int i; 346 347 volume = get_volume(RootVolNo); 348 349 rootmap = &volume->ondisk->vol0_blockmap[zone]; 350 351 /* 352 * Alignment and buffer-boundary issues 353 */ 354 bytes = (bytes + 7) & ~7; 355 if ((rootmap->phys_offset ^ (rootmap->phys_offset + bytes - 1)) & 356 ~HAMMER_BUFMASK64) { 357 volume->cache.modified = 1; 358 rootmap->phys_offset = (rootmap->phys_offset + bytes) & 359 ~HAMMER_BUFMASK64; 360 } 361 362 /* 363 * Dive layer 2 364 */ 365 i = (rootmap->alloc_offset >> (HAMMER_LARGEBLOCK_BITS + 366 HAMMER_BLOCKMAP_BITS)) & HAMMER_BLOCKMAP_RADIX_MASK; 367 368 blockmap = get_buffer_data(rootmap->phys_offset + i * sizeof(*blockmap), 369 bufferp, 0); 370 buffer = *bufferp; 371 if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_LAYER1_MASK) == 0) { 372 buffer->cache.modified = 1; 373 bzero(blockmap, sizeof(*blockmap)); 374 blockmap->phys_offset = alloc_bigblock(); 375 } 376 377 /* 378 * Dive layer 1 379 */ 380 i = (rootmap->alloc_offset >> HAMMER_LARGEBLOCK_BITS) & 381 HAMMER_BLOCKMAP_RADIX_MASK; 382 383 blockmap = get_buffer_data( 384 blockmap->phys_offset + i * sizeof(*blockmap), bufferp, 0); 385 buffer = *bufferp; 386 387 if ((rootmap->alloc_offset & HAMMER_LARGEBLOCK_MASK64) == 0) { 388 buffer->cache.modified = 1; 389 bzero(blockmap, sizeof(*blockmap)); 390 blockmap->phys_offset = alloc_bigblock(); 391 blockmap->bytes_free = HAMMER_LARGEBLOCK_SIZE; 392 } 393 394 buffer->cache.modified = 1; 395 volume->cache.modified = 1; 396 blockmap->bytes_free -= bytes; 397 *result_offp = rootmap->alloc_offset; 398 rootmap->alloc_offset += bytes; 399 400 i = (rootmap->phys_offset >> HAMMER_BUFFER_BITS) & 401 HAMMER_BUFFERS_PER_LARGEBLOCK_MASK; 402 ptr = get_buffer_data( 403 blockmap->phys_offset + i * HAMMER_BUFSIZE + 404 ((int32_t)*result_offp & HAMMER_BUFMASK), bufferp, 0); 405 buffer->cache.modified = 1; 406 407 rel_volume(volume); 408 return(ptr); 409 } 410 411 static 412 hammer_off_t 413 alloc_bigblock(void) 414 { 415 struct volume_info *volume; 416 hammer_off_t result_offset; 417 418 volume = get_volume(RootVolNo); 419 result_offset = volume->ondisk->vol0_free_off; 420 volume->ondisk->vol0_free_off += HAMMER_LARGEBLOCK_SIZE; 421 if ((volume->ondisk->vol0_free_off & HAMMER_OFF_SHORT_MASK) > 422 (hammer_off_t)(volume->ondisk->vol_buf_end - volume->ondisk->vol_buf_beg)) { 423 panic("alloc_bigblock: Ran out of room, filesystem too small"); 424 } 425 rel_volume(volume); 426 return(result_offset); 427 } 428 429 #if 0 430 /* 431 * Reserve space from the FIFO. Make sure that bytes does not cross a 432 * record boundary. 433 * 434 * Zero out base_bytes and initialize the fifo head and tail. The 435 * data area is not zerod. 436 */ 437 static 438 hammer_off_t 439 hammer_alloc_fifo(int32_t base_bytes, int32_t ext_bytes, 440 struct buffer_info **bufp, u_int16_t hdr_type) 441 { 442 struct buffer_info *buf; 443 struct volume_info *volume; 444 hammer_fifo_head_t head; 445 hammer_fifo_tail_t tail; 446 hammer_off_t off; 447 int32_t aligned_bytes; 448 449 aligned_bytes = (base_bytes + ext_bytes + HAMMER_TAIL_ONDISK_SIZE + 450 HAMMER_HEAD_ALIGN_MASK) & ~HAMMER_HEAD_ALIGN_MASK; 451 452 volume = get_volume(RootVolNo); 453 off = volume->ondisk->vol0_fifo_end; 454 455 /* 456 * For now don't deal with transitions across buffer boundaries, 457 * only newfs_hammer uses this function. 458 */ 459 assert((off & ~HAMMER_BUFMASK64) == 460 ((off + aligned_bytes) & ~HAMMER_BUFMASK)); 461 462 *bufp = buf = get_buffer(off, 0); 463 464 buf->cache.modified = 1; 465 volume->cache.modified = 1; 466 467 head = (void *)((char *)buf->ondisk + ((int32_t)off & HAMMER_BUFMASK)); 468 bzero(head, base_bytes); 469 470 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 471 head->hdr_type = hdr_type; 472 head->hdr_size = aligned_bytes; 473 head->hdr_seq = volume->ondisk->vol0_next_seq++; 474 475 tail = (void*)((char *)head + aligned_bytes - HAMMER_TAIL_ONDISK_SIZE); 476 tail->tail_signature = HAMMER_TAIL_SIGNATURE; 477 tail->tail_type = hdr_type; 478 tail->tail_size = aligned_bytes; 479 480 volume->ondisk->vol0_fifo_end += aligned_bytes; 481 volume->cache.modified = 1; 482 483 rel_volume(volume); 484 485 return(off); 486 } 487 488 #endif 489 490 /* 491 * Flush various tracking structures to disk 492 */ 493 494 /* 495 * Flush various tracking structures to disk 496 */ 497 void 498 flush_all_volumes(void) 499 { 500 struct volume_info *vol; 501 502 TAILQ_FOREACH(vol, &VolList, entry) 503 flush_volume(vol); 504 } 505 506 void 507 flush_volume(struct volume_info *volume) 508 { 509 struct buffer_info *buffer; 510 511 TAILQ_FOREACH(buffer, &volume->buffer_list, entry) 512 flush_buffer(buffer); 513 writehammerbuf(volume, volume->ondisk, 0); 514 volume->cache.modified = 0; 515 } 516 517 void 518 flush_buffer(struct buffer_info *buffer) 519 { 520 writehammerbuf(buffer->volume, buffer->ondisk, buffer->buf_disk_offset); 521 buffer->cache.modified = 0; 522 } 523 524 #if 0 525 /* 526 * Generic buffer initialization 527 */ 528 static void 529 init_fifo_head(hammer_fifo_head_t head, u_int16_t hdr_type) 530 { 531 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 532 head->hdr_type = hdr_type; 533 head->hdr_size = 0; 534 head->hdr_crc = 0; 535 head->hdr_seq = 0; 536 } 537 538 #endif 539 540 #if 0 541 /* 542 * Core I/O operations 543 */ 544 static void 545 readhammerbuf(struct volume_info *vol, void *data, int64_t offset) 546 { 547 ssize_t n; 548 549 n = pread(vol->fd, data, HAMMER_BUFSIZE, offset); 550 if (n != HAMMER_BUFSIZE) 551 err(1, "Read volume %d (%s)", vol->vol_no, vol->name); 552 } 553 554 #endif 555 556 static void 557 writehammerbuf(struct volume_info *vol, const void *data, int64_t offset) 558 { 559 ssize_t n; 560 561 n = pwrite(vol->fd, data, HAMMER_BUFSIZE, offset); 562 if (n != HAMMER_BUFSIZE) 563 err(1, "Write volume %d (%s)", vol->vol_no, vol->name); 564 } 565 566 void 567 panic(const char *ctl, ...) 568 { 569 va_list va; 570 571 va_start(va, ctl); 572 vfprintf(stderr, ctl, va); 573 va_end(va); 574 fprintf(stderr, "\n"); 575 exit(1); 576 } 577 578