1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_ondisk.c,v 1.28 2008/02/08 08:30:59 dillon Exp $ 35 */ 36 /* 37 * Manage HAMMER's on-disk structures. These routines are primarily 38 * responsible for interfacing with the kernel's I/O subsystem and for 39 * managing in-memory structures. 40 */ 41 42 #include "hammer.h" 43 #include <sys/fcntl.h> 44 #include <sys/nlookup.h> 45 #include <sys/buf.h> 46 #include <sys/buf2.h> 47 48 static void hammer_free_volume(hammer_volume_t volume); 49 static int hammer_load_volume(hammer_volume_t volume); 50 static int hammer_load_buffer(hammer_buffer_t buffer, int isnew); 51 static int hammer_load_node(hammer_node_t node); 52 static hammer_off_t hammer_advance_fifo(hammer_volume_t volume, 53 hammer_off_t off, int32_t bytes); 54 55 static hammer_off_t hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, 56 int32_t data_len, struct hammer_buffer **rec_bufferp, 57 u_int16_t hdr_type, int can_cross, 58 struct hammer_buffer **data2_bufferp, int *errorp); 59 60 /* 61 * Red-Black tree support for various structures 62 */ 63 static int 64 hammer_ino_rb_compare(hammer_inode_t ip1, hammer_inode_t ip2) 65 { 66 if (ip1->obj_id < ip2->obj_id) 67 return(-1); 68 if (ip1->obj_id > ip2->obj_id) 69 return(1); 70 if (ip1->obj_asof < ip2->obj_asof) 71 return(-1); 72 if (ip1->obj_asof > ip2->obj_asof) 73 return(1); 74 return(0); 75 } 76 77 static int 78 hammer_inode_info_cmp(hammer_inode_info_t info, hammer_inode_t ip) 79 { 80 if (info->obj_id < ip->obj_id) 81 return(-1); 82 if (info->obj_id > ip->obj_id) 83 return(1); 84 if (info->obj_asof < ip->obj_asof) 85 return(-1); 86 if (info->obj_asof > ip->obj_asof) 87 return(1); 88 return(0); 89 } 90 91 static int 92 hammer_vol_rb_compare(hammer_volume_t vol1, hammer_volume_t vol2) 93 { 94 if (vol1->vol_no < vol2->vol_no) 95 return(-1); 96 if (vol1->vol_no > vol2->vol_no) 97 return(1); 98 return(0); 99 } 100 101 static int 102 hammer_buf_rb_compare(hammer_buffer_t buf1, hammer_buffer_t buf2) 103 { 104 if (buf1->buf_offset < buf2->buf_offset) 105 return(-1); 106 if (buf1->buf_offset > buf2->buf_offset) 107 return(1); 108 return(0); 109 } 110 111 static int 112 hammer_nod_rb_compare(hammer_node_t node1, hammer_node_t node2) 113 { 114 if (node1->node_offset < node2->node_offset) 115 return(-1); 116 if (node1->node_offset > node2->node_offset) 117 return(1); 118 return(0); 119 } 120 121 /* 122 * Note: The lookup function for hammer_ino_rb_tree winds up being named 123 * hammer_ino_rb_tree_RB_LOOKUP_INFO(root, info). The other lookup 124 * functions are normal, e.g. hammer_buf_rb_tree_RB_LOOKUP(root, buf_offset). 125 */ 126 RB_GENERATE(hammer_ino_rb_tree, hammer_inode, rb_node, hammer_ino_rb_compare); 127 RB_GENERATE_XLOOKUP(hammer_ino_rb_tree, INFO, hammer_inode, rb_node, 128 hammer_inode_info_cmp, hammer_inode_info_t); 129 RB_GENERATE2(hammer_vol_rb_tree, hammer_volume, rb_node, 130 hammer_vol_rb_compare, int32_t, vol_no); 131 RB_GENERATE2(hammer_buf_rb_tree, hammer_buffer, rb_node, 132 hammer_buf_rb_compare, hammer_off_t, buf_offset); 133 RB_GENERATE2(hammer_nod_rb_tree, hammer_node, rb_node, 134 hammer_nod_rb_compare, hammer_off_t, node_offset); 135 136 /************************************************************************ 137 * VOLUMES * 138 ************************************************************************ 139 * 140 * Load a HAMMER volume by name. Returns 0 on success or a positive error 141 * code on failure. Volumes must be loaded at mount time, get_volume() will 142 * not load a new volume. 143 * 144 * Calls made to hammer_load_volume() or single-threaded 145 */ 146 int 147 hammer_install_volume(struct hammer_mount *hmp, const char *volname) 148 { 149 struct mount *mp; 150 hammer_volume_t volume; 151 struct hammer_volume_ondisk *ondisk; 152 struct nlookupdata nd; 153 struct buf *bp = NULL; 154 int error; 155 int ronly; 156 157 mp = hmp->mp; 158 ronly = ((mp->mnt_flag & MNT_RDONLY) ? 1 : 0); 159 160 /* 161 * Allocate a volume structure 162 */ 163 ++hammer_count_volumes; 164 volume = kmalloc(sizeof(*volume), M_HAMMER, M_WAITOK|M_ZERO); 165 volume->vol_name = kstrdup(volname, M_HAMMER); 166 volume->hmp = hmp; 167 hammer_io_init(&volume->io, HAMMER_STRUCTURE_VOLUME); 168 volume->io.offset = 0LL; 169 170 /* 171 * Get the device vnode 172 */ 173 error = nlookup_init(&nd, volume->vol_name, UIO_SYSSPACE, NLC_FOLLOW); 174 if (error == 0) 175 error = nlookup(&nd); 176 if (error == 0) 177 error = cache_vref(&nd.nl_nch, nd.nl_cred, &volume->devvp); 178 nlookup_done(&nd); 179 if (error == 0) { 180 if (vn_isdisk(volume->devvp, &error)) { 181 error = vfs_mountedon(volume->devvp); 182 } 183 } 184 if (error == 0 && 185 count_udev(volume->devvp->v_umajor, volume->devvp->v_uminor) > 0) { 186 error = EBUSY; 187 } 188 if (error == 0) { 189 vn_lock(volume->devvp, LK_EXCLUSIVE | LK_RETRY); 190 error = vinvalbuf(volume->devvp, V_SAVE, 0, 0); 191 if (error == 0) { 192 error = VOP_OPEN(volume->devvp, 193 (ronly ? FREAD : FREAD|FWRITE), 194 FSCRED, NULL); 195 } 196 vn_unlock(volume->devvp); 197 } 198 if (error) { 199 hammer_free_volume(volume); 200 return(error); 201 } 202 volume->devvp->v_rdev->si_mountpoint = mp; 203 204 /* 205 * Extract the volume number from the volume header and do various 206 * sanity checks. 207 */ 208 error = bread(volume->devvp, 0LL, HAMMER_BUFSIZE, &bp); 209 if (error) 210 goto late_failure; 211 ondisk = (void *)bp->b_data; 212 if (ondisk->vol_signature != HAMMER_FSBUF_VOLUME) { 213 kprintf("hammer_mount: volume %s has an invalid header\n", 214 volume->vol_name); 215 error = EFTYPE; 216 goto late_failure; 217 } 218 volume->vol_no = ondisk->vol_no; 219 volume->buffer_base = ondisk->vol_buf_beg; 220 volume->vol_flags = ondisk->vol_flags; 221 volume->nblocks = ondisk->vol_nblocks; 222 volume->maxbuf_off = HAMMER_ENCODE_RAW_BUFFER(volume->vol_no, 223 ondisk->vol_buf_end - ondisk->vol_buf_beg); 224 RB_INIT(&volume->rb_bufs_root); 225 226 hmp->mp->mnt_stat.f_blocks += volume->nblocks; 227 228 if (RB_EMPTY(&hmp->rb_vols_root)) { 229 hmp->fsid = ondisk->vol_fsid; 230 } else if (bcmp(&hmp->fsid, &ondisk->vol_fsid, sizeof(uuid_t))) { 231 kprintf("hammer_mount: volume %s's fsid does not match " 232 "other volumes\n", volume->vol_name); 233 error = EFTYPE; 234 goto late_failure; 235 } 236 237 /* 238 * Insert the volume structure into the red-black tree. 239 */ 240 if (RB_INSERT(hammer_vol_rb_tree, &hmp->rb_vols_root, volume)) { 241 kprintf("hammer_mount: volume %s has a duplicate vol_no %d\n", 242 volume->vol_name, volume->vol_no); 243 error = EEXIST; 244 } 245 246 /* 247 * Set the root volume . HAMMER special cases rootvol the structure. 248 * We do not hold a ref because this would prevent related I/O 249 * from being flushed. 250 */ 251 if (error == 0 && ondisk->vol_rootvol == ondisk->vol_no) { 252 hmp->rootvol = volume; 253 if (bp) { 254 brelse(bp); 255 bp = NULL; 256 } 257 hmp->fsid_udev = dev2udev(vn_todev(volume->devvp)); 258 } 259 late_failure: 260 if (bp) 261 brelse(bp); 262 if (error) { 263 /*vinvalbuf(volume->devvp, V_SAVE, 0, 0);*/ 264 VOP_CLOSE(volume->devvp, ronly ? FREAD : FREAD|FWRITE); 265 hammer_free_volume(volume); 266 } 267 return (error); 268 } 269 270 /* 271 * Unload and free a HAMMER volume. Must return >= 0 to continue scan 272 * so returns -1 on failure. 273 */ 274 int 275 hammer_unload_volume(hammer_volume_t volume, void *data __unused) 276 { 277 struct hammer_mount *hmp = volume->hmp; 278 int ronly = ((hmp->mp->mnt_flag & MNT_RDONLY) ? 1 : 0); 279 280 /* 281 * Sync clusters, sync volume 282 */ 283 284 hmp->mp->mnt_stat.f_blocks -= volume->nblocks; 285 286 /* 287 * Clean up the root volume pointer, which is held unlocked in hmp. 288 */ 289 if (hmp->rootvol == volume) 290 hmp->rootvol = NULL; 291 292 /* 293 * Unload clusters and super-clusters. Unloading a super-cluster 294 * also unloads related clusters, but the filesystem may not be 295 * using super-clusters so unload clusters anyway. 296 */ 297 RB_SCAN(hammer_buf_rb_tree, &volume->rb_bufs_root, NULL, 298 hammer_unload_buffer, NULL); 299 hammer_io_waitdep(&volume->io); 300 301 /* 302 * Release our buffer and flush anything left in the buffer cache. 303 */ 304 hammer_io_release(&volume->io, 2); 305 306 /* 307 * There should be no references on the volume, no clusters, and 308 * no super-clusters. 309 */ 310 KKASSERT(volume->io.lock.refs == 0); 311 KKASSERT(RB_EMPTY(&volume->rb_bufs_root)); 312 313 volume->ondisk = NULL; 314 if (volume->devvp) { 315 if (ronly) { 316 vinvalbuf(volume->devvp, 0, 0, 0); 317 VOP_CLOSE(volume->devvp, FREAD); 318 } else { 319 vinvalbuf(volume->devvp, V_SAVE, 0, 0); 320 VOP_CLOSE(volume->devvp, FREAD|FWRITE); 321 } 322 } 323 324 /* 325 * Destroy the structure 326 */ 327 RB_REMOVE(hammer_vol_rb_tree, &hmp->rb_vols_root, volume); 328 hammer_free_volume(volume); 329 return(0); 330 } 331 332 static 333 void 334 hammer_free_volume(hammer_volume_t volume) 335 { 336 if (volume->vol_name) { 337 kfree(volume->vol_name, M_HAMMER); 338 volume->vol_name = NULL; 339 } 340 if (volume->devvp) { 341 if (vn_isdisk(volume->devvp, NULL) && 342 volume->devvp->v_rdev && 343 volume->devvp->v_rdev->si_mountpoint == volume->hmp->mp 344 ) { 345 volume->devvp->v_rdev->si_mountpoint = NULL; 346 } 347 vrele(volume->devvp); 348 volume->devvp = NULL; 349 } 350 --hammer_count_volumes; 351 kfree(volume, M_HAMMER); 352 } 353 354 /* 355 * Get a HAMMER volume. The volume must already exist. 356 */ 357 hammer_volume_t 358 hammer_get_volume(struct hammer_mount *hmp, int32_t vol_no, int *errorp) 359 { 360 struct hammer_volume *volume; 361 362 /* 363 * Locate the volume structure 364 */ 365 volume = RB_LOOKUP(hammer_vol_rb_tree, &hmp->rb_vols_root, vol_no); 366 if (volume == NULL) { 367 *errorp = ENOENT; 368 return(NULL); 369 } 370 hammer_ref(&volume->io.lock); 371 372 /* 373 * Deal with on-disk info 374 */ 375 if (volume->ondisk == NULL || volume->io.loading) { 376 *errorp = hammer_load_volume(volume); 377 if (*errorp) { 378 hammer_rel_volume(volume, 1); 379 volume = NULL; 380 } 381 } else { 382 *errorp = 0; 383 } 384 return(volume); 385 } 386 387 int 388 hammer_ref_volume(hammer_volume_t volume) 389 { 390 int error; 391 392 hammer_ref(&volume->io.lock); 393 394 /* 395 * Deal with on-disk info 396 */ 397 if (volume->ondisk == NULL || volume->io.loading) { 398 error = hammer_load_volume(volume); 399 if (error) 400 hammer_rel_volume(volume, 1); 401 } else { 402 error = 0; 403 } 404 return (error); 405 } 406 407 hammer_volume_t 408 hammer_get_root_volume(struct hammer_mount *hmp, int *errorp) 409 { 410 hammer_volume_t volume; 411 412 volume = hmp->rootvol; 413 KKASSERT(volume != NULL); 414 hammer_ref(&volume->io.lock); 415 416 /* 417 * Deal with on-disk info 418 */ 419 if (volume->ondisk == NULL || volume->io.loading) { 420 *errorp = hammer_load_volume(volume); 421 if (*errorp) { 422 hammer_rel_volume(volume, 1); 423 volume = NULL; 424 } 425 } else { 426 *errorp = 0; 427 } 428 return (volume); 429 } 430 431 /* 432 * Load a volume's on-disk information. The volume must be referenced and 433 * not locked. We temporarily acquire an exclusive lock to interlock 434 * against releases or multiple get's. 435 */ 436 static int 437 hammer_load_volume(hammer_volume_t volume) 438 { 439 struct hammer_volume_ondisk *ondisk; 440 int error; 441 442 hammer_lock_ex(&volume->io.lock); 443 KKASSERT(volume->io.loading == 0); 444 volume->io.loading = 1; 445 446 if (volume->ondisk == NULL) { 447 error = hammer_io_read(volume->devvp, &volume->io); 448 if (error) { 449 volume->io.loading = 0; 450 hammer_unlock(&volume->io.lock); 451 return (error); 452 } 453 volume->ondisk = ondisk = (void *)volume->io.bp->b_data; 454 } else { 455 error = 0; 456 } 457 volume->io.loading = 0; 458 hammer_unlock(&volume->io.lock); 459 return(0); 460 } 461 462 /* 463 * Release a volume. Call hammer_io_release on the last reference. We have 464 * to acquire an exclusive lock to interlock against volume->ondisk tests 465 * in hammer_load_volume(), and hammer_io_release() also expects an exclusive 466 * lock to be held. 467 * 468 * Volumes are not unloaded from memory during normal operation. 469 */ 470 void 471 hammer_rel_volume(hammer_volume_t volume, int flush) 472 { 473 if (volume->io.lock.refs == 1) { 474 hammer_lock_ex(&volume->io.lock); 475 if (volume->io.lock.refs == 1) { 476 volume->ondisk = NULL; 477 hammer_io_release(&volume->io, flush); 478 } else if (flush) { 479 hammer_io_flush(&volume->io); 480 } 481 hammer_unlock(&volume->io.lock); 482 } 483 hammer_unref(&volume->io.lock); 484 } 485 486 /************************************************************************ 487 * BUFFERS * 488 ************************************************************************ 489 * 490 * Manage buffers. Note that a buffer holds a reference to its associated 491 * cluster, and its cluster will hold a reference to the cluster's volume. 492 */ 493 hammer_buffer_t 494 hammer_get_buffer(hammer_mount_t hmp, hammer_off_t buf_offset, 495 int isnew, int *errorp) 496 { 497 hammer_buffer_t buffer; 498 hammer_volume_t volume; 499 int vol_no; 500 501 buf_offset &= ~HAMMER_BUFMASK64; 502 KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER); 503 vol_no = HAMMER_VOL_DECODE(buf_offset); 504 volume = hammer_get_volume(hmp, vol_no, errorp); 505 if (volume == NULL) 506 return(NULL); 507 /* 508 * NOTE: buf_offset and maxbuf_off are both full offset 509 * specifications. 510 */ 511 KKASSERT(buf_offset < volume->maxbuf_off); 512 513 /* 514 * Locate and lock the buffer structure, creating one if necessary. 515 */ 516 again: 517 buffer = RB_LOOKUP(hammer_buf_rb_tree, &volume->rb_bufs_root, 518 buf_offset); 519 if (buffer == NULL) { 520 ++hammer_count_buffers; 521 buffer = kmalloc(sizeof(*buffer), M_HAMMER, M_WAITOK|M_ZERO); 522 buffer->buf_offset = buf_offset; 523 buffer->volume = volume; 524 hammer_io_init(&buffer->io, HAMMER_STRUCTURE_BUFFER); 525 buffer->io.offset = volume->ondisk->vol_buf_beg + 526 (buf_offset & HAMMER_OFF_SHORT_MASK); 527 TAILQ_INIT(&buffer->clist); 528 hammer_ref(&buffer->io.lock); 529 530 /* 531 * Insert the buffer into the RB tree and handle late 532 * collisions. 533 */ 534 if (RB_INSERT(hammer_buf_rb_tree, &volume->rb_bufs_root, buffer)) { 535 hammer_unref(&buffer->io.lock); 536 --hammer_count_buffers; 537 kfree(buffer, M_HAMMER); 538 goto again; 539 } 540 hammer_ref(&volume->io.lock); 541 } else { 542 hammer_ref(&buffer->io.lock); 543 } 544 545 /* 546 * Deal with on-disk info 547 */ 548 if (buffer->ondisk == NULL || buffer->io.loading) { 549 *errorp = hammer_load_buffer(buffer, isnew); 550 if (*errorp) { 551 hammer_rel_buffer(buffer, 1); 552 buffer = NULL; 553 } 554 } else { 555 *errorp = 0; 556 } 557 hammer_rel_volume(volume, 0); 558 return(buffer); 559 } 560 561 static int 562 hammer_load_buffer(hammer_buffer_t buffer, int isnew) 563 { 564 hammer_volume_t volume; 565 void *ondisk; 566 int error; 567 568 /* 569 * Load the buffer's on-disk info 570 */ 571 volume = buffer->volume; 572 hammer_lock_ex(&buffer->io.lock); 573 KKASSERT(buffer->io.loading == 0); 574 buffer->io.loading = 1; 575 576 if (buffer->ondisk == NULL) { 577 if (isnew) { 578 error = hammer_io_new(volume->devvp, &buffer->io); 579 } else { 580 error = hammer_io_read(volume->devvp, &buffer->io); 581 } 582 if (error) { 583 buffer->io.loading = 0; 584 hammer_unlock(&buffer->io.lock); 585 return (error); 586 } 587 buffer->ondisk = ondisk = (void *)buffer->io.bp->b_data; 588 } else if (isnew) { 589 error = hammer_io_new(volume->devvp, &buffer->io); 590 } else { 591 error = 0; 592 } 593 if (error == 0 && isnew) { 594 hammer_modify_buffer(buffer, NULL, 0); 595 /* additional initialization goes here */ 596 } 597 buffer->io.loading = 0; 598 hammer_unlock(&buffer->io.lock); 599 return (error); 600 } 601 602 /* 603 * NOTE: Called from RB_SCAN, must return >= 0 for scan to continue. 604 */ 605 int 606 hammer_unload_buffer(hammer_buffer_t buffer, void *data __unused) 607 { 608 hammer_ref(&buffer->io.lock); 609 hammer_flush_buffer_nodes(buffer); 610 KKASSERT(buffer->io.lock.refs == 1); 611 hammer_rel_buffer(buffer, 2); 612 return(0); 613 } 614 615 /* 616 * Reference a buffer that is either already referenced or via a specially 617 * handled pointer (aka cursor->buffer). 618 */ 619 int 620 hammer_ref_buffer(hammer_buffer_t buffer) 621 { 622 int error; 623 624 hammer_ref(&buffer->io.lock); 625 if (buffer->ondisk == NULL || buffer->io.loading) { 626 error = hammer_load_buffer(buffer, 0); 627 if (error) { 628 hammer_rel_buffer(buffer, 1); 629 /* 630 * NOTE: buffer pointer can become stale after 631 * the above release. 632 */ 633 } 634 } else { 635 error = 0; 636 } 637 return(error); 638 } 639 640 /* 641 * Release a buffer. We have to deal with several places where 642 * another thread can ref the buffer. 643 * 644 * Only destroy the structure itself if the related buffer cache buffer 645 * was disassociated from it. This ties the management of the structure 646 * to the buffer cache subsystem. buffer->ondisk determines whether the 647 * embedded io is referenced or not. 648 */ 649 void 650 hammer_rel_buffer(hammer_buffer_t buffer, int flush) 651 { 652 hammer_volume_t volume; 653 654 if (buffer->io.lock.refs == 1) { 655 hammer_lock_ex(&buffer->io.lock); 656 if (buffer->io.lock.refs == 1) { 657 hammer_io_release(&buffer->io, flush); 658 659 if (buffer->io.bp == NULL && 660 buffer->io.lock.refs == 1) { 661 hammer_flush_buffer_nodes(buffer); 662 KKASSERT(TAILQ_EMPTY(&buffer->clist)); 663 volume = buffer->volume; 664 RB_REMOVE(hammer_buf_rb_tree, 665 &volume->rb_bufs_root, buffer); 666 buffer->volume = NULL; /* sanity */ 667 --hammer_count_buffers; 668 kfree(buffer, M_HAMMER); 669 hammer_rel_volume(volume, 0); 670 return; 671 } 672 } else if (flush) { 673 hammer_io_flush(&buffer->io); 674 } 675 hammer_unlock(&buffer->io.lock); 676 } 677 hammer_unref(&buffer->io.lock); 678 } 679 680 /* 681 * Access the filesystem buffer containing the specified hammer offset. 682 * buf_offset is a conglomeration of the volume number and vol_buf_beg 683 * relative buffer offset. It must also have bit 55 set to be valid. 684 * (see hammer_off_t in hammer_disk.h). 685 * 686 * Any prior buffer in *bufferp will be released and replaced by the 687 * requested buffer. 688 */ 689 void * 690 hammer_bread(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp, 691 struct hammer_buffer **bufferp) 692 { 693 hammer_buffer_t buffer; 694 int32_t xoff = (int32_t)buf_offset & HAMMER_BUFMASK; 695 696 buf_offset &= ~HAMMER_BUFMASK64; 697 698 buffer = *bufferp; 699 if (buffer == NULL || buffer->buf_offset != buf_offset) { 700 if (buffer) 701 hammer_rel_buffer(buffer, 0); 702 buffer = hammer_get_buffer(hmp, buf_offset, 0, errorp); 703 *bufferp = buffer; 704 } else { 705 *errorp = 0; 706 } 707 708 /* 709 * Return a pointer to the buffer data. 710 */ 711 if (buffer == NULL) 712 return(NULL); 713 else 714 return((char *)buffer->ondisk + xoff); 715 } 716 717 /* 718 * Access the filesystem buffer containing the specified hammer offset. 719 * No disk read operation occurs. The result buffer may contain garbage. 720 * 721 * Any prior buffer in *bufferp will be released and replaced by the 722 * requested buffer. 723 */ 724 void * 725 hammer_bnew(hammer_mount_t hmp, hammer_off_t buf_offset, int *errorp, 726 struct hammer_buffer **bufferp) 727 { 728 hammer_buffer_t buffer; 729 int32_t xoff = (int32_t)buf_offset & HAMMER_BUFMASK; 730 731 buf_offset &= ~HAMMER_BUFMASK64; 732 733 buffer = *bufferp; 734 if (buffer == NULL || buffer->buf_offset != buf_offset) { 735 if (buffer) 736 hammer_rel_buffer(buffer, 0); 737 buffer = hammer_get_buffer(hmp, buf_offset, 1, errorp); 738 *bufferp = buffer; 739 } else { 740 *errorp = 0; 741 } 742 743 /* 744 * Return a pointer to the buffer data. 745 */ 746 if (buffer == NULL) 747 return(NULL); 748 else 749 return((char *)buffer->ondisk + xoff); 750 } 751 752 /************************************************************************ 753 * NODES * 754 ************************************************************************ 755 * 756 * Manage B-Tree nodes. B-Tree nodes represent the primary indexing 757 * method used by the HAMMER filesystem. 758 * 759 * Unlike other HAMMER structures, a hammer_node can be PASSIVELY 760 * associated with its buffer, and will only referenced the buffer while 761 * the node itself is referenced. 762 * 763 * A hammer_node can also be passively associated with other HAMMER 764 * structures, such as inodes, while retaining 0 references. These 765 * associations can be cleared backwards using a pointer-to-pointer in 766 * the hammer_node. 767 * 768 * This allows the HAMMER implementation to cache hammer_nodes long-term 769 * and short-cut a great deal of the infrastructure's complexity. In 770 * most cases a cached node can be reacquired without having to dip into 771 * either the buffer or cluster management code. 772 * 773 * The caller must pass a referenced cluster on call and will retain 774 * ownership of the reference on return. The node will acquire its own 775 * additional references, if necessary. 776 */ 777 hammer_node_t 778 hammer_get_node(hammer_mount_t hmp, hammer_off_t node_offset, int *errorp) 779 { 780 hammer_volume_t volume; 781 hammer_node_t node; 782 int32_t vol_no; 783 784 KKASSERT((node_offset & HAMMER_OFF_ZONE_MASK) == 785 HAMMER_ZONE_RAW_BUFFER); 786 vol_no = HAMMER_VOL_DECODE(node_offset); 787 volume = hammer_get_volume(hmp, vol_no, errorp); 788 if (volume == NULL) 789 return(NULL); 790 791 /* 792 * Locate the structure, allocating one if necessary. 793 */ 794 again: 795 node = RB_LOOKUP(hammer_nod_rb_tree, &volume->rb_nods_root, 796 node_offset); 797 if (node == NULL) { 798 ++hammer_count_nodes; 799 node = kmalloc(sizeof(*node), M_HAMMER, M_WAITOK|M_ZERO); 800 node->node_offset = node_offset; 801 node->volume = volume; /* not directly referenced */ 802 if (RB_INSERT(hammer_nod_rb_tree, &volume->rb_nods_root, 803 node)) { 804 --hammer_count_nodes; 805 kfree(node, M_HAMMER); 806 goto again; 807 } 808 } 809 hammer_ref(&node->lock); 810 *errorp = hammer_load_node(node); 811 if (*errorp) { 812 hammer_rel_node(node); 813 node = NULL; 814 } 815 hammer_rel_volume(volume, 0); 816 return(node); 817 } 818 819 /* 820 * Reference an already-referenced node. 821 */ 822 int 823 hammer_ref_node(hammer_node_t node) 824 { 825 int error; 826 827 KKASSERT(node->lock.refs > 0); 828 hammer_ref(&node->lock); 829 if ((error = hammer_load_node(node)) != 0) 830 hammer_rel_node(node); 831 return(error); 832 } 833 834 /* 835 * Load a node's on-disk data reference. 836 */ 837 static int 838 hammer_load_node(hammer_node_t node) 839 { 840 hammer_buffer_t buffer; 841 int error; 842 843 if (node->ondisk) 844 return(0); 845 error = 0; 846 hammer_lock_ex(&node->lock); 847 if (node->ondisk == NULL) { 848 /* 849 * This is a little confusing but the jist is that 850 * node->buffer determines whether the node is on 851 * the buffer's clist and node->ondisk determines 852 * whether the buffer is referenced. 853 */ 854 if ((buffer = node->buffer) != NULL) { 855 error = hammer_ref_buffer(buffer); 856 } else { 857 buffer = hammer_get_buffer(node->volume->hmp, 858 node->node_offset, 0, 859 &error); 860 if (buffer) { 861 KKASSERT(error == 0); 862 TAILQ_INSERT_TAIL(&buffer->clist, 863 node, entry); 864 node->buffer = buffer; 865 } 866 } 867 if (error == 0) { 868 node->ondisk = (void *)((char *)buffer->ondisk + 869 (node->node_offset & HAMMER_BUFMASK)); 870 } 871 } 872 hammer_unlock(&node->lock); 873 return (error); 874 } 875 876 /* 877 * Safely reference a node, interlock against flushes via the IO subsystem. 878 */ 879 hammer_node_t 880 hammer_ref_node_safe(struct hammer_mount *hmp, struct hammer_node **cache, 881 int *errorp) 882 { 883 hammer_node_t node; 884 885 if ((node = *cache) != NULL) 886 hammer_ref(&node->lock); 887 if (node) { 888 *errorp = hammer_load_node(node); 889 if (*errorp) { 890 hammer_rel_node(node); 891 node = NULL; 892 } 893 } else { 894 *errorp = ENOENT; 895 } 896 return(node); 897 } 898 899 /* 900 * Release a hammer_node. On the last release the node dereferences 901 * its underlying buffer and may or may not be destroyed. 902 */ 903 void 904 hammer_rel_node(hammer_node_t node) 905 { 906 hammer_buffer_t buffer; 907 908 /* 909 * If this isn't the last ref just decrement the ref count and 910 * return. 911 */ 912 if (node->lock.refs > 1) { 913 hammer_unref(&node->lock); 914 return; 915 } 916 917 /* 918 * If there is no ondisk info or no buffer the node failed to load, 919 * remove the last reference and destroy the node. 920 */ 921 if (node->ondisk == NULL) { 922 hammer_unref(&node->lock); 923 hammer_flush_node(node); 924 /* node is stale now */ 925 return; 926 } 927 928 /* 929 * Do final cleanups and then either destroy the node and leave it 930 * passively cached. The buffer reference is removed regardless. 931 */ 932 buffer = node->buffer; 933 node->ondisk = NULL; 934 935 if ((node->flags & (HAMMER_NODE_DELETED|HAMMER_NODE_FLUSH)) == 0) { 936 hammer_unref(&node->lock); 937 hammer_rel_buffer(buffer, 0); 938 return; 939 } 940 941 /* 942 * Destroy the node if it has been marked for deletion. We mark 943 * it as being free. Note that the disk space is physically 944 * freed when the fifo cycles back through the node. 945 */ 946 if (node->flags & HAMMER_NODE_DELETED) 947 hammer_free_fifo(node->volume->hmp, node->node_offset); 948 949 /* 950 * Destroy the node. Record pertainant data because the node 951 * becomes stale the instant we flush it. 952 */ 953 hammer_unref(&node->lock); 954 hammer_flush_node(node); 955 /* node is stale */ 956 hammer_rel_buffer(buffer, 0); 957 } 958 959 /* 960 * Passively cache a referenced hammer_node in *cache. The caller may 961 * release the node on return. 962 */ 963 void 964 hammer_cache_node(hammer_node_t node, struct hammer_node **cache) 965 { 966 hammer_node_t old; 967 968 /* 969 * If the node is being deleted, don't cache it! 970 */ 971 if (node->flags & HAMMER_NODE_DELETED) 972 return; 973 974 /* 975 * Cache the node. If we previously cached a different node we 976 * have to give HAMMER a chance to destroy it. 977 */ 978 again: 979 if (node->cache1 != cache) { 980 if (node->cache2 != cache) { 981 if ((old = *cache) != NULL) { 982 KKASSERT(node->lock.refs != 0); 983 hammer_uncache_node(cache); 984 goto again; 985 } 986 if (node->cache2) 987 *node->cache2 = NULL; 988 node->cache2 = node->cache1; 989 node->cache1 = cache; 990 *cache = node; 991 } else { 992 struct hammer_node **tmp; 993 tmp = node->cache1; 994 node->cache1 = node->cache2; 995 node->cache2 = tmp; 996 } 997 } 998 } 999 1000 void 1001 hammer_uncache_node(struct hammer_node **cache) 1002 { 1003 hammer_node_t node; 1004 1005 if ((node = *cache) != NULL) { 1006 *cache = NULL; 1007 if (node->cache1 == cache) { 1008 node->cache1 = node->cache2; 1009 node->cache2 = NULL; 1010 } else if (node->cache2 == cache) { 1011 node->cache2 = NULL; 1012 } else { 1013 panic("hammer_uncache_node: missing cache linkage"); 1014 } 1015 if (node->cache1 == NULL && node->cache2 == NULL) 1016 hammer_flush_node(node); 1017 } 1018 } 1019 1020 /* 1021 * Remove a node's cache references and destroy the node if it has no 1022 * other references or backing store. 1023 */ 1024 void 1025 hammer_flush_node(hammer_node_t node) 1026 { 1027 hammer_buffer_t buffer; 1028 1029 if (node->cache1) 1030 *node->cache1 = NULL; 1031 if (node->cache2) 1032 *node->cache2 = NULL; 1033 if (node->lock.refs == 0 && node->ondisk == NULL) { 1034 RB_REMOVE(hammer_nod_rb_tree, &node->volume->rb_nods_root, 1035 node); 1036 if ((buffer = node->buffer) != NULL) { 1037 node->buffer = NULL; 1038 TAILQ_REMOVE(&buffer->clist, node, entry); 1039 /* buffer is unreferenced because ondisk is NULL */ 1040 } 1041 --hammer_count_nodes; 1042 kfree(node, M_HAMMER); 1043 } 1044 } 1045 1046 /* 1047 * Flush passively cached B-Tree nodes associated with this buffer. 1048 * This is only called when the buffer is about to be destroyed, so 1049 * none of the nodes should have any references. 1050 */ 1051 void 1052 hammer_flush_buffer_nodes(hammer_buffer_t buffer) 1053 { 1054 hammer_node_t node; 1055 1056 while ((node = TAILQ_FIRST(&buffer->clist)) != NULL) { 1057 KKASSERT(node->lock.refs == 0 && node->ondisk == NULL); 1058 hammer_ref(&node->lock); 1059 node->flags |= HAMMER_NODE_FLUSH; 1060 hammer_rel_node(node); 1061 } 1062 } 1063 1064 1065 /************************************************************************ 1066 * ALLOCATORS * 1067 ************************************************************************/ 1068 1069 /* 1070 * Allocate a B-Tree node. 1071 */ 1072 hammer_node_t 1073 hammer_alloc_btree(hammer_mount_t hmp, int *errorp) 1074 { 1075 hammer_buffer_t buffer = NULL; 1076 hammer_node_t node = NULL; 1077 hammer_off_t node_offset; 1078 1079 node_offset = hammer_alloc_fifo(hmp, sizeof(struct hammer_node_ondisk), 1080 0, &buffer, HAMMER_HEAD_TYPE_BTREE, 1081 0, NULL, 1082 errorp); 1083 if (*errorp == 0) 1084 node = hammer_get_node(hmp, node_offset, errorp); 1085 if (buffer) 1086 hammer_rel_buffer(buffer, 0); 1087 return(node); 1088 } 1089 1090 /* 1091 * The returned buffers are already appropriately marked as being modified. 1092 * If the caller marks them again unnecessary undo records may be generated. 1093 * 1094 * The core record (rec_len) cannot cross a buffer boundary. The record + data 1095 * is only allowed to cross a buffer boundary for HAMMER_RECTYPE_DATA 1096 */ 1097 void * 1098 hammer_alloc_record(hammer_mount_t hmp, 1099 hammer_off_t *rec_offp, u_int8_t rec_type, 1100 int32_t rec_len, struct hammer_buffer **rec_bufferp, 1101 hammer_off_t *data_offp, int32_t data_len, 1102 void **data1p, void **data2p, int32_t *data2_index, 1103 struct hammer_buffer **data2_bufferp, 1104 int *errorp) 1105 { 1106 int32_t aligned_rec_len, n; 1107 hammer_off_t rec_offset; 1108 hammer_record_ondisk_t rec; 1109 int can_cross; 1110 1111 aligned_rec_len = (rec_len + HAMMER_HEAD_ALIGN_MASK) & 1112 ~HAMMER_HEAD_ALIGN_MASK; 1113 can_cross = (rec_type == HAMMER_RECTYPE_DATA); 1114 1115 rec_offset = hammer_alloc_fifo(hmp, aligned_rec_len, data_len, 1116 rec_bufferp, HAMMER_HEAD_TYPE_RECORD, 1117 can_cross, data2_bufferp, errorp); 1118 if (*errorp) 1119 return(NULL); 1120 1121 /* 1122 * Basic return values. 1123 */ 1124 *rec_offp = rec_offset; 1125 if (data_offp) 1126 *data_offp = rec_offset + aligned_rec_len; 1127 rec = (void *)((char *)(*rec_bufferp)->ondisk + 1128 ((int32_t)rec_offset & HAMMER_BUFMASK)); 1129 if (data_len) 1130 rec->base.data_off = rec_offset + aligned_rec_len; 1131 rec->base.data_len = data_len; 1132 if (data1p) 1133 *data1p = (void *)((char *)rec + aligned_rec_len); 1134 if (data2_index) { 1135 n = ((int32_t)rec_offset & HAMMER_BUFMASK) + 1136 aligned_rec_len + data_len; 1137 if (n > HAMMER_BUFSIZE) { 1138 *data2_index = data_len - (n - HAMMER_BUFSIZE); 1139 KKASSERT(can_cross != 0); 1140 *data2p = (*data2_bufferp)->ondisk; 1141 } else { 1142 *data2_index = data_len; 1143 *data2p = NULL; 1144 } 1145 } else { 1146 KKASSERT(data2p == NULL); 1147 } 1148 return(rec); 1149 } 1150 1151 /* 1152 * Generate an undo fifo entry and return the buffer to the caller (XXX). 1153 * The caller must create a dependancy to ensure that the undo record is 1154 * flushed before the modified buffer is flushed. 1155 */ 1156 int 1157 hammer_generate_undo(hammer_mount_t hmp, hammer_off_t off, void *base, int len) 1158 { 1159 hammer_off_t rec_offset; 1160 hammer_fifo_undo_t undo; 1161 hammer_buffer_t buffer = NULL; 1162 int error; 1163 1164 rec_offset = hammer_alloc_fifo(hmp, sizeof(*undo), len, 1165 &buffer, HAMMER_HEAD_TYPE_UNDO, 1166 0, NULL, &error); 1167 if (error == 0) { 1168 undo = (void *)((char *)buffer->ondisk + 1169 ((int32_t)rec_offset & HAMMER_BUFMASK)); 1170 undo->undo_offset = off; 1171 bcopy(base, undo + 1, len); 1172 } 1173 if (buffer) 1174 hammer_rel_buffer(buffer, 0); 1175 return(error); 1176 } 1177 1178 /* 1179 * Allocate space from the FIFO. The first rec_len bytes will be zero'd. 1180 * The entire space is marked modified (the caller should not remark it as 1181 * that will cause unnecessary undo records to be added). 1182 */ 1183 static 1184 hammer_off_t 1185 hammer_alloc_fifo(hammer_mount_t hmp, int32_t rec_len, int32_t data_len, 1186 struct hammer_buffer **rec_bufferp, u_int16_t hdr_type, 1187 int can_cross, 1188 struct hammer_buffer **data2_bufferp, int *errorp) 1189 { 1190 hammer_volume_t root_volume; 1191 hammer_volume_t end_volume; 1192 hammer_volume_ondisk_t ondisk; 1193 hammer_fifo_head_t head; 1194 hammer_off_t end_off = 0; 1195 hammer_off_t tmp_off = 0; 1196 int32_t end_vol_no; 1197 int32_t tmp_vol_no; 1198 int32_t xoff; 1199 int32_t aligned_bytes; 1200 int must_pad; 1201 1202 aligned_bytes = (rec_len + data_len + HAMMER_HEAD_ALIGN_MASK) & 1203 ~HAMMER_HEAD_ALIGN_MASK; 1204 1205 root_volume = hammer_get_root_volume(hmp, errorp); 1206 while (root_volume) { 1207 hammer_modify_volume(root_volume, NULL, 0); 1208 ondisk = root_volume->ondisk; 1209 1210 end_off = ondisk->vol0_fifo_end; 1211 end_vol_no = HAMMER_VOL_DECODE(end_off); 1212 1213 end_volume = hammer_get_volume(hmp, end_vol_no, errorp); 1214 if (*errorp) 1215 goto done; 1216 1217 /* 1218 * Check to see if we ran out of space. Include some extra 1219 * room. 1220 * 1221 * vol0_fifo_end cannot be advanced into the same buffer 1222 * that vol0_fifo_beg resides in. This allows us to 1223 * instantiate a new buffer without reading it in. 1224 * 1225 * XXX messy. 1226 */ 1227 tmp_off = ondisk->vol0_fifo_beg & ~HAMMER_BUFMASK64; 1228 tmp_vol_no = HAMMER_VOL_DECODE(tmp_off); 1229 if ((tmp_off & HAMMER_OFF_SHORT_MASK) == 0) { 1230 if (end_vol_no + 1 == tmp_vol_no) { 1231 tmp_vol_no = end_vol_no; 1232 tmp_off = end_volume->maxbuf_off; 1233 } else if (end_vol_no + 1 == hmp->nvolumes && 1234 tmp_vol_no == 0) { 1235 tmp_vol_no = end_vol_no; 1236 tmp_off = end_volume->maxbuf_off; 1237 } 1238 } 1239 hammer_rel_volume(end_volume, 0); 1240 1241 /* 1242 * XXX dummy head at end of fifo 1243 */ 1244 if (end_vol_no == tmp_vol_no && 1245 end_off < tmp_off && 1246 end_off + aligned_bytes + sizeof(*head) >= tmp_off) { 1247 *errorp = ENOSPC; 1248 goto done; 1249 } 1250 1251 if ((int32_t)end_off & HAMMER_BUFMASK) 1252 head = hammer_bread(hmp, end_off, errorp, rec_bufferp); 1253 else 1254 head = hammer_bnew(hmp, end_off, errorp, rec_bufferp); 1255 if (*errorp) 1256 goto done; 1257 1258 /* 1259 * Load the buffer, retry if someone else squeeked in 1260 * while we were blocked. 1261 */ 1262 1263 if (ondisk->vol0_fifo_end != end_off) 1264 continue; 1265 1266 /* 1267 * Ok, we're gonna do something. Modify the buffer 1268 */ 1269 hammer_modify_buffer(*rec_bufferp, NULL, 0); 1270 if (ondisk->vol0_fifo_end != end_off) 1271 continue; 1272 xoff = (int32_t)end_off & HAMMER_BUFMASK; 1273 1274 /* 1275 * The non-data portion of the fifo record cannot cross 1276 * a buffer boundary. 1277 * 1278 * The entire record cannot cross a buffer boundary if 1279 * can_cross is 0. 1280 * 1281 * It is illegal for a record to cross a volume boundary. 1282 * 1283 * It is illegal for a record to cross a recovery boundary 1284 * (this is so recovery code is guaranteed a record rather 1285 * then data at certain points). 1286 * 1287 * Add a pad record and loop if it does. 1288 */ 1289 must_pad = 0; 1290 if (xoff + rec_len > HAMMER_BUFSIZE) 1291 must_pad = 1; 1292 if (can_cross == 0) { 1293 if (xoff + aligned_bytes > HAMMER_BUFSIZE) 1294 must_pad = 1; 1295 } else { 1296 if (xoff + aligned_bytes > HAMMER_BUFSIZE && 1297 (end_off + aligned_bytes) >= 1298 (*rec_bufferp)->volume->maxbuf_off) { 1299 must_pad = 1; 1300 } 1301 if ((end_off ^ (end_off + aligned_bytes)) & 1302 HAMMER_OFF_SHORT_REC_MASK) { 1303 must_pad = 1; 1304 } 1305 } 1306 if (must_pad) { 1307 must_pad = HAMMER_BUFSIZE - xoff; 1308 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 1309 head->hdr_type = HAMMER_HEAD_TYPE_PAD; 1310 head->hdr_fwd_link = must_pad; 1311 head->hdr_seq = 0; /* XXX seq */ 1312 KKASSERT((must_pad & 7) == 0); 1313 ondisk->vol0_fifo_end = 1314 hammer_advance_fifo((*rec_bufferp)->volume, 1315 end_off, must_pad); 1316 /* XXX rev_link */ 1317 continue; 1318 } 1319 1320 if (xoff + aligned_bytes > HAMMER_BUFSIZE) { 1321 KKASSERT(xoff + aligned_bytes <= HAMMER_BUFSIZE * 2); 1322 hammer_bnew(hmp, end_off + (HAMMER_BUFSIZE - xoff), 1323 errorp, data2_bufferp); 1324 hammer_modify_buffer(*data2_bufferp, NULL, 0); 1325 if (*errorp) 1326 goto done; 1327 } 1328 1329 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 1330 head->hdr_type = hdr_type; 1331 head->hdr_fwd_link = aligned_bytes / 64; 1332 head->hdr_rev_link = -1; /* XXX */ 1333 head->hdr_crc = 0; 1334 head->hdr_seq = 0; /* XXX */ 1335 ondisk->vol0_fifo_end = 1336 hammer_advance_fifo((*rec_bufferp)->volume, 1337 end_off, aligned_bytes); 1338 done: 1339 hammer_rel_volume(root_volume, 0); 1340 break; 1341 } 1342 if (*errorp) 1343 end_off = 0; 1344 return(end_off); 1345 } 1346 1347 /* 1348 * Mark a fifo record as having been freed. XXX needs undo. 1349 */ 1350 void 1351 hammer_free_fifo(hammer_mount_t hmp, hammer_off_t fifo_offset) 1352 { 1353 hammer_buffer_t buffer = NULL; 1354 hammer_fifo_head_t head; 1355 int error; 1356 1357 head = hammer_bread(hmp, fifo_offset, &error, &buffer); 1358 if (head) { 1359 hammer_modify_buffer(buffer, &head->hdr_type, 1360 sizeof(head->hdr_type)); 1361 head->hdr_type |= HAMMER_HEAD_TYPEF_FREED; 1362 } 1363 if (buffer) 1364 hammer_rel_buffer(buffer, 0); 1365 } 1366 1367 /* 1368 * Attempt to rewind the FIFO 1369 * 1370 * This routine is allowed to do nothing. 1371 */ 1372 void 1373 hammer_unwind_fifo(hammer_mount_t hmp, hammer_off_t rec_offset) 1374 { 1375 } 1376 1377 /* 1378 * Advance the FIFO a certain number of bytes. 1379 */ 1380 static 1381 hammer_off_t 1382 hammer_advance_fifo(hammer_volume_t volume, hammer_off_t off, int32_t bytes) 1383 { 1384 int32_t vol_no; 1385 1386 off += bytes; 1387 KKASSERT(off <= volume->maxbuf_off); 1388 KKASSERT((off & HAMMER_OFF_ZONE_MASK) == HAMMER_ZONE_RAW_BUFFER); 1389 if (off == volume->maxbuf_off) { 1390 vol_no = volume->vol_no + 1; 1391 if (vol_no == volume->hmp->nvolumes) 1392 vol_no = 0; 1393 off = HAMMER_ENCODE_RAW_BUFFER(vol_no, 0); 1394 } 1395 return(off); 1396 } 1397 1398 /* 1399 * Sync dirty buffers to the media 1400 */ 1401 1402 static int hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data); 1403 static int hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data); 1404 1405 int 1406 hammer_sync_hmp(hammer_mount_t hmp, int waitfor) 1407 { 1408 struct hammer_sync_info info; 1409 1410 info.error = 0; 1411 info.waitfor = waitfor; 1412 1413 vmntvnodescan(hmp->mp, VMSC_GETVP|VMSC_NOWAIT, 1414 hammer_sync_scan1, hammer_sync_scan2, &info); 1415 1416 RB_SCAN(hammer_vol_rb_tree, &hmp->rb_vols_root, NULL, 1417 hammer_sync_volume, &info); 1418 return(info.error); 1419 } 1420 1421 static int 1422 hammer_sync_scan1(struct mount *mp, struct vnode *vp, void *data) 1423 { 1424 struct hammer_inode *ip; 1425 1426 ip = VTOI(vp); 1427 if (vp->v_type == VNON || ip == NULL || 1428 ((ip->flags & HAMMER_INODE_MODMASK) == 0 && 1429 RB_EMPTY(&vp->v_rbdirty_tree))) { 1430 return(-1); 1431 } 1432 return(0); 1433 } 1434 1435 static int 1436 hammer_sync_scan2(struct mount *mp, struct vnode *vp, void *data) 1437 { 1438 struct hammer_sync_info *info = data; 1439 struct hammer_inode *ip; 1440 int error; 1441 1442 ip = VTOI(vp); 1443 if (vp->v_type == VNON || vp->v_type == VBAD || 1444 ((ip->flags & HAMMER_INODE_MODMASK) == 0 && 1445 RB_EMPTY(&vp->v_rbdirty_tree))) { 1446 return(0); 1447 } 1448 error = VOP_FSYNC(vp, info->waitfor); 1449 if (error) 1450 info->error = error; 1451 return(0); 1452 } 1453 1454 int 1455 hammer_sync_volume(hammer_volume_t volume, void *data) 1456 { 1457 struct hammer_sync_info *info = data; 1458 1459 hammer_ref(&volume->io.lock); 1460 RB_SCAN(hammer_buf_rb_tree, &volume->rb_bufs_root, NULL, 1461 hammer_sync_buffer, info); 1462 hammer_rel_volume(volume, 1); 1463 return(0); 1464 } 1465 1466 int 1467 hammer_sync_buffer(hammer_buffer_t buffer, void *data __unused) 1468 { 1469 hammer_ref(&buffer->io.lock); 1470 hammer_rel_buffer(buffer, 1); 1471 return(0); 1472 } 1473 1474 /* 1475 * Generic buffer initialization. Initialize the A-list into an all-allocated 1476 * state with the free block limit properly set. 1477 * 1478 * Note that alloc_new_buffer() will free the appropriate block range via 1479 * the appropriate cluster alist, so the free count is properly propogated. 1480 */ 1481 void 1482 hammer_init_fifo(hammer_fifo_head_t head, u_int16_t type) 1483 { 1484 head->hdr_signature = HAMMER_HEAD_SIGNATURE; 1485 head->hdr_type = type; 1486 head->hdr_rev_link = 0; 1487 head->hdr_fwd_link = 0; 1488 head->hdr_crc = 0; 1489 head->hdr_seq = 0; 1490 } 1491 1492