1 /* 2 * Copyright (c) 2007-2008 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.55 2008/09/15 17:02:49 dillon Exp $ 35 */ 36 /* 37 * IO Primitives and buffer cache management 38 * 39 * All major data-tracking structures in HAMMER contain a struct hammer_io 40 * which is used to manage their backing store. We use filesystem buffers 41 * for backing store and we leave them passively associated with their 42 * HAMMER structures. 43 * 44 * If the kernel tries to destroy a passively associated buf which we cannot 45 * yet let go we set B_LOCKED in the buffer and then actively released it 46 * later when we can. 47 */ 48 49 #include "hammer.h" 50 #include <sys/fcntl.h> 51 #include <sys/nlookup.h> 52 #include <sys/buf.h> 53 #include <sys/buf2.h> 54 55 static void hammer_io_modify(hammer_io_t io, int count); 56 static void hammer_io_deallocate(struct buf *bp); 57 #if 0 58 static void hammer_io_direct_read_complete(struct bio *nbio); 59 #endif 60 static void hammer_io_direct_write_complete(struct bio *nbio); 61 static int hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data); 62 static void hammer_io_set_modlist(struct hammer_io *io); 63 static void hammer_io_flush_mark(hammer_volume_t volume); 64 static void hammer_io_flush_sync_done(struct bio *bio); 65 66 67 /* 68 * Initialize a new, already-zero'd hammer_io structure, or reinitialize 69 * an existing hammer_io structure which may have switched to another type. 70 */ 71 void 72 hammer_io_init(hammer_io_t io, hammer_volume_t volume, enum hammer_io_type type) 73 { 74 io->volume = volume; 75 io->hmp = volume->io.hmp; 76 io->type = type; 77 } 78 79 /* 80 * Helper routine to disassociate a buffer cache buffer from an I/O 81 * structure. The buffer is unlocked and marked appropriate for reclamation. 82 * 83 * The io may have 0 or 1 references depending on who called us. The 84 * caller is responsible for dealing with the refs. 85 * 86 * This call can only be made when no action is required on the buffer. 87 * 88 * The caller must own the buffer and the IO must indicate that the 89 * structure no longer owns it (io.released != 0). 90 */ 91 static void 92 hammer_io_disassociate(hammer_io_structure_t iou) 93 { 94 struct buf *bp = iou->io.bp; 95 96 KKASSERT(iou->io.released); 97 KKASSERT(iou->io.modified == 0); 98 KKASSERT(LIST_FIRST(&bp->b_dep) == (void *)iou); 99 buf_dep_init(bp); 100 iou->io.bp = NULL; 101 102 /* 103 * If the buffer was locked someone wanted to get rid of it. 104 */ 105 if (bp->b_flags & B_LOCKED) { 106 --hammer_count_io_locked; 107 bp->b_flags &= ~B_LOCKED; 108 } 109 if (iou->io.reclaim) { 110 bp->b_flags |= B_NOCACHE|B_RELBUF; 111 iou->io.reclaim = 0; 112 } 113 114 switch(iou->io.type) { 115 case HAMMER_STRUCTURE_VOLUME: 116 iou->volume.ondisk = NULL; 117 break; 118 case HAMMER_STRUCTURE_DATA_BUFFER: 119 case HAMMER_STRUCTURE_META_BUFFER: 120 case HAMMER_STRUCTURE_UNDO_BUFFER: 121 iou->buffer.ondisk = NULL; 122 break; 123 } 124 } 125 126 /* 127 * Wait for any physical IO to complete 128 */ 129 void 130 hammer_io_wait(hammer_io_t io) 131 { 132 if (io->running) { 133 crit_enter(); 134 tsleep_interlock(io); 135 io->waiting = 1; 136 for (;;) { 137 tsleep(io, 0, "hmrflw", 0); 138 if (io->running == 0) 139 break; 140 tsleep_interlock(io); 141 io->waiting = 1; 142 if (io->running == 0) 143 break; 144 } 145 crit_exit(); 146 } 147 } 148 149 /* 150 * Wait for all hammer_io-initated write I/O's to complete. This is not 151 * supposed to count direct I/O's but some can leak through (for 152 * non-full-sized direct I/Os). 153 */ 154 void 155 hammer_io_wait_all(hammer_mount_t hmp, const char *ident) 156 { 157 hammer_io_flush_sync(hmp); 158 crit_enter(); 159 while (hmp->io_running_space) 160 tsleep(&hmp->io_running_space, 0, ident, 0); 161 crit_exit(); 162 } 163 164 #define HAMMER_MAXRA 4 165 166 /* 167 * Load bp for a HAMMER structure. The io must be exclusively locked by 168 * the caller. 169 * 170 * This routine is mostly used on meta-data and small-data blocks. Generally 171 * speaking HAMMER assumes some locality of reference and will cluster 172 * a 64K read. 173 * 174 * Note that clustering occurs at the device layer, not the logical layer. 175 * If the buffers do not apply to the current operation they may apply to 176 * some other. 177 */ 178 int 179 hammer_io_read(struct vnode *devvp, struct hammer_io *io, hammer_off_t limit) 180 { 181 struct buf *bp; 182 int error; 183 184 if ((bp = io->bp) == NULL) { 185 hammer_count_io_running_read += io->bytes; 186 if (hammer_cluster_enable) { 187 error = cluster_read(devvp, limit, 188 io->offset, io->bytes, 189 HAMMER_CLUSTER_SIZE, 190 HAMMER_CLUSTER_BUFS, &io->bp); 191 } else { 192 error = bread(devvp, io->offset, io->bytes, &io->bp); 193 } 194 hammer_stats_disk_read += io->bytes; 195 hammer_count_io_running_read -= io->bytes; 196 197 /* 198 * The code generally assumes b_ops/b_dep has been set-up, 199 * even if we error out here. 200 */ 201 bp = io->bp; 202 bp->b_ops = &hammer_bioops; 203 KKASSERT(LIST_FIRST(&bp->b_dep) == NULL); 204 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 205 BUF_KERNPROC(bp); 206 KKASSERT(io->modified == 0); 207 KKASSERT(io->running == 0); 208 KKASSERT(io->waiting == 0); 209 io->released = 0; /* we hold an active lock on bp */ 210 } else { 211 error = 0; 212 } 213 return(error); 214 } 215 216 /* 217 * Similar to hammer_io_read() but returns a zero'd out buffer instead. 218 * Must be called with the IO exclusively locked. 219 * 220 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background 221 * I/O by forcing the buffer to not be in a released state before calling 222 * it. 223 * 224 * This function will also mark the IO as modified but it will not 225 * increment the modify_refs count. 226 */ 227 int 228 hammer_io_new(struct vnode *devvp, struct hammer_io *io) 229 { 230 struct buf *bp; 231 232 if ((bp = io->bp) == NULL) { 233 io->bp = getblk(devvp, io->offset, io->bytes, 0, 0); 234 bp = io->bp; 235 bp->b_ops = &hammer_bioops; 236 KKASSERT(LIST_FIRST(&bp->b_dep) == NULL); 237 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 238 io->released = 0; 239 KKASSERT(io->running == 0); 240 io->waiting = 0; 241 BUF_KERNPROC(bp); 242 } else { 243 if (io->released) { 244 regetblk(bp); 245 BUF_KERNPROC(bp); 246 io->released = 0; 247 } 248 } 249 hammer_io_modify(io, 0); 250 vfs_bio_clrbuf(bp); 251 return(0); 252 } 253 254 /* 255 * Remove potential device level aliases against buffers managed by high level 256 * vnodes. Aliases can also be created due to mixed buffer sizes. 257 * 258 * This is nasty because the buffers are also VMIO-backed. Even if a buffer 259 * does not exist its backing VM pages might, and we have to invalidate 260 * those as well or a getblk() will reinstate them. 261 */ 262 void 263 hammer_io_inval(hammer_volume_t volume, hammer_off_t zone2_offset) 264 { 265 hammer_io_structure_t iou; 266 hammer_off_t phys_offset; 267 struct buf *bp; 268 269 phys_offset = volume->ondisk->vol_buf_beg + 270 (zone2_offset & HAMMER_OFF_SHORT_MASK); 271 crit_enter(); 272 if ((bp = findblk(volume->devvp, phys_offset)) != NULL) 273 bp = getblk(volume->devvp, phys_offset, bp->b_bufsize, 0, 0); 274 else 275 bp = getblk(volume->devvp, phys_offset, HAMMER_BUFSIZE, 0, 0); 276 if ((iou = (void *)LIST_FIRST(&bp->b_dep)) != NULL) { 277 hammer_ref(&iou->io.lock); 278 hammer_io_clear_modify(&iou->io, 1); 279 bundirty(bp); 280 iou->io.released = 0; 281 BUF_KERNPROC(bp); 282 iou->io.reclaim = 1; 283 iou->io.waitdep = 1; 284 KKASSERT(iou->io.lock.refs == 1); 285 hammer_rel_buffer(&iou->buffer, 0); 286 /*hammer_io_deallocate(bp);*/ 287 } else { 288 KKASSERT((bp->b_flags & B_LOCKED) == 0); 289 bundirty(bp); 290 bp->b_flags |= B_NOCACHE|B_RELBUF; 291 brelse(bp); 292 } 293 crit_exit(); 294 } 295 296 /* 297 * This routine is called on the last reference to a hammer structure. 298 * The io is usually interlocked with io.loading and io.refs must be 1. 299 * 300 * This routine may return a non-NULL bp to the caller for dispoal. Disposal 301 * simply means the caller finishes decrementing the ref-count on the 302 * IO structure then brelse()'s the bp. The bp may or may not still be 303 * passively associated with the IO. 304 * 305 * The only requirement here is that modified meta-data and volume-header 306 * buffer may NOT be disassociated from the IO structure, and consequently 307 * we also leave such buffers actively associated with the IO if they already 308 * are (since the kernel can't do anything with them anyway). Only the 309 * flusher is allowed to write such buffers out. Modified pure-data and 310 * undo buffers are returned to the kernel but left passively associated 311 * so we can track when the kernel writes the bp out. 312 */ 313 struct buf * 314 hammer_io_release(struct hammer_io *io, int flush) 315 { 316 union hammer_io_structure *iou = (void *)io; 317 struct buf *bp; 318 319 if ((bp = io->bp) == NULL) 320 return(NULL); 321 322 /* 323 * Try to flush a dirty IO to disk if asked to by the 324 * caller or if the kernel tried to flush the buffer in the past. 325 * 326 * Kernel-initiated flushes are only allowed for pure-data buffers. 327 * meta-data and volume buffers can only be flushed explicitly 328 * by HAMMER. 329 */ 330 if (io->modified) { 331 if (flush) { 332 hammer_io_flush(io); 333 } else if (bp->b_flags & B_LOCKED) { 334 switch(io->type) { 335 case HAMMER_STRUCTURE_DATA_BUFFER: 336 case HAMMER_STRUCTURE_UNDO_BUFFER: 337 hammer_io_flush(io); 338 break; 339 default: 340 break; 341 } 342 } /* else no explicit request to flush the buffer */ 343 } 344 345 /* 346 * Wait for the IO to complete if asked to. This occurs when 347 * the buffer must be disposed of definitively during an umount 348 * or buffer invalidation. 349 */ 350 if (io->waitdep && io->running) { 351 hammer_io_wait(io); 352 } 353 354 /* 355 * Return control of the buffer to the kernel (with the provisio 356 * that our bioops can override kernel decisions with regards to 357 * the buffer). 358 */ 359 if ((flush || io->reclaim) && io->modified == 0 && io->running == 0) { 360 /* 361 * Always disassociate the bp if an explicit flush 362 * was requested and the IO completed with no error 363 * (so unmount can really clean up the structure). 364 */ 365 if (io->released) { 366 regetblk(bp); 367 BUF_KERNPROC(bp); 368 } else { 369 io->released = 1; 370 } 371 hammer_io_disassociate((hammer_io_structure_t)io); 372 /* return the bp */ 373 } else if (io->modified) { 374 /* 375 * Only certain IO types can be released to the kernel if 376 * the buffer has been modified. 377 * 378 * volume and meta-data IO types may only be explicitly 379 * flushed by HAMMER. 380 */ 381 switch(io->type) { 382 case HAMMER_STRUCTURE_DATA_BUFFER: 383 case HAMMER_STRUCTURE_UNDO_BUFFER: 384 if (io->released == 0) { 385 io->released = 1; 386 bdwrite(bp); 387 } 388 break; 389 default: 390 break; 391 } 392 bp = NULL; /* bp left associated */ 393 } else if (io->released == 0) { 394 /* 395 * Clean buffers can be generally released to the kernel. 396 * We leave the bp passively associated with the HAMMER 397 * structure and use bioops to disconnect it later on 398 * if the kernel wants to discard the buffer. 399 * 400 * We can steal the structure's ownership of the bp. 401 */ 402 io->released = 1; 403 if (bp->b_flags & B_LOCKED) { 404 hammer_io_disassociate(iou); 405 /* return the bp */ 406 } else { 407 if (io->reclaim) { 408 hammer_io_disassociate(iou); 409 /* return the bp */ 410 } else { 411 /* return the bp (bp passively associated) */ 412 } 413 } 414 } else { 415 /* 416 * A released buffer is passively associate with our 417 * hammer_io structure. The kernel cannot destroy it 418 * without making a bioops call. If the kernel (B_LOCKED) 419 * or we (reclaim) requested that the buffer be destroyed 420 * we destroy it, otherwise we do a quick get/release to 421 * reset its position in the kernel's LRU list. 422 * 423 * Leaving the buffer passively associated allows us to 424 * use the kernel's LRU buffer flushing mechanisms rather 425 * then rolling our own. 426 * 427 * XXX there are two ways of doing this. We can re-acquire 428 * and passively release to reset the LRU, or not. 429 */ 430 if (io->running == 0) { 431 regetblk(bp); 432 if ((bp->b_flags & B_LOCKED) || io->reclaim) { 433 hammer_io_disassociate(iou); 434 /* return the bp */ 435 } else { 436 /* return the bp (bp passively associated) */ 437 } 438 } else { 439 /* 440 * bp is left passively associated but we do not 441 * try to reacquire it. Interactions with the io 442 * structure will occur on completion of the bp's 443 * I/O. 444 */ 445 bp = NULL; 446 } 447 } 448 return(bp); 449 } 450 451 /* 452 * This routine is called with a locked IO when a flush is desired and 453 * no other references to the structure exists other then ours. This 454 * routine is ONLY called when HAMMER believes it is safe to flush a 455 * potentially modified buffer out. 456 */ 457 void 458 hammer_io_flush(struct hammer_io *io) 459 { 460 struct buf *bp; 461 462 /* 463 * Degenerate case - nothing to flush if nothing is dirty. 464 */ 465 if (io->modified == 0) { 466 return; 467 } 468 469 KKASSERT(io->bp); 470 KKASSERT(io->modify_refs <= 0); 471 472 /* 473 * Acquire ownership of the bp, particularly before we clear our 474 * modified flag. 475 * 476 * We are going to bawrite() this bp. Don't leave a window where 477 * io->released is set, we actually own the bp rather then our 478 * buffer. 479 */ 480 bp = io->bp; 481 if (io->released) { 482 regetblk(bp); 483 /* BUF_KERNPROC(io->bp); */ 484 /* io->released = 0; */ 485 KKASSERT(io->released); 486 KKASSERT(io->bp == bp); 487 } 488 io->released = 1; 489 490 /* 491 * Acquire exclusive access to the bp and then clear the modified 492 * state of the buffer prior to issuing I/O to interlock any 493 * modifications made while the I/O is in progress. This shouldn't 494 * happen anyway but losing data would be worse. The modified bit 495 * will be rechecked after the IO completes. 496 * 497 * NOTE: This call also finalizes the buffer's content (inval == 0). 498 * 499 * This is only legal when lock.refs == 1 (otherwise we might clear 500 * the modified bit while there are still users of the cluster 501 * modifying the data). 502 * 503 * Do this before potentially blocking so any attempt to modify the 504 * ondisk while we are blocked blocks waiting for us. 505 */ 506 hammer_ref(&io->lock); 507 hammer_io_clear_modify(io, 0); 508 hammer_unref(&io->lock); 509 510 /* 511 * Transfer ownership to the kernel and initiate I/O. 512 */ 513 io->running = 1; 514 io->hmp->io_running_space += io->bytes; 515 hammer_count_io_running_write += io->bytes; 516 bawrite(bp); 517 hammer_io_flush_mark(io->volume); 518 } 519 520 /************************************************************************ 521 * BUFFER DIRTYING * 522 ************************************************************************ 523 * 524 * These routines deal with dependancies created when IO buffers get 525 * modified. The caller must call hammer_modify_*() on a referenced 526 * HAMMER structure prior to modifying its on-disk data. 527 * 528 * Any intent to modify an IO buffer acquires the related bp and imposes 529 * various write ordering dependancies. 530 */ 531 532 /* 533 * Mark a HAMMER structure as undergoing modification. Meta-data buffers 534 * are locked until the flusher can deal with them, pure data buffers 535 * can be written out. 536 */ 537 static 538 void 539 hammer_io_modify(hammer_io_t io, int count) 540 { 541 /* 542 * io->modify_refs must be >= 0 543 */ 544 while (io->modify_refs < 0) { 545 io->waitmod = 1; 546 tsleep(io, 0, "hmrmod", 0); 547 } 548 549 /* 550 * Shortcut if nothing to do. 551 */ 552 KKASSERT(io->lock.refs != 0 && io->bp != NULL); 553 io->modify_refs += count; 554 if (io->modified && io->released == 0) 555 return; 556 557 hammer_lock_ex(&io->lock); 558 if (io->modified == 0) { 559 hammer_io_set_modlist(io); 560 io->modified = 1; 561 } 562 if (io->released) { 563 regetblk(io->bp); 564 BUF_KERNPROC(io->bp); 565 io->released = 0; 566 KKASSERT(io->modified != 0); 567 } 568 hammer_unlock(&io->lock); 569 } 570 571 static __inline 572 void 573 hammer_io_modify_done(hammer_io_t io) 574 { 575 KKASSERT(io->modify_refs > 0); 576 --io->modify_refs; 577 if (io->modify_refs == 0 && io->waitmod) { 578 io->waitmod = 0; 579 wakeup(io); 580 } 581 } 582 583 void 584 hammer_io_write_interlock(hammer_io_t io) 585 { 586 while (io->modify_refs != 0) { 587 io->waitmod = 1; 588 tsleep(io, 0, "hmrmod", 0); 589 } 590 io->modify_refs = -1; 591 } 592 593 void 594 hammer_io_done_interlock(hammer_io_t io) 595 { 596 KKASSERT(io->modify_refs == -1); 597 io->modify_refs = 0; 598 if (io->waitmod) { 599 io->waitmod = 0; 600 wakeup(io); 601 } 602 } 603 604 /* 605 * Caller intends to modify a volume's ondisk structure. 606 * 607 * This is only allowed if we are the flusher or we have a ref on the 608 * sync_lock. 609 */ 610 void 611 hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume, 612 void *base, int len) 613 { 614 KKASSERT (trans == NULL || trans->sync_lock_refs > 0); 615 616 hammer_io_modify(&volume->io, 1); 617 if (len) { 618 intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk; 619 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 620 hammer_generate_undo(trans, &volume->io, 621 HAMMER_ENCODE_RAW_VOLUME(volume->vol_no, rel_offset), 622 base, len); 623 } 624 } 625 626 /* 627 * Caller intends to modify a buffer's ondisk structure. 628 * 629 * This is only allowed if we are the flusher or we have a ref on the 630 * sync_lock. 631 */ 632 void 633 hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer, 634 void *base, int len) 635 { 636 KKASSERT (trans == NULL || trans->sync_lock_refs > 0); 637 638 hammer_io_modify(&buffer->io, 1); 639 if (len) { 640 intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk; 641 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 642 hammer_generate_undo(trans, &buffer->io, 643 buffer->zone2_offset + rel_offset, 644 base, len); 645 } 646 } 647 648 void 649 hammer_modify_volume_done(hammer_volume_t volume) 650 { 651 hammer_io_modify_done(&volume->io); 652 } 653 654 void 655 hammer_modify_buffer_done(hammer_buffer_t buffer) 656 { 657 hammer_io_modify_done(&buffer->io); 658 } 659 660 /* 661 * Mark an entity as not being dirty any more and finalize any 662 * delayed adjustments to the buffer. 663 * 664 * Delayed adjustments are an important performance enhancement, allowing 665 * us to avoid recalculating B-Tree node CRCs over and over again when 666 * making bulk-modifications to the B-Tree. 667 * 668 * If inval is non-zero delayed adjustments are ignored. 669 * 670 * This routine may dereference related btree nodes and cause the 671 * buffer to be dereferenced. The caller must own a reference on io. 672 */ 673 void 674 hammer_io_clear_modify(struct hammer_io *io, int inval) 675 { 676 if (io->modified == 0) 677 return; 678 679 /* 680 * Take us off the mod-list and clear the modified bit. 681 */ 682 KKASSERT(io->mod_list != NULL); 683 if (io->mod_list == &io->hmp->volu_list || 684 io->mod_list == &io->hmp->meta_list) { 685 io->hmp->locked_dirty_space -= io->bytes; 686 hammer_count_dirtybufspace -= io->bytes; 687 } 688 TAILQ_REMOVE(io->mod_list, io, mod_entry); 689 io->mod_list = NULL; 690 io->modified = 0; 691 692 /* 693 * If this bit is not set there are no delayed adjustments. 694 */ 695 if (io->gencrc == 0) 696 return; 697 io->gencrc = 0; 698 699 /* 700 * Finalize requested CRCs. The NEEDSCRC flag also holds a reference 701 * on the node (& underlying buffer). Release the node after clearing 702 * the flag. 703 */ 704 if (io->type == HAMMER_STRUCTURE_META_BUFFER) { 705 hammer_buffer_t buffer = (void *)io; 706 hammer_node_t node; 707 708 restart: 709 TAILQ_FOREACH(node, &buffer->clist, entry) { 710 if ((node->flags & HAMMER_NODE_NEEDSCRC) == 0) 711 continue; 712 node->flags &= ~HAMMER_NODE_NEEDSCRC; 713 KKASSERT(node->ondisk); 714 if (inval == 0) 715 node->ondisk->crc = crc32(&node->ondisk->crc + 1, HAMMER_BTREE_CRCSIZE); 716 hammer_rel_node(node); 717 goto restart; 718 } 719 } 720 /* caller must still have ref on io */ 721 KKASSERT(io->lock.refs > 0); 722 } 723 724 /* 725 * Clear the IO's modify list. Even though the IO is no longer modified 726 * it may still be on the lose_list. This routine is called just before 727 * the governing hammer_buffer is destroyed. 728 */ 729 void 730 hammer_io_clear_modlist(struct hammer_io *io) 731 { 732 KKASSERT(io->modified == 0); 733 if (io->mod_list) { 734 crit_enter(); /* biodone race against list */ 735 KKASSERT(io->mod_list == &io->hmp->lose_list); 736 TAILQ_REMOVE(io->mod_list, io, mod_entry); 737 io->mod_list = NULL; 738 crit_exit(); 739 } 740 } 741 742 static void 743 hammer_io_set_modlist(struct hammer_io *io) 744 { 745 struct hammer_mount *hmp = io->hmp; 746 747 KKASSERT(io->mod_list == NULL); 748 749 switch(io->type) { 750 case HAMMER_STRUCTURE_VOLUME: 751 io->mod_list = &hmp->volu_list; 752 hmp->locked_dirty_space += io->bytes; 753 hammer_count_dirtybufspace += io->bytes; 754 break; 755 case HAMMER_STRUCTURE_META_BUFFER: 756 io->mod_list = &hmp->meta_list; 757 hmp->locked_dirty_space += io->bytes; 758 hammer_count_dirtybufspace += io->bytes; 759 break; 760 case HAMMER_STRUCTURE_UNDO_BUFFER: 761 io->mod_list = &hmp->undo_list; 762 break; 763 case HAMMER_STRUCTURE_DATA_BUFFER: 764 io->mod_list = &hmp->data_list; 765 break; 766 } 767 TAILQ_INSERT_TAIL(io->mod_list, io, mod_entry); 768 } 769 770 /************************************************************************ 771 * HAMMER_BIOOPS * 772 ************************************************************************ 773 * 774 */ 775 776 /* 777 * Pre-IO initiation kernel callback - cluster build only 778 */ 779 static void 780 hammer_io_start(struct buf *bp) 781 { 782 } 783 784 /* 785 * Post-IO completion kernel callback - MAY BE CALLED FROM INTERRUPT! 786 * 787 * NOTE: HAMMER may modify a buffer after initiating I/O. The modified bit 788 * may also be set if we were marking a cluster header open. Only remove 789 * our dependancy if the modified bit is clear. 790 */ 791 static void 792 hammer_io_complete(struct buf *bp) 793 { 794 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 795 796 KKASSERT(iou->io.released == 1); 797 798 /* 799 * Deal with people waiting for I/O to drain 800 */ 801 if (iou->io.running) { 802 /* 803 * Deal with critical write errors. Once a critical error 804 * has been flagged in hmp the UNDO FIFO will not be updated. 805 * That way crash recover will give us a consistent 806 * filesystem. 807 * 808 * Because of this we can throw away failed UNDO buffers. If 809 * we throw away META or DATA buffers we risk corrupting 810 * the now read-only version of the filesystem visible to 811 * the user. Clear B_ERROR so the buffer is not re-dirtied 812 * by the kernel and ref the io so it doesn't get thrown 813 * away. 814 */ 815 if (bp->b_flags & B_ERROR) { 816 hammer_critical_error(iou->io.hmp, NULL, bp->b_error, 817 "while flushing meta-data"); 818 switch(iou->io.type) { 819 case HAMMER_STRUCTURE_UNDO_BUFFER: 820 break; 821 default: 822 if (iou->io.ioerror == 0) { 823 iou->io.ioerror = 1; 824 if (iou->io.lock.refs == 0) 825 ++hammer_count_refedbufs; 826 hammer_ref(&iou->io.lock); 827 } 828 break; 829 } 830 bp->b_flags &= ~B_ERROR; 831 bundirty(bp); 832 #if 0 833 hammer_io_set_modlist(&iou->io); 834 iou->io.modified = 1; 835 #endif 836 } 837 hammer_stats_disk_write += iou->io.bytes; 838 hammer_count_io_running_write -= iou->io.bytes; 839 iou->io.hmp->io_running_space -= iou->io.bytes; 840 if (iou->io.hmp->io_running_space == 0) 841 wakeup(&iou->io.hmp->io_running_space); 842 KKASSERT(iou->io.hmp->io_running_space >= 0); 843 iou->io.running = 0; 844 } else { 845 hammer_stats_disk_read += iou->io.bytes; 846 } 847 848 if (iou->io.waiting) { 849 iou->io.waiting = 0; 850 wakeup(iou); 851 } 852 853 /* 854 * If B_LOCKED is set someone wanted to deallocate the bp at some 855 * point, do it now if refs has become zero. 856 */ 857 if ((bp->b_flags & B_LOCKED) && iou->io.lock.refs == 0) { 858 KKASSERT(iou->io.modified == 0); 859 --hammer_count_io_locked; 860 bp->b_flags &= ~B_LOCKED; 861 hammer_io_deallocate(bp); 862 /* structure may be dead now */ 863 } 864 } 865 866 /* 867 * Callback from kernel when it wishes to deallocate a passively 868 * associated structure. This mostly occurs with clean buffers 869 * but it may be possible for a holding structure to be marked dirty 870 * while its buffer is passively associated. The caller owns the bp. 871 * 872 * If we cannot disassociate we set B_LOCKED to prevent the buffer 873 * from getting reused. 874 * 875 * WARNING: Because this can be called directly by getnewbuf we cannot 876 * recurse into the tree. If a bp cannot be immediately disassociated 877 * our only recourse is to set B_LOCKED. 878 * 879 * WARNING: This may be called from an interrupt via hammer_io_complete() 880 */ 881 static void 882 hammer_io_deallocate(struct buf *bp) 883 { 884 hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep); 885 886 KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0); 887 if (iou->io.lock.refs > 0 || iou->io.modified) { 888 /* 889 * It is not legal to disassociate a modified buffer. This 890 * case really shouldn't ever occur. 891 */ 892 bp->b_flags |= B_LOCKED; 893 ++hammer_count_io_locked; 894 } else { 895 /* 896 * Disassociate the BP. If the io has no refs left we 897 * have to add it to the loose list. 898 */ 899 hammer_io_disassociate(iou); 900 if (iou->io.type != HAMMER_STRUCTURE_VOLUME) { 901 KKASSERT(iou->io.bp == NULL); 902 KKASSERT(iou->io.mod_list == NULL); 903 crit_enter(); /* biodone race against list */ 904 iou->io.mod_list = &iou->io.hmp->lose_list; 905 TAILQ_INSERT_TAIL(iou->io.mod_list, &iou->io, mod_entry); 906 crit_exit(); 907 } 908 } 909 } 910 911 static int 912 hammer_io_fsync(struct vnode *vp) 913 { 914 return(0); 915 } 916 917 /* 918 * NOTE: will not be called unless we tell the kernel about the 919 * bioops. Unused... we use the mount's VFS_SYNC instead. 920 */ 921 static int 922 hammer_io_sync(struct mount *mp) 923 { 924 return(0); 925 } 926 927 static void 928 hammer_io_movedeps(struct buf *bp1, struct buf *bp2) 929 { 930 } 931 932 /* 933 * I/O pre-check for reading and writing. HAMMER only uses this for 934 * B_CACHE buffers so checkread just shouldn't happen, but if it does 935 * allow it. 936 * 937 * Writing is a different case. We don't want the kernel to try to write 938 * out a buffer that HAMMER may be modifying passively or which has a 939 * dependancy. In addition, kernel-demanded writes can only proceed for 940 * certain types of buffers (i.e. UNDO and DATA types). Other dirty 941 * buffer types can only be explicitly written by the flusher. 942 * 943 * checkwrite will only be called for bdwrite()n buffers. If we return 944 * success the kernel is guaranteed to initiate the buffer write. 945 */ 946 static int 947 hammer_io_checkread(struct buf *bp) 948 { 949 return(0); 950 } 951 952 static int 953 hammer_io_checkwrite(struct buf *bp) 954 { 955 hammer_io_t io = (void *)LIST_FIRST(&bp->b_dep); 956 957 /* 958 * This shouldn't happen under normal operation. 959 */ 960 if (io->type == HAMMER_STRUCTURE_VOLUME || 961 io->type == HAMMER_STRUCTURE_META_BUFFER) { 962 if (!panicstr) 963 panic("hammer_io_checkwrite: illegal buffer"); 964 if ((bp->b_flags & B_LOCKED) == 0) { 965 bp->b_flags |= B_LOCKED; 966 ++hammer_count_io_locked; 967 } 968 return(1); 969 } 970 971 /* 972 * We can only clear the modified bit if the IO is not currently 973 * undergoing modification. Otherwise we may miss changes. 974 * 975 * Only data and undo buffers can reach here. These buffers do 976 * not have terminal crc functions but we temporarily reference 977 * the IO anyway, just in case. 978 */ 979 if (io->modify_refs == 0 && io->modified) { 980 hammer_ref(&io->lock); 981 hammer_io_clear_modify(io, 0); 982 hammer_unref(&io->lock); 983 } else if (io->modified) { 984 KKASSERT(io->type == HAMMER_STRUCTURE_DATA_BUFFER); 985 } 986 987 /* 988 * The kernel is going to start the IO, set io->running. 989 */ 990 KKASSERT(io->running == 0); 991 io->running = 1; 992 io->hmp->io_running_space += io->bytes; 993 hammer_count_io_running_write += io->bytes; 994 return(0); 995 } 996 997 /* 998 * Return non-zero if we wish to delay the kernel's attempt to flush 999 * this buffer to disk. 1000 */ 1001 static int 1002 hammer_io_countdeps(struct buf *bp, int n) 1003 { 1004 return(0); 1005 } 1006 1007 struct bio_ops hammer_bioops = { 1008 .io_start = hammer_io_start, 1009 .io_complete = hammer_io_complete, 1010 .io_deallocate = hammer_io_deallocate, 1011 .io_fsync = hammer_io_fsync, 1012 .io_sync = hammer_io_sync, 1013 .io_movedeps = hammer_io_movedeps, 1014 .io_countdeps = hammer_io_countdeps, 1015 .io_checkread = hammer_io_checkread, 1016 .io_checkwrite = hammer_io_checkwrite, 1017 }; 1018 1019 /************************************************************************ 1020 * DIRECT IO OPS * 1021 ************************************************************************ 1022 * 1023 * These functions operate directly on the buffer cache buffer associated 1024 * with a front-end vnode rather then a back-end device vnode. 1025 */ 1026 1027 /* 1028 * Read a buffer associated with a front-end vnode directly from the 1029 * disk media. The bio may be issued asynchronously. If leaf is non-NULL 1030 * we validate the CRC. 1031 * 1032 * We must check for the presence of a HAMMER buffer to handle the case 1033 * where the reblocker has rewritten the data (which it does via the HAMMER 1034 * buffer system, not via the high-level vnode buffer cache), but not yet 1035 * committed the buffer to the media. 1036 */ 1037 int 1038 hammer_io_direct_read(hammer_mount_t hmp, struct bio *bio, 1039 hammer_btree_leaf_elm_t leaf) 1040 { 1041 hammer_off_t buf_offset; 1042 hammer_off_t zone2_offset; 1043 hammer_volume_t volume; 1044 struct buf *bp; 1045 struct bio *nbio; 1046 int vol_no; 1047 int error; 1048 1049 buf_offset = bio->bio_offset; 1050 KKASSERT((buf_offset & HAMMER_OFF_ZONE_MASK) == 1051 HAMMER_ZONE_LARGE_DATA); 1052 1053 /* 1054 * The buffer cache may have an aliased buffer (the reblocker can 1055 * write them). If it does we have to sync any dirty data before 1056 * we can build our direct-read. This is a non-critical code path. 1057 */ 1058 bp = bio->bio_buf; 1059 hammer_sync_buffers(hmp, buf_offset, bp->b_bufsize); 1060 1061 /* 1062 * Resolve to a zone-2 offset. The conversion just requires 1063 * munging the top 4 bits but we want to abstract it anyway 1064 * so the blockmap code can verify the zone assignment. 1065 */ 1066 zone2_offset = hammer_blockmap_lookup(hmp, buf_offset, &error); 1067 if (error) 1068 goto done; 1069 KKASSERT((zone2_offset & HAMMER_OFF_ZONE_MASK) == 1070 HAMMER_ZONE_RAW_BUFFER); 1071 1072 /* 1073 * Resolve volume and raw-offset for 3rd level bio. The 1074 * offset will be specific to the volume. 1075 */ 1076 vol_no = HAMMER_VOL_DECODE(zone2_offset); 1077 volume = hammer_get_volume(hmp, vol_no, &error); 1078 if (error == 0 && zone2_offset >= volume->maxbuf_off) 1079 error = EIO; 1080 1081 if (error == 0) { 1082 /* 1083 * 3rd level bio 1084 */ 1085 nbio = push_bio(bio); 1086 nbio->bio_offset = volume->ondisk->vol_buf_beg + 1087 (zone2_offset & HAMMER_OFF_SHORT_MASK); 1088 #if 0 1089 /* 1090 * XXX disabled - our CRC check doesn't work if the OS 1091 * does bogus_page replacement on the direct-read. 1092 */ 1093 if (leaf && hammer_verify_data) { 1094 nbio->bio_done = hammer_io_direct_read_complete; 1095 nbio->bio_caller_info1.uvalue32 = leaf->data_crc; 1096 } 1097 #endif 1098 hammer_stats_disk_read += bp->b_bufsize; 1099 vn_strategy(volume->devvp, nbio); 1100 } 1101 hammer_rel_volume(volume, 0); 1102 done: 1103 if (error) { 1104 kprintf("hammer_direct_read: failed @ %016llx\n", 1105 zone2_offset); 1106 bp->b_error = error; 1107 bp->b_flags |= B_ERROR; 1108 biodone(bio); 1109 } 1110 return(error); 1111 } 1112 1113 #if 0 1114 /* 1115 * On completion of the BIO this callback must check the data CRC 1116 * and chain to the previous bio. 1117 */ 1118 static 1119 void 1120 hammer_io_direct_read_complete(struct bio *nbio) 1121 { 1122 struct bio *obio; 1123 struct buf *bp; 1124 u_int32_t rec_crc = nbio->bio_caller_info1.uvalue32; 1125 1126 bp = nbio->bio_buf; 1127 if (crc32(bp->b_data, bp->b_bufsize) != rec_crc) { 1128 kprintf("HAMMER: data_crc error @%016llx/%d\n", 1129 nbio->bio_offset, bp->b_bufsize); 1130 if (hammer_debug_debug) 1131 Debugger(""); 1132 bp->b_flags |= B_ERROR; 1133 bp->b_error = EIO; 1134 } 1135 obio = pop_bio(nbio); 1136 biodone(obio); 1137 } 1138 #endif 1139 1140 /* 1141 * Write a buffer associated with a front-end vnode directly to the 1142 * disk media. The bio may be issued asynchronously. 1143 * 1144 * The BIO is associated with the specified record and RECF_DIRECT_IO 1145 * is set. The recorded is added to its object. 1146 */ 1147 int 1148 hammer_io_direct_write(hammer_mount_t hmp, hammer_record_t record, 1149 struct bio *bio) 1150 { 1151 hammer_btree_leaf_elm_t leaf = &record->leaf; 1152 hammer_off_t buf_offset; 1153 hammer_off_t zone2_offset; 1154 hammer_volume_t volume; 1155 hammer_buffer_t buffer; 1156 struct buf *bp; 1157 struct bio *nbio; 1158 char *ptr; 1159 int vol_no; 1160 int error; 1161 1162 buf_offset = leaf->data_offset; 1163 1164 KKASSERT(buf_offset > HAMMER_ZONE_BTREE); 1165 KKASSERT(bio->bio_buf->b_cmd == BUF_CMD_WRITE); 1166 1167 if ((buf_offset & HAMMER_BUFMASK) == 0 && 1168 leaf->data_len >= HAMMER_BUFSIZE) { 1169 /* 1170 * We are using the vnode's bio to write directly to the 1171 * media, any hammer_buffer at the same zone-X offset will 1172 * now have stale data. 1173 */ 1174 zone2_offset = hammer_blockmap_lookup(hmp, buf_offset, &error); 1175 vol_no = HAMMER_VOL_DECODE(zone2_offset); 1176 volume = hammer_get_volume(hmp, vol_no, &error); 1177 1178 if (error == 0 && zone2_offset >= volume->maxbuf_off) 1179 error = EIO; 1180 if (error == 0) { 1181 bp = bio->bio_buf; 1182 KKASSERT((bp->b_bufsize & HAMMER_BUFMASK) == 0); 1183 /* 1184 hammer_del_buffers(hmp, buf_offset, 1185 zone2_offset, bp->b_bufsize); 1186 */ 1187 1188 /* 1189 * Second level bio - cached zone2 offset. 1190 * 1191 * (We can put our bio_done function in either the 1192 * 2nd or 3rd level). 1193 */ 1194 nbio = push_bio(bio); 1195 nbio->bio_offset = zone2_offset; 1196 nbio->bio_done = hammer_io_direct_write_complete; 1197 nbio->bio_caller_info1.ptr = record; 1198 record->zone2_offset = zone2_offset; 1199 record->flags |= HAMMER_RECF_DIRECT_IO | 1200 HAMMER_RECF_DIRECT_INVAL; 1201 1202 /* 1203 * Third level bio - raw offset specific to the 1204 * correct volume. 1205 */ 1206 zone2_offset &= HAMMER_OFF_SHORT_MASK; 1207 nbio = push_bio(nbio); 1208 nbio->bio_offset = volume->ondisk->vol_buf_beg + 1209 zone2_offset; 1210 hammer_stats_disk_write += bp->b_bufsize; 1211 vn_strategy(volume->devvp, nbio); 1212 hammer_io_flush_mark(volume); 1213 } 1214 hammer_rel_volume(volume, 0); 1215 } else { 1216 /* 1217 * Must fit in a standard HAMMER buffer. In this case all 1218 * consumers use the HAMMER buffer system and RECF_DIRECT_IO 1219 * does not need to be set-up. 1220 */ 1221 KKASSERT(((buf_offset ^ (buf_offset + leaf->data_len - 1)) & ~HAMMER_BUFMASK64) == 0); 1222 buffer = NULL; 1223 ptr = hammer_bread(hmp, buf_offset, &error, &buffer); 1224 if (error == 0) { 1225 bp = bio->bio_buf; 1226 bp->b_flags |= B_AGE; 1227 hammer_io_modify(&buffer->io, 1); 1228 bcopy(bp->b_data, ptr, leaf->data_len); 1229 hammer_io_modify_done(&buffer->io); 1230 hammer_rel_buffer(buffer, 0); 1231 bp->b_resid = 0; 1232 biodone(bio); 1233 } 1234 } 1235 if (error == 0) { 1236 /* 1237 * The record is all setup now, add it. Potential conflics 1238 * have already been dealt with. 1239 */ 1240 error = hammer_mem_add(record); 1241 KKASSERT(error == 0); 1242 } else { 1243 /* 1244 * Major suckage occured. 1245 */ 1246 kprintf("hammer_direct_write: failed @ %016llx\n", 1247 leaf->data_offset); 1248 bp = bio->bio_buf; 1249 bp->b_resid = 0; 1250 bp->b_error = EIO; 1251 bp->b_flags |= B_ERROR; 1252 biodone(bio); 1253 record->flags |= HAMMER_RECF_DELETED_FE; 1254 hammer_rel_mem_record(record); 1255 } 1256 return(error); 1257 } 1258 1259 /* 1260 * On completion of the BIO this callback must disconnect 1261 * it from the hammer_record and chain to the previous bio. 1262 * 1263 * An I/O error forces the mount to read-only. Data buffers 1264 * are not B_LOCKED like meta-data buffers are, so we have to 1265 * throw the buffer away to prevent the kernel from retrying. 1266 */ 1267 static 1268 void 1269 hammer_io_direct_write_complete(struct bio *nbio) 1270 { 1271 struct bio *obio; 1272 struct buf *bp; 1273 hammer_record_t record = nbio->bio_caller_info1.ptr; 1274 1275 bp = nbio->bio_buf; 1276 obio = pop_bio(nbio); 1277 if (bp->b_flags & B_ERROR) { 1278 hammer_critical_error(record->ip->hmp, record->ip, 1279 bp->b_error, 1280 "while writing bulk data"); 1281 bp->b_flags |= B_INVAL; 1282 } 1283 biodone(obio); 1284 1285 KKASSERT(record != NULL); 1286 KKASSERT(record->flags & HAMMER_RECF_DIRECT_IO); 1287 record->flags &= ~HAMMER_RECF_DIRECT_IO; 1288 if (record->flags & HAMMER_RECF_DIRECT_WAIT) { 1289 record->flags &= ~HAMMER_RECF_DIRECT_WAIT; 1290 wakeup(&record->flags); 1291 } 1292 } 1293 1294 1295 /* 1296 * This is called before a record is either committed to the B-Tree 1297 * or destroyed, to resolve any associated direct-IO. 1298 * 1299 * (1) We must wait for any direct-IO related to the record to complete. 1300 * 1301 * (2) We must remove any buffer cache aliases for data accessed via 1302 * leaf->data_offset or zone2_offset so non-direct-IO consumers 1303 * (the mirroring and reblocking code) do not see stale data. 1304 */ 1305 void 1306 hammer_io_direct_wait(hammer_record_t record) 1307 { 1308 /* 1309 * Wait for I/O to complete 1310 */ 1311 if (record->flags & HAMMER_RECF_DIRECT_IO) { 1312 crit_enter(); 1313 while (record->flags & HAMMER_RECF_DIRECT_IO) { 1314 record->flags |= HAMMER_RECF_DIRECT_WAIT; 1315 tsleep(&record->flags, 0, "hmdiow", 0); 1316 } 1317 crit_exit(); 1318 } 1319 1320 /* 1321 * Invalidate any related buffer cache aliases. 1322 */ 1323 if (record->flags & HAMMER_RECF_DIRECT_INVAL) { 1324 KKASSERT(record->leaf.data_offset); 1325 hammer_del_buffers(record->ip->hmp, 1326 record->leaf.data_offset, 1327 record->zone2_offset, 1328 record->leaf.data_len); 1329 record->flags &= ~HAMMER_RECF_DIRECT_INVAL; 1330 } 1331 } 1332 1333 /* 1334 * This is called to remove the second-level cached zone-2 offset from 1335 * frontend buffer cache buffers, now stale due to a data relocation. 1336 * These offsets are generated by cluster_read() via VOP_BMAP, or directly 1337 * by hammer_vop_strategy_read(). 1338 * 1339 * This is rather nasty because here we have something like the reblocker 1340 * scanning the raw B-Tree with no held references on anything, really, 1341 * other then a shared lock on the B-Tree node, and we have to access the 1342 * frontend's buffer cache to check for and clean out the association. 1343 * Specifically, if the reblocker is moving data on the disk, these cached 1344 * offsets will become invalid. 1345 * 1346 * Only data record types associated with the large-data zone are subject 1347 * to direct-io and need to be checked. 1348 * 1349 */ 1350 void 1351 hammer_io_direct_uncache(hammer_mount_t hmp, hammer_btree_leaf_elm_t leaf) 1352 { 1353 struct hammer_inode_info iinfo; 1354 int zone; 1355 1356 if (leaf->base.rec_type != HAMMER_RECTYPE_DATA) 1357 return; 1358 zone = HAMMER_ZONE_DECODE(leaf->data_offset); 1359 if (zone != HAMMER_ZONE_LARGE_DATA_INDEX) 1360 return; 1361 iinfo.obj_id = leaf->base.obj_id; 1362 iinfo.obj_asof = 0; /* unused */ 1363 iinfo.obj_localization = leaf->base.localization & 1364 HAMMER_LOCALIZE_PSEUDOFS_MASK; 1365 iinfo.u.leaf = leaf; 1366 hammer_scan_inode_snapshots(hmp, &iinfo, 1367 hammer_io_direct_uncache_callback, 1368 leaf); 1369 } 1370 1371 static int 1372 hammer_io_direct_uncache_callback(hammer_inode_t ip, void *data) 1373 { 1374 hammer_inode_info_t iinfo = data; 1375 hammer_off_t data_offset; 1376 hammer_off_t file_offset; 1377 struct vnode *vp; 1378 struct buf *bp; 1379 int blksize; 1380 1381 if (ip->vp == NULL) 1382 return(0); 1383 data_offset = iinfo->u.leaf->data_offset; 1384 file_offset = iinfo->u.leaf->base.key - iinfo->u.leaf->data_len; 1385 blksize = iinfo->u.leaf->data_len; 1386 KKASSERT((blksize & HAMMER_BUFMASK) == 0); 1387 1388 hammer_ref(&ip->lock); 1389 if (hammer_get_vnode(ip, &vp) == 0) { 1390 if ((bp = findblk(ip->vp, file_offset)) != NULL && 1391 bp->b_bio2.bio_offset != NOOFFSET) { 1392 bp = getblk(ip->vp, file_offset, blksize, 0, 0); 1393 bp->b_bio2.bio_offset = NOOFFSET; 1394 brelse(bp); 1395 } 1396 vput(vp); 1397 } 1398 hammer_rel_inode(ip, 0); 1399 return(0); 1400 } 1401 1402 1403 /* 1404 * This function is called when writes may have occured on the volume, 1405 * indicating that the device may be holding cached writes. 1406 */ 1407 static void 1408 hammer_io_flush_mark(hammer_volume_t volume) 1409 { 1410 volume->vol_flags |= HAMMER_VOLF_NEEDFLUSH; 1411 } 1412 1413 /* 1414 * This function ensures that the device has flushed any cached writes out. 1415 */ 1416 void 1417 hammer_io_flush_sync(hammer_mount_t hmp) 1418 { 1419 hammer_volume_t volume; 1420 struct buf *bp_base = NULL; 1421 struct buf *bp; 1422 1423 RB_FOREACH(volume, hammer_vol_rb_tree, &hmp->rb_vols_root) { 1424 if (volume->vol_flags & HAMMER_VOLF_NEEDFLUSH) { 1425 volume->vol_flags &= ~HAMMER_VOLF_NEEDFLUSH; 1426 bp = getpbuf(NULL); 1427 bp->b_bio1.bio_offset = 0; 1428 bp->b_bufsize = 0; 1429 bp->b_bcount = 0; 1430 bp->b_cmd = BUF_CMD_FLUSH; 1431 bp->b_bio1.bio_caller_info1.cluster_head = bp_base; 1432 bp->b_bio1.bio_done = hammer_io_flush_sync_done; 1433 bp->b_flags |= B_ASYNC; 1434 bp_base = bp; 1435 vn_strategy(volume->devvp, &bp->b_bio1); 1436 } 1437 } 1438 while ((bp = bp_base) != NULL) { 1439 bp_base = bp->b_bio1.bio_caller_info1.cluster_head; 1440 while (bp->b_cmd != BUF_CMD_DONE) { 1441 crit_enter(); 1442 tsleep_interlock(&bp->b_cmd); 1443 if (bp->b_cmd != BUF_CMD_DONE) 1444 tsleep(&bp->b_cmd, 0, "hmrFLS", 0); 1445 crit_exit(); 1446 } 1447 bp->b_flags &= ~B_ASYNC; 1448 relpbuf(bp, NULL); 1449 } 1450 } 1451 1452 /* 1453 * Callback to deal with completed flush commands to the device. 1454 */ 1455 static void 1456 hammer_io_flush_sync_done(struct bio *bio) 1457 { 1458 struct buf *bp; 1459 1460 bp = bio->bio_buf; 1461 bp->b_cmd = BUF_CMD_DONE; 1462 wakeup(&bp->b_cmd); 1463 } 1464 1465