1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.23 2008/03/24 23:50:23 dillon Exp $ 35 */ 36 /* 37 * IO Primitives and buffer cache management 38 * 39 * All major data-tracking structures in HAMMER contain a struct hammer_io 40 * which is used to manage their backing store. We use filesystem buffers 41 * for backing store and we leave them passively associated with their 42 * HAMMER structures. 43 * 44 * If the kernel tries to release a passively associated buf which we cannot 45 * yet let go we set B_LOCKED in the buffer and then actively released it 46 * later when we can. 47 */ 48 49 #include "hammer.h" 50 #include <sys/fcntl.h> 51 #include <sys/nlookup.h> 52 #include <sys/buf.h> 53 #include <sys/buf2.h> 54 55 static void hammer_io_deallocate(struct buf *bp); 56 static int hammer_io_checkwrite(struct buf *bp); 57 58 /* 59 * Initialize an already-zero'd hammer_io structure 60 */ 61 void 62 hammer_io_init(hammer_io_t io, enum hammer_io_type type) 63 { 64 io->type = type; 65 } 66 67 /* 68 * Helper routine to disassociate a buffer cache buffer from an I/O 69 * structure. Called with the io structure exclusively locked. 70 * 71 * The io may have 0 or 1 references depending on who called us. The 72 * caller is responsible for dealing with the refs. 73 * 74 * This call can only be made when no action is required on the buffer. 75 * HAMMER must own the buffer (released == 0) since we mess around with it. 76 */ 77 static void 78 hammer_io_disassociate(hammer_io_structure_t iou, int elseit) 79 { 80 struct buf *bp = iou->io.bp; 81 82 KKASSERT(iou->io.modified == 0); 83 buf_dep_init(bp); 84 iou->io.bp = NULL; 85 bp->b_flags &= ~B_LOCKED; 86 if (elseit) { 87 KKASSERT(iou->io.released == 0); 88 iou->io.released = 1; 89 bqrelse(bp); 90 } else { 91 KKASSERT(iou->io.released); 92 } 93 94 switch(iou->io.type) { 95 case HAMMER_STRUCTURE_VOLUME: 96 iou->volume.ondisk = NULL; 97 break; 98 case HAMMER_STRUCTURE_BUFFER: 99 iou->buffer.ondisk = NULL; 100 break; 101 } 102 } 103 104 /* 105 * Wait for any physical IO to complete 106 */ 107 static void 108 hammer_io_wait(hammer_io_t io) 109 { 110 if (io->running) { 111 crit_enter(); 112 tsleep_interlock(io); 113 io->waiting = 1; 114 for (;;) { 115 tsleep(io, 0, "hmrflw", 0); 116 if (io->running == 0) 117 break; 118 tsleep_interlock(io); 119 io->waiting = 1; 120 if (io->running == 0) 121 break; 122 } 123 crit_exit(); 124 } 125 } 126 127 /* 128 * Load bp for a HAMMER structure. The io is exclusively locked by the 129 * caller. 130 */ 131 int 132 hammer_io_read(struct vnode *devvp, struct hammer_io *io) 133 { 134 struct buf *bp; 135 int error; 136 137 if ((bp = io->bp) == NULL) { 138 error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp); 139 if (error == 0) { 140 bp = io->bp; 141 bp->b_ops = &hammer_bioops; 142 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 143 BUF_KERNPROC(bp); 144 } 145 io->modified = 0; /* no new modifications yet */ 146 io->released = 0; /* we hold an active lock on bp */ 147 io->running = 0; 148 io->waiting = 0; 149 } else { 150 error = 0; 151 } 152 return(error); 153 } 154 155 /* 156 * Similar to hammer_io_read() but returns a zero'd out buffer instead. 157 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background 158 * I/O so we can call it. 159 * 160 * The caller is responsible for calling hammer_modify_*() on the appropriate 161 * HAMMER structure. 162 */ 163 int 164 hammer_io_new(struct vnode *devvp, struct hammer_io *io) 165 { 166 struct buf *bp; 167 168 if ((bp = io->bp) == NULL) { 169 io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0); 170 bp = io->bp; 171 bp->b_ops = &hammer_bioops; 172 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 173 io->modified = 0; 174 io->released = 0; 175 io->running = 0; 176 io->waiting = 0; 177 BUF_KERNPROC(bp); 178 } else { 179 if (io->released) { 180 regetblk(bp); 181 BUF_KERNPROC(bp); 182 io->released = 0; 183 } 184 } 185 vfs_bio_clrbuf(bp); 186 return(0); 187 } 188 189 /* 190 * This routine is called on the last reference to a hammer structure. 191 * The io is usually locked exclusively (but may not be during unmount). 192 * 193 * If flush is 1, or B_LOCKED was set indicating that the kernel 194 * wanted to recycle the buffer, and there are no dependancies, this 195 * function will issue an asynchronous write. 196 * 197 * If flush is 2 this function waits until all I/O has completed and 198 * disassociates the bp from the IO before returning, unless there 199 * are still other references. 200 */ 201 void 202 hammer_io_release(struct hammer_io *io) 203 { 204 struct buf *bp; 205 206 if ((bp = io->bp) == NULL) 207 return; 208 209 #if 0 210 /* 211 * If flush is 2 wait for dependancies 212 */ 213 while (io->waitdep && TAILQ_FIRST(&io->deplist)) { 214 hammer_io_wait(TAILQ_FIRST(&io->deplist)); 215 } 216 #endif 217 218 /* 219 * Try to flush a dirty IO to disk if asked to by the caller 220 * or if the kernel tried to flush the buffer in the past. 221 * 222 * The flush will fail if any dependancies are present. 223 */ 224 if (io->modified && (io->flush || (bp->b_flags & B_LOCKED))) 225 hammer_io_flush(io); 226 227 /* 228 * If flush is 2 we wait for the IO to complete. 229 */ 230 if (io->waitdep && io->running) { 231 hammer_io_wait(io); 232 } 233 234 /* 235 * Actively or passively release the buffer. Modified IOs with 236 * dependancies cannot be released. 237 */ 238 if (io->flush && io->modified == 0 && io->running == 0) { 239 if (io->released) { 240 regetblk(bp); 241 BUF_KERNPROC(bp); 242 io->released = 0; 243 } 244 hammer_io_disassociate((hammer_io_structure_t)io, 1); 245 } else if (io->modified) { 246 if (io->released == 0) { 247 io->released = 1; 248 bdwrite(bp); 249 } 250 } else if (io->released == 0) { 251 io->released = 1; 252 bqrelse(bp); 253 } 254 } 255 256 /* 257 * This routine is called with a locked IO when a flush is desired and 258 * no other references to the structure exists other then ours. This 259 * routine is ONLY called when HAMMER believes it is safe to flush a 260 * potentially modified buffer out. 261 */ 262 void 263 hammer_io_flush(struct hammer_io *io) 264 { 265 struct buf *bp; 266 267 /* 268 * Can't flush if the IO isn't modified or if it has dependancies. 269 */ 270 if (io->modified == 0) { 271 io->flush = 0; 272 return; 273 } 274 275 KKASSERT(io->bp); 276 277 /* 278 * XXX - umount syncs buffers without referencing them, check for 0 279 * also. 280 */ 281 KKASSERT(io->lock.refs == 0 || io->lock.refs == 1); 282 283 /* 284 * Reset modified to 0 here and re-check it after the IO completes. 285 * This is only legal when lock.refs == 1 (otherwise we might clear 286 * the modified bit while there are still users of the cluster 287 * modifying the data). 288 * 289 * NOTE: We have no dependancies so we don't have to worry about 290 * cluster-open's here. 291 * 292 * Do this before potentially blocking so any attempt to modify the 293 * ondisk while we are blocked blocks waiting for us. 294 */ 295 io->modified = 0; /* force interlock */ 296 io->flush = 0; 297 bp = io->bp; 298 299 if (io->released) { 300 regetblk(bp); 301 /* BUF_KERNPROC(io->bp); */ 302 io->released = 0; 303 } 304 io->released = 1; 305 io->running = 1; 306 bawrite(bp); 307 } 308 309 /************************************************************************ 310 * BUFFER DIRTYING * 311 ************************************************************************ 312 * 313 * These routines deal with dependancies created when IO buffers get 314 * modified. The caller must call hammer_modify_*() on a referenced 315 * HAMMER structure prior to modifying its on-disk data. 316 * 317 * Any intent to modify an IO buffer acquires the related bp and imposes 318 * various write ordering dependancies. 319 */ 320 321 /* 322 * Mark a HAMMER structure as undergoing modification. Return 1 when applying 323 * a non-NULL ordering dependancy for the first time, 0 otherwise. 324 */ 325 static __inline 326 void 327 hammer_io_modify(hammer_io_t io) 328 { 329 /* 330 * Shortcut if nothing to do. 331 */ 332 KKASSERT(io->lock.refs != 0 && io->bp != NULL); 333 if (io->modified && io->released == 0) 334 return; 335 336 hammer_lock_ex(&io->lock); 337 io->modified = 1; 338 if (io->released) { 339 regetblk(io->bp); 340 BUF_KERNPROC(io->bp); 341 io->released = 0; 342 KKASSERT(io->modified != 0); 343 } 344 hammer_unlock(&io->lock); 345 } 346 347 void 348 hammer_modify_volume(hammer_transaction_t trans, hammer_volume_t volume, 349 void *base, int len) 350 { 351 hammer_io_modify(&volume->io); 352 353 if (len) { 354 intptr_t rel_offset = (intptr_t)base - (intptr_t)volume->ondisk; 355 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 356 hammer_generate_undo(trans, 357 HAMMER_ENCODE_RAW_VOLUME(volume->vol_no, rel_offset), 358 base, len); 359 } 360 } 361 362 /* 363 * Caller intends to modify a buffer's ondisk structure. The related 364 * cluster must be marked open prior to being able to flush the modified 365 * buffer so get that I/O going now. 366 */ 367 void 368 hammer_modify_buffer(hammer_transaction_t trans, hammer_buffer_t buffer, 369 void *base, int len) 370 { 371 hammer_io_modify(&buffer->io); 372 if (len) { 373 intptr_t rel_offset = (intptr_t)base - (intptr_t)buffer->ondisk; 374 KKASSERT((rel_offset & ~(intptr_t)HAMMER_BUFMASK) == 0); 375 hammer_generate_undo(trans, 376 buffer->zone2_offset + rel_offset, 377 base, len); 378 } 379 } 380 381 /* 382 * Mark an entity as not being dirty any more -- this usually occurs when 383 * the governing a-list has freed the entire entity. 384 * 385 * XXX 386 */ 387 void 388 hammer_io_clear_modify(struct hammer_io *io) 389 { 390 #if 0 391 struct buf *bp; 392 393 io->modified = 0; 394 if ((bp = io->bp) != NULL) { 395 if (io->released) { 396 regetblk(bp); 397 /* BUF_KERNPROC(io->bp); */ 398 } else { 399 io->released = 1; 400 } 401 if (io->modified == 0) { 402 kprintf("hammer_io_clear_modify: cleared %p\n", io); 403 bundirty(bp); 404 bqrelse(bp); 405 } else { 406 bdwrite(bp); 407 } 408 } 409 #endif 410 } 411 412 /************************************************************************ 413 * HAMMER_BIOOPS * 414 ************************************************************************ 415 * 416 */ 417 418 /* 419 * Pre-IO initiation kernel callback - cluster build only 420 */ 421 static void 422 hammer_io_start(struct buf *bp) 423 { 424 } 425 426 /* 427 * Post-IO completion kernel callback 428 * 429 * NOTE: HAMMER may modify a buffer after initiating I/O. The modified bit 430 * may also be set if we were marking a cluster header open. Only remove 431 * our dependancy if the modified bit is clear. 432 */ 433 static void 434 hammer_io_complete(struct buf *bp) 435 { 436 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 437 438 KKASSERT(iou->io.released == 1); 439 440 /* XXX DEP REMOVE */ 441 442 /* 443 * If no lock references remain and we can acquire the IO lock and 444 * someone at some point wanted us to flush (B_LOCKED test), then 445 * try to dispose of the IO. 446 */ 447 iou->io.running = 0; 448 if (iou->io.waiting) { 449 iou->io.waiting = 0; 450 wakeup(iou); 451 } 452 453 /* 454 * Someone wanted us to flush, try to clean out the buffer. 455 */ 456 if ((bp->b_flags & B_LOCKED) && iou->io.lock.refs == 0) { 457 KKASSERT(iou->io.modified == 0); 458 bp->b_flags &= ~B_LOCKED; 459 hammer_io_deallocate(bp); 460 /* structure may be dead now */ 461 } 462 } 463 464 /* 465 * Callback from kernel when it wishes to deallocate a passively 466 * associated structure. This case can only occur with read-only 467 * bp's. 468 * 469 * If we cannot disassociate we set B_LOCKED to prevent the buffer 470 * from getting reused. 471 * 472 * WARNING: Because this can be called directly by getnewbuf we cannot 473 * recurse into the tree. If a bp cannot be immediately disassociated 474 * our only recourse is to set B_LOCKED. 475 * 476 * WARNING: If the HAMMER structure is passively cached we have to 477 * scrap it here. 478 */ 479 static void 480 hammer_io_deallocate(struct buf *bp) 481 { 482 hammer_io_structure_t iou = (void *)LIST_FIRST(&bp->b_dep); 483 484 KKASSERT((bp->b_flags & B_LOCKED) == 0 && iou->io.running == 0); 485 if (iou->io.lock.refs > 0 || iou->io.modified) { 486 bp->b_flags |= B_LOCKED; 487 } else { 488 /* XXX interlock against ref or another disassociate */ 489 /* XXX this can leave HAMMER structures lying around */ 490 hammer_io_disassociate(iou, 0); 491 #if 0 492 switch(iou->io.type) { 493 case HAMMER_STRUCTURE_VOLUME: 494 hammer_rel_volume(&iou->volume, 1); 495 break; 496 case HAMMER_STRUCTURE_BUFFER: 497 hammer_rel_buffer(&iou->buffer, 1); 498 break; 499 } 500 #endif 501 } 502 } 503 504 static int 505 hammer_io_fsync(struct vnode *vp) 506 { 507 return(0); 508 } 509 510 /* 511 * NOTE: will not be called unless we tell the kernel about the 512 * bioops. Unused... we use the mount's VFS_SYNC instead. 513 */ 514 static int 515 hammer_io_sync(struct mount *mp) 516 { 517 return(0); 518 } 519 520 static void 521 hammer_io_movedeps(struct buf *bp1, struct buf *bp2) 522 { 523 } 524 525 /* 526 * I/O pre-check for reading and writing. HAMMER only uses this for 527 * B_CACHE buffers so checkread just shouldn't happen, but if it does 528 * allow it. 529 * 530 * Writing is a different case. We don't want the kernel to try to write 531 * out a buffer that HAMMER may be modifying passively or which has a 532 * dependancy. 533 * 534 * This code enforces the following write ordering: buffers, then cluster 535 * headers, then volume headers. 536 */ 537 static int 538 hammer_io_checkread(struct buf *bp) 539 { 540 return(0); 541 } 542 543 static int 544 hammer_io_checkwrite(struct buf *bp) 545 { 546 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 547 548 /* 549 * We are called from the kernel on delayed-write buffers, and 550 * called from hammer_io_flush() on flush requests. There should 551 * be no dependancies in either case. 552 * 553 * In the case of delayed-writes, the introduction of a dependancy 554 * will block until the bp can be reacquired, and the bp is then 555 * simply not released until the dependancy can be satisfied. 556 * 557 * We can only clear the modified bit when entered from the kernel 558 * if io.lock.refs == 0. 559 */ 560 if (iou->io.lock.refs == 0) { 561 iou->io.modified = 0; 562 } 563 return(0); 564 } 565 566 /* 567 * Return non-zero if the caller should flush the structure associated 568 * with this io sub-structure. 569 */ 570 int 571 hammer_io_checkflush(struct hammer_io *io) 572 { 573 if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) { 574 return(1); 575 } 576 return(0); 577 } 578 579 /* 580 * Return non-zero if we wish to delay the kernel's attempt to flush 581 * this buffer to disk. 582 */ 583 static int 584 hammer_io_countdeps(struct buf *bp, int n) 585 { 586 return(0); 587 } 588 589 struct bio_ops hammer_bioops = { 590 .io_start = hammer_io_start, 591 .io_complete = hammer_io_complete, 592 .io_deallocate = hammer_io_deallocate, 593 .io_fsync = hammer_io_fsync, 594 .io_sync = hammer_io_sync, 595 .io_movedeps = hammer_io_movedeps, 596 .io_countdeps = hammer_io_countdeps, 597 .io_checkread = hammer_io_checkread, 598 .io_checkwrite = hammer_io_checkwrite, 599 }; 600 601