1 /* 2 * Copyright (c) 2007 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/vfs/hammer/hammer_io.c,v 1.6 2007/12/14 08:05:39 dillon Exp $ 35 */ 36 /* 37 * IO Primitives and buffer cache management 38 * 39 * All major data-tracking structures in HAMMER contain a struct hammer_io 40 * which is used to manage their backing store. We use filesystem buffers 41 * for backing store and we leave them passively associated with their 42 * HAMMER structures. 43 * 44 * If the kernel tries to release a passively associated buf which we cannot 45 * yet let go we set B_LOCKED in the buffer and then actively released it 46 * later when we can. 47 */ 48 49 #include "hammer.h" 50 #include <sys/fcntl.h> 51 #include <sys/nlookup.h> 52 #include <sys/buf.h> 53 #include <sys/buf2.h> 54 55 /* 56 * Helper routine to disassociate a buffer cache buffer from an I/O 57 * structure. 58 */ 59 static void 60 hammer_io_disassociate(union hammer_io_structure *io) 61 { 62 struct buf *bp = io->io.bp; 63 64 LIST_INIT(&bp->b_dep); /* clear the association */ 65 bp->b_ops = NULL; 66 io->io.bp = NULL; 67 68 switch(io->io.type) { 69 case HAMMER_STRUCTURE_VOLUME: 70 io->volume.ondisk = NULL; 71 io->volume.alist.meta = NULL; 72 break; 73 case HAMMER_STRUCTURE_SUPERCL: 74 io->supercl.ondisk = NULL; 75 io->supercl.alist.meta = NULL; 76 break; 77 case HAMMER_STRUCTURE_CLUSTER: 78 io->cluster.ondisk = NULL; 79 io->cluster.alist_master.meta = NULL; 80 io->cluster.alist_btree.meta = NULL; 81 io->cluster.alist_record.meta = NULL; 82 io->cluster.alist_mdata.meta = NULL; 83 break; 84 case HAMMER_STRUCTURE_BUFFER: 85 io->buffer.ondisk = NULL; 86 io->buffer.alist.meta = NULL; 87 break; 88 } 89 io->io.modified = 0; 90 io->io.released = 1; 91 } 92 93 /* 94 * Mark a cluster as being closed. This is done as late as possible, 95 * only when we are asked to flush the cluster 96 */ 97 static void 98 hammer_close_cluster(hammer_cluster_t cluster) 99 { 100 while (cluster->state == HAMMER_CLUSTER_ASYNC) 101 tsleep(cluster, 0, "hmrdep", 0); 102 if (cluster->state == HAMMER_CLUSTER_OPEN) { 103 cluster->state = HAMMER_CLUSTER_IDLE; 104 cluster->ondisk->clu_flags &= ~HAMMER_CLUF_OPEN; 105 kprintf("CLOSE CLUSTER\n"); 106 hammer_modify_cluster(cluster); 107 } 108 } 109 110 111 /* 112 * Load bp for a HAMMER structure. 113 */ 114 int 115 hammer_io_read(struct vnode *devvp, struct hammer_io *io) 116 { 117 struct buf *bp; 118 int error; 119 120 if ((bp = io->bp) == NULL) { 121 error = bread(devvp, io->offset, HAMMER_BUFSIZE, &io->bp); 122 if (error == 0) { 123 bp = io->bp; 124 bp->b_ops = &hammer_bioops; 125 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 126 BUF_KERNPROC(bp); 127 } 128 io->modified = 0; /* no new modifications yet */ 129 io->released = 0; /* we hold an active lock on bp */ 130 } else { 131 error = 0; 132 } 133 return(error); 134 } 135 136 /* 137 * Similar to hammer_io_read() but returns a zero'd out buffer instead. 138 * vfs_bio_clrbuf() is kinda nasty, enforce serialization against background 139 * I/O so we can call it. 140 */ 141 int 142 hammer_io_new(struct vnode *devvp, struct hammer_io *io) 143 { 144 struct buf *bp; 145 146 if ((bp = io->bp) == NULL) { 147 io->bp = getblk(devvp, io->offset, HAMMER_BUFSIZE, 0, 0); 148 bp = io->bp; 149 bp->b_ops = &hammer_bioops; 150 LIST_INSERT_HEAD(&bp->b_dep, &io->worklist, node); 151 io->released = 0; /* we hold an active lock on bp */ 152 BUF_KERNPROC(bp); 153 } else { 154 if (io->released) { 155 regetblk(bp); 156 io->released = 0; 157 BUF_KERNPROC(bp); 158 } 159 } 160 io->modified = 1; 161 vfs_bio_clrbuf(bp); 162 return(0); 163 } 164 165 /* 166 * This routine is called when a buffer within a cluster is modified. We 167 * mark the cluster open and immediately initiate asynchronous I/O. Any 168 * related hammer_buffer write I/O blocks until our async write completes. 169 * This guarentees (inasmuch as the OS can) that the cluster recovery code 170 * will see a cluster marked open if a crash occured while the filesystem 171 * still had dirty buffers associated with that cluster. 172 */ 173 void 174 hammer_io_notify_cluster(hammer_cluster_t cluster) 175 { 176 struct hammer_io *io = &cluster->io; 177 178 if (cluster->state == HAMMER_CLUSTER_IDLE) { 179 hammer_lock_ex(&cluster->io.lock); 180 if (cluster->state == HAMMER_CLUSTER_IDLE) { 181 if (io->released) 182 regetblk(io->bp); 183 kprintf("MARK CLUSTER OPEN\n"); 184 cluster->ondisk->clu_flags |= HAMMER_CLUF_OPEN; 185 cluster->state = HAMMER_CLUSTER_ASYNC; 186 hammer_modify_cluster(cluster); 187 bawrite(io->bp); 188 io->released = 1; 189 /* leave cluster marked as modified */ 190 } 191 hammer_unlock(&cluster->io.lock); 192 } 193 } 194 195 /* 196 * This routine is called on the last reference to a hammer structure. If 197 * flush is non-zero we have to completely disassociate the bp from the 198 * structure (which may involve blocking). Otherwise we can leave the bp 199 * passively associated with the structure. 200 * 201 * The caller is holding io->lock exclusively. 202 */ 203 void 204 hammer_io_release(struct hammer_io *io, int flush) 205 { 206 union hammer_io_structure *iou = (void *)io; 207 hammer_cluster_t cluster; 208 struct buf *bp; 209 210 if ((bp = io->bp) != NULL) { 211 /* 212 * If neither we nor the kernel want to flush the bp, we can 213 * stop here. Make sure the bp is passively released 214 * before returning. Even though we are still holding it, 215 * we want to be notified when the kernel wishes to flush 216 * it out so make sure B_DELWRI is properly set if we had 217 * made modifications. 218 */ 219 if (flush == 0 && (bp->b_flags & B_LOCKED) == 0) { 220 if ((bp->b_flags & B_DELWRI) == 0 && io->modified) { 221 if (io->released) 222 regetblk(bp); 223 bdwrite(bp); 224 io->released = 1; 225 } else if (io->released == 0) { 226 bqrelse(bp); 227 io->released = 1; 228 } 229 return; 230 } 231 232 /* 233 * We've been asked to flush the buffer. 234 * 235 * If this is a hammer_buffer we may have to wait for the 236 * cluster header write to complete. 237 */ 238 if (iou->io.type == HAMMER_STRUCTURE_BUFFER && 239 (io->modified || (bp->b_flags & B_DELWRI))) { 240 cluster = iou->buffer.cluster; 241 while (cluster->state == HAMMER_CLUSTER_ASYNC) 242 tsleep(iou->buffer.cluster, 0, "hmrdep", 0); 243 } 244 245 /* 246 * If we have an open cluster header, close it 247 */ 248 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) { 249 hammer_close_cluster(&iou->cluster); 250 } 251 252 253 /* 254 * Ok the dependancies are all gone. Check for the simple 255 * disassociation case. 256 */ 257 if (io->released && (bp->b_flags & B_LOCKED) == 0 && 258 (io->modified == 0 || (bp->b_flags & B_DELWRI))) { 259 hammer_io_disassociate(iou); 260 return; 261 } 262 263 /* 264 * Handle the more complex disassociation case. Acquire the 265 * buffer, clean up B_LOCKED, and deal with the modified 266 * flag. 267 */ 268 if (io->released) 269 regetblk(bp); 270 bp->b_flags &= ~B_LOCKED; 271 if (io->modified || (bp->b_flags & B_DELWRI)) 272 bawrite(bp); 273 else 274 bqrelse(bp); 275 io->released = 1; 276 hammer_io_disassociate(iou); 277 } 278 } 279 280 /* 281 * Flush dirty data, if any. 282 */ 283 void 284 hammer_io_flush(struct hammer_io *io, struct hammer_sync_info *info) 285 { 286 struct buf *bp; 287 int error; 288 289 if ((bp = io->bp) == NULL) 290 return; 291 if (bp->b_flags & B_DELWRI) 292 io->modified = 1; 293 if (io->modified == 0) 294 return; 295 kprintf("IO FLUSH BP %p TYPE %d REFS %d\n", bp, io->type, io->lock.refs); 296 hammer_lock_ex(&io->lock); 297 298 if ((bp = io->bp) != NULL && io->modified) { 299 if (io->released) 300 regetblk(bp); 301 io->released = 1; 302 303 /* 304 * We own the bp now 305 */ 306 if (info->waitfor & MNT_WAIT) { 307 io->modified = 0; 308 error = bwrite(bp); 309 if (error) 310 info->error = error; 311 } else if (io->lock.refs == 1) { 312 io->modified = 0; 313 bawrite(bp); 314 } else { 315 kprintf("can't flush, %d refs\n", io->lock.refs); 316 /* structure is in-use, don't race the write */ 317 bqrelse(bp); 318 } 319 } 320 hammer_unlock(&io->lock); 321 } 322 323 324 /* 325 * HAMMER_BIOOPS 326 */ 327 328 /* 329 * Pre and post I/O callbacks. 330 */ 331 static void hammer_io_deallocate(struct buf *bp); 332 333 static void 334 hammer_io_start(struct buf *bp) 335 { 336 #if 0 337 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); 338 339 if (io->io.type == HAMMER_STRUCTURE_BUFFER) { 340 while (io->buffer.cluster->io_in_progress) { 341 kprintf("hammer_io_start: wait for cluster\n"); 342 tsleep(io->buffer.cluster, 0, "hmrdep", 0); 343 kprintf("hammer_io_start: wait for cluster done\n"); 344 } 345 } 346 #endif 347 } 348 349 static void 350 hammer_io_complete(struct buf *bp) 351 { 352 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); 353 354 if (io->io.type == HAMMER_STRUCTURE_CLUSTER) { 355 if (io->cluster.state == HAMMER_CLUSTER_ASYNC) { 356 kprintf("cluster write complete flags %08x\n", 357 io->cluster.ondisk->clu_flags); 358 io->cluster.state = HAMMER_CLUSTER_OPEN; 359 wakeup(&io->cluster); 360 } 361 } 362 } 363 364 /* 365 * Callback from kernel when it wishes to deallocate a passively 366 * associated structure. This can only occur if the buffer is 367 * passively associated with the structure. The kernel has locked 368 * the buffer. 369 * 370 * If we cannot disassociate we set B_LOCKED to prevent the buffer 371 * from getting reused. 372 */ 373 static void 374 hammer_io_deallocate(struct buf *bp) 375 { 376 union hammer_io_structure *io = (void *)LIST_FIRST(&bp->b_dep); 377 378 /* XXX memory interlock, spinlock to sync cpus */ 379 380 /* 381 * Since the kernel is passing us a locked buffer, the HAMMER 382 * structure had better not believe it has a lock on the buffer. 383 */ 384 KKASSERT(io->io.released); 385 crit_enter(); 386 387 /* 388 * First, ref the structure to prevent either the buffer or the 389 * structure from going away or being unexpectedly flushed. 390 */ 391 hammer_ref(&io->io.lock); 392 393 /* 394 * Buffers can have active references from cached hammer_node's, 395 * even if those nodes are themselves passively cached. Attempt 396 * to clean them out. This may not succeed. 397 */ 398 if (io->io.type == HAMMER_STRUCTURE_BUFFER && 399 hammer_lock_ex_try(&io->io.lock) == 0) { 400 hammer_flush_buffer_nodes(&io->buffer); 401 hammer_unlock(&io->io.lock); 402 } 403 404 if (hammer_islastref(&io->io.lock)) { 405 /* 406 * If we are the only ref left we can disassociate the I/O. 407 * It had better still be in a released state because the 408 * kernel is holding a lock on the buffer. Any passive 409 * modifications should have already been synchronized with 410 * the buffer. 411 */ 412 KKASSERT(io->io.released); 413 hammer_io_disassociate(io); 414 bp->b_flags &= ~B_LOCKED; 415 KKASSERT (io->io.modified == 0 || (bp->b_flags & B_DELWRI)); 416 417 /* 418 * Perform final rights on the structure. This can cause 419 * a chain reaction - e.g. last buffer -> last cluster -> 420 * last supercluster -> last volume. 421 */ 422 switch(io->io.type) { 423 case HAMMER_STRUCTURE_VOLUME: 424 hammer_rel_volume(&io->volume, 1); 425 break; 426 case HAMMER_STRUCTURE_SUPERCL: 427 hammer_rel_supercl(&io->supercl, 1); 428 break; 429 case HAMMER_STRUCTURE_CLUSTER: 430 hammer_rel_cluster(&io->cluster, 1); 431 break; 432 case HAMMER_STRUCTURE_BUFFER: 433 hammer_rel_buffer(&io->buffer, 1); 434 break; 435 } 436 } else { 437 /* 438 * Otherwise tell the kernel not to destroy the buffer. 439 * 440 * We have to unref the structure without performing any 441 * final rights to it to avoid a deadlock. 442 */ 443 bp->b_flags |= B_LOCKED; 444 hammer_unref(&io->io.lock); 445 } 446 447 crit_exit(); 448 } 449 450 static int 451 hammer_io_fsync(struct vnode *vp) 452 { 453 return(0); 454 } 455 456 /* 457 * NOTE: will not be called unless we tell the kernel about the 458 * bioops. Unused... we use the mount's VFS_SYNC instead. 459 */ 460 static int 461 hammer_io_sync(struct mount *mp) 462 { 463 return(0); 464 } 465 466 static void 467 hammer_io_movedeps(struct buf *bp1, struct buf *bp2) 468 { 469 } 470 471 /* 472 * I/O pre-check for reading and writing. HAMMER only uses this for 473 * B_CACHE buffers so checkread just shouldn't happen, but if it does 474 * allow it. 475 * 476 * Writing is a different case. We don't want the kernel to try to write 477 * out a buffer that HAMMER may be modifying passively or which has a 478 * dependancy. 479 * 480 * This code enforces the following write ordering: buffers, then cluster 481 * headers, then volume headers. 482 */ 483 static int 484 hammer_io_checkread(struct buf *bp) 485 { 486 return(0); 487 } 488 489 static int 490 hammer_io_checkwrite(struct buf *bp) 491 { 492 union hammer_io_structure *iou = (void *)LIST_FIRST(&bp->b_dep); 493 494 if (iou->io.type == HAMMER_STRUCTURE_BUFFER && 495 iou->buffer.cluster->state == HAMMER_CLUSTER_ASYNC) { 496 /* 497 * Cannot write out a cluster buffer if the cluster header 498 * I/O opening the cluster has not completed. 499 */ 500 kprintf("hammer_io_checkwrite: w/ depend - delayed\n"); 501 bp->b_flags |= B_LOCKED; 502 return(-1); 503 } else if (iou->io.lock.refs) { 504 /* 505 * Cannot write out a bp if its associated buffer has active 506 * references. 507 */ 508 kprintf("hammer_io_checkwrite: w/ refs - delayed\n"); 509 bp->b_flags |= B_LOCKED; 510 return(-1); 511 } else { 512 /* 513 * We're good, but before we can let the kernel proceed we 514 * may have to make some adjustments. 515 */ 516 if (iou->io.type == HAMMER_STRUCTURE_CLUSTER) 517 hammer_close_cluster(&iou->cluster); 518 kprintf("hammer_io_checkwrite: ok\n"); 519 KKASSERT(iou->io.released); 520 hammer_io_disassociate(iou); 521 return(0); 522 } 523 } 524 525 /* 526 * Return non-zero if the caller should flush the structure associated 527 * with this io sub-structure. 528 */ 529 int 530 hammer_io_checkflush(struct hammer_io *io) 531 { 532 if (io->bp == NULL || (io->bp->b_flags & B_LOCKED)) 533 return(1); 534 return(0); 535 } 536 537 /* 538 * Return non-zero if we wish to delay the kernel's attempt to flush 539 * this buffer to disk. 540 */ 541 static int 542 hammer_io_countdeps(struct buf *bp, int n) 543 { 544 return(0); 545 } 546 547 struct bio_ops hammer_bioops = { 548 .io_start = hammer_io_start, 549 .io_complete = hammer_io_complete, 550 .io_deallocate = hammer_io_deallocate, 551 .io_fsync = hammer_io_fsync, 552 .io_sync = hammer_io_sync, 553 .io_movedeps = hammer_io_movedeps, 554 .io_countdeps = hammer_io_countdeps, 555 .io_checkread = hammer_io_checkread, 556 .io_checkwrite = hammer_io_checkwrite, 557 }; 558 559