1 /* 2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 /* 38 * Implements an abstraction layer for synchronous and asynchronous 39 * buffered device I/O. Can be used for OS-abstraction but the main 40 * purpose is to allow larger buffers to be used against hammer2_chain's 41 * using smaller allocations, without causing deadlocks. 42 * 43 */ 44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 45 46 static int 47 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 48 { 49 if (io1->pbase < io2->pbase) 50 return(-1); 51 if (io1->pbase > io2->pbase) 52 return(1); 53 return(0); 54 } 55 56 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 57 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 58 off_t, pbase); 59 60 struct hammer2_cleanupcb_info { 61 struct hammer2_io_tree tmptree; 62 int count; 63 }; 64 65 #define HAMMER2_GETBLK_GOOD 0 66 #define HAMMER2_GETBLK_QUEUED 1 67 #define HAMMER2_GETBLK_OWNED 2 68 69 /* 70 * Allocate/Locate the requested dio, reference it, issue or queue iocb. 71 */ 72 void 73 hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize, 74 hammer2_iocb_t *iocb) 75 { 76 hammer2_io_t *dio; 77 hammer2_io_t *xio; 78 off_t pbase; 79 off_t pmask; 80 /* 81 * XXX after free, buffer reuse case w/ different size can clash 82 * with dio cache. Lets avoid it for now. Ultimate we need to 83 * invalidate the dio cache when freeing blocks to allow a mix 84 * of 16KB and 64KB block sizes). 85 */ 86 /*int psize = hammer2_devblksize(lsize);*/ 87 int psize = HAMMER2_PBUFSIZE; 88 int refs; 89 90 pmask = ~(hammer2_off_t)(psize - 1); 91 92 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 93 lbase &= ~HAMMER2_OFF_MASK_RADIX; 94 pbase = lbase & pmask; 95 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 96 97 /* 98 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 99 */ 100 hammer2_spin_sh(&hmp->io_spin); 101 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 102 if (dio) { 103 if ((atomic_fetchadd_int(&dio->refs, 1) & 104 HAMMER2_DIO_MASK) == 0) { 105 atomic_add_int(&dio->hmp->iofree_count, -1); 106 } 107 hammer2_spin_unsh(&hmp->io_spin); 108 } else { 109 hammer2_spin_unsh(&hmp->io_spin); 110 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 111 dio->hmp = hmp; 112 dio->pbase = pbase; 113 dio->psize = psize; 114 dio->refs = 1; 115 hammer2_spin_init(&dio->spin, "h2dio"); 116 TAILQ_INIT(&dio->iocbq); 117 hammer2_spin_ex(&hmp->io_spin); 118 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 119 if (xio == NULL) { 120 atomic_add_int(&hammer2_dio_count, 1); 121 hammer2_spin_unex(&hmp->io_spin); 122 } else { 123 if ((atomic_fetchadd_int(&xio->refs, 1) & 124 HAMMER2_DIO_MASK) == 0) { 125 atomic_add_int(&xio->hmp->iofree_count, -1); 126 } 127 hammer2_spin_unex(&hmp->io_spin); 128 kfree(dio, M_HAMMER2); 129 dio = xio; 130 } 131 } 132 133 /* 134 * Obtain/Validate the buffer. 135 */ 136 iocb->dio = dio; 137 138 if (dio->act < 5) /* SMP race ok */ 139 ++dio->act; 140 141 for (;;) { 142 refs = dio->refs; 143 cpu_ccfence(); 144 145 /* 146 * Issue the iocb immediately if the buffer is already good. 147 * Once set GOOD cannot be cleared until refs drops to 0. 148 * 149 * lfence required because dio is not interlockedf for 150 * the DIO_GOOD test. 151 */ 152 if (refs & HAMMER2_DIO_GOOD) { 153 cpu_lfence(); 154 iocb->callback(iocb); 155 break; 156 } 157 158 /* 159 * Try to own the DIO by setting INPROG so we can issue 160 * I/O on it. 161 */ 162 if (refs & HAMMER2_DIO_INPROG) { 163 /* 164 * If DIO_INPROG is already set then set WAITING and 165 * queue the iocb. 166 */ 167 hammer2_spin_ex(&dio->spin); 168 if (atomic_cmpset_int(&dio->refs, refs, 169 refs | HAMMER2_DIO_WAITING)) { 170 iocb->flags |= HAMMER2_IOCB_ONQ | 171 HAMMER2_IOCB_INPROG; 172 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry); 173 hammer2_spin_unex(&dio->spin); 174 break; 175 } 176 hammer2_spin_unex(&dio->spin); 177 /* retry */ 178 } else { 179 /* 180 * If DIO_INPROG is not set then set it and issue the 181 * callback immediately to start I/O. 182 */ 183 if (atomic_cmpset_int(&dio->refs, refs, 184 refs | HAMMER2_DIO_INPROG)) { 185 iocb->flags |= HAMMER2_IOCB_INPROG; 186 iocb->callback(iocb); 187 break; 188 } 189 /* retry */ 190 } 191 /* retry */ 192 } 193 } 194 195 /* 196 * Quickly obtain a good DIO buffer, return NULL if the system no longer 197 * caches the data. 198 */ 199 hammer2_io_t * 200 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize) 201 { 202 hammer2_iocb_t iocb; 203 hammer2_io_t *dio; 204 struct buf *bp; 205 off_t pbase; 206 off_t pmask; 207 int psize = HAMMER2_PBUFSIZE; 208 int orefs; 209 int nrefs; 210 211 pmask = ~(hammer2_off_t)(psize - 1); 212 213 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 214 lbase &= ~HAMMER2_OFF_MASK_RADIX; 215 pbase = lbase & pmask; 216 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 217 218 /* 219 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 220 */ 221 hammer2_spin_sh(&hmp->io_spin); 222 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 223 if (dio == NULL) { 224 hammer2_spin_unsh(&hmp->io_spin); 225 return NULL; 226 } 227 228 if ((atomic_fetchadd_int(&dio->refs, 1) & HAMMER2_DIO_MASK) == 0) 229 atomic_add_int(&dio->hmp->iofree_count, -1); 230 hammer2_spin_unsh(&hmp->io_spin); 231 232 if (dio->act < 5) /* SMP race ok */ 233 ++dio->act; 234 235 /* 236 * Obtain/validate the buffer. Do NOT issue I/O. Discard if 237 * the system does not have the data already cached. 238 */ 239 nrefs = -1; 240 for (;;) { 241 orefs = dio->refs; 242 cpu_ccfence(); 243 244 /* 245 * Issue the iocb immediately if the buffer is already good. 246 * Once set GOOD cannot be cleared until refs drops to 0. 247 * 248 * lfence required because dio is not interlockedf for 249 * the DIO_GOOD test. 250 */ 251 if (orefs & HAMMER2_DIO_GOOD) { 252 cpu_lfence(); 253 break; 254 } 255 256 /* 257 * Try to own the DIO by setting INPROG so we can issue 258 * I/O on it. INPROG might already be set, in which case 259 * there is no way we can do this non-blocking so we punt. 260 */ 261 if ((orefs & HAMMER2_DIO_INPROG)) 262 break; 263 nrefs = orefs | HAMMER2_DIO_INPROG; 264 if (atomic_cmpset_int(&dio->refs, orefs, nrefs) == 0) 265 continue; 266 267 /* 268 * We own DIO_INPROG, try to set DIO_GOOD. 269 * 270 * For now do not use GETBLK_NOWAIT because 271 */ 272 bp = dio->bp; 273 dio->bp = NULL; 274 if (bp == NULL) { 275 #if 0 276 bp = getblk(hmp->devvp, dio->pbase, dio->psize, 0, 0); 277 #endif 278 bread(hmp->devvp, dio->pbase, dio->psize, &bp); 279 } 280 if (bp) { 281 if ((bp->b_flags & B_ERROR) == 0 && 282 (bp->b_flags & B_CACHE)) { 283 dio->bp = bp; /* assign BEFORE setting flag */ 284 atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD); 285 } else { 286 bqrelse(bp); 287 bp = NULL; 288 } 289 } 290 291 /* 292 * Clear DIO_INPROG. 293 * 294 * This is actually a bit complicated, see 295 * hammer2_io_complete() for more information. 296 */ 297 iocb.dio = dio; 298 iocb.flags = HAMMER2_IOCB_INPROG; 299 hammer2_io_complete(&iocb); 300 break; 301 } 302 303 /* 304 * Only return the dio if its buffer is good. 305 */ 306 if ((dio->refs & HAMMER2_DIO_GOOD) == 0) { 307 hammer2_io_putblk(&dio); 308 } 309 return dio; 310 } 311 312 /* 313 * The originator of the iocb is finished with it. 314 */ 315 void 316 hammer2_io_complete(hammer2_iocb_t *iocb) 317 { 318 hammer2_io_t *dio = iocb->dio; 319 hammer2_iocb_t *cbtmp; 320 uint32_t orefs; 321 uint32_t nrefs; 322 uint32_t oflags; 323 uint32_t nflags; 324 325 /* 326 * If IOCB_INPROG was not set completion is synchronous due to the 327 * buffer already being good. We can simply set IOCB_DONE and return. 328 * In this situation DIO_INPROG is not set and we have no visibility 329 * on dio->bp. 330 */ 331 if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) { 332 atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE); 333 return; 334 } 335 336 /* 337 * The iocb was queued, obtained DIO_INPROG, and its callback was 338 * made. The callback is now complete. We still own DIO_INPROG. 339 * 340 * We can set DIO_GOOD if no error occurred, which gives certain 341 * stability guarantees to dio->bp and allows other accessors to 342 * short-cut access. DIO_GOOD cannot be cleared until the last 343 * ref is dropped. 344 */ 345 KKASSERT(dio->refs & HAMMER2_DIO_INPROG); 346 if (dio->bp) { 347 BUF_KERNPROC(dio->bp); 348 if ((dio->bp->b_flags & B_ERROR) == 0) { 349 KKASSERT(dio->bp->b_flags & B_CACHE); 350 atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD); 351 } 352 } 353 354 /* 355 * Clean up the dio before marking the iocb as being done. If another 356 * iocb is pending we chain to it while leaving DIO_INPROG set (it 357 * will call io completion and presumably clear DIO_INPROG). 358 * 359 * Otherwise if no other iocbs are pending we clear DIO_INPROG before 360 * finishing up the cbio. This means that DIO_INPROG is cleared at 361 * the end of the chain before ANY of the cbios are marked done. 362 * 363 * NOTE: The TAILQ is not stable until the spin-lock is held. 364 */ 365 for (;;) { 366 orefs = dio->refs; 367 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG); 368 369 if (orefs & HAMMER2_DIO_WAITING) { 370 hammer2_spin_ex(&dio->spin); 371 cbtmp = TAILQ_FIRST(&dio->iocbq); 372 if (cbtmp) { 373 /* 374 * NOTE: flags not adjusted in this case. 375 * Flags will be adjusted by the last 376 * iocb. 377 */ 378 TAILQ_REMOVE(&dio->iocbq, cbtmp, entry); 379 hammer2_spin_unex(&dio->spin); 380 cbtmp->callback(cbtmp); /* chained */ 381 break; 382 } else if (atomic_cmpset_int(&dio->refs, 383 orefs, nrefs)) { 384 hammer2_spin_unex(&dio->spin); 385 break; 386 } 387 hammer2_spin_unex(&dio->spin); 388 /* retry */ 389 } else if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) { 390 break; 391 } /* else retry */ 392 /* retry */ 393 } 394 395 /* 396 * Mark the iocb as done and wakeup any waiters. This is done after 397 * all iocb chains have been called back and after DIO_INPROG has been 398 * cleared. This avoids races against ref count drops by the waiting 399 * threads (a hard but not impossible SMP race) which might result in 400 * a 1->0 transition of the refs while DIO_INPROG is still set. 401 */ 402 for (;;) { 403 oflags = iocb->flags; 404 cpu_ccfence(); 405 nflags = oflags; 406 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG); 407 nflags |= HAMMER2_IOCB_DONE; 408 409 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 410 if (oflags & HAMMER2_IOCB_WAKEUP) 411 wakeup(iocb); 412 /* SMP: iocb is now stale */ 413 break; 414 } 415 /* retry */ 416 } 417 iocb = NULL; 418 419 } 420 421 /* 422 * Wait for an iocb's I/O to finish. 423 */ 424 void 425 hammer2_iocb_wait(hammer2_iocb_t *iocb) 426 { 427 uint32_t oflags; 428 uint32_t nflags; 429 430 for (;;) { 431 oflags = iocb->flags; 432 cpu_ccfence(); 433 nflags = oflags | HAMMER2_IOCB_WAKEUP; 434 if (oflags & HAMMER2_IOCB_DONE) 435 break; 436 tsleep_interlock(iocb, 0); 437 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 438 tsleep(iocb, PINTERLOCKED, "h2iocb", hz); 439 } 440 } 441 442 } 443 444 /* 445 * Release our ref on *diop. 446 * 447 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG, 448 * then dispose of the underlying buffer. 449 */ 450 void 451 hammer2_io_putblk(hammer2_io_t **diop) 452 { 453 hammer2_dev_t *hmp; 454 hammer2_io_t *dio; 455 hammer2_iocb_t iocb; 456 struct buf *bp; 457 off_t peof; 458 off_t pbase; 459 int psize; 460 int orefs; 461 int nrefs; 462 463 dio = *diop; 464 *diop = NULL; 465 hmp = dio->hmp; 466 467 /* 468 * Drop refs. 469 * 470 * On the 1->0 transition clear flags and set INPROG. 471 * 472 * On the 1->0 transition if INPROG is already set, another thread 473 * is in lastdrop and we can just return after the transition. 474 * 475 * On any other transition we can generally just return. 476 */ 477 for (;;) { 478 orefs = dio->refs; 479 cpu_ccfence(); 480 nrefs = orefs - 1; 481 482 if ((orefs & HAMMER2_DIO_MASK) == 1 && 483 (orefs & HAMMER2_DIO_INPROG) == 0) { 484 /* 485 * Lastdrop case, INPROG can be set. 486 */ 487 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 488 nrefs |= HAMMER2_DIO_INPROG; 489 if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) 490 break; 491 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 492 /* 493 * Lastdrop case, INPROG already set. 494 */ 495 if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) { 496 atomic_add_int(&hmp->iofree_count, 1); 497 return; 498 } 499 } else { 500 /* 501 * Normal drop case. 502 */ 503 if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) 504 return; 505 } 506 cpu_pause(); 507 /* retry */ 508 } 509 510 /* 511 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 512 * have been cleared. 513 * 514 * We can now dispose of the buffer, and should do it before calling 515 * io_complete() in case there's a race against a new reference 516 * which causes io_complete() to chain and instantiate the bp again. 517 */ 518 pbase = dio->pbase; 519 psize = dio->psize; 520 bp = dio->bp; 521 dio->bp = NULL; 522 523 if (orefs & HAMMER2_DIO_GOOD) { 524 KKASSERT(bp != NULL); 525 if (orefs & HAMMER2_DIO_DIRTY) { 526 if (hammer2_cluster_enable) { 527 peof = (pbase + HAMMER2_SEGMASK64) & 528 ~HAMMER2_SEGMASK64; 529 cluster_write(bp, peof, psize, 4); 530 } else { 531 bp->b_flags |= B_CLUSTEROK; 532 bdwrite(bp); 533 } 534 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 535 brelse(bp); 536 } else { 537 bqrelse(bp); 538 } 539 } else if (bp) { 540 if (orefs & HAMMER2_DIO_DIRTY) { 541 bdwrite(bp); 542 } else { 543 brelse(bp); 544 } 545 } 546 547 /* 548 * The instant we call io_complete dio is a free agent again and 549 * can be ripped out from under us. 550 * 551 * we can cleanup our final DIO_INPROG by simulating an iocb 552 * completion. 553 */ 554 hmp = dio->hmp; /* extract fields */ 555 atomic_add_int(&hmp->iofree_count, 1); 556 cpu_ccfence(); 557 558 iocb.dio = dio; 559 iocb.flags = HAMMER2_IOCB_INPROG; 560 hammer2_io_complete(&iocb); 561 dio = NULL; /* dio stale */ 562 563 /* 564 * We cache free buffers so re-use cases can use a shared lock, but 565 * if too many build up we have to clean them out. 566 */ 567 if (hmp->iofree_count > 65536) { 568 struct hammer2_cleanupcb_info info; 569 570 RB_INIT(&info.tmptree); 571 hammer2_spin_ex(&hmp->io_spin); 572 if (hmp->iofree_count > 65536) { 573 info.count = hmp->iofree_count / 4; 574 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 575 hammer2_io_cleanup_callback, &info); 576 } 577 hammer2_spin_unex(&hmp->io_spin); 578 hammer2_io_cleanup(hmp, &info.tmptree); 579 } 580 } 581 582 /* 583 * Cleanup any dio's with (INPROG | refs) == 0. 584 * 585 * Called to clean up cached DIOs on umount after all activity has been 586 * flushed. 587 */ 588 static 589 int 590 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 591 { 592 struct hammer2_cleanupcb_info *info = arg; 593 hammer2_io_t *xio; 594 595 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 596 if (dio->act > 0) { 597 --dio->act; 598 return 0; 599 } 600 KKASSERT(dio->bp == NULL); 601 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 602 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 603 KKASSERT(xio == NULL); 604 if (--info->count <= 0) /* limit scan */ 605 return(-1); 606 } 607 return 0; 608 } 609 610 void 611 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 612 { 613 hammer2_io_t *dio; 614 615 while ((dio = RB_ROOT(tree)) != NULL) { 616 RB_REMOVE(hammer2_io_tree, tree, dio); 617 KKASSERT(dio->bp == NULL && 618 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 619 kfree(dio, M_HAMMER2); 620 atomic_add_int(&hammer2_dio_count, -1); 621 atomic_add_int(&hmp->iofree_count, -1); 622 } 623 } 624 625 /* 626 * Returns a pointer to the requested data. 627 */ 628 char * 629 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 630 { 631 struct buf *bp; 632 int off; 633 634 bp = dio->bp; 635 KKASSERT(bp != NULL); 636 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 637 KKASSERT(off >= 0 && off < bp->b_bufsize); 638 return(bp->b_data + off); 639 } 640 641 /* 642 * Helpers for hammer2_io_new*() functions 643 */ 644 static 645 void 646 hammer2_iocb_new_callback(hammer2_iocb_t *iocb) 647 { 648 hammer2_io_t *dio = iocb->dio; 649 int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0; 650 651 /* 652 * If IOCB_INPROG is not set the dio already has a good buffer and we 653 * can't mess with it other than zero the requested range. 654 * 655 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 656 * do what needs to be done with dio->bp. 657 */ 658 if (iocb->flags & HAMMER2_IOCB_INPROG) { 659 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) { 660 if (iocb->lsize == dio->psize) { 661 /* 662 * Fully covered buffer, try to optimize to 663 * avoid any I/O. We might already have the 664 * buffer due to iocb chaining. 665 */ 666 if (dio->bp == NULL) { 667 dio->bp = getblk(dio->hmp->devvp, 668 dio->pbase, dio->psize, 669 gbctl, 0); 670 } 671 if (dio->bp) { 672 vfs_bio_clrbuf(dio->bp); 673 dio->bp->b_flags |= B_CACHE; 674 } 675 } else if (iocb->flags & HAMMER2_IOCB_QUICK) { 676 /* 677 * Partial buffer, quick mode. Do nothing. 678 * Do not instantiate the buffer or try to 679 * mark it B_CACHE because other portions of 680 * the buffer might have to be read by other 681 * accessors. 682 */ 683 } else if (dio->bp == NULL || 684 (dio->bp->b_flags & B_CACHE) == 0) { 685 /* 686 * Partial buffer, normal mode, requires 687 * read-before-write. Chain the read. 688 * 689 * We might already have the buffer due to 690 * iocb chaining. XXX unclear if we really 691 * need to write/release it and reacquire 692 * in that case. 693 * 694 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE. 695 */ 696 if (dio->bp) { 697 if (dio->refs & HAMMER2_DIO_DIRTY) 698 bdwrite(dio->bp); 699 else 700 bqrelse(dio->bp); 701 dio->bp = NULL; 702 } 703 atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ); 704 breadcb(dio->hmp->devvp, 705 dio->pbase, dio->psize, 706 hammer2_io_callback, iocb); 707 return; 708 } /* else buffer is good */ 709 } /* else callback from breadcb is complete */ 710 } 711 if (dio->bp) { 712 if (iocb->flags & HAMMER2_IOCB_ZERO) 713 bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize); 714 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 715 } 716 hammer2_io_complete(iocb); 717 } 718 719 static 720 int 721 _hammer2_io_new(hammer2_dev_t *hmp, off_t lbase, int lsize, 722 hammer2_io_t **diop, int flags) 723 { 724 hammer2_iocb_t iocb; 725 hammer2_io_t *dio; 726 727 iocb.callback = hammer2_iocb_new_callback; 728 iocb.cluster = NULL; 729 iocb.chain = NULL; 730 iocb.ptr = NULL; 731 iocb.lbase = lbase; 732 iocb.lsize = lsize; 733 iocb.flags = flags; 734 iocb.error = 0; 735 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 736 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 737 hammer2_iocb_wait(&iocb); 738 dio = *diop = iocb.dio; 739 740 return (iocb.error); 741 } 742 743 int 744 hammer2_io_new(hammer2_dev_t *hmp, off_t lbase, int lsize, 745 hammer2_io_t **diop) 746 { 747 return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_ZERO)); 748 } 749 750 int 751 hammer2_io_newnz(hammer2_dev_t *hmp, off_t lbase, int lsize, 752 hammer2_io_t **diop) 753 { 754 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0)); 755 } 756 757 int 758 hammer2_io_newq(hammer2_dev_t *hmp, off_t lbase, int lsize, 759 hammer2_io_t **diop) 760 { 761 return(_hammer2_io_new(hmp, lbase, lsize, diop, HAMMER2_IOCB_QUICK)); 762 } 763 764 static 765 void 766 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb) 767 { 768 hammer2_io_t *dio = iocb->dio; 769 off_t peof; 770 int error; 771 772 /* 773 * If IOCB_INPROG is not set the dio already has a good buffer and we 774 * can't mess with it other than zero the requested range. 775 * 776 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 777 * do what needs to be done with dio->bp. 778 */ 779 if (iocb->flags & HAMMER2_IOCB_INPROG) { 780 if (dio->bp && (dio->bp->b_flags & B_CACHE)) { 781 /* 782 * Already good, likely due to being chained from 783 * another iocb. 784 */ 785 error = 0; 786 } else if (hammer2_cluster_enable) { 787 /* 788 * Synchronous cluster I/O for now. 789 */ 790 if (dio->bp) { 791 bqrelse(dio->bp); 792 dio->bp = NULL; 793 } 794 peof = (dio->pbase + HAMMER2_SEGMASK64) & 795 ~HAMMER2_SEGMASK64; 796 error = cluster_read(dio->hmp->devvp, peof, dio->pbase, 797 dio->psize, 798 dio->psize, HAMMER2_PBUFSIZE*4, 799 &dio->bp); 800 } else { 801 /* 802 * Synchronous I/O for now. 803 */ 804 if (dio->bp) { 805 bqrelse(dio->bp); 806 dio->bp = NULL; 807 } 808 error = bread(dio->hmp->devvp, dio->pbase, 809 dio->psize, &dio->bp); 810 } 811 if (error) { 812 brelse(dio->bp); 813 dio->bp = NULL; 814 } 815 } 816 hammer2_io_complete(iocb); 817 } 818 819 int 820 hammer2_io_bread(hammer2_dev_t *hmp, off_t lbase, int lsize, 821 hammer2_io_t **diop) 822 { 823 hammer2_iocb_t iocb; 824 hammer2_io_t *dio; 825 826 iocb.callback = hammer2_iocb_bread_callback; 827 iocb.cluster = NULL; 828 iocb.chain = NULL; 829 iocb.ptr = NULL; 830 iocb.lbase = lbase; 831 iocb.lsize = lsize; 832 iocb.flags = 0; 833 iocb.error = 0; 834 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 835 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 836 hammer2_iocb_wait(&iocb); 837 dio = *diop = iocb.dio; 838 839 return (iocb.error); 840 } 841 842 /* 843 * System buf/bio async callback extracts the iocb and chains 844 * to the iocb callback. 845 */ 846 void 847 hammer2_io_callback(struct bio *bio) 848 { 849 struct buf *dbp = bio->bio_buf; 850 hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr; 851 hammer2_io_t *dio; 852 853 dio = iocb->dio; 854 if ((bio->bio_flags & BIO_DONE) == 0) 855 bpdone(dbp, 0); 856 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 857 dio->bp = bio->bio_buf; 858 iocb->callback(iocb); 859 } 860 861 void 862 hammer2_io_bawrite(hammer2_io_t **diop) 863 { 864 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 865 hammer2_io_putblk(diop); 866 } 867 868 void 869 hammer2_io_bdwrite(hammer2_io_t **diop) 870 { 871 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 872 hammer2_io_putblk(diop); 873 } 874 875 int 876 hammer2_io_bwrite(hammer2_io_t **diop) 877 { 878 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 879 hammer2_io_putblk(diop); 880 return (0); /* XXX */ 881 } 882 883 void 884 hammer2_io_setdirty(hammer2_io_t *dio) 885 { 886 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 887 } 888 889 void 890 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes) 891 { 892 if ((u_int)dio->psize == bytes) 893 dio->bp->b_flags |= B_INVAL | B_RELBUF; 894 } 895 896 void 897 hammer2_io_brelse(hammer2_io_t **diop) 898 { 899 hammer2_io_putblk(diop); 900 } 901 902 void 903 hammer2_io_bqrelse(hammer2_io_t **diop) 904 { 905 hammer2_io_putblk(diop); 906 } 907 908 int 909 hammer2_io_isdirty(hammer2_io_t *dio) 910 { 911 return((dio->refs & HAMMER2_DIO_DIRTY) != 0); 912 } 913