1 /* 2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 /* 38 * Implements an abstraction layer for synchronous and asynchronous 39 * buffered device I/O. Can be used for OS-abstraction but the main 40 * purpose is to allow larger buffers to be used against hammer2_chain's 41 * using smaller allocations, without causing deadlocks. 42 * 43 */ 44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 45 static void dio_write_stats_update(hammer2_io_t *dio); 46 47 static int 48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 49 { 50 if (io1->pbase < io2->pbase) 51 return(-1); 52 if (io1->pbase > io2->pbase) 53 return(1); 54 return(0); 55 } 56 57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 59 off_t, pbase); 60 61 struct hammer2_cleanupcb_info { 62 struct hammer2_io_tree tmptree; 63 int count; 64 }; 65 66 static __inline 67 uint64_t 68 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 69 { 70 uint64_t mask; 71 int i; 72 73 if (bytes < 1024) /* smaller chunks not supported */ 74 return 0; 75 76 /* 77 * Calculate crc check mask for larger chunks 78 */ 79 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) & 80 HAMMER2_PBUFMASK) >> 10; 81 if (i == 0 && bytes == HAMMER2_PBUFSIZE) 82 return((uint64_t)-1); 83 mask = ((uint64_t)1U << (bytes >> 10)) - 1; 84 mask <<= i; 85 86 return mask; 87 } 88 89 #define HAMMER2_GETBLK_GOOD 0 90 #define HAMMER2_GETBLK_QUEUED 1 91 #define HAMMER2_GETBLK_OWNED 2 92 93 /* 94 * Allocate/Locate the requested dio, reference it, issue or queue iocb. 95 */ 96 void 97 hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize, 98 hammer2_iocb_t *iocb) 99 { 100 hammer2_io_t *dio; 101 hammer2_io_t *xio; 102 off_t pbase; 103 off_t pmask; 104 /* 105 * XXX after free, buffer reuse case w/ different size can clash 106 * with dio cache. Lets avoid it for now. Ultimate we need to 107 * invalidate the dio cache when freeing blocks to allow a mix 108 * of 16KB and 64KB block sizes). 109 */ 110 /*int psize = hammer2_devblksize(lsize);*/ 111 int psize = HAMMER2_PBUFSIZE; 112 int refs; 113 114 pmask = ~(hammer2_off_t)(psize - 1); 115 116 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 117 lbase &= ~HAMMER2_OFF_MASK_RADIX; 118 pbase = lbase & pmask; 119 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 120 121 /* 122 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 123 */ 124 hammer2_spin_sh(&hmp->io_spin); 125 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 126 if (dio) { 127 if ((atomic_fetchadd_int(&dio->refs, 1) & 128 HAMMER2_DIO_MASK) == 0) { 129 atomic_add_int(&dio->hmp->iofree_count, -1); 130 } 131 hammer2_spin_unsh(&hmp->io_spin); 132 } else { 133 hammer2_spin_unsh(&hmp->io_spin); 134 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 135 dio->hmp = hmp; 136 dio->pbase = pbase; 137 dio->psize = psize; 138 dio->btype = iocb->btype; 139 dio->refs = 1; 140 hammer2_spin_init(&dio->spin, "h2dio"); 141 TAILQ_INIT(&dio->iocbq); 142 hammer2_spin_ex(&hmp->io_spin); 143 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 144 if (xio == NULL) { 145 atomic_add_int(&hammer2_dio_count, 1); 146 hammer2_spin_unex(&hmp->io_spin); 147 } else { 148 if ((atomic_fetchadd_int(&xio->refs, 1) & 149 HAMMER2_DIO_MASK) == 0) { 150 atomic_add_int(&xio->hmp->iofree_count, -1); 151 } 152 hammer2_spin_unex(&hmp->io_spin); 153 kfree(dio, M_HAMMER2); 154 dio = xio; 155 } 156 } 157 158 /* 159 * Obtain/Validate the buffer. 160 */ 161 iocb->dio = dio; 162 163 if (dio->act < 5) /* SMP race ok */ 164 ++dio->act; 165 166 for (;;) { 167 refs = dio->refs; 168 cpu_ccfence(); 169 170 /* 171 * Issue the iocb immediately if the buffer is already good. 172 * Once set GOOD cannot be cleared until refs drops to 0. 173 * 174 * lfence required because dio's are not interlocked for 175 * the DIO_GOOD test. 176 */ 177 if (refs & HAMMER2_DIO_GOOD) { 178 cpu_lfence(); 179 iocb->callback(iocb); 180 break; 181 } 182 183 /* 184 * Try to own the DIO by setting INPROG so we can issue 185 * I/O on it. 186 */ 187 if (refs & HAMMER2_DIO_INPROG) { 188 /* 189 * If DIO_INPROG is already set then set WAITING and 190 * queue the iocb. 191 */ 192 hammer2_spin_ex(&dio->spin); 193 if (atomic_cmpset_int(&dio->refs, refs, 194 refs | HAMMER2_DIO_WAITING)) { 195 iocb->flags |= HAMMER2_IOCB_ONQ | 196 HAMMER2_IOCB_INPROG; 197 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry); 198 hammer2_spin_unex(&dio->spin); 199 break; 200 } 201 hammer2_spin_unex(&dio->spin); 202 /* retry */ 203 } else { 204 /* 205 * If DIO_INPROG is not set then set it and issue the 206 * callback immediately to start I/O. 207 */ 208 if (atomic_cmpset_int(&dio->refs, refs, 209 refs | HAMMER2_DIO_INPROG)) { 210 iocb->flags |= HAMMER2_IOCB_INPROG; 211 iocb->callback(iocb); 212 break; 213 } 214 /* retry */ 215 } 216 /* retry */ 217 } 218 } 219 220 /* 221 * Quickly obtain a good DIO buffer, return NULL if the system no longer 222 * caches the data. 223 */ 224 hammer2_io_t * 225 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize) 226 { 227 hammer2_iocb_t iocb; 228 hammer2_io_t *dio; 229 struct buf *bp; 230 off_t pbase; 231 off_t pmask; 232 int psize = HAMMER2_PBUFSIZE; 233 int orefs; 234 int nrefs; 235 236 pmask = ~(hammer2_off_t)(psize - 1); 237 238 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 239 lbase &= ~HAMMER2_OFF_MASK_RADIX; 240 pbase = lbase & pmask; 241 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 242 243 /* 244 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 245 */ 246 hammer2_spin_sh(&hmp->io_spin); 247 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 248 if (dio == NULL) { 249 hammer2_spin_unsh(&hmp->io_spin); 250 return NULL; 251 } 252 253 if ((atomic_fetchadd_int(&dio->refs, 1) & HAMMER2_DIO_MASK) == 0) 254 atomic_add_int(&dio->hmp->iofree_count, -1); 255 hammer2_spin_unsh(&hmp->io_spin); 256 257 if (dio->act < 5) /* SMP race ok */ 258 ++dio->act; 259 260 /* 261 * Obtain/validate the buffer. Do NOT issue I/O. Discard if 262 * the system does not have the data already cached. 263 */ 264 nrefs = -1; 265 for (;;) { 266 orefs = dio->refs; 267 cpu_ccfence(); 268 269 /* 270 * Issue the iocb immediately if the buffer is already good. 271 * Once set GOOD cannot be cleared until refs drops to 0. 272 * 273 * lfence required because dio is not interlockedf for 274 * the DIO_GOOD test. 275 */ 276 if (orefs & HAMMER2_DIO_GOOD) { 277 cpu_lfence(); 278 break; 279 } 280 281 /* 282 * Try to own the DIO by setting INPROG so we can issue 283 * I/O on it. INPROG might already be set, in which case 284 * there is no way we can do this non-blocking so we punt. 285 */ 286 if ((orefs & HAMMER2_DIO_INPROG)) 287 break; 288 nrefs = orefs | HAMMER2_DIO_INPROG; 289 if (atomic_cmpset_int(&dio->refs, orefs, nrefs) == 0) 290 continue; 291 292 /* 293 * We own DIO_INPROG, try to set DIO_GOOD. 294 * 295 * For now do not use GETBLK_NOWAIT because 296 */ 297 bp = dio->bp; 298 dio->bp = NULL; 299 if (bp == NULL) { 300 #if 0 301 bp = getblk(hmp->devvp, dio->pbase, dio->psize, 0, 0); 302 #endif 303 bread(hmp->devvp, dio->pbase, dio->psize, &bp); 304 } 305 if (bp) { 306 if ((bp->b_flags & B_ERROR) == 0 && 307 (bp->b_flags & B_CACHE)) { 308 dio->bp = bp; /* assign BEFORE setting flag */ 309 atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD); 310 } else { 311 bqrelse(bp); 312 bp = NULL; 313 } 314 } 315 316 /* 317 * Clear DIO_INPROG. 318 * 319 * This is actually a bit complicated, see 320 * hammer2_io_complete() for more information. 321 */ 322 iocb.dio = dio; 323 iocb.flags = HAMMER2_IOCB_INPROG; 324 hammer2_io_complete(&iocb); 325 break; 326 } 327 328 /* 329 * Only return the dio if its buffer is good. 330 */ 331 if ((dio->refs & HAMMER2_DIO_GOOD) == 0) { 332 hammer2_io_putblk(&dio); 333 } 334 return dio; 335 } 336 337 /* 338 * The originator of the iocb is finished with it. 339 */ 340 void 341 hammer2_io_complete(hammer2_iocb_t *iocb) 342 { 343 hammer2_io_t *dio = iocb->dio; 344 hammer2_iocb_t *cbtmp; 345 uint32_t orefs; 346 uint32_t nrefs; 347 uint32_t oflags; 348 uint32_t nflags; 349 350 /* 351 * If IOCB_INPROG was not set completion is synchronous due to the 352 * buffer already being good. We can simply set IOCB_DONE and return. 353 * In this situation DIO_INPROG is not set and we have no visibility 354 * on dio->bp. 355 */ 356 if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) { 357 atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE); 358 return; 359 } 360 361 /* 362 * The iocb was queued, obtained DIO_INPROG, and its callback was 363 * made. The callback is now complete. We still own DIO_INPROG. 364 * 365 * We can set DIO_GOOD if no error occurred, which gives certain 366 * stability guarantees to dio->bp and allows other accessors to 367 * short-cut access. DIO_GOOD cannot be cleared until the last 368 * ref is dropped. 369 */ 370 KKASSERT(dio->refs & HAMMER2_DIO_INPROG); 371 if (dio->bp) { 372 BUF_KERNPROC(dio->bp); 373 if ((dio->bp->b_flags & B_ERROR) == 0) { 374 KKASSERT(dio->bp->b_flags & B_CACHE); 375 atomic_set_int(&dio->refs, HAMMER2_DIO_GOOD); 376 } 377 } 378 379 /* 380 * Clean up the dio before marking the iocb as being done. If another 381 * iocb is pending we chain to it while leaving DIO_INPROG set (it 382 * will call io completion and presumably clear DIO_INPROG). 383 * 384 * Otherwise if no other iocbs are pending we clear DIO_INPROG before 385 * finishing up the cbio. This means that DIO_INPROG is cleared at 386 * the end of the chain before ANY of the cbios are marked done. 387 * 388 * NOTE: The TAILQ is not stable until the spin-lock is held. 389 */ 390 for (;;) { 391 orefs = dio->refs; 392 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG); 393 394 if (orefs & HAMMER2_DIO_WAITING) { 395 hammer2_spin_ex(&dio->spin); 396 cbtmp = TAILQ_FIRST(&dio->iocbq); 397 if (cbtmp) { 398 /* 399 * NOTE: flags not adjusted in this case. 400 * Flags will be adjusted by the last 401 * iocb. 402 */ 403 TAILQ_REMOVE(&dio->iocbq, cbtmp, entry); 404 hammer2_spin_unex(&dio->spin); 405 cbtmp->callback(cbtmp); /* chained */ 406 break; 407 } else if (atomic_cmpset_int(&dio->refs, 408 orefs, nrefs)) { 409 hammer2_spin_unex(&dio->spin); 410 break; 411 } 412 hammer2_spin_unex(&dio->spin); 413 /* retry */ 414 } else if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) { 415 break; 416 } /* else retry */ 417 /* retry */ 418 } 419 420 /* 421 * Mark the iocb as done and wakeup any waiters. This is done after 422 * all iocb chains have been called back and after DIO_INPROG has been 423 * cleared. This avoids races against ref count drops by the waiting 424 * threads (a hard but not impossible SMP race) which might result in 425 * a 1->0 transition of the refs while DIO_INPROG is still set. 426 */ 427 for (;;) { 428 oflags = iocb->flags; 429 cpu_ccfence(); 430 nflags = oflags; 431 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG); 432 nflags |= HAMMER2_IOCB_DONE; 433 434 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 435 if (oflags & HAMMER2_IOCB_WAKEUP) 436 wakeup(iocb); 437 /* SMP: iocb is now stale */ 438 break; 439 } 440 /* retry */ 441 } 442 iocb = NULL; 443 444 } 445 446 /* 447 * Wait for an iocb's I/O to finish. 448 */ 449 void 450 hammer2_iocb_wait(hammer2_iocb_t *iocb) 451 { 452 uint32_t oflags; 453 uint32_t nflags; 454 455 for (;;) { 456 oflags = iocb->flags; 457 cpu_ccfence(); 458 nflags = oflags | HAMMER2_IOCB_WAKEUP; 459 if (oflags & HAMMER2_IOCB_DONE) 460 break; 461 tsleep_interlock(iocb, 0); 462 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 463 tsleep(iocb, PINTERLOCKED, "h2iocb", hz); 464 } 465 } 466 467 } 468 469 /* 470 * Release our ref on *diop. 471 * 472 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG, 473 * then dispose of the underlying buffer. 474 */ 475 void 476 hammer2_io_putblk(hammer2_io_t **diop) 477 { 478 hammer2_dev_t *hmp; 479 hammer2_io_t *dio; 480 hammer2_iocb_t iocb; 481 struct buf *bp; 482 off_t peof; 483 off_t pbase; 484 int psize; 485 int orefs; 486 int nrefs; 487 488 dio = *diop; 489 *diop = NULL; 490 hmp = dio->hmp; 491 492 /* 493 * Drop refs. 494 * 495 * On the 1->0 transition clear flags and set INPROG. 496 * 497 * On the 1->0 transition if INPROG is already set, another thread 498 * is in lastdrop and we can just return after the transition. 499 * 500 * On any other transition we can generally just return. 501 */ 502 for (;;) { 503 orefs = dio->refs; 504 cpu_ccfence(); 505 nrefs = orefs - 1; 506 507 if ((orefs & HAMMER2_DIO_MASK) == 1 && 508 (orefs & HAMMER2_DIO_INPROG) == 0) { 509 /* 510 * Lastdrop case, INPROG can be set. 511 */ 512 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 513 nrefs |= HAMMER2_DIO_INPROG; 514 if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) 515 break; 516 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 517 /* 518 * Lastdrop case, INPROG already set. 519 */ 520 if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) { 521 atomic_add_int(&hmp->iofree_count, 1); 522 return; 523 } 524 } else { 525 /* 526 * Normal drop case. 527 */ 528 if (atomic_cmpset_int(&dio->refs, orefs, nrefs)) 529 return; 530 } 531 cpu_pause(); 532 /* retry */ 533 } 534 535 /* 536 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 537 * have been cleared. 538 * 539 * We can now dispose of the buffer, and should do it before calling 540 * io_complete() in case there's a race against a new reference 541 * which causes io_complete() to chain and instantiate the bp again. 542 */ 543 pbase = dio->pbase; 544 psize = dio->psize; 545 bp = dio->bp; 546 dio->bp = NULL; 547 548 if (orefs & HAMMER2_DIO_GOOD) { 549 KKASSERT(bp != NULL); 550 if (orefs & HAMMER2_DIO_DIRTY) { 551 int hce; 552 553 dio_write_stats_update(dio); 554 if ((hce = hammer2_cluster_enable) > 0) { 555 peof = (pbase + HAMMER2_SEGMASK64) & 556 ~HAMMER2_SEGMASK64; 557 cluster_write(bp, peof, psize, hce); 558 } else { 559 bp->b_flags |= B_CLUSTEROK; 560 bdwrite(bp); 561 } 562 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 563 brelse(bp); 564 } else { 565 bqrelse(bp); 566 } 567 } else if (bp) { 568 if (orefs & HAMMER2_DIO_DIRTY) { 569 dio_write_stats_update(dio); 570 bdwrite(bp); 571 } else { 572 brelse(bp); 573 } 574 } 575 576 /* 577 * The instant we call io_complete dio is a free agent again and 578 * can be ripped out from under us. 579 * 580 * we can cleanup our final DIO_INPROG by simulating an iocb 581 * completion. 582 */ 583 hmp = dio->hmp; /* extract fields */ 584 atomic_add_int(&hmp->iofree_count, 1); 585 cpu_ccfence(); 586 587 iocb.dio = dio; 588 iocb.flags = HAMMER2_IOCB_INPROG; 589 hammer2_io_complete(&iocb); 590 dio = NULL; /* dio stale */ 591 592 /* 593 * We cache free buffers so re-use cases can use a shared lock, but 594 * if too many build up we have to clean them out. 595 */ 596 if (hmp->iofree_count > 65536) { 597 struct hammer2_cleanupcb_info info; 598 599 RB_INIT(&info.tmptree); 600 hammer2_spin_ex(&hmp->io_spin); 601 if (hmp->iofree_count > 65536) { 602 info.count = hmp->iofree_count / 4; 603 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 604 hammer2_io_cleanup_callback, &info); 605 } 606 hammer2_spin_unex(&hmp->io_spin); 607 hammer2_io_cleanup(hmp, &info.tmptree); 608 } 609 } 610 611 /* 612 * Cleanup any dio's with (INPROG | refs) == 0. 613 * 614 * Called to clean up cached DIOs on umount after all activity has been 615 * flushed. 616 */ 617 static 618 int 619 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 620 { 621 struct hammer2_cleanupcb_info *info = arg; 622 hammer2_io_t *xio; 623 624 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 625 if (dio->act > 0) { 626 --dio->act; 627 return 0; 628 } 629 KKASSERT(dio->bp == NULL); 630 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 631 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 632 KKASSERT(xio == NULL); 633 if (--info->count <= 0) /* limit scan */ 634 return(-1); 635 } 636 return 0; 637 } 638 639 void 640 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 641 { 642 hammer2_io_t *dio; 643 644 while ((dio = RB_ROOT(tree)) != NULL) { 645 RB_REMOVE(hammer2_io_tree, tree, dio); 646 KKASSERT(dio->bp == NULL && 647 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 648 kfree(dio, M_HAMMER2); 649 atomic_add_int(&hammer2_dio_count, -1); 650 atomic_add_int(&hmp->iofree_count, -1); 651 } 652 } 653 654 /* 655 * Returns a pointer to the requested data. 656 */ 657 char * 658 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 659 { 660 struct buf *bp; 661 int off; 662 663 bp = dio->bp; 664 KKASSERT(bp != NULL); 665 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 666 KKASSERT(off >= 0 && off < bp->b_bufsize); 667 return(bp->b_data + off); 668 } 669 670 /* 671 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done 672 * in the chain structure, but chain structure needs to be persistent as 673 * well on refs=0 and it isn't. 674 */ 675 int 676 hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp) 677 { 678 hammer2_io_t *dio; 679 uint64_t mask; 680 681 if ((dio = chain->dio) != NULL && chain->bytes >= 1024) { 682 mask = hammer2_io_mask(dio, chain->bref.data_off, chain->bytes); 683 *maskp = mask; 684 if ((dio->crc_good_mask & mask) == mask) 685 return 1; 686 return 0; 687 } 688 *maskp = 0; 689 690 return 0; 691 } 692 693 void 694 hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask) 695 { 696 if (dio) { 697 if (sizeof(long) == 8) { 698 atomic_set_long(&dio->crc_good_mask, mask); 699 } else { 700 #if _BYTE_ORDER == _LITTLE_ENDIAN 701 atomic_set_int(&((int *)&dio->crc_good_mask)[0], 702 (uint32_t)mask); 703 atomic_set_int(&((int *)&dio->crc_good_mask)[1], 704 (uint32_t)(mask >> 32)); 705 #else 706 atomic_set_int(&((int *)&dio->crc_good_mask)[0], 707 (uint32_t)(mask >> 32)); 708 atomic_set_int(&((int *)&dio->crc_good_mask)[1], 709 (uint32_t)mask); 710 #endif 711 } 712 } 713 } 714 715 void 716 hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask) 717 { 718 if (dio) { 719 if (sizeof(long) == 8) { 720 atomic_clear_long(&dio->crc_good_mask, mask); 721 } else { 722 #if _BYTE_ORDER == _LITTLE_ENDIAN 723 atomic_clear_int(&((int *)&dio->crc_good_mask)[0], 724 (uint32_t)mask); 725 atomic_clear_int(&((int *)&dio->crc_good_mask)[1], 726 (uint32_t)(mask >> 32)); 727 #else 728 atomic_clear_int(&((int *)&dio->crc_good_mask)[0], 729 (uint32_t)(mask >> 32)); 730 atomic_clear_int(&((int *)&dio->crc_good_mask)[1], 731 (uint32_t)mask); 732 #endif 733 } 734 } 735 } 736 737 /* 738 * Helpers for hammer2_io_new*() functions 739 */ 740 static 741 void 742 hammer2_iocb_new_callback(hammer2_iocb_t *iocb) 743 { 744 hammer2_io_t *dio = iocb->dio; 745 int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0; 746 747 /* 748 * If IOCB_INPROG is not set the dio already has a good buffer and we 749 * can't mess with it other than zero the requested range. 750 * 751 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 752 * do what needs to be done with dio->bp. 753 */ 754 if (iocb->flags & HAMMER2_IOCB_INPROG) { 755 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) { 756 if (iocb->lsize == dio->psize) { 757 /* 758 * Fully covered buffer, try to optimize to 759 * avoid any I/O. We might already have the 760 * buffer due to iocb chaining. 761 */ 762 if (dio->bp == NULL) { 763 dio->bp = getblk(dio->hmp->devvp, 764 dio->pbase, dio->psize, 765 gbctl, 0); 766 } 767 if (dio->bp) { 768 vfs_bio_clrbuf(dio->bp); 769 dio->bp->b_flags |= B_CACHE; 770 } 771 } else if (iocb->flags & HAMMER2_IOCB_QUICK) { 772 /* 773 * Partial buffer, quick mode. Do nothing. 774 * Do not instantiate the buffer or try to 775 * mark it B_CACHE because other portions of 776 * the buffer might have to be read by other 777 * accessors. 778 */ 779 } else if (dio->bp == NULL || 780 (dio->bp->b_flags & B_CACHE) == 0) { 781 /* 782 * Partial buffer, normal mode, requires 783 * read-before-write. Chain the read. 784 * 785 * We might already have the buffer due to 786 * iocb chaining. XXX unclear if we really 787 * need to write/release it and reacquire 788 * in that case. 789 * 790 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE. 791 */ 792 if (dio->bp) { 793 if (dio->refs & HAMMER2_DIO_DIRTY) { 794 dio_write_stats_update(dio); 795 bdwrite(dio->bp); 796 } else { 797 bqrelse(dio->bp); 798 } 799 dio->bp = NULL; 800 } 801 atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ); 802 breadcb(dio->hmp->devvp, 803 dio->pbase, dio->psize, 804 hammer2_io_callback, iocb); 805 return; 806 } /* else buffer is good */ 807 } /* else callback from breadcb is complete */ 808 } 809 if (dio->bp) { 810 if (iocb->flags & HAMMER2_IOCB_ZERO) 811 bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize); 812 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 813 } 814 hammer2_io_complete(iocb); 815 } 816 817 static 818 int 819 _hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 820 hammer2_io_t **diop, int flags) 821 { 822 hammer2_iocb_t iocb; 823 hammer2_io_t *dio; 824 825 iocb.callback = hammer2_iocb_new_callback; 826 iocb.cluster = NULL; 827 iocb.chain = NULL; 828 iocb.ptr = NULL; 829 iocb.lbase = lbase; 830 iocb.lsize = lsize; 831 iocb.flags = flags; 832 iocb.btype = btype; 833 iocb.error = 0; 834 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 835 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 836 hammer2_iocb_wait(&iocb); 837 dio = *diop = iocb.dio; 838 839 return (iocb.error); 840 } 841 842 int 843 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 844 hammer2_io_t **diop) 845 { 846 return(_hammer2_io_new(hmp, btype, lbase, lsize, 847 diop, HAMMER2_IOCB_ZERO)); 848 } 849 850 int 851 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 852 hammer2_io_t **diop) 853 { 854 return(_hammer2_io_new(hmp, btype, lbase, lsize, diop, 0)); 855 } 856 857 int 858 hammer2_io_newq(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 859 hammer2_io_t **diop) 860 { 861 return(_hammer2_io_new(hmp, btype, lbase, lsize, 862 diop, HAMMER2_IOCB_QUICK)); 863 } 864 865 static 866 void 867 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb) 868 { 869 hammer2_io_t *dio = iocb->dio; 870 off_t peof; 871 int error; 872 873 /* 874 * If IOCB_INPROG is not set the dio already has a good buffer and we 875 * can't mess with it other than zero the requested range. 876 * 877 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 878 * do what needs to be done with dio->bp. 879 */ 880 if (iocb->flags & HAMMER2_IOCB_INPROG) { 881 int hce; 882 883 if (dio->bp && (dio->bp->b_flags & B_CACHE)) { 884 /* 885 * Already good, likely due to being chained from 886 * another iocb. 887 */ 888 error = 0; 889 } else if ((hce = hammer2_cluster_enable) > 0) { 890 /* 891 * Synchronous cluster I/O for now. 892 */ 893 if (dio->bp) { 894 bqrelse(dio->bp); 895 dio->bp = NULL; 896 } 897 peof = (dio->pbase + HAMMER2_SEGMASK64) & 898 ~HAMMER2_SEGMASK64; 899 error = cluster_read(dio->hmp->devvp, peof, dio->pbase, 900 dio->psize, 901 dio->psize, HAMMER2_PBUFSIZE*hce, 902 &dio->bp); 903 } else { 904 /* 905 * Synchronous I/O for now. 906 */ 907 if (dio->bp) { 908 bqrelse(dio->bp); 909 dio->bp = NULL; 910 } 911 error = bread(dio->hmp->devvp, dio->pbase, 912 dio->psize, &dio->bp); 913 } 914 if (error) { 915 brelse(dio->bp); 916 dio->bp = NULL; 917 } 918 } 919 hammer2_io_complete(iocb); 920 } 921 922 int 923 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 924 hammer2_io_t **diop) 925 { 926 hammer2_iocb_t iocb; 927 hammer2_io_t *dio; 928 929 iocb.callback = hammer2_iocb_bread_callback; 930 iocb.cluster = NULL; 931 iocb.chain = NULL; 932 iocb.ptr = NULL; 933 iocb.lbase = lbase; 934 iocb.lsize = lsize; 935 iocb.btype = btype; 936 iocb.flags = 0; 937 iocb.error = 0; 938 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 939 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 940 hammer2_iocb_wait(&iocb); 941 dio = *diop = iocb.dio; 942 943 return (iocb.error); 944 } 945 946 /* 947 * System buf/bio async callback extracts the iocb and chains 948 * to the iocb callback. 949 */ 950 void 951 hammer2_io_callback(struct bio *bio) 952 { 953 struct buf *dbp = bio->bio_buf; 954 hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr; 955 hammer2_io_t *dio; 956 957 dio = iocb->dio; 958 if ((bio->bio_flags & BIO_DONE) == 0) 959 bpdone(dbp, 0); 960 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 961 dio->bp = bio->bio_buf; 962 iocb->callback(iocb); 963 } 964 965 void 966 hammer2_io_bawrite(hammer2_io_t **diop) 967 { 968 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 969 hammer2_io_putblk(diop); 970 } 971 972 void 973 hammer2_io_bdwrite(hammer2_io_t **diop) 974 { 975 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 976 hammer2_io_putblk(diop); 977 } 978 979 int 980 hammer2_io_bwrite(hammer2_io_t **diop) 981 { 982 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 983 hammer2_io_putblk(diop); 984 return (0); /* XXX */ 985 } 986 987 void 988 hammer2_io_setdirty(hammer2_io_t *dio) 989 { 990 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 991 } 992 993 void 994 hammer2_io_setinval(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 995 { 996 uint64_t mask = hammer2_io_mask(dio, off, bytes); 997 998 hammer2_io_crc_clrmask(dio, mask); 999 if ((u_int)dio->psize == bytes) 1000 dio->bp->b_flags |= B_INVAL | B_RELBUF; 1001 } 1002 1003 void 1004 hammer2_io_brelse(hammer2_io_t **diop) 1005 { 1006 hammer2_io_putblk(diop); 1007 } 1008 1009 void 1010 hammer2_io_bqrelse(hammer2_io_t **diop) 1011 { 1012 hammer2_io_putblk(diop); 1013 } 1014 1015 int 1016 hammer2_io_isdirty(hammer2_io_t *dio) 1017 { 1018 return((dio->refs & HAMMER2_DIO_DIRTY) != 0); 1019 } 1020 1021 static 1022 void 1023 dio_write_stats_update(hammer2_io_t *dio) 1024 { 1025 long *counterp; 1026 1027 switch(dio->btype) { 1028 case 0: 1029 return; 1030 case HAMMER2_BREF_TYPE_DATA: 1031 counterp = &hammer2_iod_file_write; 1032 break; 1033 case HAMMER2_BREF_TYPE_INODE: 1034 counterp = &hammer2_iod_meta_write; 1035 break; 1036 case HAMMER2_BREF_TYPE_INDIRECT: 1037 counterp = &hammer2_iod_indr_write; 1038 break; 1039 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1040 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1041 counterp = &hammer2_iod_fmap_write; 1042 break; 1043 default: 1044 counterp = &hammer2_iod_volu_write; 1045 break; 1046 } 1047 *counterp += dio->psize; 1048 } 1049