1 /* 2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 /* 38 * Implements an abstraction layer for synchronous and asynchronous 39 * buffered device I/O. Can be used for OS-abstraction but the main 40 * purpose is to allow larger buffers to be used against hammer2_chain's 41 * using smaller allocations, without causing deadlocks. 42 * 43 */ 44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 45 static void dio_write_stats_update(hammer2_io_t *dio); 46 47 static int 48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 49 { 50 if (io1->pbase < io2->pbase) 51 return(-1); 52 if (io1->pbase > io2->pbase) 53 return(1); 54 return(0); 55 } 56 57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 59 off_t, pbase); 60 61 struct hammer2_cleanupcb_info { 62 struct hammer2_io_tree tmptree; 63 int count; 64 }; 65 66 static __inline 67 uint64_t 68 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 69 { 70 uint64_t mask; 71 int i; 72 73 if (bytes < 1024) /* smaller chunks not supported */ 74 return 0; 75 76 /* 77 * Calculate crc check mask for larger chunks 78 */ 79 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) & 80 HAMMER2_PBUFMASK) >> 10; 81 if (i == 0 && bytes == HAMMER2_PBUFSIZE) 82 return((uint64_t)-1); 83 mask = ((uint64_t)1U << (bytes >> 10)) - 1; 84 mask <<= i; 85 86 return mask; 87 } 88 89 #define HAMMER2_GETBLK_GOOD 0 90 #define HAMMER2_GETBLK_QUEUED 1 91 #define HAMMER2_GETBLK_OWNED 2 92 93 /* 94 * Allocate/Locate the requested dio, reference it, issue or queue iocb. 95 */ 96 void 97 hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize, 98 hammer2_iocb_t *iocb) 99 { 100 hammer2_io_t *dio; 101 hammer2_io_t *xio; 102 off_t pbase; 103 off_t pmask; 104 /* 105 * XXX after free, buffer reuse case w/ different size can clash 106 * with dio cache. Lets avoid it for now. Ultimate we need to 107 * invalidate the dio cache when freeing blocks to allow a mix 108 * of 16KB and 64KB block sizes). 109 */ 110 /*int psize = hammer2_devblksize(lsize);*/ 111 int psize = HAMMER2_PBUFSIZE; 112 uint64_t refs; 113 114 pmask = ~(hammer2_off_t)(psize - 1); 115 116 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 117 lbase &= ~HAMMER2_OFF_MASK_RADIX; 118 pbase = lbase & pmask; 119 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 120 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 121 pbase, lbase, lsize, pmask); 122 } 123 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 124 125 /* 126 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 127 */ 128 hammer2_spin_sh(&hmp->io_spin); 129 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 130 if (dio) { 131 if ((atomic_fetchadd_64(&dio->refs, 1) & 132 HAMMER2_DIO_MASK) == 0) { 133 atomic_add_int(&dio->hmp->iofree_count, -1); 134 } 135 hammer2_spin_unsh(&hmp->io_spin); 136 } else { 137 hammer2_spin_unsh(&hmp->io_spin); 138 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 139 dio->hmp = hmp; 140 dio->pbase = pbase; 141 dio->psize = psize; 142 dio->btype = iocb->btype; 143 dio->refs = 1; 144 hammer2_spin_init(&dio->spin, "h2dio"); 145 TAILQ_INIT(&dio->iocbq); 146 hammer2_spin_ex(&hmp->io_spin); 147 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 148 if (xio == NULL) { 149 atomic_add_int(&hammer2_dio_count, 1); 150 hammer2_spin_unex(&hmp->io_spin); 151 } else { 152 if ((atomic_fetchadd_64(&xio->refs, 1) & 153 HAMMER2_DIO_MASK) == 0) { 154 atomic_add_int(&xio->hmp->iofree_count, -1); 155 } 156 hammer2_spin_unex(&hmp->io_spin); 157 kfree(dio, M_HAMMER2); 158 dio = xio; 159 } 160 } 161 162 /* 163 * Obtain/Validate the buffer. 164 */ 165 iocb->dio = dio; 166 167 if (dio->act < 5) /* SMP race ok */ 168 ++dio->act; 169 170 for (;;) { 171 refs = dio->refs; 172 cpu_ccfence(); 173 174 /* 175 * Issue the iocb immediately if the buffer is already good. 176 * Once set GOOD cannot be cleared until refs drops to 0. 177 * 178 * lfence required because dio's are not interlocked for 179 * the DIO_GOOD test. 180 */ 181 if (refs & HAMMER2_DIO_GOOD) { 182 cpu_lfence(); 183 iocb->callback(iocb); 184 break; 185 } 186 187 /* 188 * Try to own the DIO by setting INPROG so we can issue 189 * I/O on it. 190 */ 191 if (refs & HAMMER2_DIO_INPROG) { 192 /* 193 * If DIO_INPROG is already set then set WAITING and 194 * queue the iocb. 195 */ 196 hammer2_spin_ex(&dio->spin); 197 if (atomic_cmpset_64(&dio->refs, refs, 198 refs | HAMMER2_DIO_WAITING)) { 199 iocb->flags |= HAMMER2_IOCB_ONQ | 200 HAMMER2_IOCB_INPROG; 201 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry); 202 hammer2_spin_unex(&dio->spin); 203 break; 204 } 205 hammer2_spin_unex(&dio->spin); 206 /* retry */ 207 } else { 208 /* 209 * If DIO_INPROG is not set then set it and issue the 210 * callback immediately to start I/O. 211 */ 212 if (atomic_cmpset_64(&dio->refs, refs, 213 refs | HAMMER2_DIO_INPROG)) { 214 iocb->flags |= HAMMER2_IOCB_INPROG; 215 iocb->callback(iocb); 216 break; 217 } 218 /* retry */ 219 } 220 /* retry */ 221 } 222 } 223 224 /* 225 * Quickly obtain a good DIO buffer, return NULL if the system no longer 226 * caches the data. 227 */ 228 hammer2_io_t * 229 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize) 230 { 231 hammer2_iocb_t iocb; 232 hammer2_io_t *dio; 233 struct buf *bp; 234 off_t pbase; 235 off_t pmask; 236 int psize = HAMMER2_PBUFSIZE; 237 uint64_t orefs; 238 uint64_t nrefs; 239 240 pmask = ~(hammer2_off_t)(psize - 1); 241 242 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 243 lbase &= ~HAMMER2_OFF_MASK_RADIX; 244 pbase = lbase & pmask; 245 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 246 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 247 pbase, lbase, lsize, pmask); 248 } 249 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 250 251 /* 252 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 253 */ 254 hammer2_spin_sh(&hmp->io_spin); 255 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 256 if (dio == NULL) { 257 hammer2_spin_unsh(&hmp->io_spin); 258 return NULL; 259 } 260 261 if ((atomic_fetchadd_64(&dio->refs, 1) & HAMMER2_DIO_MASK) == 0) 262 atomic_add_int(&dio->hmp->iofree_count, -1); 263 hammer2_spin_unsh(&hmp->io_spin); 264 265 if (dio->act < 5) /* SMP race ok */ 266 ++dio->act; 267 268 /* 269 * Obtain/validate the buffer. Do NOT issue I/O. Discard if 270 * the system does not have the data already cached. 271 */ 272 nrefs = (uint64_t)-1; 273 for (;;) { 274 orefs = dio->refs; 275 cpu_ccfence(); 276 277 /* 278 * Issue the iocb immediately if the buffer is already good. 279 * Once set GOOD cannot be cleared until refs drops to 0. 280 * 281 * lfence required because dio is not interlockedf for 282 * the DIO_GOOD test. 283 */ 284 if (orefs & HAMMER2_DIO_GOOD) { 285 cpu_lfence(); 286 break; 287 } 288 289 /* 290 * Try to own the DIO by setting INPROG so we can issue 291 * I/O on it. INPROG might already be set, in which case 292 * there is no way we can do this non-blocking so we punt. 293 */ 294 if ((orefs & HAMMER2_DIO_INPROG)) 295 break; 296 nrefs = orefs | HAMMER2_DIO_INPROG; 297 if (atomic_cmpset_64(&dio->refs, orefs, nrefs) == 0) 298 continue; 299 300 /* 301 * We own DIO_INPROG, try to set DIO_GOOD. 302 * 303 * For now do not use GETBLK_NOWAIT because 304 */ 305 bp = dio->bp; 306 dio->bp = NULL; 307 if (bp == NULL) { 308 #if 0 309 bp = getblk(hmp->devvp, dio->pbase, dio->psize, 0, 0); 310 #endif 311 bread(hmp->devvp, dio->pbase, dio->psize, &bp); 312 } 313 314 /* 315 * System buffer must also have remained cached. 316 */ 317 if (bp) { 318 if ((bp->b_flags & B_ERROR) == 0 && 319 (bp->b_flags & B_CACHE)) { 320 dio->bp = bp; /* assign BEFORE setting flag */ 321 atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD); 322 } else { 323 bqrelse(bp); 324 bp = NULL; 325 } 326 } 327 328 /* 329 * Clear DIO_INPROG. 330 * 331 * This is actually a bit complicated, see 332 * hammer2_io_complete() for more information. 333 */ 334 iocb.dio = dio; 335 iocb.flags = HAMMER2_IOCB_INPROG; 336 hammer2_io_complete(&iocb); 337 break; 338 } 339 340 /* 341 * Only return the dio if its buffer is good. If the buffer is not 342 * good be sure to clear INVALOK, meaning that invalidation is no 343 * longer acceptable 344 */ 345 if ((dio->refs & HAMMER2_DIO_GOOD) == 0) { 346 hammer2_io_putblk(&dio); 347 } 348 return dio; 349 } 350 351 /* 352 * Make sure that INVALOK is cleared on the dio associated with the specified 353 * data offset. Called from bulkfree when a block becomes reusable. 354 */ 355 void 356 hammer2_io_resetinval(hammer2_dev_t *hmp, off_t data_off) 357 { 358 hammer2_io_t *dio; 359 360 data_off &= ~HAMMER2_PBUFMASK64; 361 hammer2_spin_sh(&hmp->io_spin); 362 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, data_off); 363 if (dio) 364 atomic_clear_64(&dio->refs, HAMMER2_DIO_INVALOK); 365 hammer2_spin_unsh(&hmp->io_spin); 366 } 367 368 /* 369 * The originator of the iocb is finished with it. 370 */ 371 void 372 hammer2_io_complete(hammer2_iocb_t *iocb) 373 { 374 hammer2_io_t *dio = iocb->dio; 375 hammer2_iocb_t *cbtmp; 376 uint64_t orefs; 377 uint64_t nrefs; 378 uint32_t oflags; 379 uint32_t nflags; 380 381 /* 382 * If IOCB_INPROG was not set completion is synchronous due to the 383 * buffer already being good. We can simply set IOCB_DONE and return. 384 * 385 * In this situation DIO_INPROG is not set and we have no visibility 386 * on dio->bp. We should not try to mess with dio->bp because another 387 * thread may be finishing up its processing. dio->bp should already 388 * be set to BUF_KERNPROC()! 389 */ 390 if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) { 391 atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE); 392 return; 393 } 394 395 /* 396 * The iocb was queued, obtained DIO_INPROG, and its callback was 397 * made. The callback is now complete. We still own DIO_INPROG. 398 * 399 * We can set DIO_GOOD if no error occurred, which gives certain 400 * stability guarantees to dio->bp and allows other accessors to 401 * short-cut access. DIO_GOOD cannot be cleared until the last 402 * ref is dropped. 403 */ 404 KKASSERT(dio->refs & HAMMER2_DIO_INPROG); 405 if (dio->bp) { 406 BUF_KERNPROC(dio->bp); 407 if ((dio->bp->b_flags & B_ERROR) == 0) { 408 KKASSERT(dio->bp->b_flags & B_CACHE); 409 atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD); 410 } 411 } 412 413 /* 414 * Clean up the dio before marking the iocb as being done. If another 415 * iocb is pending we chain to it while leaving DIO_INPROG set (it 416 * will call io completion and presumably clear DIO_INPROG). 417 * 418 * Otherwise if no other iocbs are pending we clear DIO_INPROG before 419 * finishing up the cbio. This means that DIO_INPROG is cleared at 420 * the end of the chain before ANY of the cbios are marked done. 421 * 422 * NOTE: The TAILQ is not stable until the spin-lock is held. 423 */ 424 for (;;) { 425 orefs = dio->refs; 426 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG); 427 428 if (orefs & HAMMER2_DIO_WAITING) { 429 hammer2_spin_ex(&dio->spin); 430 cbtmp = TAILQ_FIRST(&dio->iocbq); 431 if (cbtmp) { 432 /* 433 * NOTE: flags not adjusted in this case. 434 * Flags will be adjusted by the last 435 * iocb. 436 */ 437 TAILQ_REMOVE(&dio->iocbq, cbtmp, entry); 438 hammer2_spin_unex(&dio->spin); 439 cbtmp->callback(cbtmp); /* chained */ 440 break; 441 } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 442 hammer2_spin_unex(&dio->spin); 443 break; 444 } 445 hammer2_spin_unex(&dio->spin); 446 /* retry */ 447 } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 448 break; 449 } /* else retry */ 450 /* retry */ 451 } 452 453 /* 454 * Mark the iocb as done and wakeup any waiters. This is done after 455 * all iocb chains have been called back and after DIO_INPROG has been 456 * cleared. This avoids races against ref count drops by the waiting 457 * threads (a hard but not impossible SMP race) which might result in 458 * a 1->0 transition of the refs while DIO_INPROG is still set. 459 */ 460 for (;;) { 461 oflags = iocb->flags; 462 cpu_ccfence(); 463 nflags = oflags; 464 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG); 465 nflags |= HAMMER2_IOCB_DONE; 466 467 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 468 if (oflags & HAMMER2_IOCB_WAKEUP) 469 wakeup(iocb); 470 /* SMP: iocb is now stale */ 471 break; 472 } 473 /* retry */ 474 } 475 iocb = NULL; 476 477 } 478 479 /* 480 * Wait for an iocb's I/O to finish. 481 */ 482 void 483 hammer2_iocb_wait(hammer2_iocb_t *iocb) 484 { 485 uint32_t oflags; 486 uint32_t nflags; 487 488 for (;;) { 489 oflags = iocb->flags; 490 cpu_ccfence(); 491 nflags = oflags | HAMMER2_IOCB_WAKEUP; 492 if (oflags & HAMMER2_IOCB_DONE) 493 break; 494 tsleep_interlock(iocb, 0); 495 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 496 tsleep(iocb, PINTERLOCKED, "h2iocb", hz); 497 } 498 } 499 500 } 501 502 /* 503 * Release our ref on *diop. 504 * 505 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG, 506 * then dispose of the underlying buffer. 507 */ 508 void 509 hammer2_io_putblk(hammer2_io_t **diop) 510 { 511 hammer2_dev_t *hmp; 512 hammer2_io_t *dio; 513 hammer2_iocb_t iocb; 514 struct buf *bp; 515 off_t peof; 516 off_t pbase; 517 int psize; 518 uint64_t orefs; 519 uint64_t nrefs; 520 521 dio = *diop; 522 *diop = NULL; 523 hmp = dio->hmp; 524 525 /* 526 * Drop refs. 527 * 528 * On the 1->0 transition clear flags and set INPROG. 529 * 530 * On the 1->0 transition if INPROG is already set, another thread 531 * is in lastdrop and we can just return after the transition. 532 * 533 * On any other transition we can generally just return. 534 */ 535 for (;;) { 536 orefs = dio->refs; 537 cpu_ccfence(); 538 nrefs = orefs - 1; 539 540 if ((orefs & HAMMER2_DIO_MASK) == 1 && 541 (orefs & HAMMER2_DIO_INPROG) == 0) { 542 /* 543 * Lastdrop case, INPROG can be set. 544 */ 545 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 546 nrefs &= ~(HAMMER2_DIO_INVAL); 547 nrefs |= HAMMER2_DIO_INPROG; 548 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 549 break; 550 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 551 /* 552 * Lastdrop case, INPROG already set. 553 */ 554 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 555 atomic_add_int(&hmp->iofree_count, 1); 556 return; 557 } 558 } else { 559 /* 560 * Normal drop case. 561 */ 562 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 563 return; 564 } 565 cpu_pause(); 566 /* retry */ 567 } 568 569 /* 570 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 571 * have been cleared. 572 * 573 * We can now dispose of the buffer, and should do it before calling 574 * io_complete() in case there's a race against a new reference 575 * which causes io_complete() to chain and instantiate the bp again. 576 */ 577 pbase = dio->pbase; 578 psize = dio->psize; 579 bp = dio->bp; 580 dio->bp = NULL; 581 582 if (orefs & HAMMER2_DIO_GOOD) { 583 KKASSERT(bp != NULL); 584 #if 1 585 if (hammer2_inval_enable && 586 (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) { 587 ++hammer2_iod_invals; 588 bp->b_flags |= B_INVAL | B_RELBUF; 589 brelse(bp); 590 } else 591 #endif 592 if (orefs & HAMMER2_DIO_DIRTY) { 593 int hce; 594 595 dio_write_stats_update(dio); 596 if ((hce = hammer2_cluster_write) > 0) { 597 /* 598 * Allows write-behind to keep the buffer 599 * cache sane. 600 */ 601 peof = (pbase + HAMMER2_SEGMASK64) & 602 ~HAMMER2_SEGMASK64; 603 bp->b_flags |= B_CLUSTEROK; 604 cluster_write(bp, peof, psize, hce); 605 } else { 606 /* 607 * Allows dirty buffers to accumulate and 608 * possibly be canceled (e.g. by a 'rm'), 609 * will burst-write later. 610 */ 611 bp->b_flags |= B_CLUSTEROK; 612 bdwrite(bp); 613 } 614 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 615 brelse(bp); 616 } else { 617 bqrelse(bp); 618 } 619 } else if (bp) { 620 #if 1 621 if (hammer2_inval_enable && 622 (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) { 623 ++hammer2_iod_invals; 624 bp->b_flags |= B_INVAL | B_RELBUF; 625 brelse(bp); 626 } else 627 #endif 628 if (orefs & HAMMER2_DIO_DIRTY) { 629 dio_write_stats_update(dio); 630 bdwrite(bp); 631 } else { 632 brelse(bp); 633 } 634 } 635 636 /* 637 * The instant we call io_complete dio is a free agent again and 638 * can be ripped out from under us. 639 * 640 * we can cleanup our final DIO_INPROG by simulating an iocb 641 * completion. 642 */ 643 hmp = dio->hmp; /* extract fields */ 644 atomic_add_int(&hmp->iofree_count, 1); 645 cpu_ccfence(); 646 647 iocb.dio = dio; 648 iocb.flags = HAMMER2_IOCB_INPROG; 649 hammer2_io_complete(&iocb); 650 dio = NULL; /* dio stale */ 651 652 /* 653 * We cache free buffers so re-use cases can use a shared lock, but 654 * if too many build up we have to clean them out. 655 */ 656 if (hmp->iofree_count > 65536) { 657 struct hammer2_cleanupcb_info info; 658 659 RB_INIT(&info.tmptree); 660 hammer2_spin_ex(&hmp->io_spin); 661 if (hmp->iofree_count > 65536) { 662 info.count = hmp->iofree_count / 4; 663 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 664 hammer2_io_cleanup_callback, &info); 665 } 666 hammer2_spin_unex(&hmp->io_spin); 667 hammer2_io_cleanup(hmp, &info.tmptree); 668 } 669 } 670 671 /* 672 * Cleanup any dio's with (INPROG | refs) == 0. 673 * 674 * Called to clean up cached DIOs on umount after all activity has been 675 * flushed. 676 */ 677 static 678 int 679 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 680 { 681 struct hammer2_cleanupcb_info *info = arg; 682 hammer2_io_t *xio; 683 684 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 685 if (dio->act > 0) { 686 --dio->act; 687 return 0; 688 } 689 KKASSERT(dio->bp == NULL); 690 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 691 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 692 KKASSERT(xio == NULL); 693 if (--info->count <= 0) /* limit scan */ 694 return(-1); 695 } 696 return 0; 697 } 698 699 void 700 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 701 { 702 hammer2_io_t *dio; 703 704 while ((dio = RB_ROOT(tree)) != NULL) { 705 RB_REMOVE(hammer2_io_tree, tree, dio); 706 KKASSERT(dio->bp == NULL && 707 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 708 kfree(dio, M_HAMMER2); 709 atomic_add_int(&hammer2_dio_count, -1); 710 atomic_add_int(&hmp->iofree_count, -1); 711 } 712 } 713 714 /* 715 * Returns a pointer to the requested data. 716 */ 717 char * 718 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 719 { 720 struct buf *bp; 721 int off; 722 723 bp = dio->bp; 724 KKASSERT(bp != NULL); 725 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 726 KKASSERT(off >= 0 && off < bp->b_bufsize); 727 return(bp->b_data + off); 728 } 729 730 #if 0 731 /* 732 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done 733 * in the chain structure, but chain structure needs to be persistent as 734 * well on refs=0 and it isn't. 735 */ 736 int 737 hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp) 738 { 739 hammer2_io_t *dio; 740 uint64_t mask; 741 742 if ((dio = chain->dio) != NULL && chain->bytes >= 1024) { 743 mask = hammer2_io_mask(dio, chain->bref.data_off, chain->bytes); 744 *maskp = mask; 745 if ((dio->crc_good_mask & mask) == mask) 746 return 1; 747 return 0; 748 } 749 *maskp = 0; 750 751 return 0; 752 } 753 754 void 755 hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask) 756 { 757 if (dio) { 758 if (sizeof(long) == 8) { 759 atomic_set_long(&dio->crc_good_mask, mask); 760 } else { 761 #if _BYTE_ORDER == _LITTLE_ENDIAN 762 atomic_set_int(&((int *)&dio->crc_good_mask)[0], 763 (uint32_t)mask); 764 atomic_set_int(&((int *)&dio->crc_good_mask)[1], 765 (uint32_t)(mask >> 32)); 766 #else 767 atomic_set_int(&((int *)&dio->crc_good_mask)[0], 768 (uint32_t)(mask >> 32)); 769 atomic_set_int(&((int *)&dio->crc_good_mask)[1], 770 (uint32_t)mask); 771 #endif 772 } 773 } 774 } 775 776 void 777 hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask) 778 { 779 if (dio) { 780 if (sizeof(long) == 8) { 781 atomic_clear_long(&dio->crc_good_mask, mask); 782 } else { 783 #if _BYTE_ORDER == _LITTLE_ENDIAN 784 atomic_clear_int(&((int *)&dio->crc_good_mask)[0], 785 (uint32_t)mask); 786 atomic_clear_int(&((int *)&dio->crc_good_mask)[1], 787 (uint32_t)(mask >> 32)); 788 #else 789 atomic_clear_int(&((int *)&dio->crc_good_mask)[0], 790 (uint32_t)(mask >> 32)); 791 atomic_clear_int(&((int *)&dio->crc_good_mask)[1], 792 (uint32_t)mask); 793 #endif 794 } 795 } 796 } 797 #endif 798 799 /* 800 * Helpers for hammer2_io_new*() functions 801 */ 802 static 803 void 804 hammer2_iocb_new_callback(hammer2_iocb_t *iocb) 805 { 806 hammer2_io_t *dio = iocb->dio; 807 int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0; 808 809 /* 810 * If IOCB_INPROG is not set the dio already has a good buffer and we 811 * can't mess with it other than zero the requested range. 812 * 813 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 814 * do what needs to be done with dio->bp. 815 */ 816 if (iocb->flags & HAMMER2_IOCB_INPROG) { 817 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) { 818 if (iocb->lsize == dio->psize) { 819 /* 820 * Fully covered buffer, try to optimize to 821 * avoid any I/O. We might already have the 822 * buffer due to iocb chaining. 823 */ 824 if (dio->bp == NULL) { 825 dio->bp = getblk(dio->hmp->devvp, 826 dio->pbase, dio->psize, 827 gbctl, 0); 828 } 829 if (dio->bp) { 830 vfs_bio_clrbuf(dio->bp); 831 dio->bp->b_flags |= B_CACHE; 832 } 833 834 /* 835 * Invalidation is ok on newly allocated 836 * buffers which cover the entire buffer. 837 * Flag will be cleared on use by the de-dup 838 * code. 839 * 840 * hammer2_chain_modify() also checks this flag. 841 * 842 * QUICK mode is used by the freemap code to 843 * pre-validate a junk buffer to prevent an 844 * unnecessary read I/O. We do NOT want 845 * to set INVALOK in that situation as the 846 * underlying allocations may be smaller. 847 */ 848 if ((iocb->flags & HAMMER2_IOCB_QUICK) == 0) { 849 atomic_set_64(&dio->refs, 850 HAMMER2_DIO_INVALOK); 851 } 852 } else if (iocb->flags & HAMMER2_IOCB_QUICK) { 853 /* 854 * Partial buffer, quick mode. Do nothing. 855 * Do not instantiate the buffer or try to 856 * mark it B_CACHE because other portions of 857 * the buffer might have to be read by other 858 * accessors. 859 */ 860 } else if (dio->bp == NULL || 861 (dio->bp->b_flags & B_CACHE) == 0) { 862 /* 863 * Partial buffer, normal mode, requires 864 * read-before-write. Chain the read. 865 * 866 * We might already have the buffer due to 867 * iocb chaining. XXX unclear if we really 868 * need to write/release it and reacquire 869 * in that case. 870 * 871 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE. 872 */ 873 if (dio->bp) { 874 if (dio->refs & HAMMER2_DIO_DIRTY) { 875 dio_write_stats_update(dio); 876 bdwrite(dio->bp); 877 } else { 878 bqrelse(dio->bp); 879 } 880 dio->bp = NULL; 881 } 882 atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ); 883 breadcb(dio->hmp->devvp, 884 dio->pbase, dio->psize, 885 hammer2_io_callback, iocb); 886 return; 887 } /* else buffer is good */ 888 } /* else callback from breadcb is complete */ 889 } 890 if (dio->bp) { 891 if (iocb->flags & HAMMER2_IOCB_ZERO) 892 bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize); 893 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 894 } 895 hammer2_io_complete(iocb); 896 } 897 898 static 899 int 900 _hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 901 hammer2_io_t **diop, int flags) 902 { 903 hammer2_iocb_t iocb; 904 905 iocb.callback = hammer2_iocb_new_callback; 906 iocb.cluster = NULL; 907 iocb.chain = NULL; 908 iocb.ptr = NULL; 909 iocb.lbase = lbase; 910 iocb.lsize = lsize; 911 iocb.flags = flags; 912 iocb.btype = btype; 913 iocb.error = 0; 914 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 915 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 916 hammer2_iocb_wait(&iocb); 917 *diop = iocb.dio; 918 919 return (iocb.error); 920 } 921 922 int 923 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 924 hammer2_io_t **diop) 925 { 926 return(_hammer2_io_new(hmp, btype, lbase, lsize, 927 diop, HAMMER2_IOCB_ZERO)); 928 } 929 930 int 931 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 932 hammer2_io_t **diop) 933 { 934 return(_hammer2_io_new(hmp, btype, lbase, lsize, diop, 0)); 935 } 936 937 /* 938 * This is called from the freemap to pre-validate a full-sized buffer 939 * whos contents we don't care about, in order to prevent an unnecessary 940 * read-before-write. 941 */ 942 void 943 hammer2_io_newq(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize) 944 { 945 hammer2_io_t *dio = NULL; 946 947 _hammer2_io_new(hmp, btype, lbase, lsize, &dio, HAMMER2_IOCB_QUICK); 948 hammer2_io_bqrelse(&dio); 949 } 950 951 static 952 void 953 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb) 954 { 955 hammer2_io_t *dio = iocb->dio; 956 off_t peof; 957 int error; 958 959 /* 960 * If IOCB_INPROG is not set the dio already has a good buffer and we 961 * can't mess with it other than zero the requested range. 962 * 963 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 964 * do what needs to be done with dio->bp. 965 */ 966 if (iocb->flags & HAMMER2_IOCB_INPROG) { 967 int hce; 968 969 if (dio->bp && (dio->bp->b_flags & B_CACHE)) { 970 /* 971 * Already good, likely due to being chained from 972 * another iocb. 973 */ 974 error = 0; 975 } else if ((hce = hammer2_cluster_read) > 0) { 976 /* 977 * Synchronous cluster I/O for now. 978 */ 979 if (dio->bp) { 980 bqrelse(dio->bp); 981 dio->bp = NULL; 982 } 983 peof = (dio->pbase + HAMMER2_SEGMASK64) & 984 ~HAMMER2_SEGMASK64; 985 error = cluster_read(dio->hmp->devvp, peof, dio->pbase, 986 dio->psize, 987 dio->psize, HAMMER2_PBUFSIZE*hce, 988 &dio->bp); 989 } else { 990 /* 991 * Synchronous I/O for now. 992 */ 993 if (dio->bp) { 994 bqrelse(dio->bp); 995 dio->bp = NULL; 996 } 997 error = bread(dio->hmp->devvp, dio->pbase, 998 dio->psize, &dio->bp); 999 } 1000 if (error) { 1001 brelse(dio->bp); 1002 dio->bp = NULL; 1003 } 1004 } 1005 hammer2_io_complete(iocb); 1006 } 1007 1008 int 1009 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 1010 hammer2_io_t **diop) 1011 { 1012 hammer2_iocb_t iocb; 1013 1014 iocb.callback = hammer2_iocb_bread_callback; 1015 iocb.cluster = NULL; 1016 iocb.chain = NULL; 1017 iocb.ptr = NULL; 1018 iocb.lbase = lbase; 1019 iocb.lsize = lsize; 1020 iocb.btype = btype; 1021 iocb.flags = 0; 1022 iocb.error = 0; 1023 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 1024 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 1025 hammer2_iocb_wait(&iocb); 1026 *diop = iocb.dio; 1027 1028 return (iocb.error); 1029 } 1030 1031 /* 1032 * System buf/bio async callback extracts the iocb and chains 1033 * to the iocb callback. 1034 */ 1035 void 1036 hammer2_io_callback(struct bio *bio) 1037 { 1038 struct buf *dbp = bio->bio_buf; 1039 hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr; 1040 hammer2_io_t *dio; 1041 1042 dio = iocb->dio; 1043 if ((bio->bio_flags & BIO_DONE) == 0) 1044 bpdone(dbp, 0); 1045 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 1046 dio->bp = bio->bio_buf; 1047 iocb->callback(iocb); 1048 } 1049 1050 void 1051 hammer2_io_bawrite(hammer2_io_t **diop) 1052 { 1053 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 1054 hammer2_io_putblk(diop); 1055 } 1056 1057 void 1058 hammer2_io_bdwrite(hammer2_io_t **diop) 1059 { 1060 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 1061 hammer2_io_putblk(diop); 1062 } 1063 1064 int 1065 hammer2_io_bwrite(hammer2_io_t **diop) 1066 { 1067 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 1068 hammer2_io_putblk(diop); 1069 return (0); /* XXX */ 1070 } 1071 1072 void 1073 hammer2_io_setdirty(hammer2_io_t *dio) 1074 { 1075 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 1076 } 1077 1078 /* 1079 * Request an invalidation. The hammer2_io code will oblige only if 1080 * DIO_INVALOK is also set. INVALOK is cleared if the dio is used 1081 * in a dedup lookup and prevents invalidation of the dirty buffer. 1082 */ 1083 void 1084 hammer2_io_setinval(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 1085 { 1086 if ((u_int)dio->psize == bytes) 1087 atomic_set_64(&dio->refs, HAMMER2_DIO_INVAL); 1088 } 1089 1090 void 1091 hammer2_io_brelse(hammer2_io_t **diop) 1092 { 1093 hammer2_io_putblk(diop); 1094 } 1095 1096 void 1097 hammer2_io_bqrelse(hammer2_io_t **diop) 1098 { 1099 hammer2_io_putblk(diop); 1100 } 1101 1102 int 1103 hammer2_io_isdirty(hammer2_io_t *dio) 1104 { 1105 return((dio->refs & HAMMER2_DIO_DIRTY) != 0); 1106 } 1107 1108 static 1109 void 1110 dio_write_stats_update(hammer2_io_t *dio) 1111 { 1112 long *counterp; 1113 1114 switch(dio->btype) { 1115 case 0: 1116 return; 1117 case HAMMER2_BREF_TYPE_DATA: 1118 counterp = &hammer2_iod_file_write; 1119 break; 1120 case HAMMER2_BREF_TYPE_INODE: 1121 counterp = &hammer2_iod_meta_write; 1122 break; 1123 case HAMMER2_BREF_TYPE_INDIRECT: 1124 counterp = &hammer2_iod_indr_write; 1125 break; 1126 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1127 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1128 counterp = &hammer2_iod_fmap_write; 1129 break; 1130 default: 1131 counterp = &hammer2_iod_volu_write; 1132 break; 1133 } 1134 *counterp += dio->psize; 1135 } 1136