1 /* 2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 /* 38 * Implements an abstraction layer for synchronous and asynchronous 39 * buffered device I/O. Can be used for OS-abstraction but the main 40 * purpose is to allow larger buffers to be used against hammer2_chain's 41 * using smaller allocations, without causing deadlocks. 42 * 43 */ 44 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 45 static void dio_write_stats_update(hammer2_io_t *dio); 46 47 static int 48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 49 { 50 if (io1->pbase < io2->pbase) 51 return(-1); 52 if (io1->pbase > io2->pbase) 53 return(1); 54 return(0); 55 } 56 57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 59 off_t, pbase); 60 61 struct hammer2_cleanupcb_info { 62 struct hammer2_io_tree tmptree; 63 int count; 64 }; 65 66 static __inline 67 uint64_t 68 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 69 { 70 uint64_t mask; 71 int i; 72 73 if (bytes < 1024) /* smaller chunks not supported */ 74 return 0; 75 76 /* 77 * Calculate crc check mask for larger chunks 78 */ 79 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) & 80 HAMMER2_PBUFMASK) >> 10; 81 if (i == 0 && bytes == HAMMER2_PBUFSIZE) 82 return((uint64_t)-1); 83 mask = ((uint64_t)1U << (bytes >> 10)) - 1; 84 mask <<= i; 85 86 return mask; 87 } 88 89 #define HAMMER2_GETBLK_GOOD 0 90 #define HAMMER2_GETBLK_QUEUED 1 91 #define HAMMER2_GETBLK_OWNED 2 92 93 /* 94 * Allocate/Locate the requested dio, reference it, issue or queue iocb. 95 */ 96 void 97 hammer2_io_getblk(hammer2_dev_t *hmp, off_t lbase, int lsize, 98 hammer2_iocb_t *iocb) 99 { 100 hammer2_io_t *dio; 101 hammer2_io_t *xio; 102 off_t pbase; 103 off_t pmask; 104 /* 105 * XXX after free, buffer reuse case w/ different size can clash 106 * with dio cache. Lets avoid it for now. Ultimate we need to 107 * invalidate the dio cache when freeing blocks to allow a mix 108 * of 16KB and 64KB block sizes). 109 */ 110 /*int psize = hammer2_devblksize(lsize);*/ 111 int psize = HAMMER2_PBUFSIZE; 112 uint64_t refs; 113 114 pmask = ~(hammer2_off_t)(psize - 1); 115 116 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 117 lbase &= ~HAMMER2_OFF_MASK_RADIX; 118 pbase = lbase & pmask; 119 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 120 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 121 pbase, lbase, lsize, pmask); 122 } 123 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 124 125 /* 126 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 127 */ 128 hammer2_spin_sh(&hmp->io_spin); 129 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 130 if (dio) { 131 if ((atomic_fetchadd_64(&dio->refs, 1) & 132 HAMMER2_DIO_MASK) == 0) { 133 atomic_add_int(&dio->hmp->iofree_count, -1); 134 } 135 hammer2_spin_unsh(&hmp->io_spin); 136 } else { 137 hammer2_spin_unsh(&hmp->io_spin); 138 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 139 dio->hmp = hmp; 140 dio->pbase = pbase; 141 dio->psize = psize; 142 dio->btype = iocb->btype; 143 dio->refs = 1; 144 hammer2_spin_init(&dio->spin, "h2dio"); 145 TAILQ_INIT(&dio->iocbq); 146 hammer2_spin_ex(&hmp->io_spin); 147 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 148 if (xio == NULL) { 149 atomic_add_int(&hammer2_dio_count, 1); 150 hammer2_spin_unex(&hmp->io_spin); 151 } else { 152 if ((atomic_fetchadd_64(&xio->refs, 1) & 153 HAMMER2_DIO_MASK) == 0) { 154 atomic_add_int(&xio->hmp->iofree_count, -1); 155 } 156 hammer2_spin_unex(&hmp->io_spin); 157 kfree(dio, M_HAMMER2); 158 dio = xio; 159 } 160 } 161 162 /* 163 * Obtain/Validate the buffer. 164 */ 165 iocb->dio = dio; 166 167 if (dio->act < 5) /* SMP race ok */ 168 ++dio->act; 169 170 for (;;) { 171 refs = dio->refs; 172 cpu_ccfence(); 173 174 /* 175 * Issue the iocb immediately if the buffer is already good. 176 * Once set GOOD cannot be cleared until refs drops to 0. 177 * 178 * lfence required because dio's are not interlocked for 179 * the DIO_GOOD test. 180 */ 181 if (refs & HAMMER2_DIO_GOOD) { 182 cpu_lfence(); 183 iocb->callback(iocb); 184 break; 185 } 186 187 /* 188 * Try to own the DIO by setting INPROG so we can issue 189 * I/O on it. 190 */ 191 if (refs & HAMMER2_DIO_INPROG) { 192 /* 193 * If DIO_INPROG is already set then set WAITING and 194 * queue the iocb. 195 */ 196 hammer2_spin_ex(&dio->spin); 197 if (atomic_cmpset_64(&dio->refs, refs, 198 refs | HAMMER2_DIO_WAITING)) { 199 iocb->flags |= HAMMER2_IOCB_ONQ | 200 HAMMER2_IOCB_INPROG; 201 TAILQ_INSERT_TAIL(&dio->iocbq, iocb, entry); 202 hammer2_spin_unex(&dio->spin); 203 break; 204 } 205 hammer2_spin_unex(&dio->spin); 206 /* retry */ 207 } else { 208 /* 209 * If DIO_INPROG is not set then set it and issue the 210 * callback immediately to start I/O. 211 */ 212 if (atomic_cmpset_64(&dio->refs, refs, 213 refs | HAMMER2_DIO_INPROG)) { 214 iocb->flags |= HAMMER2_IOCB_INPROG; 215 iocb->callback(iocb); 216 break; 217 } 218 /* retry */ 219 } 220 /* retry */ 221 } 222 } 223 224 /* 225 * Quickly obtain a good DIO buffer, return NULL if the system no longer 226 * caches the data. 227 */ 228 hammer2_io_t * 229 hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, int lsize) 230 { 231 hammer2_iocb_t iocb; 232 hammer2_io_t *dio; 233 struct buf *bp; 234 off_t pbase; 235 off_t pmask; 236 int psize = HAMMER2_PBUFSIZE; 237 uint64_t orefs; 238 uint64_t nrefs; 239 240 pmask = ~(hammer2_off_t)(psize - 1); 241 242 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 243 lbase &= ~HAMMER2_OFF_MASK_RADIX; 244 pbase = lbase & pmask; 245 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 246 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 247 pbase, lbase, lsize, pmask); 248 } 249 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 250 251 /* 252 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 253 */ 254 hammer2_spin_sh(&hmp->io_spin); 255 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 256 if (dio == NULL) { 257 hammer2_spin_unsh(&hmp->io_spin); 258 return NULL; 259 } 260 261 if ((atomic_fetchadd_64(&dio->refs, 1) & HAMMER2_DIO_MASK) == 0) 262 atomic_add_int(&dio->hmp->iofree_count, -1); 263 hammer2_spin_unsh(&hmp->io_spin); 264 265 if (dio->act < 5) /* SMP race ok */ 266 ++dio->act; 267 268 /* 269 * Obtain/validate the buffer. Do NOT issue I/O. Discard if 270 * the system does not have the data already cached. 271 */ 272 nrefs = (uint64_t)-1; 273 for (;;) { 274 orefs = dio->refs; 275 cpu_ccfence(); 276 277 /* 278 * Issue the iocb immediately if the buffer is already good. 279 * Once set GOOD cannot be cleared until refs drops to 0. 280 * 281 * lfence required because dio is not interlockedf for 282 * the DIO_GOOD test. 283 */ 284 if (orefs & HAMMER2_DIO_GOOD) { 285 cpu_lfence(); 286 break; 287 } 288 289 /* 290 * Try to own the DIO by setting INPROG so we can issue 291 * I/O on it. INPROG might already be set, in which case 292 * there is no way we can do this non-blocking so we punt. 293 */ 294 if ((orefs & HAMMER2_DIO_INPROG)) 295 break; 296 nrefs = orefs | HAMMER2_DIO_INPROG; 297 if (atomic_cmpset_64(&dio->refs, orefs, nrefs) == 0) 298 continue; 299 300 /* 301 * We own DIO_INPROG, try to set DIO_GOOD. 302 * 303 * For now do not use GETBLK_NOWAIT because 304 */ 305 bp = dio->bp; 306 dio->bp = NULL; 307 if (bp == NULL) { 308 #if 0 309 bp = getblk(hmp->devvp, dio->pbase, dio->psize, 0, 0); 310 #endif 311 bread(hmp->devvp, dio->pbase, dio->psize, &bp); 312 } 313 314 /* 315 * System buffer must also have remained cached. 316 */ 317 if (bp) { 318 if ((bp->b_flags & B_ERROR) == 0 && 319 (bp->b_flags & B_CACHE)) { 320 dio->bp = bp; /* assign BEFORE setting flag */ 321 atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD); 322 } else { 323 bqrelse(bp); 324 bp = NULL; 325 } 326 } 327 328 /* 329 * Clear DIO_INPROG. 330 * 331 * This is actually a bit complicated, see 332 * hammer2_io_complete() for more information. 333 */ 334 iocb.dio = dio; 335 iocb.flags = HAMMER2_IOCB_INPROG; 336 hammer2_io_complete(&iocb); 337 break; 338 } 339 340 /* 341 * Only return the dio if its buffer is good. If the buffer is not 342 * good be sure to clear INVALOK, meaning that invalidation is no 343 * longer acceptable 344 */ 345 if ((dio->refs & HAMMER2_DIO_GOOD) == 0) { 346 hammer2_io_putblk(&dio); 347 } 348 return dio; 349 } 350 351 /* 352 * Make sure that INVALOK is cleared on the dio associated with the specified 353 * data offset. Called from bulkfree when a block becomes reusable. 354 */ 355 void 356 hammer2_io_resetinval(hammer2_dev_t *hmp, off_t data_off) 357 { 358 hammer2_io_t *dio; 359 360 data_off &= ~HAMMER2_PBUFMASK64; 361 hammer2_spin_sh(&hmp->io_spin); 362 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, data_off); 363 if (dio) 364 atomic_clear_64(&dio->refs, HAMMER2_DIO_INVALOK); 365 hammer2_spin_unsh(&hmp->io_spin); 366 } 367 368 /* 369 * The originator of the iocb is finished with it. 370 */ 371 void 372 hammer2_io_complete(hammer2_iocb_t *iocb) 373 { 374 hammer2_io_t *dio = iocb->dio; 375 hammer2_iocb_t *cbtmp; 376 uint64_t orefs; 377 uint64_t nrefs; 378 uint32_t oflags; 379 uint32_t nflags; 380 381 /* 382 * If IOCB_INPROG was not set completion is synchronous due to the 383 * buffer already being good. We can simply set IOCB_DONE and return. 384 * 385 * In this situation DIO_INPROG is not set and we have no visibility 386 * on dio->bp. We should not try to mess with dio->bp because another 387 * thread may be finishing up its processing. dio->bp should already 388 * be set to BUF_KERNPROC()! 389 */ 390 if ((iocb->flags & HAMMER2_IOCB_INPROG) == 0) { 391 atomic_set_int(&iocb->flags, HAMMER2_IOCB_DONE); 392 return; 393 } 394 395 /* 396 * The iocb was queued, obtained DIO_INPROG, and its callback was 397 * made. The callback is now complete. We still own DIO_INPROG. 398 * 399 * We can set DIO_GOOD if no error occurred, which gives certain 400 * stability guarantees to dio->bp and allows other accessors to 401 * short-cut access. DIO_GOOD cannot be cleared until the last 402 * ref is dropped. 403 */ 404 KKASSERT(dio->refs & HAMMER2_DIO_INPROG); 405 if (dio->bp) { 406 BUF_KERNPROC(dio->bp); 407 if ((dio->bp->b_flags & B_ERROR) == 0) { 408 KKASSERT(dio->bp->b_flags & B_CACHE); 409 atomic_set_64(&dio->refs, HAMMER2_DIO_GOOD); 410 } 411 } 412 413 /* 414 * Clean up the dio before marking the iocb as being done. If another 415 * iocb is pending we chain to it while leaving DIO_INPROG set (it 416 * will call io completion and presumably clear DIO_INPROG). 417 * 418 * Otherwise if no other iocbs are pending we clear DIO_INPROG before 419 * finishing up the cbio. This means that DIO_INPROG is cleared at 420 * the end of the chain before ANY of the cbios are marked done. 421 * 422 * NOTE: The TAILQ is not stable until the spin-lock is held. 423 */ 424 for (;;) { 425 orefs = dio->refs; 426 nrefs = orefs & ~(HAMMER2_DIO_WAITING | HAMMER2_DIO_INPROG); 427 428 if (orefs & HAMMER2_DIO_WAITING) { 429 hammer2_spin_ex(&dio->spin); 430 cbtmp = TAILQ_FIRST(&dio->iocbq); 431 if (cbtmp) { 432 /* 433 * NOTE: flags not adjusted in this case. 434 * Flags will be adjusted by the last 435 * iocb. 436 */ 437 TAILQ_REMOVE(&dio->iocbq, cbtmp, entry); 438 hammer2_spin_unex(&dio->spin); 439 cbtmp->callback(cbtmp); /* chained */ 440 break; 441 } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 442 hammer2_spin_unex(&dio->spin); 443 break; 444 } 445 hammer2_spin_unex(&dio->spin); 446 /* retry */ 447 } else if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 448 break; 449 } /* else retry */ 450 /* retry */ 451 } 452 453 /* 454 * Mark the iocb as done and wakeup any waiters. This is done after 455 * all iocb chains have been called back and after DIO_INPROG has been 456 * cleared. This avoids races against ref count drops by the waiting 457 * threads (a hard but not impossible SMP race) which might result in 458 * a 1->0 transition of the refs while DIO_INPROG is still set. 459 */ 460 for (;;) { 461 oflags = iocb->flags; 462 cpu_ccfence(); 463 nflags = oflags; 464 nflags &= ~(HAMMER2_IOCB_WAKEUP | HAMMER2_IOCB_INPROG); 465 nflags |= HAMMER2_IOCB_DONE; 466 467 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 468 if (oflags & HAMMER2_IOCB_WAKEUP) 469 wakeup(iocb); 470 /* SMP: iocb is now stale */ 471 break; 472 } 473 /* retry */ 474 } 475 iocb = NULL; 476 477 } 478 479 /* 480 * Wait for an iocb's I/O to finish. 481 */ 482 void 483 hammer2_iocb_wait(hammer2_iocb_t *iocb) 484 { 485 uint32_t oflags; 486 uint32_t nflags; 487 488 for (;;) { 489 oflags = iocb->flags; 490 cpu_ccfence(); 491 nflags = oflags | HAMMER2_IOCB_WAKEUP; 492 if (oflags & HAMMER2_IOCB_DONE) 493 break; 494 tsleep_interlock(iocb, 0); 495 if (atomic_cmpset_int(&iocb->flags, oflags, nflags)) { 496 tsleep(iocb, PINTERLOCKED, "h2iocb", hz); 497 } 498 } 499 500 } 501 502 /* 503 * Release our ref on *diop. 504 * 505 * On the last ref we must atomically clear DIO_GOOD and set DIO_INPROG, 506 * then dispose of the underlying buffer. 507 */ 508 void 509 hammer2_io_putblk(hammer2_io_t **diop) 510 { 511 hammer2_dev_t *hmp; 512 hammer2_io_t *dio; 513 hammer2_iocb_t iocb; 514 struct buf *bp; 515 off_t peof; 516 off_t pbase; 517 int psize; 518 uint64_t orefs; 519 uint64_t nrefs; 520 521 dio = *diop; 522 *diop = NULL; 523 hmp = dio->hmp; 524 525 while (dio->unused01) { 526 tsleep(&dio->unused01, 0, "h2DEBUG", hz); 527 } 528 529 /* 530 * Drop refs. 531 * 532 * On the 1->0 transition clear flags and set INPROG. 533 * 534 * On the 1->0 transition if INPROG is already set, another thread 535 * is in lastdrop and we can just return after the transition. 536 * 537 * On any other transition we can generally just return. 538 */ 539 for (;;) { 540 orefs = dio->refs; 541 cpu_ccfence(); 542 nrefs = orefs - 1; 543 544 if ((orefs & HAMMER2_DIO_MASK) == 1 && 545 (orefs & HAMMER2_DIO_INPROG) == 0) { 546 /* 547 * Lastdrop case, INPROG can be set. 548 */ 549 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 550 nrefs &= ~(HAMMER2_DIO_INVAL); 551 nrefs |= HAMMER2_DIO_INPROG; 552 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 553 break; 554 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 555 /* 556 * Lastdrop case, INPROG already set. 557 */ 558 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 559 atomic_add_int(&hmp->iofree_count, 1); 560 return; 561 } 562 } else { 563 /* 564 * Normal drop case. 565 */ 566 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 567 return; 568 } 569 cpu_pause(); 570 /* retry */ 571 } 572 573 /* 574 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 575 * have been cleared. 576 * 577 * We can now dispose of the buffer, and should do it before calling 578 * io_complete() in case there's a race against a new reference 579 * which causes io_complete() to chain and instantiate the bp again. 580 */ 581 pbase = dio->pbase; 582 psize = dio->psize; 583 bp = dio->bp; 584 dio->bp = NULL; 585 586 if (orefs & HAMMER2_DIO_GOOD) { 587 KKASSERT(bp != NULL); 588 #if 1 589 if (hammer2_inval_enable && 590 (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) { 591 ++hammer2_iod_invals; 592 bp->b_flags |= B_INVAL | B_RELBUF; 593 brelse(bp); 594 } else 595 #endif 596 if (orefs & HAMMER2_DIO_DIRTY) { 597 int hce; 598 599 dio_write_stats_update(dio); 600 if ((hce = hammer2_cluster_write) > 0) { 601 /* 602 * Allows write-behind to keep the buffer 603 * cache sane. 604 */ 605 peof = (pbase + HAMMER2_SEGMASK64) & 606 ~HAMMER2_SEGMASK64; 607 bp->b_flags |= B_CLUSTEROK; 608 cluster_write(bp, peof, psize, hce); 609 } else { 610 /* 611 * Allows dirty buffers to accumulate and 612 * possibly be canceled (e.g. by a 'rm'), 613 * will burst-write later. 614 */ 615 bp->b_flags |= B_CLUSTEROK; 616 bdwrite(bp); 617 } 618 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 619 brelse(bp); 620 } else { 621 bqrelse(bp); 622 } 623 } else if (bp) { 624 #if 1 625 if (hammer2_inval_enable && 626 (orefs & HAMMER2_DIO_INVALBITS) == HAMMER2_DIO_INVALBITS) { 627 ++hammer2_iod_invals; 628 bp->b_flags |= B_INVAL | B_RELBUF; 629 brelse(bp); 630 } else 631 #endif 632 if (orefs & HAMMER2_DIO_DIRTY) { 633 dio_write_stats_update(dio); 634 bdwrite(bp); 635 } else { 636 brelse(bp); 637 } 638 } 639 640 /* 641 * The instant we call io_complete dio is a free agent again and 642 * can be ripped out from under us. 643 * 644 * we can cleanup our final DIO_INPROG by simulating an iocb 645 * completion. 646 */ 647 hmp = dio->hmp; /* extract fields */ 648 atomic_add_int(&hmp->iofree_count, 1); 649 cpu_ccfence(); 650 651 iocb.dio = dio; 652 iocb.flags = HAMMER2_IOCB_INPROG; 653 hammer2_io_complete(&iocb); 654 dio = NULL; /* dio stale */ 655 656 /* 657 * We cache free buffers so re-use cases can use a shared lock, but 658 * if too many build up we have to clean them out. 659 */ 660 if (hmp->iofree_count > 65536) { 661 struct hammer2_cleanupcb_info info; 662 663 RB_INIT(&info.tmptree); 664 hammer2_spin_ex(&hmp->io_spin); 665 if (hmp->iofree_count > 65536) { 666 info.count = hmp->iofree_count / 4; 667 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 668 hammer2_io_cleanup_callback, &info); 669 } 670 hammer2_spin_unex(&hmp->io_spin); 671 hammer2_io_cleanup(hmp, &info.tmptree); 672 } 673 } 674 675 /* 676 * Cleanup any dio's with (INPROG | refs) == 0. 677 * 678 * Called to clean up cached DIOs on umount after all activity has been 679 * flushed. 680 */ 681 static 682 int 683 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 684 { 685 struct hammer2_cleanupcb_info *info = arg; 686 hammer2_io_t *xio; 687 688 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 689 if (dio->act > 0) { 690 --dio->act; 691 return 0; 692 } 693 KKASSERT(dio->bp == NULL); 694 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 695 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 696 KKASSERT(xio == NULL); 697 if (--info->count <= 0) /* limit scan */ 698 return(-1); 699 } 700 return 0; 701 } 702 703 void 704 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 705 { 706 hammer2_io_t *dio; 707 708 while ((dio = RB_ROOT(tree)) != NULL) { 709 RB_REMOVE(hammer2_io_tree, tree, dio); 710 KKASSERT(dio->bp == NULL && 711 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 712 kfree(dio, M_HAMMER2); 713 atomic_add_int(&hammer2_dio_count, -1); 714 atomic_add_int(&hmp->iofree_count, -1); 715 } 716 } 717 718 /* 719 * Returns a pointer to the requested data. 720 */ 721 char * 722 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 723 { 724 struct buf *bp; 725 int off; 726 727 bp = dio->bp; 728 KKASSERT(bp != NULL); 729 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 730 KKASSERT(off >= 0 && off < bp->b_bufsize); 731 return(bp->b_data + off); 732 } 733 734 #if 0 735 /* 736 * Keep track of good CRCs in dio->good_crc_mask. XXX needs to be done 737 * in the chain structure, but chain structure needs to be persistent as 738 * well on refs=0 and it isn't. 739 */ 740 int 741 hammer2_io_crc_good(hammer2_chain_t *chain, uint64_t *maskp) 742 { 743 hammer2_io_t *dio; 744 uint64_t mask; 745 746 if ((dio = chain->dio) != NULL && chain->bytes >= 1024) { 747 mask = hammer2_io_mask(dio, chain->bref.data_off, chain->bytes); 748 *maskp = mask; 749 if ((dio->crc_good_mask & mask) == mask) 750 return 1; 751 return 0; 752 } 753 *maskp = 0; 754 755 return 0; 756 } 757 758 void 759 hammer2_io_crc_setmask(hammer2_io_t *dio, uint64_t mask) 760 { 761 if (dio) { 762 if (sizeof(long) == 8) { 763 atomic_set_long(&dio->crc_good_mask, mask); 764 } else { 765 #if _BYTE_ORDER == _LITTLE_ENDIAN 766 atomic_set_int(&((int *)&dio->crc_good_mask)[0], 767 (uint32_t)mask); 768 atomic_set_int(&((int *)&dio->crc_good_mask)[1], 769 (uint32_t)(mask >> 32)); 770 #else 771 atomic_set_int(&((int *)&dio->crc_good_mask)[0], 772 (uint32_t)(mask >> 32)); 773 atomic_set_int(&((int *)&dio->crc_good_mask)[1], 774 (uint32_t)mask); 775 #endif 776 } 777 } 778 } 779 780 void 781 hammer2_io_crc_clrmask(hammer2_io_t *dio, uint64_t mask) 782 { 783 if (dio) { 784 if (sizeof(long) == 8) { 785 atomic_clear_long(&dio->crc_good_mask, mask); 786 } else { 787 #if _BYTE_ORDER == _LITTLE_ENDIAN 788 atomic_clear_int(&((int *)&dio->crc_good_mask)[0], 789 (uint32_t)mask); 790 atomic_clear_int(&((int *)&dio->crc_good_mask)[1], 791 (uint32_t)(mask >> 32)); 792 #else 793 atomic_clear_int(&((int *)&dio->crc_good_mask)[0], 794 (uint32_t)(mask >> 32)); 795 atomic_clear_int(&((int *)&dio->crc_good_mask)[1], 796 (uint32_t)mask); 797 #endif 798 } 799 } 800 } 801 #endif 802 803 /* 804 * Helpers for hammer2_io_new*() functions 805 */ 806 static 807 void 808 hammer2_iocb_new_callback(hammer2_iocb_t *iocb) 809 { 810 hammer2_io_t *dio = iocb->dio; 811 int gbctl = (iocb->flags & HAMMER2_IOCB_QUICK) ? GETBLK_NOWAIT : 0; 812 813 /* 814 * If IOCB_INPROG is not set the dio already has a good buffer and we 815 * can't mess with it other than zero the requested range. 816 * 817 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 818 * do what needs to be done with dio->bp. 819 */ 820 if (iocb->flags & HAMMER2_IOCB_INPROG) { 821 if ((iocb->flags & HAMMER2_IOCB_READ) == 0) { 822 if (iocb->lsize == dio->psize) { 823 /* 824 * Fully covered buffer, try to optimize to 825 * avoid any I/O. We might already have the 826 * buffer due to iocb chaining. 827 */ 828 if (dio->bp == NULL) { 829 dio->bp = getblk(dio->hmp->devvp, 830 dio->pbase, dio->psize, 831 gbctl, 0); 832 } 833 if (dio->bp) { 834 vfs_bio_clrbuf(dio->bp); 835 dio->bp->b_flags |= B_CACHE; 836 } 837 838 /* 839 * Invalidation is ok on newly allocated 840 * buffers which cover the entire buffer. 841 * Flag will be cleared on use by the de-dup 842 * code. 843 * 844 * hammer2_chain_modify() also checks this flag. 845 * 846 * QUICK mode is used by the freemap code to 847 * pre-validate a junk buffer to prevent an 848 * unnecessary read I/O. We do NOT want 849 * to set INVALOK in that situation as the 850 * underlying allocations may be smaller. 851 */ 852 if ((iocb->flags & HAMMER2_IOCB_QUICK) == 0) { 853 atomic_set_64(&dio->refs, 854 HAMMER2_DIO_INVALOK); 855 } 856 } else if (iocb->flags & HAMMER2_IOCB_QUICK) { 857 /* 858 * Partial buffer, quick mode. Do nothing. 859 * Do not instantiate the buffer or try to 860 * mark it B_CACHE because other portions of 861 * the buffer might have to be read by other 862 * accessors. 863 */ 864 } else if (dio->bp == NULL || 865 (dio->bp->b_flags & B_CACHE) == 0) { 866 /* 867 * Partial buffer, normal mode, requires 868 * read-before-write. Chain the read. 869 * 870 * We might already have the buffer due to 871 * iocb chaining. XXX unclear if we really 872 * need to write/release it and reacquire 873 * in that case. 874 * 875 * QUEUE ASYNC I/O, IOCB IS NOT YET COMPLETE. 876 */ 877 if (dio->bp) { 878 if (dio->refs & HAMMER2_DIO_DIRTY) { 879 dio_write_stats_update(dio); 880 bdwrite(dio->bp); 881 } else { 882 bqrelse(dio->bp); 883 } 884 dio->bp = NULL; 885 } 886 atomic_set_int(&iocb->flags, HAMMER2_IOCB_READ); 887 breadcb(dio->hmp->devvp, 888 dio->pbase, dio->psize, 889 hammer2_io_callback, iocb); 890 return; 891 } /* else buffer is good */ 892 } /* else callback from breadcb is complete */ 893 } 894 if (dio->bp) { 895 if (iocb->flags & HAMMER2_IOCB_ZERO) 896 bzero(hammer2_io_data(dio, iocb->lbase), iocb->lsize); 897 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 898 } 899 hammer2_io_complete(iocb); 900 } 901 902 static 903 int 904 _hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 905 hammer2_io_t **diop, int flags) 906 { 907 hammer2_iocb_t iocb; 908 909 iocb.callback = hammer2_iocb_new_callback; 910 iocb.cluster = NULL; 911 iocb.chain = NULL; 912 iocb.ptr = NULL; 913 iocb.lbase = lbase; 914 iocb.lsize = lsize; 915 iocb.flags = flags; 916 iocb.btype = btype; 917 iocb.error = 0; 918 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 919 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 920 hammer2_iocb_wait(&iocb); 921 *diop = iocb.dio; 922 923 return (iocb.error); 924 } 925 926 int 927 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 928 hammer2_io_t **diop) 929 { 930 return(_hammer2_io_new(hmp, btype, lbase, lsize, 931 diop, HAMMER2_IOCB_ZERO)); 932 } 933 934 int 935 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 936 hammer2_io_t **diop) 937 { 938 return(_hammer2_io_new(hmp, btype, lbase, lsize, diop, 0)); 939 } 940 941 /* 942 * This is called from the freemap to pre-validate a full-sized buffer 943 * whos contents we don't care about, in order to prevent an unnecessary 944 * read-before-write. 945 */ 946 void 947 hammer2_io_newq(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize) 948 { 949 hammer2_io_t *dio = NULL; 950 951 _hammer2_io_new(hmp, btype, lbase, lsize, &dio, HAMMER2_IOCB_QUICK); 952 hammer2_io_bqrelse(&dio); 953 } 954 955 static 956 void 957 hammer2_iocb_bread_callback(hammer2_iocb_t *iocb) 958 { 959 hammer2_io_t *dio = iocb->dio; 960 off_t peof; 961 int error; 962 963 /* 964 * If IOCB_INPROG is not set the dio already has a good buffer and we 965 * can't mess with it other than zero the requested range. 966 * 967 * If IOCB_INPROG is set we also own DIO_INPROG at this time and can 968 * do what needs to be done with dio->bp. 969 */ 970 if (iocb->flags & HAMMER2_IOCB_INPROG) { 971 int hce; 972 973 if (dio->bp && (dio->bp->b_flags & B_CACHE)) { 974 /* 975 * Already good, likely due to being chained from 976 * another iocb. 977 */ 978 error = 0; 979 } else if ((hce = hammer2_cluster_read) > 0) { 980 /* 981 * Synchronous cluster I/O for now. 982 */ 983 if (dio->bp) { 984 bqrelse(dio->bp); 985 dio->bp = NULL; 986 } 987 peof = (dio->pbase + HAMMER2_SEGMASK64) & 988 ~HAMMER2_SEGMASK64; 989 error = cluster_read(dio->hmp->devvp, peof, dio->pbase, 990 dio->psize, 991 dio->psize, HAMMER2_PBUFSIZE*hce, 992 &dio->bp); 993 } else { 994 /* 995 * Synchronous I/O for now. 996 */ 997 if (dio->bp) { 998 bqrelse(dio->bp); 999 dio->bp = NULL; 1000 } 1001 error = bread(dio->hmp->devvp, dio->pbase, 1002 dio->psize, &dio->bp); 1003 } 1004 if (error) { 1005 brelse(dio->bp); 1006 dio->bp = NULL; 1007 } 1008 } 1009 hammer2_io_complete(iocb); 1010 } 1011 1012 int 1013 hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 1014 hammer2_io_t **diop) 1015 { 1016 hammer2_iocb_t iocb; 1017 1018 iocb.callback = hammer2_iocb_bread_callback; 1019 iocb.cluster = NULL; 1020 iocb.chain = NULL; 1021 iocb.ptr = NULL; 1022 iocb.lbase = lbase; 1023 iocb.lsize = lsize; 1024 iocb.btype = btype; 1025 iocb.flags = 0; 1026 iocb.error = 0; 1027 hammer2_io_getblk(hmp, lbase, lsize, &iocb); 1028 if ((iocb.flags & HAMMER2_IOCB_DONE) == 0) 1029 hammer2_iocb_wait(&iocb); 1030 *diop = iocb.dio; 1031 1032 return (iocb.error); 1033 } 1034 1035 /* 1036 * System buf/bio async callback extracts the iocb and chains 1037 * to the iocb callback. 1038 */ 1039 void 1040 hammer2_io_callback(struct bio *bio) 1041 { 1042 struct buf *dbp = bio->bio_buf; 1043 hammer2_iocb_t *iocb = bio->bio_caller_info1.ptr; 1044 hammer2_io_t *dio; 1045 1046 dio = iocb->dio; 1047 if ((bio->bio_flags & BIO_DONE) == 0) 1048 bpdone(dbp, 0); 1049 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 1050 dio->bp = bio->bio_buf; 1051 iocb->callback(iocb); 1052 } 1053 1054 void 1055 hammer2_io_bawrite(hammer2_io_t **diop) 1056 { 1057 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 1058 hammer2_io_putblk(diop); 1059 } 1060 1061 void 1062 hammer2_io_bdwrite(hammer2_io_t **diop) 1063 { 1064 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 1065 hammer2_io_putblk(diop); 1066 } 1067 1068 int 1069 hammer2_io_bwrite(hammer2_io_t **diop) 1070 { 1071 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 1072 hammer2_io_putblk(diop); 1073 return (0); /* XXX */ 1074 } 1075 1076 void 1077 hammer2_io_setdirty(hammer2_io_t *dio) 1078 { 1079 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 1080 } 1081 1082 /* 1083 * Request an invalidation. The hammer2_io code will oblige only if 1084 * DIO_INVALOK is also set. INVALOK is cleared if the dio is used 1085 * in a dedup lookup and prevents invalidation of the dirty buffer. 1086 */ 1087 void 1088 hammer2_io_setinval(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 1089 { 1090 if ((u_int)dio->psize == bytes) 1091 atomic_set_64(&dio->refs, HAMMER2_DIO_INVAL); 1092 } 1093 1094 void 1095 hammer2_io_brelse(hammer2_io_t **diop) 1096 { 1097 hammer2_io_putblk(diop); 1098 } 1099 1100 void 1101 hammer2_io_bqrelse(hammer2_io_t **diop) 1102 { 1103 hammer2_io_putblk(diop); 1104 } 1105 1106 int 1107 hammer2_io_isdirty(hammer2_io_t *dio) 1108 { 1109 return((dio->refs & HAMMER2_DIO_DIRTY) != 0); 1110 } 1111 1112 static 1113 void 1114 dio_write_stats_update(hammer2_io_t *dio) 1115 { 1116 long *counterp; 1117 1118 switch(dio->btype) { 1119 case 0: 1120 return; 1121 case HAMMER2_BREF_TYPE_DATA: 1122 counterp = &hammer2_iod_file_write; 1123 break; 1124 case HAMMER2_BREF_TYPE_INODE: 1125 counterp = &hammer2_iod_meta_write; 1126 break; 1127 case HAMMER2_BREF_TYPE_INDIRECT: 1128 counterp = &hammer2_iod_indr_write; 1129 break; 1130 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 1131 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 1132 counterp = &hammer2_iod_fmap_write; 1133 break; 1134 default: 1135 counterp = &hammer2_iod_volu_write; 1136 break; 1137 } 1138 *counterp += dio->psize; 1139 } 1140