1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org> 5 * Copyright (c) 2011-2022 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Matthew Dillon <dillon@dragonflybsd.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include "hammer2.h" 39 40 #define HAMMER2_DOP_READ 1 41 #define HAMMER2_DOP_NEW 2 42 #define HAMMER2_DOP_NEWNZ 3 43 #define HAMMER2_DOP_READQ 4 44 45 /* 46 * Implements an abstraction layer for synchronous and asynchronous 47 * buffered device I/O. Can be used as an OS-abstraction but the main 48 * purpose is to allow larger buffers to be used against hammer2_chain's 49 * using smaller allocations, without causing deadlocks. 50 * 51 * The DIOs also record temporary state with limited persistence. This 52 * feature is used to keep track of dedupable blocks. 53 */ 54 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 55 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp); 56 57 static int 58 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 59 { 60 if (io1->pbase < io2->pbase) 61 return(-1); 62 if (io1->pbase > io2->pbase) 63 return(1); 64 return(0); 65 } 66 67 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 68 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 69 off_t, pbase); 70 71 struct hammer2_cleanupcb_info { 72 struct hammer2_io_tree tmptree; 73 int count; 74 }; 75 76 #if 0 77 static __inline 78 uint64_t 79 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 80 { 81 uint64_t mask; 82 int i; 83 84 if (bytes < 1024) /* smaller chunks not supported */ 85 return 0; 86 87 /* 88 * Calculate crc check mask for larger chunks 89 */ 90 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) & 91 HAMMER2_PBUFMASK) >> 10; 92 if (i == 0 && bytes == HAMMER2_PBUFSIZE) 93 return((uint64_t)-1); 94 mask = ((uint64_t)1U << (bytes >> 10)) - 1; 95 mask <<= i; 96 97 return mask; 98 } 99 #endif 100 101 #ifdef HAMMER2_IO_DEBUG 102 103 static __inline void 104 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 105 { 106 int i; 107 108 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK; 109 110 dio->debug_file[i] = file; 111 dio->debug_line[i] = line; 112 dio->debug_refs[i] = dio->refs; 113 dio->debug_td[i] = curthread; 114 } 115 116 #else 117 118 #define DIO_RECORD(dio) 119 120 #endif 121 122 /* 123 * Returns the DIO corresponding to the data|radix, creating it if necessary. 124 * 125 * If createit is 0, NULL can be returned indicating that the DIO does not 126 * exist. (btype) is ignored when createit is 0. 127 */ 128 static 129 hammer2_io_t * 130 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype, 131 int createit, int *isgoodp) 132 { 133 hammer2_io_t *dio; 134 hammer2_io_t *xio; 135 hammer2_key_t lbase; 136 hammer2_key_t pbase; 137 hammer2_key_t pmask; 138 hammer2_vfsvolume_t *vol; 139 uint64_t refs; 140 int lsize; 141 int psize; 142 143 psize = HAMMER2_PBUFSIZE; 144 pmask = ~(hammer2_off_t)(psize - 1); 145 if ((int)(data_off & HAMMER2_OFF_MASK_RADIX)) 146 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX); 147 else 148 lsize = 0; 149 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX; 150 pbase = lbase & pmask; 151 152 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 153 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 154 pbase, lbase, lsize, pmask); 155 } 156 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 157 *isgoodp = 0; 158 159 /* 160 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 161 * 162 * If DIO_GOOD is set the ref should prevent it from being cleared 163 * out from under us, we can set *isgoodp, and the caller can operate 164 * on the buffer without any further interaction. 165 */ 166 hammer2_spin_sh(&hmp->io_spin); 167 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 168 if (dio) { 169 refs = atomic_fetchadd_64(&dio->refs, 1); 170 if ((refs & HAMMER2_DIO_MASK) == 0) { 171 atomic_add_int(&dio->hmp->iofree_count, -1); 172 } 173 if (refs & HAMMER2_DIO_GOOD) 174 *isgoodp = 1; 175 hammer2_spin_unsh(&hmp->io_spin); 176 } else if (createit) { 177 refs = 0; 178 hammer2_spin_unsh(&hmp->io_spin); 179 vol = hammer2_get_volume(hmp, pbase); 180 dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO); 181 dio->hmp = hmp; 182 dio->devvp = vol->dev->devvp; 183 dio->dbase = vol->offset; 184 KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0); 185 dio->pbase = pbase; 186 dio->psize = psize; 187 dio->btype = btype; 188 dio->refs = refs + 1; 189 dio->act = 5; 190 hammer2_spin_ex(&hmp->io_spin); 191 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 192 if (xio == NULL) { 193 atomic_add_int(&hammer2_dio_count, 1); 194 hammer2_spin_unex(&hmp->io_spin); 195 } else { 196 refs = atomic_fetchadd_64(&xio->refs, 1); 197 if ((refs & HAMMER2_DIO_MASK) == 0) 198 atomic_add_int(&xio->hmp->iofree_count, -1); 199 if (refs & HAMMER2_DIO_GOOD) 200 *isgoodp = 1; 201 hammer2_spin_unex(&hmp->io_spin); 202 kfree_obj(dio, hmp->mio); 203 dio = xio; 204 } 205 } else { 206 hammer2_spin_unsh(&hmp->io_spin); 207 return NULL; 208 } 209 dio->ticks = ticks; 210 if (dio->act < 10) 211 ++dio->act; 212 213 return dio; 214 } 215 216 /* 217 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate 218 * a buffer. If set the buffer already exists and is good to go. 219 */ 220 hammer2_io_t * 221 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, 222 int lsize, int op HAMMER2_IO_DEBUG_ARGS) 223 { 224 hammer2_io_t *dio; 225 hammer2_off_t dev_pbase; 226 //off_t peof; 227 uint64_t orefs; 228 uint64_t nrefs; 229 int isgood; 230 int error; 231 int hce; 232 //int bflags; 233 234 //bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0); 235 //bflags |= B_KVABIO; 236 237 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 238 239 if (op == HAMMER2_DOP_READQ) { 240 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood); 241 if (dio == NULL) 242 return NULL; 243 op = HAMMER2_DOP_READ; 244 } else { 245 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood); 246 } 247 248 for (;;) { 249 orefs = dio->refs; 250 cpu_ccfence(); 251 252 /* 253 * Buffer is already good, handle the op and return. 254 */ 255 if (orefs & HAMMER2_DIO_GOOD) { 256 if (isgood == 0) 257 cpu_mfence(); 258 bkvasync(dio->bp); 259 260 switch(op) { 261 case HAMMER2_DOP_NEW: 262 bzero(hammer2_io_data(dio, lbase), lsize); 263 /* fall through */ 264 case HAMMER2_DOP_NEWNZ: 265 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 266 break; 267 case HAMMER2_DOP_READ: 268 default: 269 /* nothing to do */ 270 break; 271 } 272 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 273 return (dio); 274 } 275 276 /* 277 * Try to own the DIO 278 */ 279 if (orefs & HAMMER2_DIO_INPROG) { 280 nrefs = orefs | HAMMER2_DIO_WAITING; 281 tsleep_interlock(dio, 0); 282 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 283 tsleep(dio, PINTERLOCKED, "h2dio", hz); 284 } 285 /* retry */ 286 } else { 287 nrefs = orefs | HAMMER2_DIO_INPROG; 288 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 289 break; 290 } 291 } 292 } 293 294 /* 295 * We break to here if GOOD is not set and we acquired INPROG for 296 * the I/O. 297 */ 298 KKASSERT(dio->bp == NULL); 299 if (btype == HAMMER2_BREF_TYPE_DATA) 300 hce = hammer2_cluster_data_read; 301 else 302 hce = hammer2_cluster_meta_read; 303 304 error = 0; 305 dev_pbase = dio->pbase - dio->dbase; 306 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) && 307 dio->psize == lsize) { 308 switch(op) { 309 case HAMMER2_DOP_NEW: 310 case HAMMER2_DOP_NEWNZ: 311 dio->bp = getblkx(dio->devvp, 312 dev_pbase, dio->psize, 313 GETBLK_KVABIO, 0); 314 if (op == HAMMER2_DOP_NEW) { 315 bkvasync(dio->bp); 316 bzero(dio->bp->b_data, dio->psize); 317 } 318 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 319 break; 320 case HAMMER2_DOP_READ: 321 default: 322 KKASSERT(dio->bp == NULL); 323 #if 0 324 if (hce > 0) { 325 /* 326 * Synchronous cluster I/O for now. 327 */ 328 peof = (dio->pbase + HAMMER2_SEGMASK64) & 329 ~HAMMER2_SEGMASK64; 330 peof -= dio->dbase; 331 error = cluster_readx(dio->devvp, 332 peof, dev_pbase, 333 dio->psize, bflags, 334 dio->psize, 335 HAMMER2_PBUFSIZE*hce, 336 &dio->bp); 337 } else { 338 error = breadnx(dio->devvp, dev_pbase, 339 dio->psize, bflags, 340 NULL, NULL, 0, &dio->bp); 341 } 342 #else 343 error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp); 344 #endif 345 break; 346 } 347 } else { 348 #if 0 349 if (hce > 0) { 350 /* 351 * Synchronous cluster I/O for now. 352 */ 353 peof = (dio->pbase + HAMMER2_SEGMASK64) & 354 ~HAMMER2_SEGMASK64; 355 peof -= dio->dbase; 356 error = cluster_readx(dio->devvp, 357 peof, dev_pbase, dio->psize, 358 bflags, 359 dio->psize, HAMMER2_PBUFSIZE*hce, 360 &dio->bp); 361 } else { 362 error = breadnx(dio->devvp, dev_pbase, 363 dio->psize, bflags, 364 NULL, NULL, 0, &dio->bp); 365 } 366 #else 367 error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp); 368 #endif 369 if (dio->bp) { 370 /* 371 * Handle NEW flags 372 */ 373 switch(op) { 374 case HAMMER2_DOP_NEW: 375 bkvasync(dio->bp); 376 bzero(hammer2_io_data(dio, lbase), lsize); 377 /* fall through */ 378 case HAMMER2_DOP_NEWNZ: 379 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 380 break; 381 case HAMMER2_DOP_READ: 382 default: 383 break; 384 } 385 386 /* 387 * Tell the kernel that the buffer cache is not 388 * meta-data based on the btype. This allows 389 * swapcache to distinguish between data and 390 * meta-data. 391 */ 392 switch(btype) { 393 case HAMMER2_BREF_TYPE_DATA: 394 //dio->bp->b_flags |= B_NOTMETA; 395 break; 396 default: 397 break; 398 } 399 } 400 } 401 402 if (dio->bp) { 403 bkvasync(dio->bp); 404 BUF_KERNPROC(dio->bp); 405 //dio->bp->b_flags &= ~B_AGE; 406 /* dio->bp->b_debug_info2 = dio; */ 407 } 408 dio->error = error; 409 410 /* 411 * Clear INPROG and WAITING, set GOOD wake up anyone waiting. 412 */ 413 for (;;) { 414 orefs = dio->refs; 415 cpu_ccfence(); 416 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING); 417 if (error == 0) 418 nrefs |= HAMMER2_DIO_GOOD; 419 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 420 if (orefs & HAMMER2_DIO_WAITING) 421 wakeup(dio); 422 break; 423 } 424 cpu_pause(); 425 } 426 427 /* XXX error handling */ 428 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 429 430 return dio; 431 } 432 433 /* 434 * Release our ref on *diop. 435 * 436 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose 437 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING. 438 */ 439 void 440 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 441 { 442 hammer2_dev_t *hmp; 443 hammer2_io_t *dio; 444 struct buf *bp; 445 off_t pbase; 446 int psize; 447 int dio_limit; 448 uint64_t orefs; 449 uint64_t nrefs; 450 451 dio = *diop; 452 *diop = NULL; 453 hmp = dio->hmp; 454 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 455 456 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0); 457 458 /* 459 * Drop refs. 460 * 461 * On the 1->0 transition clear GOOD and set INPROG, and break. 462 * On any other transition we can return early. 463 */ 464 for (;;) { 465 orefs = dio->refs; 466 cpu_ccfence(); 467 468 if ((orefs & HAMMER2_DIO_MASK) == 1 && 469 (orefs & HAMMER2_DIO_INPROG) == 0) { 470 /* 471 * Lastdrop case, INPROG can be set. GOOD must be 472 * cleared to prevent the getblk shortcut. 473 */ 474 nrefs = orefs - 1; 475 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 476 nrefs |= HAMMER2_DIO_INPROG; 477 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 478 break; 479 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 480 /* 481 * Lastdrop case, INPROG already set. We must 482 * wait for INPROG to clear. 483 */ 484 nrefs = orefs | HAMMER2_DIO_WAITING; 485 tsleep_interlock(dio, 0); 486 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 487 tsleep(dio, PINTERLOCKED, "h2dio", hz); 488 } 489 /* retry */ 490 } else { 491 /* 492 * Normal drop case. 493 */ 494 nrefs = orefs - 1; 495 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 496 return; 497 /* retry */ 498 } 499 cpu_pause(); 500 /* retry */ 501 } 502 503 /* 504 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 505 * have been cleared. iofree_count has not yet been incremented, 506 * note that another accessor race will decrement iofree_count so 507 * we have to increment it regardless. 508 * 509 * We can now dispose of the buffer, and should do it before calling 510 * io_complete() in case there's a race against a new reference 511 * which causes io_complete() to chain and instantiate the bp again. 512 */ 513 pbase = dio->pbase; 514 psize = dio->psize; 515 bp = dio->bp; 516 dio->bp = NULL; 517 518 if ((orefs & HAMMER2_DIO_GOOD) && bp) { 519 /* 520 * Non-errored disposal of bp 521 */ 522 if (orefs & HAMMER2_DIO_DIRTY) { 523 dio_write_stats_update(dio, bp); 524 525 /* 526 * Allows dirty buffers to accumulate and 527 * possibly be canceled (e.g. by a 'rm'), 528 * by default we will burst-write later. 529 * 530 * We generally do NOT want to issue an actual 531 * b[a]write() or cluster_write() here. Due to 532 * the way chains are locked, buffers may be cycled 533 * in and out quite often and disposal here can cause 534 * multiple writes or write-read stalls. 535 * 536 * If FLUSH is set we do want to issue the actual 537 * write. This typically occurs in the write-behind 538 * case when writing to large files. 539 */ 540 //off_t peof; 541 //int hce; 542 if (dio->refs & HAMMER2_DIO_FLUSH) { 543 #if 0 544 if ((hce = hammer2_cluster_write) != 0) { 545 peof = (pbase + HAMMER2_SEGMASK64) & 546 ~HAMMER2_SEGMASK64; 547 peof -= dio->dbase; 548 bp->b_flags |= B_CLUSTEROK; 549 cluster_write(bp, peof, psize, hce); 550 } else { 551 bp->b_flags &= ~B_CLUSTEROK; 552 bawrite(bp); 553 } 554 #else 555 bawrite(bp); 556 #endif 557 } else { 558 //bp->b_flags &= ~B_CLUSTEROK; 559 bdwrite(bp); 560 } 561 #if 0 562 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 563 brelse(bp); 564 #endif 565 } else { 566 bqrelse(bp); 567 } 568 } else if (bp) { 569 /* 570 * Errored disposal of bp 571 */ 572 brelse(bp); 573 } 574 575 /* 576 * Update iofree_count before disposing of the dio 577 */ 578 hmp = dio->hmp; 579 atomic_add_int(&hmp->iofree_count, 1); 580 581 /* 582 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear). 583 * 584 * Also clear FLUSH as it was handled above. 585 */ 586 for (;;) { 587 orefs = dio->refs; 588 cpu_ccfence(); 589 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD | 590 HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH); 591 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 592 if (orefs & HAMMER2_DIO_WAITING) 593 wakeup(dio); 594 break; 595 } 596 cpu_pause(); 597 } 598 599 /* 600 * We cache free buffers so re-use cases can use a shared lock, but 601 * if too many build up we have to clean them out. 602 */ 603 dio_limit = hammer2_dio_limit; 604 if (dio_limit < 256) 605 dio_limit = 256; 606 if (dio_limit > 1024*1024) 607 dio_limit = 1024*1024; 608 if (hmp->iofree_count > dio_limit) { 609 struct hammer2_cleanupcb_info info; 610 611 RB_INIT(&info.tmptree); 612 hammer2_spin_ex(&hmp->io_spin); 613 if (hmp->iofree_count > dio_limit) { 614 info.count = hmp->iofree_count / 5; 615 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 616 hammer2_io_cleanup_callback, &info); 617 } 618 hammer2_spin_unex(&hmp->io_spin); 619 hammer2_io_cleanup(hmp, &info.tmptree); 620 } 621 } 622 623 /* 624 * Cleanup any dio's with (INPROG | refs) == 0. 625 * 626 * Called to clean up cached DIOs on umount after all activity has been 627 * flushed. 628 */ 629 static 630 int 631 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 632 { 633 struct hammer2_cleanupcb_info *info = arg; 634 hammer2_io_t *xio; 635 636 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 637 /* 638 if (dio->act > 0) { 639 int act; 640 641 act = dio->act - (ticks - dio->ticks) / hz - 1; 642 if (act > 0) { 643 dio->act = act; 644 return 0; 645 } 646 dio->act = 0; 647 } 648 */ 649 KKASSERT(dio->bp == NULL); 650 if (info->count > 0) { 651 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 652 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 653 KKASSERT(xio == NULL); 654 --info->count; 655 } 656 } 657 return 0; 658 } 659 660 void 661 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 662 { 663 hammer2_io_t *dio; 664 665 while ((dio = RB_ROOT(tree)) != NULL) { 666 RB_REMOVE(hammer2_io_tree, tree, dio); 667 KKASSERT(dio->bp == NULL && 668 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 669 if (dio->refs & HAMMER2_DIO_DIRTY) { 670 kprintf("hammer2_io_cleanup: Dirty buffer " 671 "%016jx/%d (bp=%p)\n", 672 dio->pbase, dio->psize, dio->bp); 673 } 674 kfree_obj(dio, hmp->mio); 675 atomic_add_int(&hammer2_dio_count, -1); 676 atomic_add_int(&hmp->iofree_count, -1); 677 } 678 } 679 680 /* 681 * Returns a pointer to the requested data. 682 */ 683 char * 684 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 685 { 686 struct buf *bp; 687 int off; 688 689 bp = dio->bp; 690 KKASSERT(bp != NULL); 691 bkvasync(bp); 692 lbase -= dio->dbase; 693 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 694 KKASSERT(off >= 0 && off < bp->b_bufsize); 695 return(bp->b_data + off); 696 } 697 698 int 699 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 700 hammer2_io_t **diop) 701 { 702 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW); 703 return ((*diop)->error); 704 } 705 706 int 707 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 708 hammer2_io_t **diop) 709 { 710 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ); 711 return ((*diop)->error); 712 } 713 714 int 715 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 716 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 717 { 718 #ifdef HAMMER2_IO_DEBUG 719 hammer2_io_t *dio; 720 #endif 721 722 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize, 723 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL); 724 #ifdef HAMMER2_IO_DEBUG 725 if ((dio = *diop) != NULL) { 726 #if 0 727 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK; 728 dio->debug_data[i] = debug_data; 729 #endif 730 } 731 #endif 732 return ((*diop)->error); 733 } 734 735 hammer2_io_t * 736 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, 737 int lsize HAMMER2_IO_DEBUG_ARGS) 738 { 739 hammer2_io_t *dio; 740 741 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize, 742 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL); 743 return dio; 744 } 745 746 void 747 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 748 { 749 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 750 HAMMER2_DIO_FLUSH); 751 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 752 } 753 754 void 755 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 756 { 757 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 758 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 759 } 760 761 int 762 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 763 { 764 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 765 HAMMER2_DIO_FLUSH); 766 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 767 return (0); /* XXX */ 768 } 769 770 void 771 hammer2_io_setdirty(hammer2_io_t *dio) 772 { 773 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 774 } 775 776 /* 777 * This routine is called when a MODIFIED chain is being DESTROYED, 778 * in an attempt to allow the related buffer cache buffer to be 779 * invalidated and discarded instead of flushing it to disk. 780 * 781 * At the moment this case is only really useful for file meta-data. 782 * File data is already handled via the logical buffer cache associated 783 * with the vnode, and will be discarded if it was never flushed to disk. 784 * File meta-data may include inodes, directory entries, and indirect blocks. 785 * 786 * XXX 787 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being 788 * invalidated might be smaller. Most of the meta-data structures above 789 * are in the 'smaller' category. For now, don't try to invalidate the 790 * data areas. 791 */ 792 void 793 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes) 794 { 795 /* NOP */ 796 } 797 798 void 799 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 800 { 801 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 802 } 803 804 void 805 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 806 { 807 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 808 } 809 810 /* 811 * Set dedup validation bits in a DIO. We do not need the buffer cache 812 * buffer for this. This must be done concurrent with setting bits in 813 * the freemap so as to interlock with bulkfree's clearing of those bits. 814 */ 815 void 816 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref) 817 { 818 hammer2_io_t *dio; 819 uint64_t mask; 820 int lsize; 821 int isgood; 822 823 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood); 824 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 825 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 826 else 827 lsize = 0; 828 mask = hammer2_dedup_mask(dio, bref->data_off, lsize); 829 atomic_clear_64(&dio->dedup_valid, mask); 830 atomic_set_64(&dio->dedup_alloc, mask); 831 hammer2_io_putblk(&dio); 832 } 833 834 /* 835 * Clear dedup validation bits in a DIO. This is typically done when 836 * a modified chain is destroyed or by the bulkfree code. No buffer 837 * is needed for this operation. If the DIO no longer exists it is 838 * equivalent to the bits not being set. 839 */ 840 void 841 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype, 842 hammer2_off_t data_off, u_int bytes) 843 { 844 hammer2_io_t *dio; 845 uint64_t mask; 846 int isgood; 847 848 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 849 return; 850 if (btype != HAMMER2_BREF_TYPE_DATA) 851 return; 852 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood); 853 if (dio) { 854 if (data_off < dio->pbase || 855 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes > 856 dio->pbase + dio->psize) { 857 panic("hammer2_io_dedup_delete: DATAOFF BAD " 858 "%016jx/%d %016jx\n", 859 data_off, bytes, dio->pbase); 860 } 861 mask = hammer2_dedup_mask(dio, data_off, bytes); 862 atomic_clear_64(&dio->dedup_alloc, mask); 863 atomic_clear_64(&dio->dedup_valid, mask); 864 hammer2_io_putblk(&dio); 865 } 866 } 867 868 /* 869 * Assert that dedup allocation bits in a DIO are not set. This operation 870 * does not require a buffer. The DIO does not need to exist. 871 */ 872 void 873 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes) 874 { 875 hammer2_io_t *dio; 876 int isgood; 877 878 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA, 879 0, &isgood); 880 if (dio) { 881 KASSERT((dio->dedup_alloc & 882 hammer2_dedup_mask(dio, data_off, bytes)) == 0, 883 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx", 884 data_off, 885 bytes, 886 hammer2_dedup_mask(dio, data_off, bytes), 887 dio->dedup_alloc)); 888 hammer2_io_putblk(&dio); 889 } 890 } 891 892 static 893 void 894 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp) 895 { 896 /* 897 if (bp->b_flags & B_DELWRI) 898 return; 899 */ 900 hammer2_adjwritecounter(dio->btype, dio->psize); 901 } 902 903 void 904 hammer2_io_bkvasync(hammer2_io_t *dio) 905 { 906 KKASSERT(dio->bp != NULL); 907 bkvasync(dio->bp); 908 } 909 910 /* 911 * Ref a dio that is already owned 912 */ 913 void 914 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 915 { 916 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 917 atomic_add_64(&dio->refs, 1); 918 } 919