1 /* 2 * Copyright (c) 2013-2023 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 #define HAMMER2_DOP_READ 1 38 #define HAMMER2_DOP_NEW 2 39 #define HAMMER2_DOP_NEWNZ 3 40 #define HAMMER2_DOP_READQ 4 41 42 /* 43 * Implements an abstraction layer for synchronous and asynchronous 44 * buffered device I/O. Can be used as an OS-abstraction but the main 45 * purpose is to allow larger buffers to be used against hammer2_chain's 46 * using smaller allocations, without causing deadlocks. 47 * 48 * The DIOs also record temporary state with limited persistence. This 49 * feature is used to keep track of dedupable blocks. 50 */ 51 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp); 52 53 static hammer2_io_t *hammer2_io_hash_lookup(hammer2_dev_t *hmp, 54 hammer2_off_t pbase, uint64_t *refsp); 55 static hammer2_io_t *hammer2_io_hash_enter(hammer2_dev_t *hmp, 56 hammer2_io_t *dio, uint64_t *refsp); 57 static void hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit); 58 59 void 60 hammer2_io_hash_init(hammer2_dev_t *hmp) 61 { 62 hammer2_io_hash_t *hash; 63 int i; 64 65 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) { 66 hash = &hmp->iohash[i]; 67 hammer2_spin_init(&hash->spin, "h2iohash"); 68 } 69 } 70 71 #ifdef HAMMER2_IO_DEBUG 72 73 static __inline void 74 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 75 { 76 int i; 77 78 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK; 79 80 dio->debug_file[i] = file; 81 dio->debug_line[i] = line; 82 dio->debug_refs[i] = dio->refs; 83 dio->debug_td[i] = curthread; 84 } 85 86 #else 87 88 #define DIO_RECORD(dio) 89 90 #endif 91 92 /* 93 * Returns the DIO corresponding to the data|radix, creating it if necessary. 94 * 95 * If createit is 0, NULL can be returned indicating that the DIO does not 96 * exist. (btype) is ignored when createit is 0. 97 */ 98 static __inline 99 hammer2_io_t * 100 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_off_t data_off, uint8_t btype, 101 int createit, int *isgoodp) 102 { 103 hammer2_io_t *dio; 104 hammer2_io_t *xio; 105 hammer2_off_t lbase; 106 hammer2_off_t pbase; 107 hammer2_off_t pmask; 108 hammer2_volume_t *vol; 109 uint64_t refs; 110 int lsize; 111 int psize; 112 113 psize = HAMMER2_PBUFSIZE; 114 pmask = ~(hammer2_off_t)(psize - 1); 115 if ((int)(data_off & HAMMER2_OFF_MASK_RADIX)) 116 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX); 117 else 118 lsize = 0; 119 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX; 120 pbase = lbase & pmask; 121 122 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 123 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 124 pbase, lbase, lsize, pmask); 125 } 126 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 127 *isgoodp = 0; 128 129 /* 130 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 131 * 132 * If DIO_GOOD is set the ref should prevent it from being cleared 133 * out from under us, we can set *isgoodp, and the caller can operate 134 * on the buffer without any further interaction. 135 */ 136 dio = hammer2_io_hash_lookup(hmp, pbase, &refs); 137 if (dio) { 138 if (refs & HAMMER2_DIO_GOOD) 139 *isgoodp = 1; 140 } else if (createit) { 141 refs = 0; 142 vol = hammer2_get_volume(hmp, pbase); 143 dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO); 144 dio->hmp = hmp; 145 dio->devvp = vol->dev->devvp; 146 dio->dbase = vol->offset; 147 KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0); 148 dio->pbase = pbase; 149 dio->psize = psize; 150 dio->btype = btype; 151 dio->refs = refs + 1; 152 dio->act = 5; 153 xio = hammer2_io_hash_enter(hmp, dio, &refs); 154 if (xio == NULL) { 155 atomic_add_int(&hammer2_dio_count, 1); 156 } else { 157 if (refs & HAMMER2_DIO_GOOD) 158 *isgoodp = 1; 159 kfree_obj(dio, hmp->mio); 160 dio = xio; 161 } 162 } else { 163 return NULL; 164 } 165 dio->ticks = ticks; 166 if (dio->act < 10) 167 ++dio->act; 168 169 return dio; 170 } 171 172 /* 173 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate 174 * a buffer. If set the buffer already exists and is good to go. 175 */ 176 hammer2_io_t * 177 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, 178 int lsize, int op HAMMER2_IO_DEBUG_ARGS) 179 { 180 hammer2_io_t *dio; 181 hammer2_off_t dev_pbase; 182 off_t peof; 183 uint64_t orefs; 184 uint64_t nrefs; 185 int isgood; 186 int error; 187 int hce; 188 int bflags; 189 190 bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0); 191 bflags |= B_KVABIO; 192 193 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 194 195 if (op == HAMMER2_DOP_READQ) { 196 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood); 197 if (dio == NULL) 198 return NULL; 199 op = HAMMER2_DOP_READ; 200 } else { 201 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood); 202 } 203 204 for (;;) { 205 orefs = dio->refs; 206 cpu_ccfence(); 207 208 /* 209 * Buffer is already good, handle the op and return. 210 */ 211 if (orefs & HAMMER2_DIO_GOOD) { 212 if (isgood == 0) 213 cpu_mfence(); 214 bkvasync(dio->bp); 215 216 switch(op) { 217 case HAMMER2_DOP_NEW: 218 bzero(hammer2_io_data(dio, lbase), lsize); 219 /* fall through */ 220 case HAMMER2_DOP_NEWNZ: 221 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 222 break; 223 case HAMMER2_DOP_READ: 224 default: 225 /* nothing to do */ 226 break; 227 } 228 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 229 return (dio); 230 } 231 232 /* 233 * Try to own the DIO 234 */ 235 if (orefs & HAMMER2_DIO_INPROG) { 236 nrefs = orefs | HAMMER2_DIO_WAITING; 237 tsleep_interlock(dio, 0); 238 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 239 tsleep(dio, PINTERLOCKED, "h2dio", hz); 240 } 241 /* retry */ 242 } else { 243 nrefs = orefs | HAMMER2_DIO_INPROG; 244 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 245 break; 246 } 247 } 248 } 249 250 /* 251 * We break to here if GOOD is not set and we acquired INPROG for 252 * the I/O. 253 */ 254 KKASSERT(dio->bp == NULL); 255 if (btype == HAMMER2_BREF_TYPE_DATA) 256 hce = hammer2_cluster_data_read; 257 else 258 hce = hammer2_cluster_meta_read; 259 260 error = 0; 261 dev_pbase = dio->pbase - dio->dbase; 262 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) && 263 dio->psize == lsize) { 264 switch(op) { 265 case HAMMER2_DOP_NEW: 266 case HAMMER2_DOP_NEWNZ: 267 dio->bp = getblk(dio->devvp, 268 dev_pbase, dio->psize, 269 GETBLK_KVABIO, 0); 270 if (op == HAMMER2_DOP_NEW) { 271 bkvasync(dio->bp); 272 bzero(dio->bp->b_data, dio->psize); 273 } 274 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 275 break; 276 case HAMMER2_DOP_READ: 277 default: 278 KKASSERT(dio->bp == NULL); 279 if (hce > 0) { 280 /* 281 * Synchronous cluster I/O for now. 282 */ 283 peof = (dio->pbase + HAMMER2_SEGMASK64) & 284 ~HAMMER2_SEGMASK64; 285 peof -= dio->dbase; 286 error = cluster_readx(dio->devvp, 287 peof, dev_pbase, 288 dio->psize, bflags, 289 dio->psize, 290 HAMMER2_PBUFSIZE*hce, 291 &dio->bp); 292 } else { 293 error = breadnx(dio->devvp, dev_pbase, 294 dio->psize, bflags, 295 NULL, NULL, 0, &dio->bp); 296 } 297 break; 298 } 299 } else { 300 if (hce > 0) { 301 /* 302 * Synchronous cluster I/O for now. 303 */ 304 peof = (dio->pbase + HAMMER2_SEGMASK64) & 305 ~HAMMER2_SEGMASK64; 306 peof -= dio->dbase; 307 error = cluster_readx(dio->devvp, 308 peof, dev_pbase, dio->psize, 309 bflags, 310 dio->psize, HAMMER2_PBUFSIZE*hce, 311 &dio->bp); 312 } else { 313 error = breadnx(dio->devvp, dev_pbase, 314 dio->psize, bflags, 315 NULL, NULL, 0, &dio->bp); 316 } 317 if (dio->bp) { 318 /* 319 * Handle NEW flags 320 */ 321 switch(op) { 322 case HAMMER2_DOP_NEW: 323 bkvasync(dio->bp); 324 bzero(hammer2_io_data(dio, lbase), lsize); 325 /* fall through */ 326 case HAMMER2_DOP_NEWNZ: 327 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 328 break; 329 case HAMMER2_DOP_READ: 330 default: 331 break; 332 } 333 334 /* 335 * Tell the kernel that the buffer cache is not 336 * meta-data based on the btype. This allows 337 * swapcache to distinguish between data and 338 * meta-data. 339 */ 340 switch(btype) { 341 case HAMMER2_BREF_TYPE_DATA: 342 dio->bp->b_flags |= B_NOTMETA; 343 break; 344 default: 345 break; 346 } 347 } 348 } 349 350 if (dio->bp) { 351 bkvasync(dio->bp); 352 BUF_KERNPROC(dio->bp); 353 dio->bp->b_flags &= ~B_AGE; 354 /* dio->bp->b_debug_info2 = dio; */ 355 } 356 dio->error = error; 357 358 /* 359 * Clear INPROG and WAITING, set GOOD wake up anyone waiting. 360 */ 361 for (;;) { 362 orefs = dio->refs; 363 cpu_ccfence(); 364 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING); 365 if (error == 0) 366 nrefs |= HAMMER2_DIO_GOOD; 367 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 368 if (orefs & HAMMER2_DIO_WAITING) 369 wakeup(dio); 370 break; 371 } 372 cpu_pause(); 373 } 374 375 /* XXX error handling */ 376 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 377 378 return dio; 379 } 380 381 /* 382 * Release our ref on *diop. 383 * 384 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose 385 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING. 386 */ 387 void 388 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 389 { 390 hammer2_dev_t *hmp; 391 hammer2_io_t *dio; 392 struct buf *bp; 393 off_t pbase; 394 int psize; 395 int dio_limit; 396 uint64_t orefs; 397 uint64_t nrefs; 398 399 dio = *diop; 400 *diop = NULL; 401 hmp = dio->hmp; 402 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 403 404 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0); 405 406 /* 407 * Drop refs. 408 * 409 * On the 1->0 transition clear GOOD and set INPROG, and break. 410 * On any other transition we can return early. 411 */ 412 for (;;) { 413 orefs = dio->refs; 414 cpu_ccfence(); 415 416 if ((orefs & HAMMER2_DIO_MASK) == 1 && 417 (orefs & HAMMER2_DIO_INPROG) == 0) { 418 /* 419 * Lastdrop case, INPROG can be set. GOOD must be 420 * cleared to prevent the getblk shortcut. 421 */ 422 nrefs = orefs - 1; 423 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 424 nrefs |= HAMMER2_DIO_INPROG; 425 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 426 break; 427 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 428 /* 429 * Lastdrop case, INPROG already set. We must 430 * wait for INPROG to clear. 431 */ 432 nrefs = orefs | HAMMER2_DIO_WAITING; 433 tsleep_interlock(dio, 0); 434 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 435 tsleep(dio, PINTERLOCKED, "h2dio", hz); 436 } 437 /* retry */ 438 } else { 439 /* 440 * Normal drop case. 441 */ 442 nrefs = orefs - 1; 443 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 444 return; 445 /* retry */ 446 } 447 cpu_pause(); 448 /* retry */ 449 } 450 451 /* 452 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 453 * have been cleared. iofree_count has not yet been incremented, 454 * note that another accessor race will decrement iofree_count so 455 * we have to increment it regardless. 456 * We can now dispose of the buffer. 457 */ 458 pbase = dio->pbase; 459 psize = dio->psize; 460 bp = dio->bp; 461 dio->bp = NULL; 462 463 if ((orefs & HAMMER2_DIO_GOOD) && bp) { 464 /* 465 * Non-errored disposal of bp 466 */ 467 if (orefs & HAMMER2_DIO_DIRTY) { 468 dio_write_stats_update(dio, bp); 469 470 /* 471 * Allows dirty buffers to accumulate and 472 * possibly be canceled (e.g. by a 'rm'), 473 * by default we will burst-write later. 474 * 475 * We generally do NOT want to issue an actual 476 * b[a]write() or cluster_write() here. Due to 477 * the way chains are locked, buffers may be cycled 478 * in and out quite often and disposal here can cause 479 * multiple writes or write-read stalls. 480 * 481 * If FLUSH is set we do want to issue the actual 482 * write. This typically occurs in the write-behind 483 * case when writing to large files. 484 */ 485 off_t peof; 486 int hce; 487 if (dio->refs & HAMMER2_DIO_FLUSH) { 488 if ((hce = hammer2_cluster_write) != 0) { 489 peof = (pbase + HAMMER2_SEGMASK64) & 490 ~HAMMER2_SEGMASK64; 491 peof -= dio->dbase; 492 bp->b_flags |= B_CLUSTEROK; 493 cluster_write(bp, peof, psize, hce); 494 } else { 495 bp->b_flags &= ~B_CLUSTEROK; 496 bawrite(bp); 497 } 498 } else { 499 bp->b_flags &= ~B_CLUSTEROK; 500 bdwrite(bp); 501 } 502 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 503 brelse(bp); 504 } else { 505 bqrelse(bp); 506 } 507 } else if (bp) { 508 /* 509 * Errored disposal of bp 510 */ 511 brelse(bp); 512 } 513 514 /* 515 * Update iofree_count before disposing of the dio 516 */ 517 hmp = dio->hmp; 518 atomic_add_int(&hmp->iofree_count, 1); 519 520 /* 521 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear). 522 * 523 * Also clear FLUSH as it was handled above. 524 */ 525 for (;;) { 526 orefs = dio->refs; 527 cpu_ccfence(); 528 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD | 529 HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH); 530 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 531 if (orefs & HAMMER2_DIO_WAITING) 532 wakeup(dio); 533 break; 534 } 535 cpu_pause(); 536 } 537 538 /* 539 * We cache free buffers so re-use cases can use a shared lock, but 540 * if too many build up we have to clean them out. 541 */ 542 dio_limit = hammer2_dio_limit; 543 if (dio_limit < 256) 544 dio_limit = 256; 545 if (dio_limit > 1024*1024) 546 dio_limit = 1024*1024; 547 if (hmp->iofree_count > dio_limit) 548 hammer2_io_hash_cleanup(hmp, dio_limit); 549 } 550 551 /* 552 * Returns a pointer to the requested data. 553 */ 554 char * 555 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 556 { 557 struct buf *bp; 558 int off; 559 560 bp = dio->bp; 561 KKASSERT(bp != NULL); 562 bkvasync(bp); 563 lbase -= dio->dbase; 564 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 565 KKASSERT(off >= 0 && off < bp->b_bufsize); 566 return(bp->b_data + off); 567 } 568 569 int 570 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 571 hammer2_io_t **diop) 572 { 573 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW); 574 return ((*diop)->error); 575 } 576 577 int 578 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 579 hammer2_io_t **diop) 580 { 581 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ); 582 return ((*diop)->error); 583 } 584 585 int 586 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 587 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 588 { 589 #ifdef HAMMER2_IO_DEBUG 590 hammer2_io_t *dio; 591 #endif 592 593 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize, 594 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL); 595 #ifdef HAMMER2_IO_DEBUG 596 if ((dio = *diop) != NULL) { 597 #if 0 598 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK; 599 dio->debug_data[i] = debug_data; 600 #endif 601 } 602 #endif 603 return ((*diop)->error); 604 } 605 606 hammer2_io_t * 607 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, 608 int lsize HAMMER2_IO_DEBUG_ARGS) 609 { 610 hammer2_io_t *dio; 611 612 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize, 613 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL); 614 return dio; 615 } 616 617 void 618 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 619 { 620 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 621 HAMMER2_DIO_FLUSH); 622 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 623 } 624 625 void 626 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 627 { 628 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 629 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 630 } 631 632 int 633 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 634 { 635 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 636 HAMMER2_DIO_FLUSH); 637 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 638 return (0); /* XXX */ 639 } 640 641 void 642 hammer2_io_setdirty(hammer2_io_t *dio) 643 { 644 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 645 } 646 647 /* 648 * This routine is called when a MODIFIED chain is being DESTROYED, 649 * in an attempt to allow the related buffer cache buffer to be 650 * invalidated and discarded instead of flushing it to disk. 651 * 652 * At the moment this case is only really useful for file meta-data. 653 * File data is already handled via the logical buffer cache associated 654 * with the vnode, and will be discarded if it was never flushed to disk. 655 * File meta-data may include inodes, directory entries, and indirect blocks. 656 * 657 * XXX 658 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being 659 * invalidated might be smaller. Most of the meta-data structures above 660 * are in the 'smaller' category. For now, don't try to invalidate the 661 * data areas. 662 */ 663 void 664 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes) 665 { 666 /* NOP */ 667 } 668 669 void 670 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 671 { 672 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 673 } 674 675 void 676 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 677 { 678 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 679 } 680 681 /* 682 * Set dedup validation bits in a DIO. We do not need the buffer cache 683 * buffer for this. This must be done concurrent with setting bits in 684 * the freemap so as to interlock with bulkfree's clearing of those bits. 685 */ 686 void 687 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref) 688 { 689 hammer2_io_t *dio; 690 uint64_t mask; 691 int lsize; 692 int isgood; 693 694 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood); 695 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 696 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 697 else 698 lsize = 0; 699 mask = hammer2_dedup_mask(dio, bref->data_off, lsize); 700 atomic_clear_64(&dio->dedup_valid, mask); 701 atomic_set_64(&dio->dedup_alloc, mask); 702 hammer2_io_putblk(&dio); 703 } 704 705 /* 706 * Clear dedup validation bits in a DIO. This is typically done when 707 * a modified chain is destroyed or by the bulkfree code. No buffer 708 * is needed for this operation. If the DIO no longer exists it is 709 * equivalent to the bits not being set. 710 */ 711 void 712 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype, 713 hammer2_off_t data_off, u_int bytes) 714 { 715 hammer2_io_t *dio; 716 uint64_t mask; 717 int isgood; 718 719 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 720 return; 721 if (btype != HAMMER2_BREF_TYPE_DATA) 722 return; 723 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood); 724 if (dio) { 725 if (data_off < dio->pbase || 726 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes > 727 dio->pbase + dio->psize) { 728 panic("hammer2_io_dedup_delete: DATAOFF BAD " 729 "%016jx/%d %016jx\n", 730 data_off, bytes, dio->pbase); 731 } 732 mask = hammer2_dedup_mask(dio, data_off, bytes); 733 atomic_clear_64(&dio->dedup_alloc, mask); 734 atomic_clear_64(&dio->dedup_valid, mask); 735 hammer2_io_putblk(&dio); 736 } 737 } 738 739 /* 740 * Assert that dedup allocation bits in a DIO are not set. This operation 741 * does not require a buffer. The DIO does not need to exist. 742 */ 743 void 744 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes) 745 { 746 hammer2_io_t *dio; 747 int isgood; 748 749 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA, 750 0, &isgood); 751 if (dio) { 752 KASSERT((dio->dedup_alloc & 753 hammer2_dedup_mask(dio, data_off, bytes)) == 0, 754 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx", 755 data_off, 756 bytes, 757 hammer2_dedup_mask(dio, data_off, bytes), 758 dio->dedup_alloc)); 759 hammer2_io_putblk(&dio); 760 } 761 } 762 763 static 764 void 765 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp) 766 { 767 if (bp->b_flags & B_DELWRI) 768 return; 769 hammer2_adjwritecounter(dio->btype, dio->psize); 770 } 771 772 void 773 hammer2_io_bkvasync(hammer2_io_t *dio) 774 { 775 KKASSERT(dio->bp != NULL); 776 bkvasync(dio->bp); 777 } 778 779 /* 780 * Ref a dio that is already owned 781 */ 782 void 783 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 784 { 785 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 786 atomic_add_64(&dio->refs, 1); 787 } 788 789 static __inline hammer2_io_hash_t * 790 hammer2_io_hashv(hammer2_dev_t *hmp, hammer2_off_t pbase) 791 { 792 int hv; 793 794 hv = (int)pbase + (int)(pbase >> 16); 795 return (&hmp->iohash[hv & HAMMER2_IOHASH_MASK]); 796 } 797 798 /* 799 * Lookup and reference the requested dio 800 */ 801 static hammer2_io_t * 802 hammer2_io_hash_lookup(hammer2_dev_t *hmp, hammer2_off_t pbase, uint64_t *refsp) 803 { 804 hammer2_io_hash_t *hash; 805 hammer2_io_t *dio; 806 uint64_t refs; 807 808 *refsp = 0; 809 hash = hammer2_io_hashv(hmp, pbase); 810 hammer2_spin_sh(&hash->spin); 811 for (dio = hash->base; dio; dio = dio->next) { 812 if (dio->pbase == pbase) { 813 refs = atomic_fetchadd_64(&dio->refs, 1); 814 if ((refs & HAMMER2_DIO_MASK) == 0) 815 atomic_add_int(&dio->hmp->iofree_count, -1); 816 *refsp = refs; 817 break; 818 } 819 } 820 hammer2_spin_unsh(&hash->spin); 821 822 return dio; 823 } 824 825 /* 826 * Enter a dio into the hash. If the pbase already exists in the hash, 827 * the xio in the hash is referenced and returned. If dio is sucessfully 828 * entered into the hash, NULL is returned. 829 */ 830 static hammer2_io_t * 831 hammer2_io_hash_enter(hammer2_dev_t *hmp, hammer2_io_t *dio, uint64_t *refsp) 832 { 833 hammer2_io_t *xio; 834 hammer2_io_t **xiop; 835 hammer2_io_hash_t *hash; 836 uint64_t refs; 837 838 *refsp = 0; 839 hash = hammer2_io_hashv(hmp, dio->pbase); 840 hammer2_spin_ex(&hash->spin); 841 for (xiop = &hash->base; (xio = *xiop) != NULL; xiop = &xio->next) { 842 if (xio->pbase == dio->pbase) { 843 refs = atomic_fetchadd_64(&xio->refs, 1); 844 if ((refs & HAMMER2_DIO_MASK) == 0) 845 atomic_add_int(&xio->hmp->iofree_count, -1); 846 *refsp = refs; 847 goto done; 848 } 849 } 850 dio->next = NULL; 851 *xiop = dio; 852 done: 853 hammer2_spin_unex(&hash->spin); 854 855 return xio; 856 } 857 858 /* 859 * Clean out a limited number of freeable DIOs 860 */ 861 static void 862 hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit) 863 { 864 hammer2_io_hash_t *hash; 865 hammer2_io_t *dio; 866 hammer2_io_t **diop; 867 hammer2_io_t **cleanapp; 868 hammer2_io_t *cleanbase; 869 int count; 870 int maxscan; 871 int i; 872 873 count = hmp->iofree_count - dio_limit + 32; 874 if (count <= 0) 875 return; 876 cleanbase = NULL; 877 cleanapp = &cleanbase; 878 879 i = hmp->io_iterator++; 880 maxscan = HAMMER2_IOHASH_SIZE; 881 while (count > 0 && maxscan--) { 882 hash = &hmp->iohash[i & HAMMER2_IOHASH_MASK]; 883 hammer2_spin_ex(&hash->spin); 884 diop = &hash->base; 885 while ((dio = *diop) != NULL) { 886 if ((dio->refs & (HAMMER2_DIO_MASK | 887 HAMMER2_DIO_INPROG)) != 0) 888 { 889 diop = &dio->next; 890 continue; 891 } 892 if (dio->act > 0) { 893 int act; 894 895 act = dio->act - (ticks - dio->ticks) / hz - 1; 896 dio->act = (act < 0) ? 0 : act; 897 } 898 if (dio->act) { 899 diop = &dio->next; 900 continue; 901 } 902 KKASSERT(dio->bp == NULL); 903 *diop = dio->next; 904 dio->next = NULL; 905 *cleanapp = dio; 906 cleanapp = &dio->next; 907 --count; 908 /* diop remains unchanged */ 909 atomic_add_int(&hmp->iofree_count, -1); 910 } 911 hammer2_spin_unex(&hash->spin); 912 i = hmp->io_iterator++; 913 } 914 915 /* 916 * Get rid of dios on clean list without holding any locks 917 */ 918 while ((dio = cleanbase) != NULL) { 919 cleanbase = dio->next; 920 dio->next = NULL; 921 KKASSERT(dio->bp == NULL && 922 (dio->refs & (HAMMER2_DIO_MASK | 923 HAMMER2_DIO_INPROG)) == 0); 924 if (dio->refs & HAMMER2_DIO_DIRTY) { 925 kprintf("hammer2_io_cleanup: Dirty buffer " 926 "%016jx/%d (bp=%p)\n", 927 dio->pbase, dio->psize, dio->bp); 928 } 929 kfree_obj(dio, hmp->mio); 930 atomic_add_int(&hammer2_dio_count, -1); 931 } 932 } 933 934 /* 935 * Destroy all DIOs associated with the media 936 */ 937 void 938 hammer2_io_hash_cleanup_all(hammer2_dev_t *hmp) 939 { 940 hammer2_io_hash_t *hash; 941 hammer2_io_t *dio; 942 int i; 943 944 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) { 945 hash = &hmp->iohash[i]; 946 947 while ((dio = hash->base) != NULL) { 948 hash->base = dio->next; 949 dio->next = NULL; 950 KKASSERT(dio->bp == NULL && 951 (dio->refs & (HAMMER2_DIO_MASK | 952 HAMMER2_DIO_INPROG)) == 0); 953 if (dio->refs & HAMMER2_DIO_DIRTY) { 954 kprintf("hammer2_io_cleanup: Dirty buffer " 955 "%016jx/%d (bp=%p)\n", 956 dio->pbase, dio->psize, dio->bp); 957 } 958 kfree_obj(dio, hmp->mio); 959 atomic_add_int(&hammer2_dio_count, -1); 960 atomic_add_int(&hmp->iofree_count, -1); 961 } 962 } 963 } 964