1 /* 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2022 Tomohiro Kusumi <tkusumi@netbsd.org> 5 * Copyright (c) 2013-2023 The DragonFly Project. All rights reserved. 6 * 7 * This code is derived from software contributed to The DragonFly Project 8 * by Matthew Dillon <dillon@dragonflybsd.org> 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in 18 * the documentation and/or other materials provided with the 19 * distribution. 20 * 3. Neither the name of The DragonFly Project nor the names of its 21 * contributors may be used to endorse or promote products derived 22 * from this software without specific, prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 25 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 26 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 27 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 28 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 29 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 30 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 31 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 32 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 33 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 34 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 35 * SUCH DAMAGE. 36 */ 37 38 #include "hammer2.h" 39 40 #define HAMMER2_DOP_READ 1 41 #define HAMMER2_DOP_NEW 2 42 #define HAMMER2_DOP_NEWNZ 3 43 #define HAMMER2_DOP_READQ 4 44 45 /* 46 * Implements an abstraction layer for synchronous and asynchronous 47 * buffered device I/O. Can be used as an OS-abstraction but the main 48 * purpose is to allow larger buffers to be used against hammer2_chain's 49 * using smaller allocations, without causing deadlocks. 50 * 51 * The DIOs also record temporary state with limited persistence. This 52 * feature is used to keep track of dedupable blocks. 53 */ 54 static void dio_write_stats_update(hammer2_io_t *dio, struct m_buf *bp); 55 56 static hammer2_io_t *hammer2_io_hash_lookup(hammer2_dev_t *hmp, 57 hammer2_off_t pbase, uint64_t *refsp); 58 static hammer2_io_t *hammer2_io_hash_enter(hammer2_dev_t *hmp, 59 hammer2_io_t *dio, uint64_t *refsp); 60 static void hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit); 61 62 void 63 hammer2_io_hash_init(hammer2_dev_t *hmp) 64 { 65 hammer2_io_hash_t *hash; 66 int i; 67 68 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) { 69 hash = &hmp->iohash[i]; 70 hammer2_spin_init(&hash->spin, "h2iohash"); 71 } 72 } 73 74 #ifdef HAMMER2_IO_DEBUG 75 76 static __inline void 77 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 78 { 79 int i; 80 81 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK; 82 83 dio->debug_file[i] = file; 84 dio->debug_line[i] = line; 85 dio->debug_refs[i] = dio->refs; 86 dio->debug_td[i] = curthread; 87 } 88 89 #else 90 91 #define DIO_RECORD(dio) 92 93 #endif 94 95 /* 96 * Returns the DIO corresponding to the data|radix, creating it if necessary. 97 * 98 * If createit is 0, NULL can be returned indicating that the DIO does not 99 * exist. (btype) is ignored when createit is 0. 100 */ 101 static 102 hammer2_io_t * 103 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_off_t data_off, uint8_t btype, 104 int createit, int *isgoodp) 105 { 106 hammer2_io_t *dio; 107 hammer2_io_t *xio; 108 hammer2_off_t lbase; 109 hammer2_off_t pbase; 110 hammer2_off_t pmask; 111 hammer2_vfsvolume_t *vol; 112 uint64_t refs; 113 int lsize; 114 int psize; 115 116 psize = HAMMER2_PBUFSIZE; 117 pmask = ~(hammer2_off_t)(psize - 1); 118 if ((int)(data_off & HAMMER2_OFF_MASK_RADIX)) 119 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX); 120 else 121 lsize = 0; 122 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX; 123 pbase = lbase & pmask; 124 125 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 126 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 127 pbase, lbase, lsize, pmask); 128 } 129 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 130 *isgoodp = 0; 131 132 /* 133 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 134 * 135 * If DIO_GOOD is set the ref should prevent it from being cleared 136 * out from under us, we can set *isgoodp, and the caller can operate 137 * on the buffer without any further interaction. 138 */ 139 dio = hammer2_io_hash_lookup(hmp, pbase, &refs); 140 if (dio) { 141 if (refs & HAMMER2_DIO_GOOD) 142 *isgoodp = 1; 143 } else if (createit) { 144 refs = 0; 145 vol = hammer2_get_volume_from_hmp(hmp, pbase); 146 dio = kmalloc_obj(sizeof(*dio), hmp->mio, M_INTWAIT | M_ZERO); 147 dio->hmp = hmp; 148 dio->devvp = vol->dev->devvp; 149 dio->dbase = vol->offset; 150 KKASSERT((dio->dbase & HAMMER2_FREEMAP_LEVEL1_MASK) == 0); 151 dio->pbase = pbase; 152 dio->psize = psize; 153 dio->btype = btype; 154 dio->refs = refs + 1; 155 dio->act = 5; 156 xio = hammer2_io_hash_enter(hmp, dio, &refs); 157 if (xio == NULL) { 158 atomic_add_int(&hammer2_dio_count, 1); 159 } else { 160 if (refs & HAMMER2_DIO_GOOD) 161 *isgoodp = 1; 162 kfree_obj(dio, hmp->mio); 163 dio = xio; 164 } 165 } else { 166 return NULL; 167 } 168 dio->ticks = ticks; 169 if (dio->act < 10) 170 ++dio->act; 171 172 return dio; 173 } 174 175 /* 176 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate 177 * a buffer. If set the buffer already exists and is good to go. 178 */ 179 hammer2_io_t * 180 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, 181 int lsize, int op HAMMER2_IO_DEBUG_ARGS) 182 { 183 hammer2_io_t *dio; 184 hammer2_off_t dev_pbase; 185 //off_t peof; 186 uint64_t orefs; 187 uint64_t nrefs; 188 int isgood; 189 int error; 190 int hce; 191 //int bflags; 192 193 //bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0); 194 //bflags |= B_KVABIO; 195 196 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 197 198 if (op == HAMMER2_DOP_READQ) { 199 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood); 200 if (dio == NULL) 201 return NULL; 202 op = HAMMER2_DOP_READ; 203 } else { 204 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood); 205 } 206 207 for (;;) { 208 orefs = dio->refs; 209 cpu_ccfence(); 210 211 /* 212 * Buffer is already good, handle the op and return. 213 */ 214 if (orefs & HAMMER2_DIO_GOOD) { 215 if (isgood == 0) 216 cpu_mfence(); 217 bkvasync(dio->bp); 218 219 switch(op) { 220 case HAMMER2_DOP_NEW: 221 bzero(hammer2_io_data(dio, lbase), lsize); 222 /* fall through */ 223 case HAMMER2_DOP_NEWNZ: 224 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 225 break; 226 case HAMMER2_DOP_READ: 227 default: 228 /* nothing to do */ 229 break; 230 } 231 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 232 return (dio); 233 } 234 235 /* 236 * Try to own the DIO 237 */ 238 if (orefs & HAMMER2_DIO_INPROG) { 239 nrefs = orefs | HAMMER2_DIO_WAITING; 240 tsleep_interlock(dio, 0); 241 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 242 tsleep(dio, PINTERLOCKED, "h2dio", hz); 243 } 244 /* retry */ 245 } else { 246 nrefs = orefs | HAMMER2_DIO_INPROG; 247 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 248 break; 249 } 250 } 251 } 252 253 /* 254 * We break to here if GOOD is not set and we acquired INPROG for 255 * the I/O. 256 */ 257 KKASSERT(dio->bp == NULL); 258 if (btype == HAMMER2_BREF_TYPE_DATA) 259 hce = hammer2_cluster_data_read; 260 else 261 hce = hammer2_cluster_meta_read; 262 263 error = 0; 264 dev_pbase = dio->pbase - dio->dbase; 265 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) && 266 dio->psize == lsize) { 267 switch(op) { 268 case HAMMER2_DOP_NEW: 269 case HAMMER2_DOP_NEWNZ: 270 dio->bp = getblkx(dio->devvp, 271 dev_pbase, dio->psize, 272 GETBLK_KVABIO, 0); 273 if (op == HAMMER2_DOP_NEW) { 274 bkvasync(dio->bp); 275 bzero(dio->bp->b_data, dio->psize); 276 } 277 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 278 break; 279 case HAMMER2_DOP_READ: 280 default: 281 KKASSERT(dio->bp == NULL); 282 #if 0 283 if (hce > 0) { 284 /* 285 * Synchronous cluster I/O for now. 286 */ 287 peof = (dio->pbase + HAMMER2_SEGMASK64) & 288 ~HAMMER2_SEGMASK64; 289 peof -= dio->dbase; 290 error = cluster_readx(dio->devvp, 291 peof, dev_pbase, 292 dio->psize, bflags, 293 dio->psize, 294 HAMMER2_PBUFSIZE*hce, 295 &dio->bp); 296 } else { 297 error = breadnx(dio->devvp, dev_pbase, 298 dio->psize, bflags, 299 NULL, NULL, 0, &dio->bp); 300 } 301 #else 302 error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp); 303 #endif 304 break; 305 } 306 } else { 307 #if 0 308 if (hce > 0) { 309 /* 310 * Synchronous cluster I/O for now. 311 */ 312 peof = (dio->pbase + HAMMER2_SEGMASK64) & 313 ~HAMMER2_SEGMASK64; 314 peof -= dio->dbase; 315 error = cluster_readx(dio->devvp, 316 peof, dev_pbase, dio->psize, 317 bflags, 318 dio->psize, HAMMER2_PBUFSIZE*hce, 319 &dio->bp); 320 } else { 321 error = breadnx(dio->devvp, dev_pbase, 322 dio->psize, bflags, 323 NULL, NULL, 0, &dio->bp); 324 } 325 #else 326 error = breadx(dio->devvp, dev_pbase, dio->psize, &dio->bp); 327 #endif 328 if (dio->bp) { 329 /* 330 * Handle NEW flags 331 */ 332 switch(op) { 333 case HAMMER2_DOP_NEW: 334 bkvasync(dio->bp); 335 bzero(hammer2_io_data(dio, lbase), lsize); 336 /* fall through */ 337 case HAMMER2_DOP_NEWNZ: 338 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 339 break; 340 case HAMMER2_DOP_READ: 341 default: 342 break; 343 } 344 345 /* 346 * Tell the kernel that the buffer cache is not 347 * meta-data based on the btype. This allows 348 * swapcache to distinguish between data and 349 * meta-data. 350 */ 351 switch(btype) { 352 case HAMMER2_BREF_TYPE_DATA: 353 //dio->bp->b_flags |= B_NOTMETA; 354 break; 355 default: 356 break; 357 } 358 } 359 } 360 361 if (dio->bp) { 362 bkvasync(dio->bp); 363 BUF_KERNPROC(dio->bp); 364 //dio->bp->b_flags &= ~B_AGE; 365 /* dio->bp->b_debug_info2 = dio; */ 366 } 367 dio->error = error; 368 369 /* 370 * Clear INPROG and WAITING, set GOOD wake up anyone waiting. 371 */ 372 for (;;) { 373 orefs = dio->refs; 374 cpu_ccfence(); 375 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING); 376 if (error == 0) 377 nrefs |= HAMMER2_DIO_GOOD; 378 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 379 if (orefs & HAMMER2_DIO_WAITING) 380 wakeup(dio); 381 break; 382 } 383 cpu_pause(); 384 } 385 386 /* XXX error handling */ 387 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 388 389 return dio; 390 } 391 392 /* 393 * Release our ref on *diop. 394 * 395 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose 396 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING. 397 */ 398 void 399 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 400 { 401 hammer2_dev_t *hmp; 402 hammer2_io_t *dio; 403 struct m_buf *bp; 404 off_t pbase; 405 int psize; 406 int dio_limit; 407 uint64_t orefs; 408 uint64_t nrefs; 409 410 dio = *diop; 411 *diop = NULL; 412 hmp = dio->hmp; 413 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 414 415 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0); 416 417 /* 418 * Drop refs. 419 * 420 * On the 1->0 transition clear GOOD and set INPROG, and break. 421 * On any other transition we can return early. 422 */ 423 for (;;) { 424 orefs = dio->refs; 425 cpu_ccfence(); 426 427 if ((orefs & HAMMER2_DIO_MASK) == 1 && 428 (orefs & HAMMER2_DIO_INPROG) == 0) { 429 /* 430 * Lastdrop case, INPROG can be set. GOOD must be 431 * cleared to prevent the getblk shortcut. 432 */ 433 nrefs = orefs - 1; 434 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 435 nrefs |= HAMMER2_DIO_INPROG; 436 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 437 break; 438 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 439 /* 440 * Lastdrop case, INPROG already set. We must 441 * wait for INPROG to clear. 442 */ 443 nrefs = orefs | HAMMER2_DIO_WAITING; 444 tsleep_interlock(dio, 0); 445 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 446 tsleep(dio, PINTERLOCKED, "h2dio", hz); 447 } 448 /* retry */ 449 } else { 450 /* 451 * Normal drop case. 452 */ 453 nrefs = orefs - 1; 454 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 455 return; 456 /* retry */ 457 } 458 cpu_pause(); 459 /* retry */ 460 } 461 462 /* 463 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 464 * have been cleared. iofree_count has not yet been incremented, 465 * note that another accessor race will decrement iofree_count so 466 * we have to increment it regardless. 467 * We can now dispose of the buffer. 468 */ 469 pbase = dio->pbase; 470 psize = dio->psize; 471 bp = dio->bp; 472 dio->bp = NULL; 473 474 if ((orefs & HAMMER2_DIO_GOOD) && bp) { 475 /* 476 * Non-errored disposal of bp 477 */ 478 if (orefs & HAMMER2_DIO_DIRTY) { 479 dio_write_stats_update(dio, bp); 480 481 /* 482 * Allows dirty buffers to accumulate and 483 * possibly be canceled (e.g. by a 'rm'), 484 * by default we will burst-write later. 485 * 486 * We generally do NOT want to issue an actual 487 * b[a]write() or cluster_write() here. Due to 488 * the way chains are locked, buffers may be cycled 489 * in and out quite often and disposal here can cause 490 * multiple writes or write-read stalls. 491 * 492 * If FLUSH is set we do want to issue the actual 493 * write. This typically occurs in the write-behind 494 * case when writing to large files. 495 */ 496 //off_t peof; 497 //int hce; 498 if (dio->refs & HAMMER2_DIO_FLUSH) { 499 #if 0 500 if ((hce = hammer2_cluster_write) != 0) { 501 peof = (pbase + HAMMER2_SEGMASK64) & 502 ~HAMMER2_SEGMASK64; 503 peof -= dio->dbase; 504 bp->b_flags |= B_CLUSTEROK; 505 cluster_write(bp, peof, psize, hce); 506 } else { 507 bp->b_flags &= ~B_CLUSTEROK; 508 bawrite(bp); 509 } 510 #else 511 bawrite(bp); 512 #endif 513 } else { 514 //bp->b_flags &= ~B_CLUSTEROK; 515 bdwrite(bp); 516 } 517 #if 0 518 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 519 brelse(bp); 520 #endif 521 } else { 522 bqrelse(bp); 523 } 524 } else if (bp) { 525 /* 526 * Errored disposal of bp 527 */ 528 brelse(bp); 529 } 530 531 /* 532 * Update iofree_count before disposing of the dio 533 */ 534 hmp = dio->hmp; 535 atomic_add_int(&hmp->iofree_count, 1); 536 537 /* 538 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear). 539 * 540 * Also clear FLUSH as it was handled above. 541 */ 542 for (;;) { 543 orefs = dio->refs; 544 cpu_ccfence(); 545 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD | 546 HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH); 547 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 548 if (orefs & HAMMER2_DIO_WAITING) 549 wakeup(dio); 550 break; 551 } 552 cpu_pause(); 553 } 554 555 /* 556 * We cache free buffers so re-use cases can use a shared lock, but 557 * if too many build up we have to clean them out. 558 */ 559 dio_limit = hammer2_dio_limit; 560 if (dio_limit < 256) 561 dio_limit = 256; 562 if (dio_limit > 1024*1024) 563 dio_limit = 1024*1024; 564 if (hmp->iofree_count > dio_limit) 565 hammer2_io_hash_cleanup(hmp, dio_limit); 566 } 567 568 /* 569 * Returns a pointer to the requested data. 570 */ 571 char * 572 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 573 { 574 struct m_buf *bp; 575 int off; 576 577 bp = dio->bp; 578 KKASSERT(bp != NULL); 579 bkvasync(bp); 580 lbase -= dio->dbase; 581 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 582 KKASSERT(off >= 0 && off < bp->b_bufsize); 583 return(bp->b_data + off); 584 } 585 586 int 587 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 588 hammer2_io_t **diop) 589 { 590 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW); 591 return ((*diop)->error); 592 } 593 594 int 595 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 596 hammer2_io_t **diop) 597 { 598 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ); 599 return ((*diop)->error); 600 } 601 602 int 603 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 604 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 605 { 606 #ifdef HAMMER2_IO_DEBUG 607 hammer2_io_t *dio; 608 #endif 609 610 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize, 611 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL); 612 #ifdef HAMMER2_IO_DEBUG 613 if ((dio = *diop) != NULL) { 614 #if 0 615 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK; 616 dio->debug_data[i] = debug_data; 617 #endif 618 } 619 #endif 620 return ((*diop)->error); 621 } 622 623 hammer2_io_t * 624 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, 625 int lsize HAMMER2_IO_DEBUG_ARGS) 626 { 627 hammer2_io_t *dio; 628 629 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize, 630 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL); 631 return dio; 632 } 633 634 void 635 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 636 { 637 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 638 HAMMER2_DIO_FLUSH); 639 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 640 } 641 642 void 643 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 644 { 645 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 646 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 647 } 648 649 int 650 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 651 { 652 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 653 HAMMER2_DIO_FLUSH); 654 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 655 return (0); /* XXX */ 656 } 657 658 void 659 hammer2_io_setdirty(hammer2_io_t *dio) 660 { 661 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 662 } 663 664 /* 665 * This routine is called when a MODIFIED chain is being DESTROYED, 666 * in an attempt to allow the related buffer cache buffer to be 667 * invalidated and discarded instead of flushing it to disk. 668 * 669 * At the moment this case is only really useful for file meta-data. 670 * File data is already handled via the logical buffer cache associated 671 * with the vnode, and will be discarded if it was never flushed to disk. 672 * File meta-data may include inodes, directory entries, and indirect blocks. 673 * 674 * XXX 675 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being 676 * invalidated might be smaller. Most of the meta-data structures above 677 * are in the 'smaller' category. For now, don't try to invalidate the 678 * data areas. 679 */ 680 void 681 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes) 682 { 683 /* NOP */ 684 } 685 686 void 687 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 688 { 689 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 690 } 691 692 void 693 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 694 { 695 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 696 } 697 698 /* 699 * Set dedup validation bits in a DIO. We do not need the buffer cache 700 * buffer for this. This must be done concurrent with setting bits in 701 * the freemap so as to interlock with bulkfree's clearing of those bits. 702 */ 703 void 704 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref) 705 { 706 hammer2_io_t *dio; 707 uint64_t mask; 708 int lsize; 709 int isgood; 710 711 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood); 712 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 713 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 714 else 715 lsize = 0; 716 mask = hammer2_dedup_mask(dio, bref->data_off, lsize); 717 atomic_clear_64(&dio->dedup_valid, mask); 718 atomic_set_64(&dio->dedup_alloc, mask); 719 hammer2_io_putblk(&dio); 720 } 721 722 /* 723 * Clear dedup validation bits in a DIO. This is typically done when 724 * a modified chain is destroyed or by the bulkfree code. No buffer 725 * is needed for this operation. If the DIO no longer exists it is 726 * equivalent to the bits not being set. 727 */ 728 void 729 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype, 730 hammer2_off_t data_off, u_int bytes) 731 { 732 hammer2_io_t *dio; 733 uint64_t mask; 734 int isgood; 735 736 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 737 return; 738 if (btype != HAMMER2_BREF_TYPE_DATA) 739 return; 740 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood); 741 if (dio) { 742 if (data_off < dio->pbase || 743 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes > 744 dio->pbase + dio->psize) { 745 panic("hammer2_io_dedup_delete: DATAOFF BAD " 746 "%016jx/%d %016jx\n", 747 data_off, bytes, dio->pbase); 748 } 749 mask = hammer2_dedup_mask(dio, data_off, bytes); 750 atomic_clear_64(&dio->dedup_alloc, mask); 751 atomic_clear_64(&dio->dedup_valid, mask); 752 hammer2_io_putblk(&dio); 753 } 754 } 755 756 /* 757 * Assert that dedup allocation bits in a DIO are not set. This operation 758 * does not require a buffer. The DIO does not need to exist. 759 */ 760 void 761 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes) 762 { 763 hammer2_io_t *dio; 764 int isgood; 765 766 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA, 767 0, &isgood); 768 if (dio) { 769 KASSERT((dio->dedup_alloc & 770 hammer2_dedup_mask(dio, data_off, bytes)) == 0, 771 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx", 772 data_off, 773 bytes, 774 hammer2_dedup_mask(dio, data_off, bytes), 775 dio->dedup_alloc)); 776 hammer2_io_putblk(&dio); 777 } 778 } 779 780 static 781 void 782 dio_write_stats_update(hammer2_io_t *dio, struct m_buf *bp) 783 { 784 /* 785 if (bp->b_flags & B_DELWRI) 786 return; 787 */ 788 hammer2_adjwritecounter(dio->btype, dio->psize); 789 } 790 791 void 792 hammer2_io_bkvasync(hammer2_io_t *dio) 793 { 794 KKASSERT(dio->bp != NULL); 795 bkvasync(dio->bp); 796 } 797 798 /* 799 * Ref a dio that is already owned 800 */ 801 void 802 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 803 { 804 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 805 atomic_add_64(&dio->refs, 1); 806 } 807 808 static __inline hammer2_io_hash_t * 809 hammer2_io_hashv(hammer2_dev_t *hmp, hammer2_off_t pbase) 810 { 811 int hv; 812 813 hv = (int)pbase + (int)(pbase >> 16); 814 return (&hmp->iohash[hv & HAMMER2_IOHASH_MASK]); 815 } 816 817 /* 818 * Lookup and reference the requested dio 819 */ 820 static hammer2_io_t * 821 hammer2_io_hash_lookup(hammer2_dev_t *hmp, hammer2_off_t pbase, uint64_t *refsp) 822 { 823 hammer2_io_hash_t *hash; 824 hammer2_io_t *dio; 825 uint64_t refs; 826 827 *refsp = 0; 828 hash = hammer2_io_hashv(hmp, pbase); 829 hammer2_spin_sh(&hash->spin); 830 for (dio = hash->base; dio; dio = dio->next) { 831 if (dio->pbase == pbase) { 832 refs = atomic_fetchadd_64(&dio->refs, 1); 833 if ((refs & HAMMER2_DIO_MASK) == 0) 834 atomic_add_int(&dio->hmp->iofree_count, -1); 835 *refsp = refs; 836 break; 837 } 838 } 839 hammer2_spin_unsh(&hash->spin); 840 841 return dio; 842 } 843 844 /* 845 * Enter a dio into the hash. If the pbase already exists in the hash, 846 * the xio in the hash is referenced and returned. If dio is sucessfully 847 * entered into the hash, NULL is returned. 848 */ 849 static hammer2_io_t * 850 hammer2_io_hash_enter(hammer2_dev_t *hmp, hammer2_io_t *dio, uint64_t *refsp) 851 { 852 hammer2_io_t *xio; 853 hammer2_io_t **xiop; 854 hammer2_io_hash_t *hash; 855 uint64_t refs; 856 857 *refsp = 0; 858 hash = hammer2_io_hashv(hmp, dio->pbase); 859 hammer2_spin_ex(&hash->spin); 860 for (xiop = &hash->base; (xio = *xiop) != NULL; xiop = &xio->next) { 861 if (xio->pbase == dio->pbase) { 862 refs = atomic_fetchadd_64(&xio->refs, 1); 863 if ((refs & HAMMER2_DIO_MASK) == 0) 864 atomic_add_int(&xio->hmp->iofree_count, -1); 865 *refsp = refs; 866 goto done; 867 } 868 } 869 dio->next = NULL; 870 *xiop = dio; 871 done: 872 hammer2_spin_unex(&hash->spin); 873 874 return xio; 875 } 876 877 /* 878 * Clean out a limited number of freeable DIOs 879 */ 880 static void 881 hammer2_io_hash_cleanup(hammer2_dev_t *hmp, int dio_limit) 882 { 883 hammer2_io_hash_t *hash; 884 hammer2_io_t *dio; 885 hammer2_io_t **diop; 886 hammer2_io_t **cleanapp; 887 hammer2_io_t *cleanbase; 888 int count; 889 int maxscan; 890 int i; 891 892 count = hmp->iofree_count - dio_limit + 32; 893 if (count <= 0) 894 return; 895 cleanbase = NULL; 896 cleanapp = &cleanbase; 897 898 i = hmp->io_iterator++; 899 maxscan = HAMMER2_IOHASH_SIZE; 900 while (count > 0 && maxscan--) { 901 hash = &hmp->iohash[i & HAMMER2_IOHASH_MASK]; 902 hammer2_spin_ex(&hash->spin); 903 diop = &hash->base; 904 while ((dio = *diop) != NULL) { 905 if ((dio->refs & (HAMMER2_DIO_MASK | 906 HAMMER2_DIO_INPROG)) != 0) 907 { 908 diop = &dio->next; 909 continue; 910 } 911 if (dio->act > 0) { 912 int act; 913 914 act = dio->act - (ticks - dio->ticks) / hz - 1; 915 dio->act = (act < 0) ? 0 : act; 916 } 917 if (dio->act) { 918 diop = &dio->next; 919 continue; 920 } 921 KKASSERT(dio->bp == NULL); 922 *diop = dio->next; 923 dio->next = NULL; 924 *cleanapp = dio; 925 cleanapp = &dio->next; 926 --count; 927 /* diop remains unchanged */ 928 atomic_add_int(&hmp->iofree_count, -1); 929 } 930 hammer2_spin_unex(&hash->spin); 931 i = hmp->io_iterator++; 932 } 933 934 /* 935 * Get rid of dios on clean list without holding any locks 936 */ 937 while ((dio = cleanbase) != NULL) { 938 cleanbase = dio->next; 939 dio->next = NULL; 940 KKASSERT(dio->bp == NULL && 941 (dio->refs & (HAMMER2_DIO_MASK | 942 HAMMER2_DIO_INPROG)) == 0); 943 if (dio->refs & HAMMER2_DIO_DIRTY) { 944 kprintf("hammer2_io_cleanup: Dirty buffer " 945 "%016jx/%d (bp=%p)\n", 946 dio->pbase, dio->psize, dio->bp); 947 } 948 kfree_obj(dio, hmp->mio); 949 atomic_add_int(&hammer2_dio_count, -1); 950 } 951 } 952 953 /* 954 * Destroy all DIOs associated with the media 955 */ 956 void 957 hammer2_io_hash_cleanup_all(hammer2_dev_t *hmp) 958 { 959 hammer2_io_hash_t *hash; 960 hammer2_io_t *dio; 961 int i; 962 963 for (i = 0; i < HAMMER2_IOHASH_SIZE; ++i) { 964 hash = &hmp->iohash[i]; 965 966 while ((dio = hash->base) != NULL) { 967 hash->base = dio->next; 968 dio->next = NULL; 969 KKASSERT(dio->bp == NULL && 970 (dio->refs & (HAMMER2_DIO_MASK | 971 HAMMER2_DIO_INPROG)) == 0); 972 if (dio->refs & HAMMER2_DIO_DIRTY) { 973 kprintf("hammer2_io_cleanup: Dirty buffer " 974 "%016jx/%d (bp=%p)\n", 975 dio->pbase, dio->psize, dio->bp); 976 } 977 kfree_obj(dio, hmp->mio); 978 atomic_add_int(&hammer2_dio_count, -1); 979 atomic_add_int(&hmp->iofree_count, -1); 980 } 981 } 982 } 983