1 /* 2 * Copyright (c) 2013-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 #define HAMMER2_DOP_READ 1 38 #define HAMMER2_DOP_NEW 2 39 #define HAMMER2_DOP_NEWNZ 3 40 #define HAMMER2_DOP_READQ 4 41 42 /* 43 * Implements an abstraction layer for synchronous and asynchronous 44 * buffered device I/O. Can be used as an OS-abstraction but the main 45 * purpose is to allow larger buffers to be used against hammer2_chain's 46 * using smaller allocations, without causing deadlocks. 47 * 48 * The DIOs also record temporary state with limited persistence. This 49 * feature is used to keep track of dedupable blocks. 50 */ 51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp); 53 54 static int 55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 56 { 57 if (io1->pbase < io2->pbase) 58 return(-1); 59 if (io1->pbase > io2->pbase) 60 return(1); 61 return(0); 62 } 63 64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 66 off_t, pbase); 67 68 struct hammer2_cleanupcb_info { 69 struct hammer2_io_tree tmptree; 70 int count; 71 }; 72 73 #if 0 74 static __inline 75 uint64_t 76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 77 { 78 uint64_t mask; 79 int i; 80 81 if (bytes < 1024) /* smaller chunks not supported */ 82 return 0; 83 84 /* 85 * Calculate crc check mask for larger chunks 86 */ 87 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) & 88 HAMMER2_PBUFMASK) >> 10; 89 if (i == 0 && bytes == HAMMER2_PBUFSIZE) 90 return((uint64_t)-1); 91 mask = ((uint64_t)1U << (bytes >> 10)) - 1; 92 mask <<= i; 93 94 return mask; 95 } 96 #endif 97 98 #ifdef HAMMER2_IO_DEBUG 99 100 static __inline void 101 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 102 { 103 int i; 104 105 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK; 106 107 dio->debug_file[i] = file; 108 dio->debug_line[i] = line; 109 dio->debug_refs[i] = dio->refs; 110 dio->debug_td[i] = curthread; 111 } 112 113 #else 114 115 #define DIO_RECORD(dio) 116 117 #endif 118 119 /* 120 * Returns the DIO corresponding to the data|radix, creating it if necessary. 121 * 122 * If createit is 0, NULL can be returned indicating that the DIO does not 123 * exist. (btype) is ignored when createit is 0. 124 */ 125 static __inline 126 hammer2_io_t * 127 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype, 128 int createit, int *isgoodp) 129 { 130 hammer2_io_t *dio; 131 hammer2_io_t *xio; 132 hammer2_key_t lbase; 133 hammer2_key_t pbase; 134 hammer2_key_t pmask; 135 uint64_t refs; 136 int lsize; 137 int psize; 138 139 psize = HAMMER2_PBUFSIZE; 140 pmask = ~(hammer2_off_t)(psize - 1); 141 if ((int)(data_off & HAMMER2_OFF_MASK_RADIX)) 142 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX); 143 else 144 lsize = 0; 145 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX; 146 pbase = lbase & pmask; 147 148 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 149 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 150 pbase, lbase, lsize, pmask); 151 } 152 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 153 *isgoodp = 0; 154 155 /* 156 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 157 * 158 * If DIO_GOOD is set the ref should prevent it from being cleared 159 * out from under us, we can set *isgoodp, and the caller can operate 160 * on the buffer without any further interaction. 161 */ 162 hammer2_spin_sh(&hmp->io_spin); 163 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 164 if (dio) { 165 refs = atomic_fetchadd_64(&dio->refs, 1); 166 if ((refs & HAMMER2_DIO_MASK) == 0) { 167 atomic_add_int(&dio->hmp->iofree_count, -1); 168 } 169 if (refs & HAMMER2_DIO_GOOD) 170 *isgoodp = 1; 171 hammer2_spin_unsh(&hmp->io_spin); 172 } else if (createit) { 173 refs = 0; 174 hammer2_spin_unsh(&hmp->io_spin); 175 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 176 dio->hmp = hmp; 177 dio->pbase = pbase; 178 dio->psize = psize; 179 dio->btype = btype; 180 dio->refs = refs + 1; 181 dio->act = 5; 182 hammer2_spin_ex(&hmp->io_spin); 183 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 184 if (xio == NULL) { 185 atomic_add_int(&hammer2_dio_count, 1); 186 hammer2_spin_unex(&hmp->io_spin); 187 } else { 188 refs = atomic_fetchadd_64(&xio->refs, 1); 189 if ((refs & HAMMER2_DIO_MASK) == 0) 190 atomic_add_int(&xio->hmp->iofree_count, -1); 191 if (refs & HAMMER2_DIO_GOOD) 192 *isgoodp = 1; 193 hammer2_spin_unex(&hmp->io_spin); 194 kfree(dio, M_HAMMER2); 195 dio = xio; 196 } 197 } else { 198 hammer2_spin_unsh(&hmp->io_spin); 199 return NULL; 200 } 201 dio->ticks = ticks; 202 if (dio->act < 10) 203 ++dio->act; 204 205 return dio; 206 } 207 208 /* 209 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate 210 * a buffer. If set the buffer already exists and is good to go. 211 */ 212 hammer2_io_t * 213 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, 214 int lsize, int op HAMMER2_IO_DEBUG_ARGS) 215 { 216 hammer2_io_t *dio; 217 off_t peof; 218 uint64_t orefs; 219 uint64_t nrefs; 220 int isgood; 221 int error; 222 int hce; 223 int bflags; 224 225 bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0); 226 bflags |= B_KVABIO; 227 228 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 229 230 if (op == HAMMER2_DOP_READQ) { 231 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood); 232 if (dio == NULL) 233 return NULL; 234 op = HAMMER2_DOP_READ; 235 } else { 236 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood); 237 } 238 239 for (;;) { 240 orefs = dio->refs; 241 cpu_ccfence(); 242 243 /* 244 * Buffer is already good, handle the op and return. 245 */ 246 if (orefs & HAMMER2_DIO_GOOD) { 247 if (isgood == 0) 248 cpu_mfence(); 249 bkvasync(dio->bp); 250 251 switch(op) { 252 case HAMMER2_DOP_NEW: 253 bzero(hammer2_io_data(dio, lbase), lsize); 254 /* fall through */ 255 case HAMMER2_DOP_NEWNZ: 256 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 257 break; 258 case HAMMER2_DOP_READ: 259 default: 260 /* nothing to do */ 261 break; 262 } 263 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 264 return (dio); 265 } 266 267 /* 268 * Try to own the DIO 269 */ 270 if (orefs & HAMMER2_DIO_INPROG) { 271 nrefs = orefs | HAMMER2_DIO_WAITING; 272 tsleep_interlock(dio, 0); 273 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 274 tsleep(dio, PINTERLOCKED, "h2dio", hz); 275 } 276 /* retry */ 277 } else { 278 nrefs = orefs | HAMMER2_DIO_INPROG; 279 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 280 break; 281 } 282 } 283 } 284 285 /* 286 * We break to here if GOOD is not set and we acquired INPROG for 287 * the I/O. 288 */ 289 KKASSERT(dio->bp == NULL); 290 if (btype == HAMMER2_BREF_TYPE_DATA) 291 hce = hammer2_cluster_data_read; 292 else 293 hce = hammer2_cluster_meta_read; 294 295 error = 0; 296 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) && 297 dio->psize == lsize) { 298 switch(op) { 299 case HAMMER2_DOP_NEW: 300 case HAMMER2_DOP_NEWNZ: 301 dio->bp = getblk(dio->hmp->devvp, 302 dio->pbase, dio->psize, 303 GETBLK_KVABIO, 0); 304 if (op == HAMMER2_DOP_NEW) { 305 bkvasync(dio->bp); 306 bzero(dio->bp->b_data, dio->psize); 307 } 308 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 309 break; 310 case HAMMER2_DOP_READ: 311 default: 312 KKASSERT(dio->bp == NULL); 313 if (hce > 0) { 314 /* 315 * Synchronous cluster I/O for now. 316 */ 317 peof = (dio->pbase + HAMMER2_SEGMASK64) & 318 ~HAMMER2_SEGMASK64; 319 error = cluster_readx(dio->hmp->devvp, 320 peof, dio->pbase, 321 dio->psize, bflags, 322 dio->psize, 323 HAMMER2_PBUFSIZE*hce, 324 &dio->bp); 325 } else { 326 error = breadnx(dio->hmp->devvp, dio->pbase, 327 dio->psize, bflags, 328 NULL, NULL, 0, &dio->bp); 329 } 330 } 331 } else { 332 if (hce > 0) { 333 /* 334 * Synchronous cluster I/O for now. 335 */ 336 peof = (dio->pbase + HAMMER2_SEGMASK64) & 337 ~HAMMER2_SEGMASK64; 338 error = cluster_readx(dio->hmp->devvp, 339 peof, dio->pbase, dio->psize, 340 bflags, 341 dio->psize, HAMMER2_PBUFSIZE*hce, 342 &dio->bp); 343 } else { 344 error = breadnx(dio->hmp->devvp, dio->pbase, 345 dio->psize, bflags, 346 NULL, NULL, 0, &dio->bp); 347 } 348 if (dio->bp) { 349 /* 350 * Handle NEW flags 351 */ 352 switch(op) { 353 case HAMMER2_DOP_NEW: 354 bkvasync(dio->bp); 355 bzero(hammer2_io_data(dio, lbase), lsize); 356 /* fall through */ 357 case HAMMER2_DOP_NEWNZ: 358 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 359 break; 360 case HAMMER2_DOP_READ: 361 default: 362 break; 363 } 364 365 /* 366 * Tell the kernel that the buffer cache is not 367 * meta-data based on the btype. This allows 368 * swapcache to distinguish between data and 369 * meta-data. 370 */ 371 switch(btype) { 372 case HAMMER2_BREF_TYPE_DATA: 373 dio->bp->b_flags |= B_NOTMETA; 374 break; 375 default: 376 break; 377 } 378 } 379 } 380 381 if (dio->bp) { 382 bkvasync(dio->bp); 383 BUF_KERNPROC(dio->bp); 384 dio->bp->b_flags &= ~B_AGE; 385 /* dio->bp->b_debug_info2 = dio; */ 386 } 387 dio->error = error; 388 389 /* 390 * Clear INPROG and WAITING, set GOOD wake up anyone waiting. 391 */ 392 for (;;) { 393 orefs = dio->refs; 394 cpu_ccfence(); 395 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING); 396 if (error == 0) 397 nrefs |= HAMMER2_DIO_GOOD; 398 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 399 if (orefs & HAMMER2_DIO_WAITING) 400 wakeup(dio); 401 break; 402 } 403 cpu_pause(); 404 } 405 406 /* XXX error handling */ 407 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 408 409 return dio; 410 } 411 412 /* 413 * Release our ref on *diop. 414 * 415 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose 416 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING. 417 */ 418 void 419 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 420 { 421 hammer2_dev_t *hmp; 422 hammer2_io_t *dio; 423 struct buf *bp; 424 off_t pbase; 425 int psize; 426 int dio_limit; 427 uint64_t orefs; 428 uint64_t nrefs; 429 430 dio = *diop; 431 *diop = NULL; 432 hmp = dio->hmp; 433 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 434 435 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0); 436 437 /* 438 * Drop refs. 439 * 440 * On the 1->0 transition clear GOOD and set INPROG, and break. 441 * On any other transition we can return early. 442 */ 443 for (;;) { 444 orefs = dio->refs; 445 cpu_ccfence(); 446 447 if ((orefs & HAMMER2_DIO_MASK) == 1 && 448 (orefs & HAMMER2_DIO_INPROG) == 0) { 449 /* 450 * Lastdrop case, INPROG can be set. GOOD must be 451 * cleared to prevent the getblk shortcut. 452 */ 453 nrefs = orefs - 1; 454 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 455 nrefs |= HAMMER2_DIO_INPROG; 456 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 457 break; 458 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 459 /* 460 * Lastdrop case, INPROG already set. We must 461 * wait for INPROG to clear. 462 */ 463 nrefs = orefs | HAMMER2_DIO_WAITING; 464 tsleep_interlock(dio, 0); 465 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 466 tsleep(dio, PINTERLOCKED, "h2dio", hz); 467 } 468 /* retry */ 469 } else { 470 /* 471 * Normal drop case. 472 */ 473 nrefs = orefs - 1; 474 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 475 return; 476 /* retry */ 477 } 478 cpu_pause(); 479 /* retry */ 480 } 481 482 /* 483 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 484 * have been cleared. iofree_count has not yet been incremented, 485 * note that another accessor race will decrement iofree_count so 486 * we have to increment it regardless. 487 * 488 * We can now dispose of the buffer, and should do it before calling 489 * io_complete() in case there's a race against a new reference 490 * which causes io_complete() to chain and instantiate the bp again. 491 */ 492 pbase = dio->pbase; 493 psize = dio->psize; 494 bp = dio->bp; 495 dio->bp = NULL; 496 497 if ((orefs & HAMMER2_DIO_GOOD) && bp) { 498 /* 499 * Non-errored disposal of bp 500 */ 501 if (orefs & HAMMER2_DIO_DIRTY) { 502 dio_write_stats_update(dio, bp); 503 504 /* 505 * Allows dirty buffers to accumulate and 506 * possibly be canceled (e.g. by a 'rm'), 507 * by default we will burst-write later. 508 * 509 * We generally do NOT want to issue an actual 510 * b[a]write() or cluster_write() here. Due to 511 * the way chains are locked, buffers may be cycled 512 * in and out quite often and disposal here can cause 513 * multiple writes or write-read stalls. 514 * 515 * If FLUSH is set we do want to issue the actual 516 * write. This typically occurs in the write-behind 517 * case when writing to large files. 518 */ 519 off_t peof; 520 int hce; 521 if (dio->refs & HAMMER2_DIO_FLUSH) { 522 if ((hce = hammer2_cluster_write) != 0) { 523 peof = (pbase + HAMMER2_SEGMASK64) & 524 ~HAMMER2_SEGMASK64; 525 bp->b_flags |= B_CLUSTEROK; 526 cluster_write(bp, peof, psize, hce); 527 } else { 528 bp->b_flags &= ~B_CLUSTEROK; 529 bawrite(bp); 530 } 531 } else { 532 bp->b_flags &= ~B_CLUSTEROK; 533 bdwrite(bp); 534 } 535 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 536 brelse(bp); 537 } else { 538 bqrelse(bp); 539 } 540 } else if (bp) { 541 /* 542 * Errored disposal of bp 543 */ 544 brelse(bp); 545 } 546 547 /* 548 * Update iofree_count before disposing of the dio 549 */ 550 hmp = dio->hmp; 551 atomic_add_int(&hmp->iofree_count, 1); 552 553 /* 554 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear). 555 * 556 * Also clear FLUSH as it was handled above. 557 */ 558 for (;;) { 559 orefs = dio->refs; 560 cpu_ccfence(); 561 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD | 562 HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH); 563 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 564 if (orefs & HAMMER2_DIO_WAITING) 565 wakeup(dio); 566 break; 567 } 568 cpu_pause(); 569 } 570 571 /* 572 * We cache free buffers so re-use cases can use a shared lock, but 573 * if too many build up we have to clean them out. 574 */ 575 dio_limit = hammer2_dio_limit; 576 if (dio_limit < 256) 577 dio_limit = 256; 578 if (dio_limit > 1024*1024) 579 dio_limit = 1024*1024; 580 if (hmp->iofree_count > dio_limit) { 581 struct hammer2_cleanupcb_info info; 582 583 RB_INIT(&info.tmptree); 584 hammer2_spin_ex(&hmp->io_spin); 585 if (hmp->iofree_count > dio_limit) { 586 info.count = hmp->iofree_count / 5; 587 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 588 hammer2_io_cleanup_callback, &info); 589 } 590 hammer2_spin_unex(&hmp->io_spin); 591 hammer2_io_cleanup(hmp, &info.tmptree); 592 } 593 } 594 595 /* 596 * Cleanup any dio's with (INPROG | refs) == 0. 597 * 598 * Called to clean up cached DIOs on umount after all activity has been 599 * flushed. 600 */ 601 static 602 int 603 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 604 { 605 struct hammer2_cleanupcb_info *info = arg; 606 hammer2_io_t *xio; 607 608 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 609 if (dio->act > 0) { 610 int act; 611 612 act = dio->act - (ticks - dio->ticks) / hz - 1; 613 if (act > 0) { 614 dio->act = act; 615 return 0; 616 } 617 dio->act = 0; 618 } 619 KKASSERT(dio->bp == NULL); 620 if (info->count > 0) { 621 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 622 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 623 KKASSERT(xio == NULL); 624 --info->count; 625 } 626 } 627 return 0; 628 } 629 630 void 631 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 632 { 633 hammer2_io_t *dio; 634 635 while ((dio = RB_ROOT(tree)) != NULL) { 636 RB_REMOVE(hammer2_io_tree, tree, dio); 637 KKASSERT(dio->bp == NULL && 638 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 639 if (dio->refs & HAMMER2_DIO_DIRTY) { 640 kprintf("hammer2_io_cleanup: Dirty buffer " 641 "%016jx/%d (bp=%p)\n", 642 dio->pbase, dio->psize, dio->bp); 643 } 644 kfree(dio, M_HAMMER2); 645 atomic_add_int(&hammer2_dio_count, -1); 646 atomic_add_int(&hmp->iofree_count, -1); 647 } 648 } 649 650 /* 651 * Returns a pointer to the requested data. 652 */ 653 char * 654 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 655 { 656 struct buf *bp; 657 int off; 658 659 bp = dio->bp; 660 KKASSERT(bp != NULL); 661 bkvasync(bp); 662 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 663 KKASSERT(off >= 0 && off < bp->b_bufsize); 664 return(bp->b_data + off); 665 } 666 667 int 668 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 669 hammer2_io_t **diop) 670 { 671 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW); 672 return ((*diop)->error); 673 } 674 675 int 676 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 677 hammer2_io_t **diop) 678 { 679 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ); 680 return ((*diop)->error); 681 } 682 683 int 684 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 685 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 686 { 687 #ifdef HAMMER2_IO_DEBUG 688 hammer2_io_t *dio; 689 #endif 690 691 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize, 692 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL); 693 #ifdef HAMMER2_IO_DEBUG 694 if ((dio = *diop) != NULL) { 695 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK; 696 dio->debug_data[i] = debug_data; 697 } 698 #endif 699 return ((*diop)->error); 700 } 701 702 hammer2_io_t * 703 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, 704 int lsize HAMMER2_IO_DEBUG_ARGS) 705 { 706 hammer2_io_t *dio; 707 708 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize, 709 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL); 710 return dio; 711 } 712 713 void 714 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 715 { 716 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 717 HAMMER2_DIO_FLUSH); 718 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 719 } 720 721 void 722 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 723 { 724 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 725 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 726 } 727 728 int 729 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 730 { 731 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 732 HAMMER2_DIO_FLUSH); 733 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 734 return (0); /* XXX */ 735 } 736 737 void 738 hammer2_io_setdirty(hammer2_io_t *dio) 739 { 740 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 741 } 742 743 /* 744 * This routine is called when a MODIFIED chain is being DESTROYED, 745 * in an attempt to allow the related buffer cache buffer to be 746 * invalidated and discarded instead of flushing it to disk. 747 * 748 * At the moment this case is only really useful for file meta-data. 749 * File data is already handled via the logical buffer cache associated 750 * with the vnode, and will be discarded if it was never flushed to disk. 751 * File meta-data may include inodes, directory entries, and indirect blocks. 752 * 753 * XXX 754 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being 755 * invalidated might be smaller. Most of the meta-data structures above 756 * are in the 'smaller' category. For now, don't try to invalidate the 757 * data areas. 758 */ 759 void 760 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes) 761 { 762 /* NOP */ 763 } 764 765 void 766 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 767 { 768 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 769 } 770 771 void 772 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 773 { 774 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 775 } 776 777 /* 778 * Set dedup validation bits in a DIO. We do not need the buffer cache 779 * buffer for this. This must be done concurrent with setting bits in 780 * the freemap so as to interlock with bulkfree's clearing of those bits. 781 */ 782 void 783 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref) 784 { 785 hammer2_io_t *dio; 786 uint64_t mask; 787 int lsize; 788 int isgood; 789 790 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood); 791 if ((int)(bref->data_off & HAMMER2_OFF_MASK_RADIX)) 792 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 793 else 794 lsize = 0; 795 mask = hammer2_dedup_mask(dio, bref->data_off, lsize); 796 atomic_clear_64(&dio->dedup_valid, mask); 797 atomic_set_64(&dio->dedup_alloc, mask); 798 hammer2_io_putblk(&dio); 799 } 800 801 /* 802 * Clear dedup validation bits in a DIO. This is typically done when 803 * a modified chain is destroyed or by the bulkfree code. No buffer 804 * is needed for this operation. If the DIO no longer exists it is 805 * equivalent to the bits not being set. 806 */ 807 void 808 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype, 809 hammer2_off_t data_off, u_int bytes) 810 { 811 hammer2_io_t *dio; 812 uint64_t mask; 813 int isgood; 814 815 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 816 return; 817 if (btype != HAMMER2_BREF_TYPE_DATA) 818 return; 819 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood); 820 if (dio) { 821 if (data_off < dio->pbase || 822 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes > 823 dio->pbase + dio->psize) { 824 panic("hammer2_io_dedup_delete: DATAOFF BAD " 825 "%016jx/%d %016jx\n", 826 data_off, bytes, dio->pbase); 827 } 828 mask = hammer2_dedup_mask(dio, data_off, bytes); 829 atomic_clear_64(&dio->dedup_alloc, mask); 830 atomic_clear_64(&dio->dedup_valid, mask); 831 hammer2_io_putblk(&dio); 832 } 833 } 834 835 /* 836 * Assert that dedup allocation bits in a DIO are not set. This operation 837 * does not require a buffer. The DIO does not need to exist. 838 */ 839 void 840 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes) 841 { 842 hammer2_io_t *dio; 843 int isgood; 844 845 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA, 846 0, &isgood); 847 if (dio) { 848 KASSERT((dio->dedup_alloc & 849 hammer2_dedup_mask(dio, data_off, bytes)) == 0, 850 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx", 851 data_off, 852 bytes, 853 hammer2_dedup_mask(dio, data_off, bytes), 854 dio->dedup_alloc)); 855 hammer2_io_putblk(&dio); 856 } 857 } 858 859 static 860 void 861 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp) 862 { 863 if (bp->b_flags & B_DELWRI) 864 return; 865 hammer2_adjwritecounter(dio->btype, dio->psize); 866 } 867 868 void 869 hammer2_io_bkvasync(hammer2_io_t *dio) 870 { 871 KKASSERT(dio->bp != NULL); 872 bkvasync(dio->bp); 873 } 874 875 /* 876 * Ref a dio that is already owned 877 */ 878 void 879 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 880 { 881 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 882 atomic_add_64(&dio->refs, 1); 883 } 884