1 /* 2 * Copyright (c) 2013-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 #define HAMMER2_DOP_READ 1 38 #define HAMMER2_DOP_NEW 2 39 #define HAMMER2_DOP_NEWNZ 3 40 #define HAMMER2_DOP_READQ 4 41 42 /* 43 * Implements an abstraction layer for synchronous and asynchronous 44 * buffered device I/O. Can be used as an OS-abstraction but the main 45 * purpose is to allow larger buffers to be used against hammer2_chain's 46 * using smaller allocations, without causing deadlocks. 47 * 48 * The DIOs also record temporary state with limited persistence. This 49 * feature is used to keep track of dedupable blocks. 50 */ 51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp); 53 54 static int 55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 56 { 57 if (io1->pbase < io2->pbase) 58 return(-1); 59 if (io1->pbase > io2->pbase) 60 return(1); 61 return(0); 62 } 63 64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 66 off_t, pbase); 67 68 struct hammer2_cleanupcb_info { 69 struct hammer2_io_tree tmptree; 70 int count; 71 }; 72 73 #if 0 74 static __inline 75 uint64_t 76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 77 { 78 uint64_t mask; 79 int i; 80 81 if (bytes < 1024) /* smaller chunks not supported */ 82 return 0; 83 84 /* 85 * Calculate crc check mask for larger chunks 86 */ 87 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) & 88 HAMMER2_PBUFMASK) >> 10; 89 if (i == 0 && bytes == HAMMER2_PBUFSIZE) 90 return((uint64_t)-1); 91 mask = ((uint64_t)1U << (bytes >> 10)) - 1; 92 mask <<= i; 93 94 return mask; 95 } 96 #endif 97 98 #ifdef HAMMER2_IO_DEBUG 99 100 static __inline void 101 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 102 { 103 int i; 104 105 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK; 106 107 dio->debug_file[i] = file; 108 dio->debug_line[i] = line; 109 dio->debug_refs[i] = dio->refs; 110 dio->debug_td[i] = curthread; 111 } 112 113 #else 114 115 #define DIO_RECORD(dio) 116 117 #endif 118 119 /* 120 * Returns the DIO corresponding to the data|radix, creating it if necessary. 121 * 122 * If createit is 0, NULL can be returned indicating that the DIO does not 123 * exist. (btype) is ignored when createit is 0. 124 */ 125 static __inline 126 hammer2_io_t * 127 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype, 128 int createit, int *isgoodp) 129 { 130 hammer2_io_t *dio; 131 hammer2_io_t *xio; 132 hammer2_key_t lbase; 133 hammer2_key_t pbase; 134 hammer2_key_t pmask; 135 uint64_t refs; 136 int lsize; 137 int psize; 138 139 psize = HAMMER2_PBUFSIZE; 140 pmask = ~(hammer2_off_t)(psize - 1); 141 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX); 142 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX; 143 pbase = lbase & pmask; 144 145 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 146 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 147 pbase, lbase, lsize, pmask); 148 } 149 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 150 *isgoodp = 0; 151 152 /* 153 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 154 */ 155 hammer2_spin_sh(&hmp->io_spin); 156 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 157 if (dio) { 158 refs = atomic_fetchadd_64(&dio->refs, 1); 159 if ((refs & HAMMER2_DIO_MASK) == 0) { 160 atomic_add_int(&dio->hmp->iofree_count, -1); 161 } 162 if (refs & HAMMER2_DIO_GOOD) 163 *isgoodp = 1; 164 hammer2_spin_unsh(&hmp->io_spin); 165 } else if (createit) { 166 refs = 0; 167 hammer2_spin_unsh(&hmp->io_spin); 168 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 169 dio->hmp = hmp; 170 dio->pbase = pbase; 171 dio->psize = psize; 172 dio->btype = btype; 173 dio->refs = refs + 1; 174 dio->act = 5; 175 hammer2_spin_ex(&hmp->io_spin); 176 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 177 if (xio == NULL) { 178 atomic_add_int(&hammer2_dio_count, 1); 179 hammer2_spin_unex(&hmp->io_spin); 180 } else { 181 refs = atomic_fetchadd_64(&xio->refs, 1); 182 if ((refs & HAMMER2_DIO_MASK) == 0) 183 atomic_add_int(&xio->hmp->iofree_count, -1); 184 if (refs & HAMMER2_DIO_GOOD) 185 *isgoodp = 1; 186 hammer2_spin_unex(&hmp->io_spin); 187 kfree(dio, M_HAMMER2); 188 dio = xio; 189 } 190 } else { 191 hammer2_spin_unsh(&hmp->io_spin); 192 return NULL; 193 } 194 dio->ticks = ticks; 195 if (dio->act < 10) 196 ++dio->act; 197 198 return dio; 199 } 200 201 /* 202 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate 203 * a buffer. If set the buffer already exists and is good to go. 204 */ 205 hammer2_io_t * 206 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, 207 int lsize, int op HAMMER2_IO_DEBUG_ARGS) 208 { 209 hammer2_io_t *dio; 210 off_t peof; 211 uint64_t orefs; 212 uint64_t nrefs; 213 int isgood; 214 int error; 215 int hce; 216 int bflags; 217 218 bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0); 219 bflags |= B_KVABIO; 220 221 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 222 223 if (op == HAMMER2_DOP_READQ) { 224 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood); 225 if (dio == NULL) 226 return NULL; 227 op = HAMMER2_DOP_READ; 228 } else { 229 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood); 230 } 231 232 for (;;) { 233 orefs = dio->refs; 234 cpu_ccfence(); 235 236 /* 237 * Buffer is already good, handle the op and return. 238 */ 239 if (orefs & HAMMER2_DIO_GOOD) { 240 if (isgood == 0) 241 cpu_mfence(); 242 bkvasync(dio->bp); 243 244 switch(op) { 245 case HAMMER2_DOP_NEW: 246 bzero(hammer2_io_data(dio, lbase), lsize); 247 /* fall through */ 248 case HAMMER2_DOP_NEWNZ: 249 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 250 break; 251 case HAMMER2_DOP_READ: 252 default: 253 /* nothing to do */ 254 break; 255 } 256 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 257 return (dio); 258 } 259 260 /* 261 * Try to own the DIO 262 */ 263 if (orefs & HAMMER2_DIO_INPROG) { 264 nrefs = orefs | HAMMER2_DIO_WAITING; 265 tsleep_interlock(dio, 0); 266 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 267 tsleep(dio, PINTERLOCKED, "h2dio", hz); 268 } 269 /* retry */ 270 } else { 271 nrefs = orefs | HAMMER2_DIO_INPROG; 272 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 273 break; 274 } 275 } 276 } 277 278 /* 279 * We break to here if GOOD is not set and we acquired INPROG for 280 * the I/O. 281 */ 282 KKASSERT(dio->bp == NULL); 283 if (btype == HAMMER2_BREF_TYPE_DATA) 284 hce = hammer2_cluster_data_read; 285 else 286 hce = hammer2_cluster_meta_read; 287 288 error = 0; 289 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) && 290 dio->psize == lsize) { 291 switch(op) { 292 case HAMMER2_DOP_NEW: 293 case HAMMER2_DOP_NEWNZ: 294 dio->bp = getblk(dio->hmp->devvp, 295 dio->pbase, dio->psize, 296 GETBLK_KVABIO, 0); 297 if (op == HAMMER2_DOP_NEW) { 298 bkvasync(dio->bp); 299 bzero(dio->bp->b_data, dio->psize); 300 } 301 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 302 break; 303 case HAMMER2_DOP_READ: 304 default: 305 if (hce > 0) { 306 /* 307 * Synchronous cluster I/O for now. 308 */ 309 peof = (dio->pbase + HAMMER2_SEGMASK64) & 310 ~HAMMER2_SEGMASK64; 311 dio->bp = NULL; 312 error = cluster_readx(dio->hmp->devvp, 313 peof, dio->pbase, 314 dio->psize, bflags, 315 dio->psize, 316 HAMMER2_PBUFSIZE*hce, 317 &dio->bp); 318 } else { 319 dio->bp = NULL; 320 error = breadnx(dio->hmp->devvp, dio->pbase, 321 dio->psize, bflags, 322 NULL, NULL, 0, &dio->bp); 323 } 324 } 325 } else { 326 if (hce > 0) { 327 /* 328 * Synchronous cluster I/O for now. 329 */ 330 peof = (dio->pbase + HAMMER2_SEGMASK64) & 331 ~HAMMER2_SEGMASK64; 332 error = cluster_readx(dio->hmp->devvp, 333 peof, dio->pbase, dio->psize, 334 bflags, 335 dio->psize, HAMMER2_PBUFSIZE*hce, 336 &dio->bp); 337 } else { 338 error = breadnx(dio->hmp->devvp, dio->pbase, 339 dio->psize, bflags, 340 NULL, NULL, 0, &dio->bp); 341 } 342 if (dio->bp) { 343 /* 344 * Handle NEW flags 345 */ 346 switch(op) { 347 case HAMMER2_DOP_NEW: 348 bkvasync(dio->bp); 349 bzero(hammer2_io_data(dio, lbase), lsize); 350 /* fall through */ 351 case HAMMER2_DOP_NEWNZ: 352 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 353 break; 354 case HAMMER2_DOP_READ: 355 default: 356 break; 357 } 358 359 /* 360 * Tell the kernel that the buffer cache is not 361 * meta-data based on the btype. This allows 362 * swapcache to distinguish between data and 363 * meta-data. 364 */ 365 switch(btype) { 366 case HAMMER2_BREF_TYPE_DATA: 367 dio->bp->b_flags |= B_NOTMETA; 368 break; 369 default: 370 break; 371 } 372 } 373 } 374 375 if (dio->bp) { 376 bkvasync(dio->bp); 377 BUF_KERNPROC(dio->bp); 378 dio->bp->b_flags &= ~B_AGE; 379 /* dio->bp->b_debug_info2 = dio; */ 380 } 381 dio->error = error; 382 383 /* 384 * Clear INPROG and WAITING, set GOOD wake up anyone waiting. 385 */ 386 for (;;) { 387 orefs = dio->refs; 388 cpu_ccfence(); 389 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING); 390 if (error == 0) 391 nrefs |= HAMMER2_DIO_GOOD; 392 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 393 if (orefs & HAMMER2_DIO_WAITING) 394 wakeup(dio); 395 break; 396 } 397 cpu_pause(); 398 } 399 400 /* XXX error handling */ 401 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 402 403 return dio; 404 } 405 406 /* 407 * Release our ref on *diop. 408 * 409 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose 410 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING. 411 */ 412 void 413 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 414 { 415 hammer2_dev_t *hmp; 416 hammer2_io_t *dio; 417 struct buf *bp; 418 off_t pbase; 419 int psize; 420 int dio_limit; 421 uint64_t orefs; 422 uint64_t nrefs; 423 424 dio = *diop; 425 *diop = NULL; 426 hmp = dio->hmp; 427 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 428 429 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0); 430 431 /* 432 * Drop refs. 433 * 434 * On the 1->0 transition clear GOOD and set INPROG, and break. 435 * On any other transition we can return early. 436 */ 437 for (;;) { 438 orefs = dio->refs; 439 cpu_ccfence(); 440 441 if ((orefs & HAMMER2_DIO_MASK) == 1 && 442 (orefs & HAMMER2_DIO_INPROG) == 0) { 443 /* 444 * Lastdrop case, INPROG can be set. GOOD must be 445 * cleared to prevent the getblk shortcut. 446 */ 447 nrefs = orefs - 1; 448 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 449 nrefs |= HAMMER2_DIO_INPROG; 450 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 451 break; 452 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 453 /* 454 * Lastdrop case, INPROG already set. We must 455 * wait for INPROG to clear. 456 */ 457 nrefs = orefs | HAMMER2_DIO_WAITING; 458 tsleep_interlock(dio, 0); 459 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 460 tsleep(dio, PINTERLOCKED, "h2dio", hz); 461 } 462 /* retry */ 463 } else { 464 /* 465 * Normal drop case. 466 */ 467 nrefs = orefs - 1; 468 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 469 return; 470 /* retry */ 471 } 472 cpu_pause(); 473 /* retry */ 474 } 475 476 /* 477 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 478 * have been cleared. iofree_count has not yet been incremented, 479 * note that another accessor race will decrement iofree_count so 480 * we have to increment it regardless. 481 * 482 * We can now dispose of the buffer, and should do it before calling 483 * io_complete() in case there's a race against a new reference 484 * which causes io_complete() to chain and instantiate the bp again. 485 */ 486 pbase = dio->pbase; 487 psize = dio->psize; 488 bp = dio->bp; 489 dio->bp = NULL; 490 491 if ((orefs & HAMMER2_DIO_GOOD) && bp) { 492 /* 493 * Non-errored disposal of bp 494 */ 495 if (orefs & HAMMER2_DIO_DIRTY) { 496 dio_write_stats_update(dio, bp); 497 498 /* 499 * Allows dirty buffers to accumulate and 500 * possibly be canceled (e.g. by a 'rm'), 501 * by default we will burst-write later. 502 * 503 * We generally do NOT want to issue an actual 504 * b[a]write() or cluster_write() here. Due to 505 * the way chains are locked, buffers may be cycled 506 * in and out quite often and disposal here can cause 507 * multiple writes or write-read stalls. 508 * 509 * If FLUSH is set we do want to issue the actual 510 * write. This typically occurs in the write-behind 511 * case when writing to large files. 512 */ 513 off_t peof; 514 int hce; 515 if (dio->refs & HAMMER2_DIO_FLUSH) { 516 if ((hce = hammer2_cluster_write) != 0) { 517 peof = (pbase + HAMMER2_SEGMASK64) & 518 ~HAMMER2_SEGMASK64; 519 bp->b_flags |= B_CLUSTEROK; 520 cluster_write(bp, peof, psize, hce); 521 } else { 522 bp->b_flags &= ~B_CLUSTEROK; 523 bawrite(bp); 524 } 525 } else { 526 bp->b_flags &= ~B_CLUSTEROK; 527 bdwrite(bp); 528 } 529 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 530 brelse(bp); 531 } else { 532 bqrelse(bp); 533 } 534 } else if (bp) { 535 /* 536 * Errored disposal of bp 537 */ 538 brelse(bp); 539 } 540 541 /* 542 * Update iofree_count before disposing of the dio 543 */ 544 hmp = dio->hmp; 545 atomic_add_int(&hmp->iofree_count, 1); 546 547 /* 548 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear). 549 * 550 * Also clear FLUSH as it was handled above. 551 */ 552 for (;;) { 553 orefs = dio->refs; 554 cpu_ccfence(); 555 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD | 556 HAMMER2_DIO_WAITING | HAMMER2_DIO_FLUSH); 557 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 558 if (orefs & HAMMER2_DIO_WAITING) 559 wakeup(dio); 560 break; 561 } 562 cpu_pause(); 563 } 564 565 /* 566 * We cache free buffers so re-use cases can use a shared lock, but 567 * if too many build up we have to clean them out. 568 */ 569 dio_limit = hammer2_dio_limit; 570 if (dio_limit < 256) 571 dio_limit = 256; 572 if (dio_limit > 1024*1024) 573 dio_limit = 1024*1024; 574 if (hmp->iofree_count > dio_limit) { 575 struct hammer2_cleanupcb_info info; 576 577 RB_INIT(&info.tmptree); 578 hammer2_spin_ex(&hmp->io_spin); 579 if (hmp->iofree_count > dio_limit) { 580 info.count = hmp->iofree_count / 5; 581 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 582 hammer2_io_cleanup_callback, &info); 583 } 584 hammer2_spin_unex(&hmp->io_spin); 585 hammer2_io_cleanup(hmp, &info.tmptree); 586 } 587 } 588 589 /* 590 * Cleanup any dio's with (INPROG | refs) == 0. 591 * 592 * Called to clean up cached DIOs on umount after all activity has been 593 * flushed. 594 */ 595 static 596 int 597 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 598 { 599 struct hammer2_cleanupcb_info *info = arg; 600 hammer2_io_t *xio; 601 602 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 603 if (dio->act > 0) { 604 int act; 605 606 act = dio->act - (ticks - dio->ticks) / hz - 1; 607 if (act > 0) { 608 dio->act = act; 609 return 0; 610 } 611 dio->act = 0; 612 } 613 KKASSERT(dio->bp == NULL); 614 if (info->count > 0) { 615 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 616 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 617 KKASSERT(xio == NULL); 618 --info->count; 619 } 620 } 621 return 0; 622 } 623 624 void 625 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 626 { 627 hammer2_io_t *dio; 628 629 while ((dio = RB_ROOT(tree)) != NULL) { 630 RB_REMOVE(hammer2_io_tree, tree, dio); 631 KKASSERT(dio->bp == NULL && 632 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 633 if (dio->refs & HAMMER2_DIO_DIRTY) { 634 kprintf("hammer2_io_cleanup: Dirty buffer " 635 "%016jx/%d (bp=%p)\n", 636 dio->pbase, dio->psize, dio->bp); 637 } 638 kfree(dio, M_HAMMER2); 639 atomic_add_int(&hammer2_dio_count, -1); 640 atomic_add_int(&hmp->iofree_count, -1); 641 } 642 } 643 644 /* 645 * Returns a pointer to the requested data. 646 */ 647 char * 648 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 649 { 650 struct buf *bp; 651 int off; 652 653 bp = dio->bp; 654 KKASSERT(bp != NULL); 655 bkvasync(bp); 656 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 657 KKASSERT(off >= 0 && off < bp->b_bufsize); 658 return(bp->b_data + off); 659 } 660 661 int 662 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 663 hammer2_io_t **diop) 664 { 665 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW); 666 return ((*diop)->error); 667 } 668 669 int 670 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 671 hammer2_io_t **diop) 672 { 673 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ); 674 return ((*diop)->error); 675 } 676 677 int 678 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 679 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 680 { 681 #ifdef HAMMER2_IO_DEBUG 682 hammer2_io_t *dio; 683 #endif 684 685 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize, 686 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL); 687 #ifdef HAMMER2_IO_DEBUG 688 if ((dio = *diop) != NULL) { 689 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK; 690 dio->debug_data[i] = debug_data; 691 } 692 #endif 693 return ((*diop)->error); 694 } 695 696 hammer2_io_t * 697 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, 698 int lsize HAMMER2_IO_DEBUG_ARGS) 699 { 700 hammer2_io_t *dio; 701 702 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize, 703 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL); 704 return dio; 705 } 706 707 void 708 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 709 { 710 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 711 HAMMER2_DIO_FLUSH); 712 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 713 } 714 715 void 716 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 717 { 718 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 719 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 720 } 721 722 int 723 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 724 { 725 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY | 726 HAMMER2_DIO_FLUSH); 727 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 728 return (0); /* XXX */ 729 } 730 731 void 732 hammer2_io_setdirty(hammer2_io_t *dio) 733 { 734 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 735 } 736 737 /* 738 * This routine is called when a MODIFIED chain is being DESTROYED, 739 * in an attempt to allow the related buffer cache buffer to be 740 * invalidated and discarded instead of flushing it to disk. 741 * 742 * At the moment this case is only really useful for file meta-data. 743 * File data is already handled via the logical buffer cache associated 744 * with the vnode, and will be discarded if it was never flushed to disk. 745 * File meta-data may include inodes, directory entries, and indirect blocks. 746 * 747 * XXX 748 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being 749 * invalidated might be smaller. Most of the meta-data structures above 750 * are in the 'smaller' category. For now, don't try to invalidate the 751 * data areas. 752 */ 753 void 754 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes) 755 { 756 /* NOP */ 757 } 758 759 void 760 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 761 { 762 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 763 } 764 765 void 766 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 767 { 768 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 769 } 770 771 /* 772 * Set dedup validation bits in a DIO. We do not need the buffer cache 773 * buffer for this. This must be done concurrent with setting bits in 774 * the freemap so as to interlock with bulkfree's clearing of those bits. 775 */ 776 void 777 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref) 778 { 779 hammer2_io_t *dio; 780 uint64_t mask; 781 int lsize; 782 int isgood; 783 784 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood); 785 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 786 mask = hammer2_dedup_mask(dio, bref->data_off, lsize); 787 atomic_clear_64(&dio->dedup_valid, mask); 788 atomic_set_64(&dio->dedup_alloc, mask); 789 hammer2_io_putblk(&dio); 790 } 791 792 /* 793 * Clear dedup validation bits in a DIO. This is typically done when 794 * a modified chain is destroyed or by the bulkfree code. No buffer 795 * is needed for this operation. If the DIO no longer exists it is 796 * equivalent to the bits not being set. 797 */ 798 void 799 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype, 800 hammer2_off_t data_off, u_int bytes) 801 { 802 hammer2_io_t *dio; 803 uint64_t mask; 804 int isgood; 805 806 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 807 return; 808 if (btype != HAMMER2_BREF_TYPE_DATA) 809 return; 810 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood); 811 if (dio) { 812 if (data_off < dio->pbase || 813 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes > 814 dio->pbase + dio->psize) { 815 panic("hammer2_io_dedup_delete: DATAOFF BAD " 816 "%016jx/%d %016jx\n", 817 data_off, bytes, dio->pbase); 818 } 819 mask = hammer2_dedup_mask(dio, data_off, bytes); 820 atomic_clear_64(&dio->dedup_alloc, mask); 821 atomic_clear_64(&dio->dedup_valid, mask); 822 hammer2_io_putblk(&dio); 823 } 824 } 825 826 /* 827 * Assert that dedup allocation bits in a DIO are not set. This operation 828 * does not require a buffer. The DIO does not need to exist. 829 */ 830 void 831 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes) 832 { 833 hammer2_io_t *dio; 834 int isgood; 835 836 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA, 837 0, &isgood); 838 if (dio) { 839 KASSERT((dio->dedup_alloc & 840 hammer2_dedup_mask(dio, data_off, bytes)) == 0, 841 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx", 842 data_off, 843 bytes, 844 hammer2_dedup_mask(dio, data_off, bytes), 845 dio->dedup_alloc)); 846 hammer2_io_putblk(&dio); 847 } 848 } 849 850 static 851 void 852 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp) 853 { 854 long *counterp; 855 856 if (bp->b_flags & B_DELWRI) 857 return; 858 859 switch(dio->btype) { 860 case 0: 861 return; 862 case HAMMER2_BREF_TYPE_DATA: 863 counterp = &hammer2_iod_file_write; 864 break; 865 case HAMMER2_BREF_TYPE_DIRENT: 866 case HAMMER2_BREF_TYPE_INODE: 867 counterp = &hammer2_iod_meta_write; 868 break; 869 case HAMMER2_BREF_TYPE_INDIRECT: 870 counterp = &hammer2_iod_indr_write; 871 break; 872 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 873 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 874 counterp = &hammer2_iod_fmap_write; 875 break; 876 default: 877 counterp = &hammer2_iod_volu_write; 878 break; 879 } 880 *counterp += dio->psize; 881 } 882 883 void 884 hammer2_io_bkvasync(hammer2_io_t *dio) 885 { 886 KKASSERT(dio->bp != NULL); 887 bkvasync(dio->bp); 888 } 889 890 /* 891 * Ref a dio that is already owned 892 */ 893 void 894 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 895 { 896 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 897 atomic_add_64(&dio->refs, 1); 898 } 899