1 /* 2 * Copyright (c) 2013-2018 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 #define HAMMER2_DOP_READ 1 38 #define HAMMER2_DOP_NEW 2 39 #define HAMMER2_DOP_NEWNZ 3 40 #define HAMMER2_DOP_READQ 4 41 42 /* 43 * Implements an abstraction layer for synchronous and asynchronous 44 * buffered device I/O. Can be used as an OS-abstraction but the main 45 * purpose is to allow larger buffers to be used against hammer2_chain's 46 * using smaller allocations, without causing deadlocks. 47 * 48 * The DIOs also record temporary state with limited persistence. This 49 * feature is used to keep track of dedupable blocks. 50 */ 51 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 52 static void dio_write_stats_update(hammer2_io_t *dio, struct buf *bp); 53 54 static int 55 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 56 { 57 if (io1->pbase < io2->pbase) 58 return(-1); 59 if (io1->pbase > io2->pbase) 60 return(1); 61 return(0); 62 } 63 64 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 65 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 66 off_t, pbase); 67 68 struct hammer2_cleanupcb_info { 69 struct hammer2_io_tree tmptree; 70 int count; 71 }; 72 73 #if 0 74 static __inline 75 uint64_t 76 hammer2_io_mask(hammer2_io_t *dio, hammer2_off_t off, u_int bytes) 77 { 78 uint64_t mask; 79 int i; 80 81 if (bytes < 1024) /* smaller chunks not supported */ 82 return 0; 83 84 /* 85 * Calculate crc check mask for larger chunks 86 */ 87 i = (((off & ~HAMMER2_OFF_MASK_RADIX) - dio->pbase) & 88 HAMMER2_PBUFMASK) >> 10; 89 if (i == 0 && bytes == HAMMER2_PBUFSIZE) 90 return((uint64_t)-1); 91 mask = ((uint64_t)1U << (bytes >> 10)) - 1; 92 mask <<= i; 93 94 return mask; 95 } 96 #endif 97 98 #ifdef HAMMER2_IO_DEBUG 99 100 static __inline void 101 DIO_RECORD(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 102 { 103 int i; 104 105 i = atomic_fetchadd_int(&dio->debug_index, 1) & HAMMER2_IO_DEBUG_MASK; 106 107 dio->debug_file[i] = file; 108 dio->debug_line[i] = line; 109 dio->debug_refs[i] = dio->refs; 110 dio->debug_td[i] = curthread; 111 } 112 113 #else 114 115 #define DIO_RECORD(dio) 116 117 #endif 118 119 /* 120 * Returns the DIO corresponding to the data|radix, creating it if necessary. 121 * 122 * If createit is 0, NULL can be returned indicating that the DIO does not 123 * exist. (btype) is ignored when createit is 0. 124 */ 125 static __inline 126 hammer2_io_t * 127 hammer2_io_alloc(hammer2_dev_t *hmp, hammer2_key_t data_off, uint8_t btype, 128 int createit, int *isgoodp) 129 { 130 hammer2_io_t *dio; 131 hammer2_io_t *xio; 132 hammer2_key_t lbase; 133 hammer2_key_t pbase; 134 hammer2_key_t pmask; 135 uint64_t refs; 136 int lsize; 137 int psize; 138 139 psize = HAMMER2_PBUFSIZE; 140 pmask = ~(hammer2_off_t)(psize - 1); 141 lsize = 1 << (int)(data_off & HAMMER2_OFF_MASK_RADIX); 142 lbase = data_off & ~HAMMER2_OFF_MASK_RADIX; 143 pbase = lbase & pmask; 144 145 if (pbase == 0 || ((lbase + lsize - 1) & pmask) != pbase) { 146 kprintf("Illegal: %016jx %016jx+%08x / %016jx\n", 147 pbase, lbase, lsize, pmask); 148 } 149 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 150 *isgoodp = 0; 151 152 /* 153 * Access/Allocate the DIO, bump dio->refs to prevent destruction. 154 */ 155 hammer2_spin_sh(&hmp->io_spin); 156 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 157 if (dio) { 158 refs = atomic_fetchadd_64(&dio->refs, 1); 159 if ((refs & HAMMER2_DIO_MASK) == 0) { 160 atomic_add_int(&dio->hmp->iofree_count, -1); 161 } 162 if (refs & HAMMER2_DIO_GOOD) 163 *isgoodp = 1; 164 hammer2_spin_unsh(&hmp->io_spin); 165 } else if (createit) { 166 refs = 0; 167 hammer2_spin_unsh(&hmp->io_spin); 168 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 169 dio->hmp = hmp; 170 dio->pbase = pbase; 171 dio->psize = psize; 172 dio->btype = btype; 173 dio->refs = refs + 1; 174 dio->act = 5; 175 hammer2_spin_ex(&hmp->io_spin); 176 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 177 if (xio == NULL) { 178 atomic_add_int(&hammer2_dio_count, 1); 179 hammer2_spin_unex(&hmp->io_spin); 180 } else { 181 refs = atomic_fetchadd_64(&xio->refs, 1); 182 if ((refs & HAMMER2_DIO_MASK) == 0) 183 atomic_add_int(&xio->hmp->iofree_count, -1); 184 if (refs & HAMMER2_DIO_GOOD) 185 *isgoodp = 1; 186 hammer2_spin_unex(&hmp->io_spin); 187 kfree(dio, M_HAMMER2); 188 dio = xio; 189 } 190 } else { 191 hammer2_spin_unsh(&hmp->io_spin); 192 return NULL; 193 } 194 dio->ticks = ticks; 195 if (dio->act < 10) 196 ++dio->act; 197 198 return dio; 199 } 200 201 /* 202 * Acquire the requested dio. If DIO_GOOD is not set we must instantiate 203 * a buffer. If set the buffer already exists and is good to go. 204 */ 205 hammer2_io_t * 206 _hammer2_io_getblk(hammer2_dev_t *hmp, int btype, off_t lbase, 207 int lsize, int op HAMMER2_IO_DEBUG_ARGS) 208 { 209 hammer2_io_t *dio; 210 off_t peof; 211 uint64_t orefs; 212 uint64_t nrefs; 213 int isgood; 214 int error; 215 int hce; 216 int bflags; 217 218 bflags = ((btype == HAMMER2_BREF_TYPE_DATA) ? B_NOTMETA : 0); 219 bflags |= B_KVABIO; 220 221 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 222 223 if (op == HAMMER2_DOP_READQ) { 224 dio = hammer2_io_alloc(hmp, lbase, btype, 0, &isgood); 225 if (dio == NULL) 226 return NULL; 227 op = HAMMER2_DOP_READ; 228 } else { 229 dio = hammer2_io_alloc(hmp, lbase, btype, 1, &isgood); 230 } 231 232 for (;;) { 233 orefs = dio->refs; 234 cpu_ccfence(); 235 236 /* 237 * Buffer is already good, handle the op and return. 238 */ 239 if (orefs & HAMMER2_DIO_GOOD) { 240 if (isgood == 0) 241 cpu_mfence(); 242 bkvasync(dio->bp); 243 244 switch(op) { 245 case HAMMER2_DOP_NEW: 246 bzero(hammer2_io_data(dio, lbase), lsize); 247 /* fall through */ 248 case HAMMER2_DOP_NEWNZ: 249 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 250 break; 251 case HAMMER2_DOP_READ: 252 default: 253 /* nothing to do */ 254 break; 255 } 256 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 257 return (dio); 258 } 259 260 /* 261 * Try to own the DIO 262 */ 263 if (orefs & HAMMER2_DIO_INPROG) { 264 nrefs = orefs | HAMMER2_DIO_WAITING; 265 tsleep_interlock(dio, 0); 266 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 267 tsleep(dio, PINTERLOCKED, "h2dio", hz); 268 } 269 /* retry */ 270 } else { 271 nrefs = orefs | HAMMER2_DIO_INPROG; 272 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 273 break; 274 } 275 } 276 } 277 278 /* 279 * We break to here if GOOD is not set and we acquired INPROG for 280 * the I/O. 281 */ 282 KKASSERT(dio->bp == NULL); 283 if (btype == HAMMER2_BREF_TYPE_DATA) 284 hce = hammer2_cluster_data_read; 285 else 286 hce = hammer2_cluster_meta_read; 287 288 error = 0; 289 if (dio->pbase == (lbase & ~HAMMER2_OFF_MASK_RADIX) && 290 dio->psize == lsize) { 291 switch(op) { 292 case HAMMER2_DOP_NEW: 293 case HAMMER2_DOP_NEWNZ: 294 dio->bp = getblk(dio->hmp->devvp, 295 dio->pbase, dio->psize, 296 GETBLK_KVABIO, 0); 297 if (op == HAMMER2_DOP_NEW) { 298 bkvasync(dio->bp); 299 bzero(dio->bp->b_data, dio->psize); 300 } 301 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 302 break; 303 case HAMMER2_DOP_READ: 304 default: 305 if (hce > 0) { 306 /* 307 * Synchronous cluster I/O for now. 308 */ 309 peof = (dio->pbase + HAMMER2_SEGMASK64) & 310 ~HAMMER2_SEGMASK64; 311 dio->bp = NULL; 312 error = cluster_readx(dio->hmp->devvp, 313 peof, dio->pbase, 314 dio->psize, bflags, 315 dio->psize, 316 HAMMER2_PBUFSIZE*hce, 317 &dio->bp); 318 } else { 319 dio->bp = NULL; 320 error = breadnx(dio->hmp->devvp, dio->pbase, 321 dio->psize, bflags, 322 NULL, NULL, 0, &dio->bp); 323 } 324 } 325 } else { 326 if (hce > 0) { 327 /* 328 * Synchronous cluster I/O for now. 329 */ 330 peof = (dio->pbase + HAMMER2_SEGMASK64) & 331 ~HAMMER2_SEGMASK64; 332 error = cluster_readx(dio->hmp->devvp, 333 peof, dio->pbase, dio->psize, 334 bflags, 335 dio->psize, HAMMER2_PBUFSIZE*hce, 336 &dio->bp); 337 } else { 338 error = breadnx(dio->hmp->devvp, dio->pbase, 339 dio->psize, bflags, 340 NULL, NULL, 0, &dio->bp); 341 } 342 if (dio->bp) { 343 /* 344 * Handle NEW flags 345 */ 346 switch(op) { 347 case HAMMER2_DOP_NEW: 348 bkvasync(dio->bp); 349 bzero(hammer2_io_data(dio, lbase), lsize); 350 /* fall through */ 351 case HAMMER2_DOP_NEWNZ: 352 atomic_set_long(&dio->refs, HAMMER2_DIO_DIRTY); 353 break; 354 case HAMMER2_DOP_READ: 355 default: 356 break; 357 } 358 359 /* 360 * Tell the kernel that the buffer cache is not 361 * meta-data based on the btype. This allows 362 * swapcache to distinguish between data and 363 * meta-data. 364 */ 365 switch(btype) { 366 case HAMMER2_BREF_TYPE_DATA: 367 dio->bp->b_flags |= B_NOTMETA; 368 break; 369 default: 370 break; 371 } 372 } 373 } 374 375 if (dio->bp) { 376 bkvasync(dio->bp); 377 BUF_KERNPROC(dio->bp); 378 dio->bp->b_flags &= ~B_AGE; 379 /* dio->bp->b_debug_info2 = dio; */ 380 } 381 dio->error = error; 382 383 /* 384 * Clear INPROG and WAITING, set GOOD wake up anyone waiting. 385 */ 386 for (;;) { 387 orefs = dio->refs; 388 cpu_ccfence(); 389 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_WAITING); 390 if (error == 0) 391 nrefs |= HAMMER2_DIO_GOOD; 392 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 393 if (orefs & HAMMER2_DIO_WAITING) 394 wakeup(dio); 395 break; 396 } 397 cpu_pause(); 398 } 399 400 /* XXX error handling */ 401 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 402 403 return dio; 404 } 405 406 /* 407 * Release our ref on *diop. 408 * 409 * On the 1->0 transition we clear DIO_GOOD, set DIO_INPROG, and dispose 410 * of dio->bp. Then we clean up DIO_INPROG and DIO_WAITING. 411 */ 412 void 413 _hammer2_io_putblk(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 414 { 415 hammer2_dev_t *hmp; 416 hammer2_io_t *dio; 417 struct buf *bp; 418 off_t pbase; 419 int psize; 420 int dio_limit; 421 uint64_t orefs; 422 uint64_t nrefs; 423 424 dio = *diop; 425 *diop = NULL; 426 hmp = dio->hmp; 427 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 428 429 KKASSERT((dio->refs & HAMMER2_DIO_MASK) != 0); 430 431 /* 432 * Drop refs. 433 * 434 * On the 1->0 transition clear GOOD and set INPROG, and break. 435 * On any other transition we can return early. 436 */ 437 for (;;) { 438 orefs = dio->refs; 439 cpu_ccfence(); 440 441 if ((orefs & HAMMER2_DIO_MASK) == 1 && 442 (orefs & HAMMER2_DIO_INPROG) == 0) { 443 /* 444 * Lastdrop case, INPROG can be set. GOOD must be 445 * cleared to prevent the getblk shortcut. 446 */ 447 nrefs = orefs - 1; 448 nrefs &= ~(HAMMER2_DIO_GOOD | HAMMER2_DIO_DIRTY); 449 nrefs |= HAMMER2_DIO_INPROG; 450 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 451 break; 452 } else if ((orefs & HAMMER2_DIO_MASK) == 1) { 453 /* 454 * Lastdrop case, INPROG already set. We must 455 * wait for INPROG to clear. 456 */ 457 nrefs = orefs | HAMMER2_DIO_WAITING; 458 tsleep_interlock(dio, 0); 459 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 460 tsleep(dio, PINTERLOCKED, "h2dio", hz); 461 } 462 /* retry */ 463 } else { 464 /* 465 * Normal drop case. 466 */ 467 nrefs = orefs - 1; 468 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) 469 return; 470 /* retry */ 471 } 472 cpu_pause(); 473 /* retry */ 474 } 475 476 /* 477 * Lastdrop (1->0 transition). INPROG has been set, GOOD and DIRTY 478 * have been cleared. iofree_count has not yet been incremented, 479 * note that another accessor race will decrement iofree_count so 480 * we have to increment it regardless. 481 * 482 * We can now dispose of the buffer, and should do it before calling 483 * io_complete() in case there's a race against a new reference 484 * which causes io_complete() to chain and instantiate the bp again. 485 */ 486 pbase = dio->pbase; 487 psize = dio->psize; 488 bp = dio->bp; 489 dio->bp = NULL; 490 491 if ((orefs & HAMMER2_DIO_GOOD) && bp) { 492 /* 493 * Non-errored disposal of bp 494 */ 495 if (orefs & HAMMER2_DIO_DIRTY) { 496 dio_write_stats_update(dio, bp); 497 498 /* 499 * Allows dirty buffers to accumulate and 500 * possibly be canceled (e.g. by a 'rm'), 501 * will burst-write later. 502 * 503 * We normally do not allow the kernel to 504 * cluster dirty buffers because H2 already 505 * uses a large block size. 506 * 507 * NOTE: Do not use cluster_write() here. The 508 * problem is that due to the way chains 509 * are locked, buffers are cycled in and out 510 * quite often so the disposal here is not 511 * necessarily the final disposal. Avoid 512 * excessive rewriting of the same blocks 513 * by using bdwrite(). 514 */ 515 #if 0 516 off_t peof; 517 int hce; 518 519 if ((hce = hammer2_cluster_write) > 0) { 520 /* 521 * Allows write-behind to keep the buffer 522 * cache sane. 523 */ 524 peof = (pbase + HAMMER2_SEGMASK64) & 525 ~HAMMER2_SEGMASK64; 526 bp->b_flags |= B_CLUSTEROK; 527 cluster_write(bp, peof, psize, hce); 528 } else 529 #endif 530 if (hammer2_cluster_write) 531 bp->b_flags |= B_CLUSTEROK; 532 else 533 bp->b_flags &= ~B_CLUSTEROK; 534 bdwrite(bp); 535 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 536 brelse(bp); 537 } else { 538 bqrelse(bp); 539 } 540 } else if (bp) { 541 /* 542 * Errored disposal of bp 543 */ 544 brelse(bp); 545 } 546 547 /* 548 * Update iofree_count before disposing of the dio 549 */ 550 hmp = dio->hmp; 551 atomic_add_int(&hmp->iofree_count, 1); 552 553 /* 554 * Clear INPROG, GOOD, and WAITING (GOOD should already be clear). 555 */ 556 for (;;) { 557 orefs = dio->refs; 558 cpu_ccfence(); 559 nrefs = orefs & ~(HAMMER2_DIO_INPROG | HAMMER2_DIO_GOOD | 560 HAMMER2_DIO_WAITING); 561 if (atomic_cmpset_64(&dio->refs, orefs, nrefs)) { 562 if (orefs & HAMMER2_DIO_WAITING) 563 wakeup(dio); 564 break; 565 } 566 cpu_pause(); 567 } 568 569 /* 570 * We cache free buffers so re-use cases can use a shared lock, but 571 * if too many build up we have to clean them out. 572 */ 573 dio_limit = hammer2_dio_limit; 574 if (dio_limit < 256) 575 dio_limit = 256; 576 if (dio_limit > 1024*1024) 577 dio_limit = 1024*1024; 578 if (hmp->iofree_count > dio_limit) { 579 struct hammer2_cleanupcb_info info; 580 581 RB_INIT(&info.tmptree); 582 hammer2_spin_ex(&hmp->io_spin); 583 if (hmp->iofree_count > dio_limit) { 584 info.count = hmp->iofree_count / 5; 585 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 586 hammer2_io_cleanup_callback, &info); 587 } 588 hammer2_spin_unex(&hmp->io_spin); 589 hammer2_io_cleanup(hmp, &info.tmptree); 590 } 591 } 592 593 /* 594 * Cleanup any dio's with (INPROG | refs) == 0. 595 * 596 * Called to clean up cached DIOs on umount after all activity has been 597 * flushed. 598 */ 599 static 600 int 601 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 602 { 603 struct hammer2_cleanupcb_info *info = arg; 604 hammer2_io_t *xio; 605 606 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 607 if (dio->act > 0) { 608 int act; 609 610 act = dio->act - (ticks - dio->ticks) / hz - 1; 611 if (act > 0) { 612 dio->act = act; 613 return 0; 614 } 615 dio->act = 0; 616 } 617 KKASSERT(dio->bp == NULL); 618 if (info->count > 0) { 619 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 620 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 621 KKASSERT(xio == NULL); 622 --info->count; 623 } 624 } 625 return 0; 626 } 627 628 void 629 hammer2_io_cleanup(hammer2_dev_t *hmp, struct hammer2_io_tree *tree) 630 { 631 hammer2_io_t *dio; 632 633 while ((dio = RB_ROOT(tree)) != NULL) { 634 RB_REMOVE(hammer2_io_tree, tree, dio); 635 KKASSERT(dio->bp == NULL && 636 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 637 if (dio->refs & HAMMER2_DIO_DIRTY) { 638 kprintf("hammer2_io_cleanup: Dirty buffer " 639 "%016jx/%d (bp=%p)\n", 640 dio->pbase, dio->psize, dio->bp); 641 } 642 kfree(dio, M_HAMMER2); 643 atomic_add_int(&hammer2_dio_count, -1); 644 atomic_add_int(&hmp->iofree_count, -1); 645 } 646 } 647 648 /* 649 * Returns a pointer to the requested data. 650 */ 651 char * 652 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 653 { 654 struct buf *bp; 655 int off; 656 657 bp = dio->bp; 658 KKASSERT(bp != NULL); 659 bkvasync(bp); 660 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 661 KKASSERT(off >= 0 && off < bp->b_bufsize); 662 return(bp->b_data + off); 663 } 664 665 int 666 hammer2_io_new(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 667 hammer2_io_t **diop) 668 { 669 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEW); 670 return ((*diop)->error); 671 } 672 673 int 674 hammer2_io_newnz(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 675 hammer2_io_t **diop) 676 { 677 *diop = hammer2_io_getblk(hmp, btype, lbase, lsize, HAMMER2_DOP_NEWNZ); 678 return ((*diop)->error); 679 } 680 681 int 682 _hammer2_io_bread(hammer2_dev_t *hmp, int btype, off_t lbase, int lsize, 683 hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 684 { 685 #ifdef HAMMER2_IO_DEBUG 686 hammer2_io_t *dio; 687 #endif 688 689 *diop = _hammer2_io_getblk(hmp, btype, lbase, lsize, 690 HAMMER2_DOP_READ HAMMER2_IO_DEBUG_CALL); 691 #ifdef HAMMER2_IO_DEBUG 692 if ((dio = *diop) != NULL) { 693 int i = (dio->debug_index - 1) & HAMMER2_IO_DEBUG_MASK; 694 dio->debug_data[i] = debug_data; 695 } 696 #endif 697 return ((*diop)->error); 698 } 699 700 hammer2_io_t * 701 _hammer2_io_getquick(hammer2_dev_t *hmp, off_t lbase, 702 int lsize HAMMER2_IO_DEBUG_ARGS) 703 { 704 hammer2_io_t *dio; 705 706 dio = _hammer2_io_getblk(hmp, 0, lbase, lsize, 707 HAMMER2_DOP_READQ HAMMER2_IO_DEBUG_CALL); 708 return dio; 709 } 710 711 void 712 _hammer2_io_bawrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 713 { 714 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 715 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 716 } 717 718 void 719 _hammer2_io_bdwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 720 { 721 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 722 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 723 } 724 725 int 726 _hammer2_io_bwrite(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 727 { 728 atomic_set_64(&(*diop)->refs, HAMMER2_DIO_DIRTY); 729 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 730 return (0); /* XXX */ 731 } 732 733 void 734 hammer2_io_setdirty(hammer2_io_t *dio) 735 { 736 atomic_set_64(&dio->refs, HAMMER2_DIO_DIRTY); 737 } 738 739 /* 740 * This routine is called when a MODIFIED chain is being DESTROYED, 741 * in an attempt to allow the related buffer cache buffer to be 742 * invalidated and discarded instead of flushing it to disk. 743 * 744 * At the moment this case is only really useful for file meta-data. 745 * File data is already handled via the logical buffer cache associated 746 * with the vnode, and will be discarded if it was never flushed to disk. 747 * File meta-data may include inodes, directory entries, and indirect blocks. 748 * 749 * XXX 750 * However, our DIO buffers are PBUFSIZE'd (64KB), and the area being 751 * invalidated might be smaller. Most of the meta-data structures above 752 * are in the 'smaller' category. For now, don't try to invalidate the 753 * data areas. 754 */ 755 void 756 hammer2_io_inval(hammer2_io_t *dio, hammer2_off_t data_off, u_int bytes) 757 { 758 /* NOP */ 759 } 760 761 void 762 _hammer2_io_brelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 763 { 764 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 765 } 766 767 void 768 _hammer2_io_bqrelse(hammer2_io_t **diop HAMMER2_IO_DEBUG_ARGS) 769 { 770 _hammer2_io_putblk(diop HAMMER2_IO_DEBUG_CALL); 771 } 772 773 /* 774 * Set dedup validation bits in a DIO. We do not need the buffer cache 775 * buffer for this. This must be done concurrent with setting bits in 776 * the freemap so as to interlock with bulkfree's clearing of those bits. 777 */ 778 void 779 hammer2_io_dedup_set(hammer2_dev_t *hmp, hammer2_blockref_t *bref) 780 { 781 hammer2_io_t *dio; 782 uint64_t mask; 783 int lsize; 784 int isgood; 785 786 dio = hammer2_io_alloc(hmp, bref->data_off, bref->type, 1, &isgood); 787 lsize = 1 << (int)(bref->data_off & HAMMER2_OFF_MASK_RADIX); 788 mask = hammer2_dedup_mask(dio, bref->data_off, lsize); 789 atomic_clear_64(&dio->dedup_valid, mask); 790 atomic_set_64(&dio->dedup_alloc, mask); 791 hammer2_io_putblk(&dio); 792 } 793 794 /* 795 * Clear dedup validation bits in a DIO. This is typically done when 796 * a modified chain is destroyed or by the bulkfree code. No buffer 797 * is needed for this operation. If the DIO no longer exists it is 798 * equivalent to the bits not being set. 799 */ 800 void 801 hammer2_io_dedup_delete(hammer2_dev_t *hmp, uint8_t btype, 802 hammer2_off_t data_off, u_int bytes) 803 { 804 hammer2_io_t *dio; 805 uint64_t mask; 806 int isgood; 807 808 if ((data_off & ~HAMMER2_OFF_MASK_RADIX) == 0) 809 return; 810 if (btype != HAMMER2_BREF_TYPE_DATA) 811 return; 812 dio = hammer2_io_alloc(hmp, data_off, btype, 0, &isgood); 813 if (dio) { 814 if (data_off < dio->pbase || 815 (data_off & ~HAMMER2_OFF_MASK_RADIX) + bytes > 816 dio->pbase + dio->psize) { 817 panic("hammer2_dedup_delete: DATAOFF BAD " 818 "%016jx/%d %016jx\n", 819 data_off, bytes, dio->pbase); 820 } 821 mask = hammer2_dedup_mask(dio, data_off, bytes); 822 atomic_clear_64(&dio->dedup_alloc, mask); 823 atomic_clear_64(&dio->dedup_valid, mask); 824 hammer2_io_putblk(&dio); 825 } 826 } 827 828 /* 829 * Assert that dedup allocation bits in a DIO are not set. This operation 830 * does not require a buffer. The DIO does not need to exist. 831 */ 832 void 833 hammer2_io_dedup_assert(hammer2_dev_t *hmp, hammer2_off_t data_off, u_int bytes) 834 { 835 hammer2_io_t *dio; 836 int isgood; 837 838 dio = hammer2_io_alloc(hmp, data_off, HAMMER2_BREF_TYPE_DATA, 839 0, &isgood); 840 if (dio) { 841 KASSERT((dio->dedup_alloc & 842 hammer2_dedup_mask(dio, data_off, bytes)) == 0, 843 ("hammer2_dedup_assert: %016jx/%d %016jx/%016jx", 844 data_off, 845 bytes, 846 hammer2_dedup_mask(dio, data_off, bytes), 847 dio->dedup_alloc)); 848 hammer2_io_putblk(&dio); 849 } 850 } 851 852 static 853 void 854 dio_write_stats_update(hammer2_io_t *dio, struct buf *bp) 855 { 856 long *counterp; 857 858 if (bp->b_flags & B_DELWRI) 859 return; 860 861 switch(dio->btype) { 862 case 0: 863 return; 864 case HAMMER2_BREF_TYPE_DATA: 865 counterp = &hammer2_iod_file_write; 866 break; 867 case HAMMER2_BREF_TYPE_DIRENT: 868 case HAMMER2_BREF_TYPE_INODE: 869 counterp = &hammer2_iod_meta_write; 870 break; 871 case HAMMER2_BREF_TYPE_INDIRECT: 872 counterp = &hammer2_iod_indr_write; 873 break; 874 case HAMMER2_BREF_TYPE_FREEMAP_NODE: 875 case HAMMER2_BREF_TYPE_FREEMAP_LEAF: 876 counterp = &hammer2_iod_fmap_write; 877 break; 878 default: 879 counterp = &hammer2_iod_volu_write; 880 break; 881 } 882 *counterp += dio->psize; 883 } 884 885 void 886 hammer2_io_bkvasync(hammer2_io_t *dio) 887 { 888 KKASSERT(dio->bp != NULL); 889 bkvasync(dio->bp); 890 } 891 892 /* 893 * Ref a dio that is already owned 894 */ 895 void 896 _hammer2_io_ref(hammer2_io_t *dio HAMMER2_IO_DEBUG_ARGS) 897 { 898 DIO_RECORD(dio HAMMER2_IO_DEBUG_CALL); 899 atomic_add_64(&dio->refs, 1); 900 } 901