1 /* 2 * Copyright (c) 2013-2014 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@dragonflybsd.org> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 35 #include "hammer2.h" 36 37 /* 38 * Implements an abstraction layer for synchronous and asynchronous 39 * buffered device I/O. Can be used for OS-abstraction but the main 40 * purpose is to allow larger buffers to be used against hammer2_chain's 41 * using smaller allocations, without causing deadlocks. 42 * 43 */ 44 static void hammer2_io_callback(struct bio *bio); 45 static int hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg); 46 47 static int 48 hammer2_io_cmp(hammer2_io_t *io1, hammer2_io_t *io2) 49 { 50 if (io2->pbase < io1->pbase) 51 return(-1); 52 if (io2->pbase > io1->pbase) 53 return(1); 54 return(0); 55 } 56 57 RB_PROTOTYPE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, off_t); 58 RB_GENERATE2(hammer2_io_tree, hammer2_io, rbnode, hammer2_io_cmp, 59 off_t, pbase); 60 61 struct hammer2_cleanupcb_info { 62 struct hammer2_io_tree tmptree; 63 int count; 64 }; 65 66 67 #define HAMMER2_DIO_INPROG 0x80000000 68 #define HAMMER2_DIO_GOOD 0x40000000 69 #define HAMMER2_DIO_WAITING 0x20000000 70 #define HAMMER2_DIO_DIRTY 0x10000000 71 72 #define HAMMER2_DIO_MASK 0x0FFFFFFF 73 74 /* 75 * Acquire the requested dio, set *ownerp based on state. If state is good 76 * *ownerp is set to 0, otherwise *ownerp is set to DIO_INPROG and the 77 * caller must resolve the buffer. 78 */ 79 hammer2_io_t * 80 hammer2_io_getblk(hammer2_mount_t *hmp, off_t lbase, int lsize, int *ownerp) 81 { 82 hammer2_io_t *dio; 83 hammer2_io_t *xio; 84 off_t pbase; 85 off_t pmask; 86 int psize = hammer2_devblksize(lsize); 87 int refs; 88 89 pmask = ~(hammer2_off_t)(psize - 1); 90 91 KKASSERT((1 << (int)(lbase & HAMMER2_OFF_MASK_RADIX)) == lsize); 92 lbase &= ~HAMMER2_OFF_MASK_RADIX; 93 pbase = lbase & pmask; 94 KKASSERT(pbase != 0 && ((lbase + lsize - 1) & pmask) == pbase); 95 96 /* 97 * Access/Allocate the DIO 98 */ 99 spin_lock_shared(&hmp->io_spin); 100 dio = RB_LOOKUP(hammer2_io_tree, &hmp->iotree, pbase); 101 if (dio) { 102 if ((atomic_fetchadd_int(&dio->refs, 1) & 103 HAMMER2_DIO_MASK) == 0) { 104 atomic_add_int(&dio->hmp->iofree_count, -1); 105 } 106 spin_unlock_shared(&hmp->io_spin); 107 } else { 108 spin_unlock_shared(&hmp->io_spin); 109 dio = kmalloc(sizeof(*dio), M_HAMMER2, M_INTWAIT | M_ZERO); 110 dio->hmp = hmp; 111 dio->pbase = pbase; 112 dio->psize = psize; 113 dio->refs = 1; 114 spin_lock(&hmp->io_spin); 115 xio = RB_INSERT(hammer2_io_tree, &hmp->iotree, dio); 116 if (xio == NULL) { 117 spin_unlock(&hmp->io_spin); 118 } else { 119 if ((atomic_fetchadd_int(&xio->refs, 1) & 120 HAMMER2_DIO_MASK) == 0) { 121 atomic_add_int(&xio->hmp->iofree_count, -1); 122 } 123 spin_unlock(&hmp->io_spin); 124 kfree(dio, M_HAMMER2); 125 dio = xio; 126 } 127 } 128 129 /* 130 * Obtain/Validate the buffer. 131 */ 132 for (;;) { 133 refs = dio->refs; 134 cpu_ccfence(); 135 136 /* 137 * Stop if the buffer is good. Once set GOOD the flag cannot 138 * be cleared until refs drops to 0. 139 */ 140 if (refs & HAMMER2_DIO_GOOD) { 141 *ownerp = 0; 142 goto done; 143 } 144 145 /* 146 * We need to acquire the in-progress lock on the buffer 147 */ 148 if (refs & HAMMER2_DIO_INPROG) { 149 tsleep_interlock(dio, 0); 150 if (atomic_cmpset_int(&dio->refs, refs, 151 refs | HAMMER2_DIO_WAITING)) { 152 tsleep(dio, PINTERLOCKED, "h2dio", 0); 153 } 154 /* retry */ 155 } else { 156 if (atomic_cmpset_int(&dio->refs, refs, 157 refs | HAMMER2_DIO_INPROG)) { 158 break; 159 } 160 } 161 /* retry */ 162 } 163 164 /* 165 * We need to do more work before the buffer is usable 166 */ 167 *ownerp = HAMMER2_DIO_INPROG; 168 done: 169 if (dio->act < 5) 170 ++dio->act; 171 return(dio); 172 } 173 174 /* 175 * If part of an asynchronous I/O the asynchronous I/O is biodone()'d. 176 * 177 * If the caller owned INPROG then the dio will be set GOOD or not 178 * depending on whether the caller disposed of dio->bp or not. 179 */ 180 static 181 void 182 hammer2_io_complete(hammer2_io_t *dio, int owner) 183 { 184 int refs; 185 int good; 186 187 while (owner & HAMMER2_DIO_INPROG) { 188 refs = dio->refs; 189 cpu_ccfence(); 190 good = dio->bp ? HAMMER2_DIO_GOOD : 0; 191 if (atomic_cmpset_int(&dio->refs, refs, 192 (refs & ~(HAMMER2_DIO_WAITING | 193 HAMMER2_DIO_INPROG)) | 194 good)) { 195 if (refs & HAMMER2_DIO_WAITING) 196 wakeup(dio); 197 if (good) 198 BUF_KERNPROC(dio->bp); 199 break; 200 } 201 /* retry */ 202 } 203 } 204 205 /* 206 * Release our ref on *diop, dispose of the underlying buffer. 207 */ 208 void 209 hammer2_io_putblk(hammer2_io_t **diop) 210 { 211 hammer2_mount_t *hmp; 212 hammer2_io_t *dio; 213 struct buf *bp; 214 off_t peof; 215 off_t pbase; 216 int psize; 217 int refs; 218 219 dio = *diop; 220 *diop = NULL; 221 222 for (;;) { 223 refs = dio->refs; 224 225 if ((refs & HAMMER2_DIO_MASK) == 1) { 226 KKASSERT((refs & HAMMER2_DIO_INPROG) == 0); 227 if (atomic_cmpset_int(&dio->refs, refs, 228 ((refs - 1) & 229 ~(HAMMER2_DIO_GOOD | 230 HAMMER2_DIO_DIRTY)) | 231 HAMMER2_DIO_INPROG)) { 232 break; 233 } 234 /* retry */ 235 } else { 236 if (atomic_cmpset_int(&dio->refs, refs, refs - 1)) 237 return; 238 /* retry */ 239 } 240 /* retry */ 241 } 242 243 /* 244 * Locked INPROG on 1->0 transition and we cleared DIO_GOOD (which is 245 * legal only on the last ref). This allows us to dispose of the 246 * buffer. refs is now 0. 247 * 248 * The instant we call io_complete dio is a free agent again and 249 * can be ripped out from under us. Acquisition of the dio after 250 * this point will require a shared or exclusive spinlock. 251 */ 252 hmp = dio->hmp; 253 bp = dio->bp; 254 dio->bp = NULL; 255 pbase = dio->pbase; 256 psize = dio->psize; 257 atomic_add_int(&hmp->iofree_count, 1); 258 hammer2_io_complete(dio, HAMMER2_DIO_INPROG); /* clears INPROG */ 259 dio = NULL; /* dio stale */ 260 261 if (refs & HAMMER2_DIO_GOOD) { 262 KKASSERT(bp != NULL); 263 if (refs & HAMMER2_DIO_DIRTY) { 264 if (hammer2_cluster_enable) { 265 peof = (pbase + HAMMER2_SEGMASK64) & 266 ~HAMMER2_SEGMASK64; 267 cluster_write(bp, peof, psize, 4); 268 } else { 269 bp->b_flags |= B_CLUSTEROK; 270 bdwrite(bp); 271 } 272 } else if (bp->b_flags & (B_ERROR | B_INVAL | B_RELBUF)) { 273 brelse(bp); 274 } else { 275 bqrelse(bp); 276 } 277 } 278 279 /* 280 * We cache free buffers so re-use cases can use a shared lock, but 281 * if too many build up we have to clean them out. 282 */ 283 if (hmp->iofree_count > 1000) { 284 struct hammer2_cleanupcb_info info; 285 286 RB_INIT(&info.tmptree); 287 spin_lock(&hmp->io_spin); 288 if (hmp->iofree_count > 1000) { 289 info.count = hmp->iofree_count / 2; 290 RB_SCAN(hammer2_io_tree, &hmp->iotree, NULL, 291 hammer2_io_cleanup_callback, &info); 292 } 293 spin_unlock(&hmp->io_spin); 294 hammer2_io_cleanup(hmp, &info.tmptree); 295 } 296 } 297 298 /* 299 * Cleanup any dio's with no references which are not in-progress. 300 */ 301 static 302 int 303 hammer2_io_cleanup_callback(hammer2_io_t *dio, void *arg) 304 { 305 struct hammer2_cleanupcb_info *info = arg; 306 hammer2_io_t *xio; 307 308 if ((dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0) { 309 if (dio->act > 0) { 310 --dio->act; 311 return 0; 312 } 313 KKASSERT(dio->bp == NULL); 314 RB_REMOVE(hammer2_io_tree, &dio->hmp->iotree, dio); 315 xio = RB_INSERT(hammer2_io_tree, &info->tmptree, dio); 316 KKASSERT(xio == NULL); 317 if (--info->count <= 0) /* limit scan */ 318 return(-1); 319 } 320 return 0; 321 } 322 323 void 324 hammer2_io_cleanup(hammer2_mount_t *hmp, struct hammer2_io_tree *tree) 325 { 326 hammer2_io_t *dio; 327 328 while ((dio = RB_ROOT(tree)) != NULL) { 329 RB_REMOVE(hammer2_io_tree, tree, dio); 330 KKASSERT(dio->bp == NULL && 331 (dio->refs & (HAMMER2_DIO_MASK | HAMMER2_DIO_INPROG)) == 0); 332 kfree(dio, M_HAMMER2); 333 atomic_add_int(&hmp->iofree_count, -1); 334 } 335 } 336 337 char * 338 hammer2_io_data(hammer2_io_t *dio, off_t lbase) 339 { 340 struct buf *bp; 341 int off; 342 343 bp = dio->bp; 344 KKASSERT(bp != NULL); 345 off = (lbase & ~HAMMER2_OFF_MASK_RADIX) - bp->b_loffset; 346 KKASSERT(off >= 0 && off < bp->b_bufsize); 347 return(bp->b_data + off); 348 } 349 350 static 351 int 352 _hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize, 353 hammer2_io_t **diop, int dozero, int quick) 354 { 355 hammer2_io_t *dio; 356 int owner; 357 int error; 358 359 dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner); 360 if (owner) { 361 if (lsize == dio->psize) { 362 dio->bp = getblk(hmp->devvp, 363 dio->pbase, dio->psize, 364 (quick ? GETBLK_NOWAIT : 0), 365 0); 366 if (dio->bp) { 367 vfs_bio_clrbuf(dio->bp); 368 if (quick) { 369 dio->bp->b_flags |= B_CACHE; 370 bqrelse(dio->bp); 371 dio->bp = NULL; 372 } 373 } 374 error = 0; 375 } else if (quick) { 376 /* do nothing */ 377 error = 0; 378 } else { 379 error = bread(hmp->devvp, dio->pbase, 380 dio->psize, &dio->bp); 381 } 382 if (error) { 383 brelse(dio->bp); 384 dio->bp = NULL; 385 } 386 hammer2_io_complete(dio, owner); 387 } else { 388 error = 0; 389 } 390 if (dio->bp) { 391 if (dozero) 392 bzero(hammer2_io_data(dio, lbase), lsize); 393 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 394 } 395 return error; 396 } 397 398 int 399 hammer2_io_new(hammer2_mount_t *hmp, off_t lbase, int lsize, 400 hammer2_io_t **diop) 401 { 402 return(_hammer2_io_new(hmp, lbase, lsize, diop, 1, 0)); 403 } 404 405 int 406 hammer2_io_newnz(hammer2_mount_t *hmp, off_t lbase, int lsize, 407 hammer2_io_t **diop) 408 { 409 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 0)); 410 } 411 412 int 413 hammer2_io_newq(hammer2_mount_t *hmp, off_t lbase, int lsize, 414 hammer2_io_t **diop) 415 { 416 return(_hammer2_io_new(hmp, lbase, lsize, diop, 0, 1)); 417 } 418 419 int 420 hammer2_io_bread(hammer2_mount_t *hmp, off_t lbase, int lsize, 421 hammer2_io_t **diop) 422 { 423 hammer2_io_t *dio; 424 off_t peof; 425 int owner; 426 int error; 427 428 dio = *diop = hammer2_io_getblk(hmp, lbase, lsize, &owner); 429 if (owner) { 430 if (hammer2_cluster_enable) { 431 peof = (dio->pbase + HAMMER2_SEGMASK64) & 432 ~HAMMER2_SEGMASK64; 433 error = cluster_read(hmp->devvp, peof, dio->pbase, 434 dio->psize, 435 dio->psize, HAMMER2_PBUFSIZE*4, 436 &dio->bp); 437 } else { 438 error = bread(hmp->devvp, dio->pbase, 439 dio->psize, &dio->bp); 440 } 441 if (error) { 442 brelse(dio->bp); 443 dio->bp = NULL; 444 } 445 hammer2_io_complete(dio, owner); 446 } else { 447 error = 0; 448 } 449 return error; 450 } 451 452 void 453 hammer2_io_breadcb(hammer2_mount_t *hmp, off_t lbase, int lsize, 454 void (*callback)(hammer2_io_t *dio, 455 hammer2_cluster_t *arg_l, 456 hammer2_chain_t *arg_c, 457 void *arg_p, off_t arg_o), 458 hammer2_cluster_t *arg_l, hammer2_chain_t *arg_c, 459 void *arg_p, off_t arg_o) 460 { 461 hammer2_io_t *dio; 462 int owner; 463 int error; 464 465 dio = hammer2_io_getblk(hmp, lbase, lsize, &owner); 466 if (owner) { 467 dio->callback = callback; 468 dio->arg_l = arg_l; 469 dio->arg_c = arg_c; 470 dio->arg_p = arg_p; 471 dio->arg_o = arg_o; 472 breadcb(hmp->devvp, dio->pbase, dio->psize, 473 hammer2_io_callback, dio); 474 } else { 475 error = 0; 476 callback(dio, arg_l, arg_c, arg_p, arg_o); 477 hammer2_io_bqrelse(&dio); 478 } 479 } 480 481 static void 482 hammer2_io_callback(struct bio *bio) 483 { 484 struct buf *dbp = bio->bio_buf; 485 hammer2_io_t *dio = bio->bio_caller_info1.ptr; 486 487 if ((bio->bio_flags & BIO_DONE) == 0) 488 bpdone(dbp, 0); 489 bio->bio_flags &= ~(BIO_DONE | BIO_SYNC); 490 dio->bp = bio->bio_buf; 491 KKASSERT((dio->bp->b_flags & B_ERROR) == 0); /* XXX */ 492 hammer2_io_complete(dio, HAMMER2_DIO_INPROG); 493 494 /* 495 * We still have the ref and DIO_GOOD is now set so nothing else 496 * should mess with the callback fields until we release the dio. 497 */ 498 dio->callback(dio, dio->arg_l, dio->arg_c, dio->arg_p, dio->arg_o); 499 hammer2_io_bqrelse(&dio); 500 /* TODO: async load meta-data and assign chain->dio */ 501 } 502 503 void 504 hammer2_io_bawrite(hammer2_io_t **diop) 505 { 506 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 507 hammer2_io_putblk(diop); 508 } 509 510 void 511 hammer2_io_bdwrite(hammer2_io_t **diop) 512 { 513 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 514 hammer2_io_putblk(diop); 515 } 516 517 int 518 hammer2_io_bwrite(hammer2_io_t **diop) 519 { 520 atomic_set_int(&(*diop)->refs, HAMMER2_DIO_DIRTY); 521 hammer2_io_putblk(diop); 522 return (0); /* XXX */ 523 } 524 525 void 526 hammer2_io_setdirty(hammer2_io_t *dio) 527 { 528 atomic_set_int(&dio->refs, HAMMER2_DIO_DIRTY); 529 } 530 531 void 532 hammer2_io_setinval(hammer2_io_t *dio, u_int bytes) 533 { 534 if ((u_int)dio->psize == bytes) 535 dio->bp->b_flags |= B_INVAL | B_RELBUF; 536 } 537 538 void 539 hammer2_io_brelse(hammer2_io_t **diop) 540 { 541 hammer2_io_putblk(diop); 542 } 543 544 void 545 hammer2_io_bqrelse(hammer2_io_t **diop) 546 { 547 hammer2_io_putblk(diop); 548 } 549 550 int 551 hammer2_io_isdirty(hammer2_io_t *dio) 552 { 553 return((dio->refs & HAMMER2_DIO_DIRTY) != 0); 554 } 555