1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 56 57 static dsched_prepare_t noop_prepare; 58 static dsched_teardown_t noop_teardown; 59 static dsched_cancel_t noop_cancel; 60 static dsched_queue_t noop_queue; 61 62 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 63 64 static int dsched_inited = 0; 65 static int default_set = 0; 66 67 struct lock dsched_lock; 68 static int dsched_debug_enable = 0; 69 70 struct dsched_stats dsched_stats; 71 72 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 73 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 74 struct objcache_malloc_args dsched_thread_io_malloc_args = { 75 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 76 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 77 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 78 79 static struct objcache *dsched_diskctx_cache; 80 static struct objcache *dsched_tdctx_cache; 81 static struct objcache *dsched_tdio_cache; 82 83 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = 84 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); 85 86 struct lock dsched_tdctx_lock; 87 88 static struct dsched_policy_head dsched_policy_list = 89 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 90 91 static struct dsched_policy dsched_noop_policy = { 92 .name = "noop", 93 94 .prepare = noop_prepare, 95 .teardown = noop_teardown, 96 .cancel_all = noop_cancel, 97 .bio_queue = noop_queue 98 }; 99 100 static struct dsched_policy *default_policy = &dsched_noop_policy; 101 102 /* 103 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 104 * using kvprintf 105 */ 106 int 107 dsched_debug(int level, char *fmt, ...) 108 { 109 __va_list ap; 110 111 __va_start(ap, fmt); 112 if (level <= dsched_debug_enable) 113 kvprintf(fmt, ap); 114 __va_end(ap); 115 116 return 0; 117 } 118 119 /* 120 * Called on disk_create() 121 * tries to read which policy to use from loader.conf, if there's 122 * none specified, the default policy is used. 123 */ 124 void 125 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 126 { 127 char tunable_key[SPECNAMELEN + 48]; 128 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 129 char *ptr; 130 struct dsched_policy *policy = NULL; 131 132 /* Also look for serno stuff? */ 133 /* kprintf("dsched_disk_create_callback() for disk %s%d\n", head_name, unit); */ 134 lockmgr(&dsched_lock, LK_EXCLUSIVE); 135 136 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s%d", 137 head_name, unit); 138 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 139 sizeof(sched_policy)) != 0) { 140 policy = dsched_find_policy(sched_policy); 141 } 142 143 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 144 head_name); 145 for (ptr = tunable_key; *ptr; ptr++) { 146 if (*ptr == '/') 147 *ptr = '-'; 148 } 149 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 150 sizeof(sched_policy)) != 0)) { 151 policy = dsched_find_policy(sched_policy); 152 } 153 154 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 155 if (!policy && !default_set && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 156 sizeof(sched_policy)) != 0)) { 157 policy = dsched_find_policy(sched_policy); 158 } 159 160 if (!policy) { 161 if (!default_set) { 162 dsched_debug(0, "No policy for %s%d specified, " 163 "or policy not found\n", head_name, unit); 164 } 165 dsched_set_policy(dp, default_policy); 166 } else { 167 dsched_set_policy(dp, policy); 168 } 169 170 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0) 171 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name); 172 else 173 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 174 for (ptr = tunable_key; *ptr; ptr++) { 175 if (*ptr == '/') 176 *ptr = '-'; 177 } 178 dsched_sysctl_add_disk( 179 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 180 tunable_key); 181 182 lockmgr(&dsched_lock, LK_RELEASE); 183 } 184 185 /* 186 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 187 * there's any policy associated with the serial number of the device. 188 */ 189 void 190 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 191 { 192 char tunable_key[SPECNAMELEN + 48]; 193 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 194 struct dsched_policy *policy = NULL; 195 196 if (info->d_serialno == NULL) 197 return; 198 199 lockmgr(&dsched_lock, LK_EXCLUSIVE); 200 201 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 202 info->d_serialno); 203 204 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 205 sizeof(sched_policy)) != 0)) { 206 policy = dsched_find_policy(sched_policy); 207 } 208 209 if (policy) { 210 dsched_switch(dp, policy); 211 } 212 213 dsched_sysctl_add_disk( 214 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 215 info->d_serialno); 216 217 lockmgr(&dsched_lock, LK_RELEASE); 218 } 219 220 /* 221 * Called on disk_destroy() 222 * shuts down the scheduler core and cancels all remaining bios 223 */ 224 void 225 dsched_disk_destroy_callback(struct disk *dp) 226 { 227 struct dsched_policy *old_policy; 228 struct dsched_disk_ctx *diskctx; 229 230 lockmgr(&dsched_lock, LK_EXCLUSIVE); 231 232 diskctx = dsched_get_disk_priv(dp); 233 234 old_policy = dp->d_sched_policy; 235 dp->d_sched_policy = &dsched_noop_policy; 236 old_policy->cancel_all(dsched_get_disk_priv(dp)); 237 old_policy->teardown(dsched_get_disk_priv(dp)); 238 239 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 240 sysctl_ctx_free(&diskctx->sysctl_ctx); 241 242 policy_destroy(dp); 243 atomic_subtract_int(&old_policy->ref_count, 1); 244 KKASSERT(old_policy->ref_count >= 0); 245 246 lockmgr(&dsched_lock, LK_RELEASE); 247 } 248 249 250 void 251 dsched_queue(struct disk *dp, struct bio *bio) 252 { 253 struct dsched_thread_ctx *tdctx; 254 struct dsched_thread_io *tdio; 255 struct dsched_disk_ctx *diskctx; 256 257 int found = 0, error = 0; 258 259 tdctx = dsched_get_buf_priv(bio->bio_buf); 260 if (tdctx == NULL) { 261 /* We don't handle this case, let dsched dispatch */ 262 atomic_add_int(&dsched_stats.no_tdctx, 1); 263 dsched_strategy_raw(dp, bio); 264 return; 265 } 266 267 DSCHED_THREAD_CTX_LOCK(tdctx); 268 269 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); 270 TAILQ_FOREACH(tdio, &tdctx->tdio_list, link) { 271 if (tdio->dp == dp) { 272 dsched_thread_io_ref(tdio); 273 found = 1; 274 break; 275 } 276 } 277 278 DSCHED_THREAD_CTX_UNLOCK(tdctx); 279 dsched_clr_buf_priv(bio->bio_buf); 280 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 281 282 KKASSERT(found == 1); 283 diskctx = dsched_get_disk_priv(dp); 284 dsched_disk_ctx_ref(diskctx); 285 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 286 287 if (error) { 288 dsched_strategy_raw(dp, bio); 289 } 290 dsched_disk_ctx_unref(diskctx); 291 dsched_thread_io_unref(tdio); 292 } 293 294 295 /* 296 * Called from each module_init or module_attach of each policy 297 * registers the policy in the local policy list. 298 */ 299 int 300 dsched_register(struct dsched_policy *d_policy) 301 { 302 struct dsched_policy *policy; 303 int error = 0; 304 305 lockmgr(&dsched_lock, LK_EXCLUSIVE); 306 307 policy = dsched_find_policy(d_policy->name); 308 309 if (!policy) { 310 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 311 atomic_add_int(&d_policy->ref_count, 1); 312 } else { 313 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 314 d_policy->name); 315 error = EEXIST; 316 } 317 318 lockmgr(&dsched_lock, LK_RELEASE); 319 return error; 320 } 321 322 /* 323 * Called from each module_detach of each policy 324 * unregisters the policy 325 */ 326 int 327 dsched_unregister(struct dsched_policy *d_policy) 328 { 329 struct dsched_policy *policy; 330 331 lockmgr(&dsched_lock, LK_EXCLUSIVE); 332 policy = dsched_find_policy(d_policy->name); 333 334 if (policy) { 335 if (policy->ref_count > 1) { 336 lockmgr(&dsched_lock, LK_RELEASE); 337 return EBUSY; 338 } 339 TAILQ_REMOVE(&dsched_policy_list, policy, link); 340 atomic_subtract_int(&policy->ref_count, 1); 341 KKASSERT(policy->ref_count == 0); 342 } 343 lockmgr(&dsched_lock, LK_RELEASE); 344 return 0; 345 } 346 347 348 /* 349 * switches the policy by first removing the old one and then 350 * enabling the new one. 351 */ 352 int 353 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 354 { 355 struct dsched_policy *old_policy; 356 357 /* If we are asked to set the same policy, do nothing */ 358 if (dp->d_sched_policy == new_policy) 359 return 0; 360 361 /* lock everything down, diskwise */ 362 lockmgr(&dsched_lock, LK_EXCLUSIVE); 363 old_policy = dp->d_sched_policy; 364 365 atomic_subtract_int(&old_policy->ref_count, 1); 366 KKASSERT(old_policy->ref_count >= 0); 367 368 dp->d_sched_policy = &dsched_noop_policy; 369 old_policy->teardown(dsched_get_disk_priv(dp)); 370 policy_destroy(dp); 371 372 /* Bring everything back to life */ 373 dsched_set_policy(dp, new_policy); 374 lockmgr(&dsched_lock, LK_RELEASE); 375 return 0; 376 } 377 378 379 /* 380 * Loads a given policy and attaches it to the specified disk. 381 * Also initializes the core for the policy 382 */ 383 void 384 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 385 { 386 int locked = 0; 387 388 /* Check if it is locked already. if not, we acquire the devfs lock */ 389 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 390 lockmgr(&dsched_lock, LK_EXCLUSIVE); 391 locked = 1; 392 } 393 394 policy_new(dp, new_policy); 395 new_policy->prepare(dsched_get_disk_priv(dp)); 396 dp->d_sched_policy = new_policy; 397 atomic_add_int(&new_policy->ref_count, 1); 398 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 399 new_policy->name); 400 401 /* If we acquired the lock, we also get rid of it */ 402 if (locked) 403 lockmgr(&dsched_lock, LK_RELEASE); 404 } 405 406 struct dsched_policy* 407 dsched_find_policy(char *search) 408 { 409 struct dsched_policy *policy; 410 struct dsched_policy *policy_found = NULL; 411 int locked = 0; 412 413 /* Check if it is locked already. if not, we acquire the devfs lock */ 414 if (!(lockstatus(&dsched_lock, curthread)) == LK_EXCLUSIVE) { 415 lockmgr(&dsched_lock, LK_EXCLUSIVE); 416 locked = 1; 417 } 418 419 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 420 if (!strcmp(policy->name, search)) { 421 policy_found = policy; 422 break; 423 } 424 } 425 426 /* If we acquired the lock, we also get rid of it */ 427 if (locked) 428 lockmgr(&dsched_lock, LK_RELEASE); 429 430 return policy_found; 431 } 432 433 struct disk* 434 dsched_find_disk(char *search) 435 { 436 struct disk *dp_found = NULL; 437 struct disk *dp = NULL; 438 439 while((dp = disk_enumerate(dp))) { 440 if (!strcmp(dp->d_cdev->si_name, search)) { 441 dp_found = dp; 442 break; 443 } 444 } 445 446 return dp_found; 447 } 448 449 struct disk* 450 dsched_disk_enumerate(struct disk *dp, struct dsched_policy *policy) 451 { 452 while ((dp = disk_enumerate(dp))) { 453 if (dp->d_sched_policy == policy) 454 return dp; 455 } 456 457 return NULL; 458 } 459 460 struct dsched_policy * 461 dsched_policy_enumerate(struct dsched_policy *pol) 462 { 463 if (!pol) 464 return (TAILQ_FIRST(&dsched_policy_list)); 465 else 466 return (TAILQ_NEXT(pol, link)); 467 } 468 469 void 470 dsched_cancel_bio(struct bio *bp) 471 { 472 bp->bio_buf->b_error = ENXIO; 473 bp->bio_buf->b_flags |= B_ERROR; 474 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 475 476 biodone(bp); 477 } 478 479 void 480 dsched_strategy_raw(struct disk *dp, struct bio *bp) 481 { 482 /* 483 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 484 * to avoid panics 485 */ 486 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 487 if(bp->bio_track != NULL) { 488 dsched_debug(LOG_INFO, 489 "dsched_strategy_raw sees non-NULL bio_track!! " 490 "bio: %p\n", bp); 491 bp->bio_track = NULL; 492 } 493 dev_dstrategy(dp->d_rawdev, bp); 494 } 495 496 void 497 dsched_strategy_sync(struct disk *dp, struct bio *bio) 498 { 499 struct buf *bp, *nbp; 500 struct bio *nbio; 501 502 bp = bio->bio_buf; 503 504 nbp = getpbuf(NULL); 505 nbio = &nbp->b_bio1; 506 507 nbp->b_cmd = bp->b_cmd; 508 nbp->b_bufsize = bp->b_bufsize; 509 nbp->b_runningbufspace = bp->b_runningbufspace; 510 nbp->b_bcount = bp->b_bcount; 511 nbp->b_resid = bp->b_resid; 512 nbp->b_data = bp->b_data; 513 #if 0 514 /* 515 * Buffers undergoing device I/O do not need a kvabase/size. 516 */ 517 nbp->b_kvabase = bp->b_kvabase; 518 nbp->b_kvasize = bp->b_kvasize; 519 #endif 520 nbp->b_dirtyend = bp->b_dirtyend; 521 522 nbio->bio_done = biodone_sync; 523 nbio->bio_flags |= BIO_SYNC; 524 nbio->bio_track = NULL; 525 526 nbio->bio_caller_info1.ptr = dp; 527 nbio->bio_offset = bio->bio_offset; 528 529 dev_dstrategy(dp->d_rawdev, nbio); 530 biowait(nbio, "dschedsync"); 531 bp->b_resid = nbp->b_resid; 532 bp->b_error = nbp->b_error; 533 biodone(bio); 534 #if 0 535 nbp->b_kvabase = NULL; 536 nbp->b_kvasize = 0; 537 #endif 538 relpbuf(nbp, NULL); 539 } 540 541 void 542 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 543 { 544 struct bio *nbio; 545 546 nbio = push_bio(bio); 547 nbio->bio_done = done; 548 nbio->bio_offset = bio->bio_offset; 549 550 dsched_set_bio_dp(nbio, dp); 551 dsched_set_bio_priv(nbio, priv); 552 553 getmicrotime(&nbio->bio_caller_info3.tv); 554 dev_dstrategy(dp->d_rawdev, nbio); 555 } 556 557 void 558 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 559 { 560 int refcount; 561 562 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 563 564 KKASSERT(refcount >= 0); 565 } 566 567 void 568 dsched_thread_io_ref(struct dsched_thread_io *tdio) 569 { 570 int refcount; 571 572 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 573 574 KKASSERT(refcount >= 0); 575 } 576 577 void 578 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 579 { 580 int refcount; 581 582 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 583 584 KKASSERT(refcount >= 0); 585 } 586 587 void 588 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 589 { 590 struct dsched_thread_io *tdio, *tdio2; 591 int refcount; 592 593 refcount = atomic_fetchadd_int(&diskctx->refcount, -1); 594 595 596 KKASSERT(refcount >= 0 || refcount <= -0x400); 597 598 if (refcount == 1) { 599 atomic_subtract_int(&diskctx->refcount, 0x400); /* mark as: in destruction */ 600 #if 0 601 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 602 print_backtrace(4); 603 #endif 604 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 605 TAILQ_FOREACH_MUTABLE(tdio, &diskctx->tdio_list, dlink, tdio2) { 606 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 607 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 608 dsched_thread_io_unref(tdio); 609 } 610 lockmgr(&diskctx->lock, LK_RELEASE); 611 if (diskctx->dp->d_sched_policy->destroy_diskctx) 612 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 613 objcache_put(dsched_diskctx_cache, diskctx); 614 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 615 } 616 } 617 618 void 619 dsched_thread_io_unref(struct dsched_thread_io *tdio) 620 { 621 struct dsched_thread_ctx *tdctx; 622 struct dsched_disk_ctx *diskctx; 623 int refcount; 624 625 refcount = atomic_fetchadd_int(&tdio->refcount, -1); 626 627 KKASSERT(refcount >= 0 || refcount <= -0x400); 628 629 if (refcount == 1) { 630 atomic_subtract_int(&tdio->refcount, 0x400); /* mark as: in destruction */ 631 #if 0 632 kprintf("tdio (%p) destruction started, trace:\n", tdio); 633 print_backtrace(8); 634 #endif 635 diskctx = tdio->diskctx; 636 KKASSERT(diskctx != NULL); 637 KKASSERT(tdio->qlength == 0); 638 639 if (tdio->flags & DSCHED_LINKED_DISK_CTX) { 640 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 641 642 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 643 tdio->flags &= ~DSCHED_LINKED_DISK_CTX; 644 645 lockmgr(&diskctx->lock, LK_RELEASE); 646 } 647 648 if (tdio->flags & DSCHED_LINKED_THREAD_CTX) { 649 tdctx = tdio->tdctx; 650 KKASSERT(tdctx != NULL); 651 652 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 653 654 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 655 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 656 657 lockmgr(&tdctx->lock, LK_RELEASE); 658 } 659 if (tdio->diskctx->dp->d_sched_policy->destroy_tdio) 660 tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio); 661 objcache_put(dsched_tdio_cache, tdio); 662 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 663 #if 0 664 dsched_disk_ctx_unref(diskctx); 665 #endif 666 } 667 } 668 669 void 670 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 671 { 672 struct dsched_thread_io *tdio, *tdio2; 673 int refcount; 674 675 refcount = atomic_fetchadd_int(&tdctx->refcount, -1); 676 677 KKASSERT(refcount >= 0 || refcount <= -0x400); 678 679 if (refcount == 1) { 680 atomic_subtract_int(&tdctx->refcount, 0x400); /* mark as: in destruction */ 681 #if 0 682 kprintf("tdctx (%p) destruction started, trace:\n", tdctx); 683 print_backtrace(8); 684 #endif 685 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 686 687 TAILQ_FOREACH_MUTABLE(tdio, &tdctx->tdio_list, link, tdio2) { 688 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 689 tdio->flags &= ~DSCHED_LINKED_THREAD_CTX; 690 dsched_thread_io_unref(tdio); 691 } 692 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); 693 694 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 695 696 objcache_put(dsched_tdctx_cache, tdctx); 697 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 698 } 699 } 700 701 702 struct dsched_thread_io * 703 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 704 struct dsched_policy *pol) 705 { 706 struct dsched_thread_io *tdio; 707 #if 0 708 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 709 #endif 710 tdio = objcache_get(dsched_tdio_cache, M_WAITOK); 711 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 712 713 /* XXX: maybe we do need another ref for the disk list for tdio */ 714 dsched_thread_io_ref(tdio); 715 716 DSCHED_THREAD_IO_LOCKINIT(tdio); 717 tdio->dp = dp; 718 719 tdio->diskctx = dsched_get_disk_priv(dp); 720 TAILQ_INIT(&tdio->queue); 721 722 if (pol->new_tdio) 723 pol->new_tdio(tdio); 724 725 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 726 tdio->flags |= DSCHED_LINKED_DISK_CTX; 727 728 if (tdctx) { 729 tdio->tdctx = tdctx; 730 tdio->p = tdctx->p; 731 732 /* Put the tdio in the tdctx list */ 733 DSCHED_THREAD_CTX_LOCK(tdctx); 734 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 735 DSCHED_THREAD_CTX_UNLOCK(tdctx); 736 tdio->flags |= DSCHED_LINKED_THREAD_CTX; 737 } 738 739 atomic_add_int(&dsched_stats.tdio_allocations, 1); 740 return tdio; 741 } 742 743 744 struct dsched_disk_ctx * 745 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 746 { 747 struct dsched_disk_ctx *diskctx; 748 749 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 750 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 751 dsched_disk_ctx_ref(diskctx); 752 diskctx->dp = dp; 753 DSCHED_DISK_CTX_LOCKINIT(diskctx); 754 TAILQ_INIT(&diskctx->tdio_list); 755 756 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 757 if (pol->new_diskctx) 758 pol->new_diskctx(diskctx); 759 return diskctx; 760 } 761 762 763 struct dsched_thread_ctx * 764 dsched_thread_ctx_alloc(struct proc *p) 765 { 766 struct dsched_thread_ctx *tdctx; 767 struct dsched_thread_io *tdio; 768 struct disk *dp = NULL; 769 770 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 771 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 772 dsched_thread_ctx_ref(tdctx); 773 #if 0 774 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 775 #endif 776 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 777 TAILQ_INIT(&tdctx->tdio_list); 778 tdctx->p = p; 779 780 /* XXX */ 781 while ((dp = disk_enumerate(dp))) { 782 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); 783 } 784 785 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 786 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); 787 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 788 789 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 790 /* XXX: no callback here */ 791 return tdctx; 792 } 793 794 void 795 policy_new(struct disk *dp, struct dsched_policy *pol) { 796 struct dsched_thread_ctx *tdctx; 797 struct dsched_disk_ctx *diskctx; 798 struct dsched_thread_io *tdio; 799 800 diskctx = dsched_disk_ctx_alloc(dp, pol); 801 dsched_disk_ctx_ref(diskctx); 802 dsched_set_disk_priv(dp, diskctx); 803 804 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 805 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) { 806 tdio = dsched_thread_io_alloc(dp, tdctx, pol); 807 } 808 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 809 810 } 811 812 void 813 policy_destroy(struct disk *dp) { 814 struct dsched_disk_ctx *diskctx; 815 816 diskctx = dsched_get_disk_priv(dp); 817 KKASSERT(diskctx != NULL); 818 819 dsched_disk_ctx_unref(diskctx); /* from prepare */ 820 dsched_disk_ctx_unref(diskctx); /* from alloc */ 821 822 dsched_set_disk_priv(dp, NULL); 823 } 824 825 void 826 dsched_new_buf(struct buf *bp) 827 { 828 struct dsched_thread_ctx *tdctx = NULL; 829 830 if (dsched_inited == 0) 831 return; 832 833 if (curproc != NULL) { 834 tdctx = dsched_get_proc_priv(curproc); 835 } else { 836 /* This is a kernel thread, so no proc info is available */ 837 tdctx = dsched_get_thread_priv(curthread); 838 } 839 840 #if 0 841 /* 842 * XXX: hack. we don't want this assert because we aren't catching all 843 * threads. mi_startup() is still getting away without an tdctx. 844 */ 845 846 /* by now we should have an tdctx. if not, something bad is going on */ 847 KKASSERT(tdctx != NULL); 848 #endif 849 850 if (tdctx) { 851 dsched_thread_ctx_ref(tdctx); 852 } 853 dsched_set_buf_priv(bp, tdctx); 854 } 855 856 void 857 dsched_exit_buf(struct buf *bp) 858 { 859 struct dsched_thread_ctx *tdctx; 860 861 tdctx = dsched_get_buf_priv(bp); 862 if (tdctx != NULL) { 863 dsched_clr_buf_priv(bp); 864 dsched_thread_ctx_unref(tdctx); 865 } 866 } 867 868 void 869 dsched_new_proc(struct proc *p) 870 { 871 struct dsched_thread_ctx *tdctx; 872 873 if (dsched_inited == 0) 874 return; 875 876 KKASSERT(p != NULL); 877 878 tdctx = dsched_thread_ctx_alloc(p); 879 tdctx->p = p; 880 dsched_thread_ctx_ref(tdctx); 881 882 dsched_set_proc_priv(p, tdctx); 883 atomic_add_int(&dsched_stats.nprocs, 1); 884 } 885 886 887 void 888 dsched_new_thread(struct thread *td) 889 { 890 struct dsched_thread_ctx *tdctx; 891 892 if (dsched_inited == 0) 893 return; 894 895 KKASSERT(td != NULL); 896 897 tdctx = dsched_thread_ctx_alloc(NULL); 898 tdctx->td = td; 899 dsched_thread_ctx_ref(tdctx); 900 901 dsched_set_thread_priv(td, tdctx); 902 atomic_add_int(&dsched_stats.nthreads, 1); 903 } 904 905 void 906 dsched_exit_proc(struct proc *p) 907 { 908 struct dsched_thread_ctx *tdctx; 909 910 if (dsched_inited == 0) 911 return; 912 913 KKASSERT(p != NULL); 914 915 tdctx = dsched_get_proc_priv(p); 916 KKASSERT(tdctx != NULL); 917 918 tdctx->dead = 0xDEAD; 919 dsched_set_proc_priv(p, NULL); 920 921 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 922 dsched_thread_ctx_unref(tdctx); /* one for ref */ 923 atomic_subtract_int(&dsched_stats.nprocs, 1); 924 } 925 926 927 void 928 dsched_exit_thread(struct thread *td) 929 { 930 struct dsched_thread_ctx *tdctx; 931 932 if (dsched_inited == 0) 933 return; 934 935 KKASSERT(td != NULL); 936 937 tdctx = dsched_get_thread_priv(td); 938 KKASSERT(tdctx != NULL); 939 940 tdctx->dead = 0xDEAD; 941 dsched_set_thread_priv(td, 0); 942 943 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 944 dsched_thread_ctx_unref(tdctx); /* one for ref */ 945 atomic_subtract_int(&dsched_stats.nthreads, 1); 946 } 947 948 struct dsched_thread_io * 949 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 950 struct dsched_policy *pol) { 951 struct dsched_thread_ctx *tdctx; 952 struct dsched_thread_io *tdio; 953 954 tdctx = dsched_get_thread_priv(curthread); 955 KKASSERT(tdctx != NULL); 956 957 tdio = dsched_thread_io_alloc(diskctx->dp, tdctx, pol); 958 return tdio; 959 } 960 961 /* DEFAULT NOOP POLICY */ 962 963 static int 964 noop_prepare(struct dsched_disk_ctx *diskctx) 965 { 966 return 0; 967 } 968 969 static void 970 noop_teardown(struct dsched_disk_ctx *diskctx) 971 { 972 973 } 974 975 static void 976 noop_cancel(struct dsched_disk_ctx *diskctx) 977 { 978 979 } 980 981 static int 982 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 983 struct bio *bio) 984 { 985 dsched_strategy_raw(diskctx->dp, bio); 986 #if 0 987 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 988 #endif 989 return 0; 990 } 991 992 /* 993 * SYSINIT stuff 994 */ 995 static void 996 dsched_init(void) 997 { 998 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 999 NULL, NULL, NULL, 1000 objcache_malloc_alloc, 1001 objcache_malloc_free, 1002 &dsched_thread_io_malloc_args ); 1003 1004 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1005 NULL, NULL, NULL, 1006 objcache_malloc_alloc, 1007 objcache_malloc_free, 1008 &dsched_thread_ctx_malloc_args ); 1009 1010 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1011 NULL, NULL, NULL, 1012 objcache_malloc_alloc, 1013 objcache_malloc_free, 1014 &dsched_disk_ctx_malloc_args ); 1015 1016 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1017 1018 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1019 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1020 1021 dsched_register(&dsched_noop_policy); 1022 1023 dsched_inited = 1; 1024 } 1025 1026 static void 1027 dsched_uninit(void) 1028 { 1029 } 1030 1031 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1032 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1033 1034 /* 1035 * SYSCTL stuff 1036 */ 1037 static int 1038 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1039 { 1040 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1041 } 1042 1043 static int 1044 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1045 { 1046 struct dsched_policy *pol = NULL; 1047 int error, first = 1; 1048 1049 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1050 1051 while ((pol = dsched_policy_enumerate(pol))) { 1052 if (!first) { 1053 error = SYSCTL_OUT(req, " ", 1); 1054 if (error) 1055 break; 1056 } else { 1057 first = 0; 1058 } 1059 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1060 if (error) 1061 break; 1062 1063 } 1064 1065 lockmgr(&dsched_lock, LK_RELEASE); 1066 1067 error = SYSCTL_OUT(req, "", 1); 1068 1069 return error; 1070 } 1071 1072 static int 1073 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1074 { 1075 char buf[DSCHED_POLICY_NAME_LENGTH]; 1076 struct dsched_disk_ctx *diskctx = arg1; 1077 struct dsched_policy *pol = NULL; 1078 int error; 1079 1080 if (diskctx == NULL) { 1081 return 0; 1082 } 1083 1084 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1085 1086 pol = diskctx->dp->d_sched_policy; 1087 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1088 1089 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1090 if (error || req->newptr == NULL) { 1091 lockmgr(&dsched_lock, LK_RELEASE); 1092 return (error); 1093 } 1094 1095 pol = dsched_find_policy(buf); 1096 if (pol == NULL) { 1097 lockmgr(&dsched_lock, LK_RELEASE); 1098 return 0; 1099 } 1100 1101 dsched_switch(diskctx->dp, pol); 1102 1103 lockmgr(&dsched_lock, LK_RELEASE); 1104 1105 return error; 1106 } 1107 1108 static int 1109 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1110 { 1111 char buf[DSCHED_POLICY_NAME_LENGTH]; 1112 struct dsched_policy *pol = NULL; 1113 int error; 1114 1115 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1116 1117 pol = default_policy; 1118 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1119 1120 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1121 if (error || req->newptr == NULL) { 1122 lockmgr(&dsched_lock, LK_RELEASE); 1123 return (error); 1124 } 1125 1126 pol = dsched_find_policy(buf); 1127 if (pol == NULL) { 1128 lockmgr(&dsched_lock, LK_RELEASE); 1129 return 0; 1130 } 1131 1132 default_set = 1; 1133 default_policy = pol; 1134 1135 lockmgr(&dsched_lock, LK_RELEASE); 1136 1137 return error; 1138 } 1139 1140 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1141 "Disk Scheduler Framework (dsched) magic"); 1142 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1143 "List of disks and their policies"); 1144 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1145 0, "Enable dsched debugging"); 1146 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1147 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1148 "dsched statistics"); 1149 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1150 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1151 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1152 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1153 1154 static void 1155 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1156 { 1157 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1158 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1159 sysctl_ctx_init(&diskctx->sysctl_ctx); 1160 } 1161 1162 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1163 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1164 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1165 } 1166