1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 TAILQ_HEAD(tdio_list_head, dsched_thread_io); 56 57 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 58 59 static dsched_prepare_t noop_prepare; 60 static dsched_teardown_t noop_teardown; 61 static dsched_cancel_t noop_cancel; 62 static dsched_queue_t noop_queue; 63 64 static void dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio); 65 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 66 static void dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx); 67 static void dsched_thread_io_destroy(struct dsched_thread_io *tdio); 68 static void dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx); 69 70 static int dsched_inited = 0; 71 static int default_set = 0; 72 73 struct lock dsched_lock; 74 static int dsched_debug_enable = 0; 75 76 struct dsched_stats dsched_stats; 77 78 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 79 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 80 struct objcache_malloc_args dsched_thread_io_malloc_args = { 81 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 82 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 83 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 84 85 static struct objcache *dsched_diskctx_cache; 86 static struct objcache *dsched_tdctx_cache; 87 static struct objcache *dsched_tdio_cache; 88 89 TAILQ_HEAD(, dsched_thread_ctx) dsched_tdctx_list = 90 TAILQ_HEAD_INITIALIZER(dsched_tdctx_list); 91 92 struct lock dsched_tdctx_lock; 93 94 static struct dsched_policy_head dsched_policy_list = 95 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 96 97 static struct dsched_policy dsched_noop_policy = { 98 .name = "noop", 99 100 .prepare = noop_prepare, 101 .teardown = noop_teardown, 102 .cancel_all = noop_cancel, 103 .bio_queue = noop_queue 104 }; 105 106 static struct dsched_policy *default_policy = &dsched_noop_policy; 107 108 /* 109 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 110 * using kvprintf 111 */ 112 int 113 dsched_debug(int level, char *fmt, ...) 114 { 115 __va_list ap; 116 117 __va_start(ap, fmt); 118 if (level <= dsched_debug_enable) 119 kvprintf(fmt, ap); 120 __va_end(ap); 121 122 return 0; 123 } 124 125 /* 126 * Called on disk_create() 127 * tries to read which policy to use from loader.conf, if there's 128 * none specified, the default policy is used. 129 */ 130 void 131 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 132 { 133 char tunable_key[SPECNAMELEN + 48]; 134 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 135 char *ptr; 136 struct dsched_policy *policy = NULL; 137 138 /* Also look for serno stuff? */ 139 lockmgr(&dsched_lock, LK_EXCLUSIVE); 140 141 ksnprintf(tunable_key, sizeof(tunable_key), 142 "dsched.policy.%s%d", head_name, unit); 143 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 144 sizeof(sched_policy)) != 0) { 145 policy = dsched_find_policy(sched_policy); 146 } 147 148 ksnprintf(tunable_key, sizeof(tunable_key), 149 "dsched.policy.%s", head_name); 150 151 for (ptr = tunable_key; *ptr; ptr++) { 152 if (*ptr == '/') 153 *ptr = '-'; 154 } 155 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 156 sizeof(sched_policy)) != 0)) { 157 policy = dsched_find_policy(sched_policy); 158 } 159 160 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 161 if (!policy && !default_set && 162 (TUNABLE_STR_FETCH(tunable_key, sched_policy, 163 sizeof(sched_policy)) != 0)) { 164 policy = dsched_find_policy(sched_policy); 165 } 166 167 if (!policy) { 168 if (!default_set && bootverbose) { 169 dsched_debug(0, 170 "No policy for %s%d specified, " 171 "or policy not found\n", 172 head_name, unit); 173 } 174 dsched_set_policy(dp, default_policy); 175 } else { 176 dsched_set_policy(dp, policy); 177 } 178 179 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0) 180 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name); 181 else 182 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 183 for (ptr = tunable_key; *ptr; ptr++) { 184 if (*ptr == '/') 185 *ptr = '-'; 186 } 187 dsched_sysctl_add_disk( 188 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 189 tunable_key); 190 191 lockmgr(&dsched_lock, LK_RELEASE); 192 } 193 194 /* 195 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 196 * there's any policy associated with the serial number of the device. 197 */ 198 void 199 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 200 { 201 char tunable_key[SPECNAMELEN + 48]; 202 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 203 struct dsched_policy *policy = NULL; 204 205 if (info->d_serialno == NULL) 206 return; 207 208 lockmgr(&dsched_lock, LK_EXCLUSIVE); 209 210 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 211 info->d_serialno); 212 213 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 214 sizeof(sched_policy)) != 0)) { 215 policy = dsched_find_policy(sched_policy); 216 } 217 218 if (policy) { 219 dsched_switch(dp, policy); 220 } 221 222 dsched_sysctl_add_disk( 223 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 224 info->d_serialno); 225 226 lockmgr(&dsched_lock, LK_RELEASE); 227 } 228 229 /* 230 * Called on disk_destroy() 231 * shuts down the scheduler core and cancels all remaining bios 232 */ 233 void 234 dsched_disk_destroy_callback(struct disk *dp) 235 { 236 struct dsched_policy *old_policy; 237 struct dsched_disk_ctx *diskctx; 238 239 lockmgr(&dsched_lock, LK_EXCLUSIVE); 240 241 diskctx = dsched_get_disk_priv(dp); 242 243 old_policy = dp->d_sched_policy; 244 dp->d_sched_policy = &dsched_noop_policy; 245 old_policy->cancel_all(dsched_get_disk_priv(dp)); 246 old_policy->teardown(dsched_get_disk_priv(dp)); 247 248 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 249 sysctl_ctx_free(&diskctx->sysctl_ctx); 250 251 policy_destroy(dp); 252 atomic_subtract_int(&old_policy->ref_count, 1); 253 KKASSERT(old_policy->ref_count >= 0); 254 255 lockmgr(&dsched_lock, LK_RELEASE); 256 } 257 258 259 void 260 dsched_queue(struct disk *dp, struct bio *bio) 261 { 262 struct dsched_thread_ctx *tdctx; 263 struct dsched_thread_io *tdio; 264 struct dsched_disk_ctx *diskctx; 265 266 int found = 0, error = 0; 267 268 tdctx = dsched_get_buf_priv(bio->bio_buf); 269 if (tdctx == NULL) { 270 /* We don't handle this case, let dsched dispatch */ 271 atomic_add_int(&dsched_stats.no_tdctx, 1); 272 dsched_strategy_raw(dp, bio); 273 return; 274 } 275 276 DSCHED_THREAD_CTX_LOCK(tdctx); 277 278 KKASSERT(!TAILQ_EMPTY(&tdctx->tdio_list)); 279 /* 280 * XXX: 281 * iterate in reverse to make sure we find the most up-to-date 282 * tdio for a given disk. After a switch it may take some time 283 * for everything to clean up. 284 */ 285 TAILQ_FOREACH_REVERSE(tdio, &tdctx->tdio_list, tdio_list_head, link) { 286 if (tdio->dp == dp) { 287 dsched_thread_io_ref(tdio); 288 found = 1; 289 break; 290 } 291 } 292 293 DSCHED_THREAD_CTX_UNLOCK(tdctx); 294 dsched_clr_buf_priv(bio->bio_buf); 295 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 296 297 KKASSERT(found == 1); 298 diskctx = dsched_get_disk_priv(dp); 299 dsched_disk_ctx_ref(diskctx); 300 301 if (dp->d_sched_policy != &dsched_noop_policy) 302 KKASSERT(tdio->debug_policy == dp->d_sched_policy); 303 304 KKASSERT(tdio->debug_inited == 0xF00F1234); 305 306 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 307 308 if (error) { 309 dsched_strategy_raw(dp, bio); 310 } 311 dsched_disk_ctx_unref(diskctx); 312 dsched_thread_io_unref(tdio); 313 } 314 315 316 /* 317 * Called from each module_init or module_attach of each policy 318 * registers the policy in the local policy list. 319 */ 320 int 321 dsched_register(struct dsched_policy *d_policy) 322 { 323 struct dsched_policy *policy; 324 int error = 0; 325 326 lockmgr(&dsched_lock, LK_EXCLUSIVE); 327 328 policy = dsched_find_policy(d_policy->name); 329 330 if (!policy) { 331 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 332 atomic_add_int(&d_policy->ref_count, 1); 333 } else { 334 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 335 d_policy->name); 336 error = EEXIST; 337 } 338 339 lockmgr(&dsched_lock, LK_RELEASE); 340 return error; 341 } 342 343 /* 344 * Called from each module_detach of each policy 345 * unregisters the policy 346 */ 347 int 348 dsched_unregister(struct dsched_policy *d_policy) 349 { 350 struct dsched_policy *policy; 351 352 lockmgr(&dsched_lock, LK_EXCLUSIVE); 353 policy = dsched_find_policy(d_policy->name); 354 355 if (policy) { 356 if (policy->ref_count > 1) { 357 lockmgr(&dsched_lock, LK_RELEASE); 358 return EBUSY; 359 } 360 TAILQ_REMOVE(&dsched_policy_list, policy, link); 361 atomic_subtract_int(&policy->ref_count, 1); 362 KKASSERT(policy->ref_count == 0); 363 } 364 lockmgr(&dsched_lock, LK_RELEASE); 365 366 return 0; 367 } 368 369 370 /* 371 * switches the policy by first removing the old one and then 372 * enabling the new one. 373 */ 374 int 375 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 376 { 377 struct dsched_policy *old_policy; 378 379 /* If we are asked to set the same policy, do nothing */ 380 if (dp->d_sched_policy == new_policy) 381 return 0; 382 383 /* lock everything down, diskwise */ 384 lockmgr(&dsched_lock, LK_EXCLUSIVE); 385 old_policy = dp->d_sched_policy; 386 387 atomic_subtract_int(&old_policy->ref_count, 1); 388 KKASSERT(old_policy->ref_count >= 0); 389 390 dp->d_sched_policy = &dsched_noop_policy; 391 old_policy->teardown(dsched_get_disk_priv(dp)); 392 policy_destroy(dp); 393 394 /* Bring everything back to life */ 395 dsched_set_policy(dp, new_policy); 396 lockmgr(&dsched_lock, LK_RELEASE); 397 398 return 0; 399 } 400 401 402 /* 403 * Loads a given policy and attaches it to the specified disk. 404 * Also initializes the core for the policy 405 */ 406 void 407 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 408 { 409 int locked = 0; 410 411 /* Check if it is locked already. if not, we acquire the devfs lock */ 412 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 413 lockmgr(&dsched_lock, LK_EXCLUSIVE); 414 locked = 1; 415 } 416 417 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 418 419 policy_new(dp, new_policy); 420 new_policy->prepare(dsched_get_disk_priv(dp)); 421 dp->d_sched_policy = new_policy; 422 423 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 424 425 atomic_add_int(&new_policy->ref_count, 1); 426 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 427 new_policy->name); 428 429 /* If we acquired the lock, we also get rid of it */ 430 if (locked) 431 lockmgr(&dsched_lock, LK_RELEASE); 432 } 433 434 struct dsched_policy* 435 dsched_find_policy(char *search) 436 { 437 struct dsched_policy *policy; 438 struct dsched_policy *policy_found = NULL; 439 int locked = 0; 440 441 /* Check if it is locked already. if not, we acquire the devfs lock */ 442 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 443 lockmgr(&dsched_lock, LK_EXCLUSIVE); 444 locked = 1; 445 } 446 447 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 448 if (!strcmp(policy->name, search)) { 449 policy_found = policy; 450 break; 451 } 452 } 453 454 /* If we acquired the lock, we also get rid of it */ 455 if (locked) 456 lockmgr(&dsched_lock, LK_RELEASE); 457 458 return policy_found; 459 } 460 461 /* 462 * Returns ref'd disk 463 */ 464 struct disk * 465 dsched_find_disk(char *search) 466 { 467 struct disk marker; 468 struct disk *dp = NULL; 469 470 while ((dp = disk_enumerate(&marker, dp)) != NULL) { 471 if (strcmp(dp->d_cdev->si_name, search) == 0) { 472 disk_enumerate_stop(&marker, NULL); 473 /* leave ref on dp */ 474 break; 475 } 476 } 477 return dp; 478 } 479 480 struct disk * 481 dsched_disk_enumerate(struct disk *marker, struct disk *dp, 482 struct dsched_policy *policy) 483 { 484 while ((dp = disk_enumerate(marker, dp)) != NULL) { 485 if (dp->d_sched_policy == policy) 486 break; 487 } 488 return NULL; 489 } 490 491 struct dsched_policy * 492 dsched_policy_enumerate(struct dsched_policy *pol) 493 { 494 if (!pol) 495 return (TAILQ_FIRST(&dsched_policy_list)); 496 else 497 return (TAILQ_NEXT(pol, link)); 498 } 499 500 void 501 dsched_cancel_bio(struct bio *bp) 502 { 503 bp->bio_buf->b_error = ENXIO; 504 bp->bio_buf->b_flags |= B_ERROR; 505 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 506 507 biodone(bp); 508 } 509 510 void 511 dsched_strategy_raw(struct disk *dp, struct bio *bp) 512 { 513 /* 514 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 515 * to avoid panics 516 */ 517 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 518 if(bp->bio_track != NULL) { 519 dsched_debug(LOG_INFO, 520 "dsched_strategy_raw sees non-NULL bio_track!! " 521 "bio: %p\n", bp); 522 bp->bio_track = NULL; 523 } 524 dev_dstrategy(dp->d_rawdev, bp); 525 } 526 527 void 528 dsched_strategy_sync(struct disk *dp, struct bio *bio) 529 { 530 struct buf *bp, *nbp; 531 struct bio *nbio; 532 533 bp = bio->bio_buf; 534 535 nbp = getpbuf(NULL); 536 nbio = &nbp->b_bio1; 537 538 nbp->b_cmd = bp->b_cmd; 539 nbp->b_bufsize = bp->b_bufsize; 540 nbp->b_runningbufspace = bp->b_runningbufspace; 541 nbp->b_bcount = bp->b_bcount; 542 nbp->b_resid = bp->b_resid; 543 nbp->b_data = bp->b_data; 544 #if 0 545 /* 546 * Buffers undergoing device I/O do not need a kvabase/size. 547 */ 548 nbp->b_kvabase = bp->b_kvabase; 549 nbp->b_kvasize = bp->b_kvasize; 550 #endif 551 nbp->b_dirtyend = bp->b_dirtyend; 552 553 nbio->bio_done = biodone_sync; 554 nbio->bio_flags |= BIO_SYNC; 555 nbio->bio_track = NULL; 556 557 nbio->bio_caller_info1.ptr = dp; 558 nbio->bio_offset = bio->bio_offset; 559 560 dev_dstrategy(dp->d_rawdev, nbio); 561 biowait(nbio, "dschedsync"); 562 bp->b_resid = nbp->b_resid; 563 bp->b_error = nbp->b_error; 564 biodone(bio); 565 #if 0 566 nbp->b_kvabase = NULL; 567 nbp->b_kvasize = 0; 568 #endif 569 relpbuf(nbp, NULL); 570 } 571 572 void 573 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 574 { 575 struct bio *nbio; 576 577 nbio = push_bio(bio); 578 nbio->bio_done = done; 579 nbio->bio_offset = bio->bio_offset; 580 581 dsched_set_bio_dp(nbio, dp); 582 dsched_set_bio_priv(nbio, priv); 583 584 getmicrotime(&nbio->bio_caller_info3.tv); 585 dev_dstrategy(dp->d_rawdev, nbio); 586 } 587 588 /* 589 * A special bio done call back function 590 * used by policy having request polling implemented. 591 */ 592 static void 593 request_polling_biodone(struct bio *bp) 594 { 595 struct dsched_disk_ctx *diskctx = NULL; 596 struct disk *dp = NULL; 597 struct bio *obio; 598 struct dsched_policy *policy; 599 600 dp = dsched_get_bio_dp(bp); 601 policy = dp->d_sched_policy; 602 diskctx = dsched_get_disk_priv(dp); 603 KKASSERT(diskctx && policy); 604 dsched_disk_ctx_ref(diskctx); 605 606 /* 607 * XXX: 608 * the bio_done function should not be blocked ! 609 */ 610 if (diskctx->dp->d_sched_policy->bio_done) 611 diskctx->dp->d_sched_policy->bio_done(bp); 612 613 obio = pop_bio(bp); 614 biodone(obio); 615 616 atomic_subtract_int(&diskctx->current_tag_queue_depth, 1); 617 618 /* call the polling function, 619 * XXX: 620 * the polling function should not be blocked! 621 */ 622 if (policy->polling_func) 623 policy->polling_func(diskctx); 624 else 625 dsched_debug(0, "dsched: the policy uses request polling without a polling function!\n"); 626 dsched_disk_ctx_unref(diskctx); 627 } 628 629 /* 630 * A special dsched strategy used by policy having request polling 631 * (polling function) implemented. 632 * 633 * The strategy is the just like dsched_strategy_async(), but 634 * the biodone call back is set to a preset one. 635 * 636 * If the policy needs its own biodone callback, it should 637 * register it in the policy structure. (bio_done field) 638 * 639 * The current_tag_queue_depth is maintained by this function 640 * and the request_polling_biodone() function 641 */ 642 643 void 644 dsched_strategy_request_polling(struct disk *dp, struct bio *bio, struct dsched_disk_ctx *diskctx) 645 { 646 atomic_add_int(&diskctx->current_tag_queue_depth, 1); 647 dsched_strategy_async(dp, bio, request_polling_biodone, dsched_get_bio_priv(bio)); 648 } 649 650 /* 651 * Ref and deref various structures. The 1->0 transition of the reference 652 * count actually transitions 1->0x80000000 and causes the object to be 653 * destroyed. It is possible for transitory references to occur on the 654 * object while it is being destroyed. We use bit 31 to indicate that 655 * destruction is in progress and to prevent nested destructions. 656 */ 657 void 658 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 659 { 660 int refcount; 661 662 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 663 } 664 665 void 666 dsched_thread_io_ref(struct dsched_thread_io *tdio) 667 { 668 int refcount; 669 670 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 671 } 672 673 void 674 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 675 { 676 int refcount; 677 678 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 679 } 680 681 void 682 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 683 { 684 int refs; 685 int nrefs; 686 687 /* 688 * Handle 1->0 transitions for diskctx and nested destruction 689 * recursions. If the refs are already in destruction mode (bit 31 690 * set) on the 1->0 transition we don't try to destruct it again. 691 * 692 * 0x80000001->0x80000000 transitions are handled normally and 693 * thus avoid nested dstruction. 694 */ 695 for (;;) { 696 refs = diskctx->refcount; 697 cpu_ccfence(); 698 nrefs = refs - 1; 699 700 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 701 if (nrefs) { 702 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 703 break; 704 continue; 705 } 706 nrefs = 0x80000000; 707 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) { 708 dsched_disk_ctx_destroy(diskctx); 709 break; 710 } 711 } 712 } 713 714 static 715 void 716 dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx) 717 { 718 struct dsched_thread_io *tdio; 719 int refs; 720 int nrefs; 721 722 #if 0 723 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 724 print_backtrace(4); 725 #endif 726 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 727 while ((tdio = TAILQ_FIRST(&diskctx->tdio_list)) != NULL) { 728 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 729 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 730 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 731 tdio->diskctx = NULL; 732 /* XXX tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);*/ 733 lockmgr(&diskctx->lock, LK_RELEASE); 734 dsched_thread_io_unref_destroy(tdio); 735 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 736 } 737 lockmgr(&diskctx->lock, LK_RELEASE); 738 739 /* 740 * Expect diskctx->refcount to be 0x80000000. If it isn't someone 741 * else still has a temporary ref on the diskctx and we have to 742 * transition it back to an undestroyed-state (albeit without any 743 * associations), so the other user destroys it properly when the 744 * ref is released. 745 */ 746 while ((refs = diskctx->refcount) != 0x80000000) { 747 kprintf("dsched_thread_io: destroy race diskctx=%p\n", diskctx); 748 cpu_ccfence(); 749 KKASSERT(refs & 0x80000000); 750 nrefs = refs & 0x7FFFFFFF; 751 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 752 return; 753 } 754 755 /* 756 * Really for sure now. 757 */ 758 if (diskctx->dp->d_sched_policy->destroy_diskctx) 759 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 760 objcache_put(dsched_diskctx_cache, diskctx); 761 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 762 } 763 764 void 765 dsched_thread_io_unref(struct dsched_thread_io *tdio) 766 { 767 int refs; 768 int nrefs; 769 770 /* 771 * Handle 1->0 transitions for tdio and nested destruction 772 * recursions. If the refs are already in destruction mode (bit 31 773 * set) on the 1->0 transition we don't try to destruct it again. 774 * 775 * 0x80000001->0x80000000 transitions are handled normally and 776 * thus avoid nested dstruction. 777 */ 778 for (;;) { 779 refs = tdio->refcount; 780 cpu_ccfence(); 781 nrefs = refs - 1; 782 783 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 784 if (nrefs) { 785 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 786 break; 787 continue; 788 } 789 nrefs = 0x80000000; 790 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 791 dsched_thread_io_destroy(tdio); 792 break; 793 } 794 } 795 } 796 797 /* 798 * Unref and destroy the tdio even if additional refs are present. 799 */ 800 static 801 void 802 dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio) 803 { 804 int refs; 805 int nrefs; 806 807 /* 808 * If not already transitioned to destroy-in-progress we transition 809 * to destroy-in-progress, cleanup our ref, and destroy the tdio. 810 */ 811 for (;;) { 812 refs = tdio->refcount; 813 cpu_ccfence(); 814 nrefs = refs - 1; 815 816 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 817 if (nrefs & 0x80000000) { 818 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 819 break; 820 continue; 821 } 822 nrefs |= 0x80000000; 823 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 824 dsched_thread_io_destroy(tdio); 825 break; 826 } 827 } 828 } 829 830 static void 831 dsched_thread_io_destroy(struct dsched_thread_io *tdio) 832 { 833 struct dsched_thread_ctx *tdctx; 834 struct dsched_disk_ctx *diskctx; 835 int refs; 836 int nrefs; 837 838 #if 0 839 kprintf("tdio (%p) destruction started, trace:\n", tdio); 840 print_backtrace(8); 841 #endif 842 KKASSERT(tdio->qlength == 0); 843 844 while ((diskctx = tdio->diskctx) != NULL) { 845 dsched_disk_ctx_ref(diskctx); 846 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 847 if (diskctx != tdio->diskctx) { 848 lockmgr(&diskctx->lock, LK_RELEASE); 849 dsched_disk_ctx_unref(diskctx); 850 continue; 851 } 852 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 853 if (diskctx->dp->d_sched_policy->destroy_tdio) 854 diskctx->dp->d_sched_policy->destroy_tdio(tdio); 855 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 856 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 857 tdio->diskctx = NULL; 858 dsched_thread_io_unref(tdio); 859 lockmgr(&diskctx->lock, LK_RELEASE); 860 dsched_disk_ctx_unref(diskctx); 861 } 862 while ((tdctx = tdio->tdctx) != NULL) { 863 dsched_thread_ctx_ref(tdctx); 864 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 865 if (tdctx != tdio->tdctx) { 866 lockmgr(&tdctx->lock, LK_RELEASE); 867 dsched_thread_ctx_unref(tdctx); 868 continue; 869 } 870 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 871 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 872 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 873 tdio->tdctx = NULL; 874 dsched_thread_io_unref(tdio); 875 lockmgr(&tdctx->lock, LK_RELEASE); 876 dsched_thread_ctx_unref(tdctx); 877 } 878 879 /* 880 * Expect tdio->refcount to be 0x80000000. If it isn't someone else 881 * still has a temporary ref on the tdio and we have to transition 882 * it back to an undestroyed-state (albeit without any associations) 883 * so the other user destroys it properly when the ref is released. 884 */ 885 while ((refs = tdio->refcount) != 0x80000000) { 886 kprintf("dsched_thread_io: destroy race tdio=%p\n", tdio); 887 cpu_ccfence(); 888 KKASSERT(refs & 0x80000000); 889 nrefs = refs & 0x7FFFFFFF; 890 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 891 return; 892 } 893 894 /* 895 * Really for sure now. 896 */ 897 objcache_put(dsched_tdio_cache, tdio); 898 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 899 } 900 901 void 902 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 903 { 904 int refs; 905 int nrefs; 906 907 /* 908 * Handle 1->0 transitions for tdctx and nested destruction 909 * recursions. If the refs are already in destruction mode (bit 31 910 * set) on the 1->0 transition we don't try to destruct it again. 911 * 912 * 0x80000001->0x80000000 transitions are handled normally and 913 * thus avoid nested dstruction. 914 */ 915 for (;;) { 916 refs = tdctx->refcount; 917 cpu_ccfence(); 918 nrefs = refs - 1; 919 920 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 921 if (nrefs) { 922 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) 923 break; 924 continue; 925 } 926 nrefs = 0x80000000; 927 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) { 928 dsched_thread_ctx_destroy(tdctx); 929 break; 930 } 931 } 932 } 933 934 static void 935 dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx) 936 { 937 struct dsched_thread_io *tdio; 938 939 #if 0 940 kprintf("tdctx (%p) destruction started, trace:\n", tdctx); 941 print_backtrace(8); 942 #endif 943 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 944 945 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 946 947 while ((tdio = TAILQ_FIRST(&tdctx->tdio_list)) != NULL) { 948 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 949 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 950 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 951 tdio->tdctx = NULL; 952 lockmgr(&tdctx->lock, LK_RELEASE); /* avoid deadlock */ 953 dsched_thread_io_unref_destroy(tdio); 954 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 955 } 956 KKASSERT(tdctx->refcount == 0x80000000); 957 TAILQ_REMOVE(&dsched_tdctx_list, tdctx, link); 958 959 lockmgr(&tdctx->lock, LK_RELEASE); 960 961 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 962 963 objcache_put(dsched_tdctx_cache, tdctx); 964 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 965 } 966 967 /* 968 * Ensures that a tdio is assigned to tdctx and disk. 969 */ 970 void 971 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 972 struct dsched_policy *pol) 973 { 974 struct dsched_thread_io *tdio; 975 #if 0 976 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 977 #endif 978 tdio = objcache_get(dsched_tdio_cache, M_WAITOK); 979 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 980 981 dsched_thread_io_ref(tdio); /* prevent ripout */ 982 dsched_thread_io_ref(tdio); /* for diskctx ref */ 983 984 DSCHED_THREAD_IO_LOCKINIT(tdio); 985 tdio->dp = dp; 986 987 tdio->diskctx = dsched_get_disk_priv(dp); 988 TAILQ_INIT(&tdio->queue); 989 990 if (pol->new_tdio) 991 pol->new_tdio(tdio); 992 993 lockmgr(&tdio->diskctx->lock, LK_EXCLUSIVE); 994 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 995 atomic_set_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 996 lockmgr(&tdio->diskctx->lock, LK_RELEASE); 997 998 if (tdctx) { 999 /* 1000 * Put the tdio in the tdctx list. Inherit the temporary 1001 * ref (one ref for each list). 1002 */ 1003 DSCHED_THREAD_CTX_LOCK(tdctx); 1004 tdio->tdctx = tdctx; 1005 tdio->p = tdctx->p; 1006 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 1007 atomic_set_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 1008 DSCHED_THREAD_CTX_UNLOCK(tdctx); 1009 } else { 1010 dsched_thread_io_unref(tdio); 1011 } 1012 1013 tdio->debug_policy = pol; 1014 tdio->debug_inited = 0xF00F1234; 1015 1016 atomic_add_int(&dsched_stats.tdio_allocations, 1); 1017 } 1018 1019 1020 struct dsched_disk_ctx * 1021 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 1022 { 1023 struct dsched_disk_ctx *diskctx; 1024 1025 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 1026 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 1027 dsched_disk_ctx_ref(diskctx); 1028 diskctx->dp = dp; 1029 DSCHED_DISK_CTX_LOCKINIT(diskctx); 1030 TAILQ_INIT(&diskctx->tdio_list); 1031 /* 1032 * XXX: magic number 32: most device has a tag queue 1033 * of depth 32. 1034 * Better to retrive more precise value from the driver 1035 */ 1036 diskctx->max_tag_queue_depth = 32; 1037 diskctx->current_tag_queue_depth = 0; 1038 1039 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 1040 if (pol->new_diskctx) 1041 pol->new_diskctx(diskctx); 1042 return diskctx; 1043 } 1044 1045 1046 struct dsched_thread_ctx * 1047 dsched_thread_ctx_alloc(struct proc *p) 1048 { 1049 struct dsched_thread_ctx *tdctx; 1050 struct disk marker; 1051 struct disk *dp; 1052 1053 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 1054 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 1055 dsched_thread_ctx_ref(tdctx); 1056 #if 0 1057 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 1058 #endif 1059 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 1060 TAILQ_INIT(&tdctx->tdio_list); 1061 tdctx->p = p; 1062 1063 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 1064 dp = NULL; 1065 while ((dp = disk_enumerate(&marker, dp)) != NULL) 1066 dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy); 1067 1068 TAILQ_INSERT_TAIL(&dsched_tdctx_list, tdctx, link); 1069 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 1070 1071 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 1072 /* XXX: no callback here */ 1073 return tdctx; 1074 } 1075 1076 void 1077 policy_new(struct disk *dp, struct dsched_policy *pol) { 1078 struct dsched_thread_ctx *tdctx; 1079 struct dsched_disk_ctx *diskctx; 1080 1081 diskctx = dsched_disk_ctx_alloc(dp, pol); 1082 dsched_disk_ctx_ref(diskctx); 1083 dsched_set_disk_priv(dp, diskctx); 1084 1085 /* 1086 * XXX this is really really expensive! 1087 */ 1088 TAILQ_FOREACH(tdctx, &dsched_tdctx_list, link) 1089 dsched_thread_io_alloc(dp, tdctx, pol); 1090 } 1091 1092 void 1093 policy_destroy(struct disk *dp) { 1094 struct dsched_disk_ctx *diskctx; 1095 1096 diskctx = dsched_get_disk_priv(dp); 1097 KKASSERT(diskctx != NULL); 1098 1099 dsched_disk_ctx_unref(diskctx); /* from prepare */ 1100 dsched_disk_ctx_unref(diskctx); /* from alloc */ 1101 1102 dsched_set_disk_priv(dp, NULL); 1103 } 1104 1105 void 1106 dsched_new_buf(struct buf *bp) 1107 { 1108 struct dsched_thread_ctx *tdctx = NULL; 1109 1110 if (dsched_inited == 0) 1111 return; 1112 1113 if (curproc != NULL) { 1114 tdctx = dsched_get_proc_priv(curproc); 1115 } else { 1116 /* This is a kernel thread, so no proc info is available */ 1117 tdctx = dsched_get_thread_priv(curthread); 1118 } 1119 1120 #if 0 1121 /* 1122 * XXX: hack. we don't want this assert because we aren't catching all 1123 * threads. mi_startup() is still getting away without an tdctx. 1124 */ 1125 1126 /* by now we should have an tdctx. if not, something bad is going on */ 1127 KKASSERT(tdctx != NULL); 1128 #endif 1129 1130 if (tdctx) { 1131 dsched_thread_ctx_ref(tdctx); 1132 } 1133 dsched_set_buf_priv(bp, tdctx); 1134 } 1135 1136 void 1137 dsched_exit_buf(struct buf *bp) 1138 { 1139 struct dsched_thread_ctx *tdctx; 1140 1141 tdctx = dsched_get_buf_priv(bp); 1142 if (tdctx != NULL) { 1143 dsched_clr_buf_priv(bp); 1144 dsched_thread_ctx_unref(tdctx); 1145 } 1146 } 1147 1148 void 1149 dsched_new_proc(struct proc *p) 1150 { 1151 struct dsched_thread_ctx *tdctx; 1152 1153 if (dsched_inited == 0) 1154 return; 1155 1156 KKASSERT(p != NULL); 1157 1158 tdctx = dsched_thread_ctx_alloc(p); 1159 tdctx->p = p; 1160 dsched_thread_ctx_ref(tdctx); 1161 1162 dsched_set_proc_priv(p, tdctx); 1163 atomic_add_int(&dsched_stats.nprocs, 1); 1164 } 1165 1166 1167 void 1168 dsched_new_thread(struct thread *td) 1169 { 1170 struct dsched_thread_ctx *tdctx; 1171 1172 if (dsched_inited == 0) 1173 return; 1174 1175 KKASSERT(td != NULL); 1176 1177 tdctx = dsched_thread_ctx_alloc(NULL); 1178 tdctx->td = td; 1179 dsched_thread_ctx_ref(tdctx); 1180 1181 dsched_set_thread_priv(td, tdctx); 1182 atomic_add_int(&dsched_stats.nthreads, 1); 1183 } 1184 1185 void 1186 dsched_exit_proc(struct proc *p) 1187 { 1188 struct dsched_thread_ctx *tdctx; 1189 1190 if (dsched_inited == 0) 1191 return; 1192 1193 KKASSERT(p != NULL); 1194 1195 tdctx = dsched_get_proc_priv(p); 1196 KKASSERT(tdctx != NULL); 1197 1198 tdctx->dead = 0xDEAD; 1199 dsched_set_proc_priv(p, NULL); 1200 1201 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1202 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1203 atomic_subtract_int(&dsched_stats.nprocs, 1); 1204 } 1205 1206 1207 void 1208 dsched_exit_thread(struct thread *td) 1209 { 1210 struct dsched_thread_ctx *tdctx; 1211 1212 if (dsched_inited == 0) 1213 return; 1214 1215 KKASSERT(td != NULL); 1216 1217 tdctx = dsched_get_thread_priv(td); 1218 KKASSERT(tdctx != NULL); 1219 1220 tdctx->dead = 0xDEAD; 1221 dsched_set_thread_priv(td, 0); 1222 1223 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1224 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1225 atomic_subtract_int(&dsched_stats.nthreads, 1); 1226 } 1227 1228 /* 1229 * Returns ref'd tdio. 1230 * 1231 * tdio may have additional refs for the diskctx and tdctx it resides on. 1232 */ 1233 void 1234 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 1235 struct dsched_policy *pol) 1236 { 1237 struct dsched_thread_ctx *tdctx; 1238 1239 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 1240 1241 tdctx = dsched_get_thread_priv(curthread); 1242 KKASSERT(tdctx != NULL); 1243 dsched_thread_io_alloc(diskctx->dp, tdctx, pol); 1244 1245 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 1246 } 1247 1248 /* DEFAULT NOOP POLICY */ 1249 1250 static int 1251 noop_prepare(struct dsched_disk_ctx *diskctx) 1252 { 1253 return 0; 1254 } 1255 1256 static void 1257 noop_teardown(struct dsched_disk_ctx *diskctx) 1258 { 1259 1260 } 1261 1262 static void 1263 noop_cancel(struct dsched_disk_ctx *diskctx) 1264 { 1265 1266 } 1267 1268 static int 1269 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 1270 struct bio *bio) 1271 { 1272 dsched_strategy_raw(diskctx->dp, bio); 1273 #if 0 1274 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 1275 #endif 1276 return 0; 1277 } 1278 1279 /* 1280 * SYSINIT stuff 1281 */ 1282 static void 1283 dsched_init(void) 1284 { 1285 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 1286 NULL, NULL, NULL, 1287 objcache_malloc_alloc, 1288 objcache_malloc_free, 1289 &dsched_thread_io_malloc_args ); 1290 1291 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1292 NULL, NULL, NULL, 1293 objcache_malloc_alloc, 1294 objcache_malloc_free, 1295 &dsched_thread_ctx_malloc_args ); 1296 1297 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1298 NULL, NULL, NULL, 1299 objcache_malloc_alloc, 1300 objcache_malloc_free, 1301 &dsched_disk_ctx_malloc_args ); 1302 1303 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1304 1305 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1306 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1307 1308 dsched_register(&dsched_noop_policy); 1309 1310 dsched_inited = 1; 1311 } 1312 1313 static void 1314 dsched_uninit(void) 1315 { 1316 } 1317 1318 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1319 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1320 1321 /* 1322 * SYSCTL stuff 1323 */ 1324 static int 1325 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1326 { 1327 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1328 } 1329 1330 static int 1331 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1332 { 1333 struct dsched_policy *pol = NULL; 1334 int error, first = 1; 1335 1336 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1337 1338 while ((pol = dsched_policy_enumerate(pol))) { 1339 if (!first) { 1340 error = SYSCTL_OUT(req, " ", 1); 1341 if (error) 1342 break; 1343 } else { 1344 first = 0; 1345 } 1346 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1347 if (error) 1348 break; 1349 1350 } 1351 1352 lockmgr(&dsched_lock, LK_RELEASE); 1353 1354 error = SYSCTL_OUT(req, "", 1); 1355 1356 return error; 1357 } 1358 1359 static int 1360 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1361 { 1362 char buf[DSCHED_POLICY_NAME_LENGTH]; 1363 struct dsched_disk_ctx *diskctx = arg1; 1364 struct dsched_policy *pol = NULL; 1365 int error; 1366 1367 if (diskctx == NULL) { 1368 return 0; 1369 } 1370 1371 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1372 1373 pol = diskctx->dp->d_sched_policy; 1374 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1375 1376 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1377 if (error || req->newptr == NULL) { 1378 lockmgr(&dsched_lock, LK_RELEASE); 1379 return (error); 1380 } 1381 1382 pol = dsched_find_policy(buf); 1383 if (pol == NULL) { 1384 lockmgr(&dsched_lock, LK_RELEASE); 1385 return 0; 1386 } 1387 1388 dsched_switch(diskctx->dp, pol); 1389 1390 lockmgr(&dsched_lock, LK_RELEASE); 1391 1392 return error; 1393 } 1394 1395 static int 1396 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1397 { 1398 char buf[DSCHED_POLICY_NAME_LENGTH]; 1399 struct dsched_policy *pol = NULL; 1400 int error; 1401 1402 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1403 1404 pol = default_policy; 1405 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1406 1407 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1408 if (error || req->newptr == NULL) { 1409 lockmgr(&dsched_lock, LK_RELEASE); 1410 return (error); 1411 } 1412 1413 pol = dsched_find_policy(buf); 1414 if (pol == NULL) { 1415 lockmgr(&dsched_lock, LK_RELEASE); 1416 return 0; 1417 } 1418 1419 default_set = 1; 1420 default_policy = pol; 1421 1422 lockmgr(&dsched_lock, LK_RELEASE); 1423 1424 return error; 1425 } 1426 1427 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1428 "Disk Scheduler Framework (dsched) magic"); 1429 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1430 "List of disks and their policies"); 1431 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1432 0, "Enable dsched debugging"); 1433 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1434 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1435 "dsched statistics"); 1436 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1437 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1438 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1439 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1440 1441 static void 1442 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1443 { 1444 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1445 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1446 sysctl_ctx_init(&diskctx->sysctl_ctx); 1447 } 1448 1449 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1450 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1451 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1452 } 1453