1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 TAILQ_HEAD(tdio_list_head, dsched_thread_io); 56 57 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 58 59 static dsched_prepare_t noop_prepare; 60 static dsched_teardown_t noop_teardown; 61 static dsched_cancel_t noop_cancel; 62 static dsched_queue_t noop_queue; 63 64 static void dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio); 65 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 66 static void dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx); 67 static void dsched_thread_io_destroy(struct dsched_thread_io *tdio); 68 static void dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx); 69 70 static struct dsched_thread_io *dsched_thread_io_alloc( 71 struct disk *dp, struct dsched_thread_ctx *tdctx, 72 struct dsched_policy *pol, int tdctx_locked); 73 74 static int dsched_inited = 0; 75 static int default_set = 0; 76 77 struct lock dsched_lock; 78 static int dsched_debug_enable = 0; 79 80 struct dsched_stats dsched_stats; 81 82 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 83 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 84 struct objcache_malloc_args dsched_thread_io_malloc_args = { 85 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 86 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 87 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 88 89 static struct objcache *dsched_diskctx_cache; 90 static struct objcache *dsched_tdctx_cache; 91 static struct objcache *dsched_tdio_cache; 92 93 struct lock dsched_tdctx_lock; 94 95 static struct dsched_policy_head dsched_policy_list = 96 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 97 98 static struct dsched_policy dsched_noop_policy = { 99 .name = "noop", 100 101 .prepare = noop_prepare, 102 .teardown = noop_teardown, 103 .cancel_all = noop_cancel, 104 .bio_queue = noop_queue 105 }; 106 107 static struct dsched_policy *default_policy = &dsched_noop_policy; 108 109 /* 110 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 111 * using kvprintf 112 */ 113 int 114 dsched_debug(int level, char *fmt, ...) 115 { 116 __va_list ap; 117 118 __va_start(ap, fmt); 119 if (level <= dsched_debug_enable) 120 kvprintf(fmt, ap); 121 __va_end(ap); 122 123 return 0; 124 } 125 126 /* 127 * Called on disk_create() 128 * tries to read which policy to use from loader.conf, if there's 129 * none specified, the default policy is used. 130 */ 131 void 132 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 133 { 134 char tunable_key[SPECNAMELEN + 48]; 135 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 136 char *ptr; 137 struct dsched_policy *policy = NULL; 138 139 /* Also look for serno stuff? */ 140 lockmgr(&dsched_lock, LK_EXCLUSIVE); 141 142 ksnprintf(tunable_key, sizeof(tunable_key), 143 "dsched.policy.%s%d", head_name, unit); 144 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 145 sizeof(sched_policy)) != 0) { 146 policy = dsched_find_policy(sched_policy); 147 } 148 149 ksnprintf(tunable_key, sizeof(tunable_key), 150 "dsched.policy.%s", head_name); 151 152 for (ptr = tunable_key; *ptr; ptr++) { 153 if (*ptr == '/') 154 *ptr = '-'; 155 } 156 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 157 sizeof(sched_policy)) != 0)) { 158 policy = dsched_find_policy(sched_policy); 159 } 160 161 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 162 if (!policy && !default_set && 163 (TUNABLE_STR_FETCH(tunable_key, sched_policy, 164 sizeof(sched_policy)) != 0)) { 165 policy = dsched_find_policy(sched_policy); 166 } 167 168 if (!policy) { 169 if (!default_set && bootverbose) { 170 dsched_debug(0, 171 "No policy for %s%d specified, " 172 "or policy not found\n", 173 head_name, unit); 174 } 175 dsched_set_policy(dp, default_policy); 176 } else { 177 dsched_set_policy(dp, policy); 178 } 179 180 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0) 181 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name); 182 else 183 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 184 for (ptr = tunable_key; *ptr; ptr++) { 185 if (*ptr == '/') 186 *ptr = '-'; 187 } 188 dsched_sysctl_add_disk( 189 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 190 tunable_key); 191 192 lockmgr(&dsched_lock, LK_RELEASE); 193 } 194 195 /* 196 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 197 * there's any policy associated with the serial number of the device. 198 */ 199 void 200 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 201 { 202 char tunable_key[SPECNAMELEN + 48]; 203 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 204 struct dsched_policy *policy = NULL; 205 206 if (info->d_serialno == NULL) 207 return; 208 209 lockmgr(&dsched_lock, LK_EXCLUSIVE); 210 211 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 212 info->d_serialno); 213 214 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 215 sizeof(sched_policy)) != 0)) { 216 policy = dsched_find_policy(sched_policy); 217 } 218 219 if (policy) { 220 dsched_switch(dp, policy); 221 } 222 223 dsched_sysctl_add_disk( 224 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 225 info->d_serialno); 226 227 lockmgr(&dsched_lock, LK_RELEASE); 228 } 229 230 /* 231 * Called on disk_destroy() 232 * shuts down the scheduler core and cancels all remaining bios 233 */ 234 void 235 dsched_disk_destroy_callback(struct disk *dp) 236 { 237 struct dsched_policy *old_policy; 238 struct dsched_disk_ctx *diskctx; 239 240 lockmgr(&dsched_lock, LK_EXCLUSIVE); 241 242 diskctx = dsched_get_disk_priv(dp); 243 244 old_policy = dp->d_sched_policy; 245 dp->d_sched_policy = &dsched_noop_policy; 246 old_policy->cancel_all(dsched_get_disk_priv(dp)); 247 old_policy->teardown(dsched_get_disk_priv(dp)); 248 249 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 250 sysctl_ctx_free(&diskctx->sysctl_ctx); 251 252 policy_destroy(dp); 253 atomic_subtract_int(&old_policy->ref_count, 1); 254 KKASSERT(old_policy->ref_count >= 0); 255 256 lockmgr(&dsched_lock, LK_RELEASE); 257 } 258 259 260 /* 261 * Caller must have dp->diskctx locked 262 */ 263 void 264 dsched_queue(struct disk *dp, struct bio *bio) 265 { 266 struct dsched_thread_ctx *tdctx; 267 struct dsched_thread_io *tdio; 268 struct dsched_disk_ctx *diskctx; 269 int found; 270 int error; 271 272 if (dp->d_sched_policy == &dsched_noop_policy) { 273 dsched_clr_buf_priv(bio->bio_buf); 274 atomic_add_int(&dsched_stats.no_tdctx, 1); 275 dsched_strategy_raw(dp, bio); 276 return; 277 } 278 279 found = 0; 280 error = 0; 281 tdctx = dsched_get_buf_priv(bio->bio_buf); 282 if (tdctx == NULL) { 283 /* We don't handle this case, let dsched dispatch */ 284 atomic_add_int(&dsched_stats.no_tdctx, 1); 285 dsched_strategy_raw(dp, bio); 286 return; 287 } 288 289 DSCHED_THREAD_CTX_LOCK(tdctx); 290 291 /* 292 * XXX: 293 * iterate in reverse to make sure we find the most up-to-date 294 * tdio for a given disk. After a switch it may take some time 295 * for everything to clean up. 296 */ 297 TAILQ_FOREACH_REVERSE(tdio, &tdctx->tdio_list, tdio_list_head, link) { 298 if (tdio->dp == dp) { 299 dsched_thread_io_ref(tdio); 300 break; 301 } 302 } 303 if (tdio == NULL) { 304 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy, 1); 305 dsched_thread_io_ref(tdio); 306 } 307 308 DSCHED_THREAD_CTX_UNLOCK(tdctx); 309 dsched_clr_buf_priv(bio->bio_buf); 310 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 311 312 KKASSERT(found == 1); 313 diskctx = dsched_get_disk_priv(dp); 314 dsched_disk_ctx_ref(diskctx); 315 316 if (dp->d_sched_policy != &dsched_noop_policy) 317 KKASSERT(tdio->debug_policy == dp->d_sched_policy); 318 319 KKASSERT(tdio->debug_inited == 0xF00F1234); 320 321 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 322 323 if (error) { 324 dsched_strategy_raw(dp, bio); 325 } 326 dsched_disk_ctx_unref(diskctx); 327 dsched_thread_io_unref(tdio); 328 } 329 330 331 /* 332 * Called from each module_init or module_attach of each policy 333 * registers the policy in the local policy list. 334 */ 335 int 336 dsched_register(struct dsched_policy *d_policy) 337 { 338 struct dsched_policy *policy; 339 int error = 0; 340 341 lockmgr(&dsched_lock, LK_EXCLUSIVE); 342 343 policy = dsched_find_policy(d_policy->name); 344 345 if (!policy) { 346 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 347 atomic_add_int(&d_policy->ref_count, 1); 348 } else { 349 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 350 d_policy->name); 351 error = EEXIST; 352 } 353 354 lockmgr(&dsched_lock, LK_RELEASE); 355 return error; 356 } 357 358 /* 359 * Called from each module_detach of each policy 360 * unregisters the policy 361 */ 362 int 363 dsched_unregister(struct dsched_policy *d_policy) 364 { 365 struct dsched_policy *policy; 366 367 lockmgr(&dsched_lock, LK_EXCLUSIVE); 368 policy = dsched_find_policy(d_policy->name); 369 370 if (policy) { 371 if (policy->ref_count > 1) { 372 lockmgr(&dsched_lock, LK_RELEASE); 373 return EBUSY; 374 } 375 TAILQ_REMOVE(&dsched_policy_list, policy, link); 376 atomic_subtract_int(&policy->ref_count, 1); 377 KKASSERT(policy->ref_count == 0); 378 } 379 lockmgr(&dsched_lock, LK_RELEASE); 380 381 return 0; 382 } 383 384 385 /* 386 * switches the policy by first removing the old one and then 387 * enabling the new one. 388 */ 389 int 390 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 391 { 392 struct dsched_policy *old_policy; 393 394 /* If we are asked to set the same policy, do nothing */ 395 if (dp->d_sched_policy == new_policy) 396 return 0; 397 398 /* lock everything down, diskwise */ 399 lockmgr(&dsched_lock, LK_EXCLUSIVE); 400 old_policy = dp->d_sched_policy; 401 402 atomic_subtract_int(&old_policy->ref_count, 1); 403 KKASSERT(old_policy->ref_count >= 0); 404 405 dp->d_sched_policy = &dsched_noop_policy; 406 old_policy->teardown(dsched_get_disk_priv(dp)); 407 policy_destroy(dp); 408 409 /* Bring everything back to life */ 410 dsched_set_policy(dp, new_policy); 411 lockmgr(&dsched_lock, LK_RELEASE); 412 413 return 0; 414 } 415 416 417 /* 418 * Loads a given policy and attaches it to the specified disk. 419 * Also initializes the core for the policy 420 */ 421 void 422 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 423 { 424 int locked = 0; 425 426 /* Check if it is locked already. if not, we acquire the devfs lock */ 427 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 428 lockmgr(&dsched_lock, LK_EXCLUSIVE); 429 locked = 1; 430 } 431 432 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 433 434 policy_new(dp, new_policy); 435 new_policy->prepare(dsched_get_disk_priv(dp)); 436 dp->d_sched_policy = new_policy; 437 atomic_add_int(&new_policy->ref_count, 1); 438 439 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 440 441 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 442 new_policy->name); 443 444 /* If we acquired the lock, we also get rid of it */ 445 if (locked) 446 lockmgr(&dsched_lock, LK_RELEASE); 447 } 448 449 struct dsched_policy* 450 dsched_find_policy(char *search) 451 { 452 struct dsched_policy *policy; 453 struct dsched_policy *policy_found = NULL; 454 int locked = 0; 455 456 /* Check if it is locked already. if not, we acquire the devfs lock */ 457 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 458 lockmgr(&dsched_lock, LK_EXCLUSIVE); 459 locked = 1; 460 } 461 462 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 463 if (!strcmp(policy->name, search)) { 464 policy_found = policy; 465 break; 466 } 467 } 468 469 /* If we acquired the lock, we also get rid of it */ 470 if (locked) 471 lockmgr(&dsched_lock, LK_RELEASE); 472 473 return policy_found; 474 } 475 476 /* 477 * Returns ref'd disk 478 */ 479 struct disk * 480 dsched_find_disk(char *search) 481 { 482 struct disk marker; 483 struct disk *dp = NULL; 484 485 while ((dp = disk_enumerate(&marker, dp)) != NULL) { 486 if (strcmp(dp->d_cdev->si_name, search) == 0) { 487 disk_enumerate_stop(&marker, NULL); 488 /* leave ref on dp */ 489 break; 490 } 491 } 492 return dp; 493 } 494 495 struct disk * 496 dsched_disk_enumerate(struct disk *marker, struct disk *dp, 497 struct dsched_policy *policy) 498 { 499 while ((dp = disk_enumerate(marker, dp)) != NULL) { 500 if (dp->d_sched_policy == policy) 501 break; 502 } 503 return NULL; 504 } 505 506 struct dsched_policy * 507 dsched_policy_enumerate(struct dsched_policy *pol) 508 { 509 if (!pol) 510 return (TAILQ_FIRST(&dsched_policy_list)); 511 else 512 return (TAILQ_NEXT(pol, link)); 513 } 514 515 void 516 dsched_cancel_bio(struct bio *bp) 517 { 518 bp->bio_buf->b_error = ENXIO; 519 bp->bio_buf->b_flags |= B_ERROR; 520 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 521 522 biodone(bp); 523 } 524 525 void 526 dsched_strategy_raw(struct disk *dp, struct bio *bp) 527 { 528 /* 529 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 530 * to avoid panics 531 */ 532 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 533 if(bp->bio_track != NULL) { 534 dsched_debug(LOG_INFO, 535 "dsched_strategy_raw sees non-NULL bio_track!! " 536 "bio: %p\n", bp); 537 bp->bio_track = NULL; 538 } 539 dev_dstrategy(dp->d_rawdev, bp); 540 } 541 542 void 543 dsched_strategy_sync(struct disk *dp, struct bio *bio) 544 { 545 struct buf *bp, *nbp; 546 struct bio *nbio; 547 548 bp = bio->bio_buf; 549 550 nbp = getpbuf(NULL); 551 nbio = &nbp->b_bio1; 552 553 nbp->b_cmd = bp->b_cmd; 554 nbp->b_bufsize = bp->b_bufsize; 555 nbp->b_runningbufspace = bp->b_runningbufspace; 556 nbp->b_bcount = bp->b_bcount; 557 nbp->b_resid = bp->b_resid; 558 nbp->b_data = bp->b_data; 559 #if 0 560 /* 561 * Buffers undergoing device I/O do not need a kvabase/size. 562 */ 563 nbp->b_kvabase = bp->b_kvabase; 564 nbp->b_kvasize = bp->b_kvasize; 565 #endif 566 nbp->b_dirtyend = bp->b_dirtyend; 567 568 nbio->bio_done = biodone_sync; 569 nbio->bio_flags |= BIO_SYNC; 570 nbio->bio_track = NULL; 571 572 nbio->bio_caller_info1.ptr = dp; 573 nbio->bio_offset = bio->bio_offset; 574 575 dev_dstrategy(dp->d_rawdev, nbio); 576 biowait(nbio, "dschedsync"); 577 bp->b_resid = nbp->b_resid; 578 bp->b_error = nbp->b_error; 579 biodone(bio); 580 #if 0 581 nbp->b_kvabase = NULL; 582 nbp->b_kvasize = 0; 583 #endif 584 relpbuf(nbp, NULL); 585 } 586 587 void 588 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 589 { 590 struct bio *nbio; 591 592 nbio = push_bio(bio); 593 nbio->bio_done = done; 594 nbio->bio_offset = bio->bio_offset; 595 596 dsched_set_bio_dp(nbio, dp); 597 dsched_set_bio_priv(nbio, priv); 598 599 getmicrotime(&nbio->bio_caller_info3.tv); 600 dev_dstrategy(dp->d_rawdev, nbio); 601 } 602 603 /* 604 * A special bio done call back function 605 * used by policy having request polling implemented. 606 */ 607 static void 608 request_polling_biodone(struct bio *bp) 609 { 610 struct dsched_disk_ctx *diskctx = NULL; 611 struct disk *dp = NULL; 612 struct bio *obio; 613 struct dsched_policy *policy; 614 615 dp = dsched_get_bio_dp(bp); 616 policy = dp->d_sched_policy; 617 diskctx = dsched_get_disk_priv(dp); 618 KKASSERT(diskctx && policy); 619 dsched_disk_ctx_ref(diskctx); 620 621 /* 622 * XXX: 623 * the bio_done function should not be blocked ! 624 */ 625 if (diskctx->dp->d_sched_policy->bio_done) 626 diskctx->dp->d_sched_policy->bio_done(bp); 627 628 obio = pop_bio(bp); 629 biodone(obio); 630 631 atomic_subtract_int(&diskctx->current_tag_queue_depth, 1); 632 633 /* call the polling function, 634 * XXX: 635 * the polling function should not be blocked! 636 */ 637 if (policy->polling_func) 638 policy->polling_func(diskctx); 639 else 640 dsched_debug(0, "dsched: the policy uses request polling without a polling function!\n"); 641 dsched_disk_ctx_unref(diskctx); 642 } 643 644 /* 645 * A special dsched strategy used by policy having request polling 646 * (polling function) implemented. 647 * 648 * The strategy is the just like dsched_strategy_async(), but 649 * the biodone call back is set to a preset one. 650 * 651 * If the policy needs its own biodone callback, it should 652 * register it in the policy structure. (bio_done field) 653 * 654 * The current_tag_queue_depth is maintained by this function 655 * and the request_polling_biodone() function 656 */ 657 658 void 659 dsched_strategy_request_polling(struct disk *dp, struct bio *bio, struct dsched_disk_ctx *diskctx) 660 { 661 atomic_add_int(&diskctx->current_tag_queue_depth, 1); 662 dsched_strategy_async(dp, bio, request_polling_biodone, dsched_get_bio_priv(bio)); 663 } 664 665 /* 666 * Ref and deref various structures. The 1->0 transition of the reference 667 * count actually transitions 1->0x80000000 and causes the object to be 668 * destroyed. It is possible for transitory references to occur on the 669 * object while it is being destroyed. We use bit 31 to indicate that 670 * destruction is in progress and to prevent nested destructions. 671 */ 672 void 673 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 674 { 675 int refcount; 676 677 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 678 } 679 680 void 681 dsched_thread_io_ref(struct dsched_thread_io *tdio) 682 { 683 int refcount; 684 685 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 686 } 687 688 void 689 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 690 { 691 int refcount; 692 693 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 694 } 695 696 void 697 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 698 { 699 int refs; 700 int nrefs; 701 702 /* 703 * Handle 1->0 transitions for diskctx and nested destruction 704 * recursions. If the refs are already in destruction mode (bit 31 705 * set) on the 1->0 transition we don't try to destruct it again. 706 * 707 * 0x80000001->0x80000000 transitions are handled normally and 708 * thus avoid nested dstruction. 709 */ 710 for (;;) { 711 refs = diskctx->refcount; 712 cpu_ccfence(); 713 nrefs = refs - 1; 714 715 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 716 if (nrefs) { 717 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 718 break; 719 continue; 720 } 721 nrefs = 0x80000000; 722 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) { 723 dsched_disk_ctx_destroy(diskctx); 724 break; 725 } 726 } 727 } 728 729 static 730 void 731 dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx) 732 { 733 struct dsched_thread_io *tdio; 734 int refs; 735 int nrefs; 736 737 #if 0 738 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 739 print_backtrace(4); 740 #endif 741 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 742 while ((tdio = TAILQ_FIRST(&diskctx->tdio_list)) != NULL) { 743 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 744 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 745 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 746 tdio->diskctx = NULL; 747 /* XXX tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);*/ 748 lockmgr(&diskctx->lock, LK_RELEASE); 749 dsched_thread_io_unref_destroy(tdio); 750 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 751 } 752 lockmgr(&diskctx->lock, LK_RELEASE); 753 754 /* 755 * Expect diskctx->refcount to be 0x80000000. If it isn't someone 756 * else still has a temporary ref on the diskctx and we have to 757 * transition it back to an undestroyed-state (albeit without any 758 * associations), so the other user destroys it properly when the 759 * ref is released. 760 */ 761 while ((refs = diskctx->refcount) != 0x80000000) { 762 kprintf("dsched_thread_io: destroy race diskctx=%p\n", diskctx); 763 cpu_ccfence(); 764 KKASSERT(refs & 0x80000000); 765 nrefs = refs & 0x7FFFFFFF; 766 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 767 return; 768 } 769 770 /* 771 * Really for sure now. 772 */ 773 if (diskctx->dp->d_sched_policy->destroy_diskctx) 774 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 775 objcache_put(dsched_diskctx_cache, diskctx); 776 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 777 } 778 779 void 780 dsched_thread_io_unref(struct dsched_thread_io *tdio) 781 { 782 int refs; 783 int nrefs; 784 785 /* 786 * Handle 1->0 transitions for tdio and nested destruction 787 * recursions. If the refs are already in destruction mode (bit 31 788 * set) on the 1->0 transition we don't try to destruct it again. 789 * 790 * 0x80000001->0x80000000 transitions are handled normally and 791 * thus avoid nested dstruction. 792 */ 793 for (;;) { 794 refs = tdio->refcount; 795 cpu_ccfence(); 796 nrefs = refs - 1; 797 798 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 799 if (nrefs) { 800 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 801 break; 802 continue; 803 } 804 nrefs = 0x80000000; 805 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 806 dsched_thread_io_destroy(tdio); 807 break; 808 } 809 } 810 } 811 812 /* 813 * Unref and destroy the tdio even if additional refs are present. 814 */ 815 static 816 void 817 dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio) 818 { 819 int refs; 820 int nrefs; 821 822 /* 823 * If not already transitioned to destroy-in-progress we transition 824 * to destroy-in-progress, cleanup our ref, and destroy the tdio. 825 */ 826 for (;;) { 827 refs = tdio->refcount; 828 cpu_ccfence(); 829 nrefs = refs - 1; 830 831 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 832 if (nrefs & 0x80000000) { 833 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 834 break; 835 continue; 836 } 837 nrefs |= 0x80000000; 838 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 839 dsched_thread_io_destroy(tdio); 840 break; 841 } 842 } 843 } 844 845 static void 846 dsched_thread_io_destroy(struct dsched_thread_io *tdio) 847 { 848 struct dsched_thread_ctx *tdctx; 849 struct dsched_disk_ctx *diskctx; 850 int refs; 851 int nrefs; 852 853 #if 0 854 kprintf("tdio (%p) destruction started, trace:\n", tdio); 855 print_backtrace(8); 856 #endif 857 KKASSERT(tdio->qlength == 0); 858 859 while ((diskctx = tdio->diskctx) != NULL) { 860 dsched_disk_ctx_ref(diskctx); 861 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 862 if (diskctx != tdio->diskctx) { 863 lockmgr(&diskctx->lock, LK_RELEASE); 864 dsched_disk_ctx_unref(diskctx); 865 continue; 866 } 867 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 868 if (diskctx->dp->d_sched_policy->destroy_tdio) 869 diskctx->dp->d_sched_policy->destroy_tdio(tdio); 870 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 871 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 872 tdio->diskctx = NULL; 873 dsched_thread_io_unref(tdio); 874 lockmgr(&diskctx->lock, LK_RELEASE); 875 dsched_disk_ctx_unref(diskctx); 876 } 877 while ((tdctx = tdio->tdctx) != NULL) { 878 dsched_thread_ctx_ref(tdctx); 879 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 880 if (tdctx != tdio->tdctx) { 881 lockmgr(&tdctx->lock, LK_RELEASE); 882 dsched_thread_ctx_unref(tdctx); 883 continue; 884 } 885 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 886 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 887 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 888 tdio->tdctx = NULL; 889 dsched_thread_io_unref(tdio); 890 lockmgr(&tdctx->lock, LK_RELEASE); 891 dsched_thread_ctx_unref(tdctx); 892 } 893 894 /* 895 * Expect tdio->refcount to be 0x80000000. If it isn't someone else 896 * still has a temporary ref on the tdio and we have to transition 897 * it back to an undestroyed-state (albeit without any associations) 898 * so the other user destroys it properly when the ref is released. 899 */ 900 while ((refs = tdio->refcount) != 0x80000000) { 901 kprintf("dsched_thread_io: destroy race tdio=%p\n", tdio); 902 cpu_ccfence(); 903 KKASSERT(refs & 0x80000000); 904 nrefs = refs & 0x7FFFFFFF; 905 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 906 return; 907 } 908 909 /* 910 * Really for sure now. 911 */ 912 objcache_put(dsched_tdio_cache, tdio); 913 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 914 } 915 916 void 917 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 918 { 919 int refs; 920 int nrefs; 921 922 /* 923 * Handle 1->0 transitions for tdctx and nested destruction 924 * recursions. If the refs are already in destruction mode (bit 31 925 * set) on the 1->0 transition we don't try to destruct it again. 926 * 927 * 0x80000001->0x80000000 transitions are handled normally and 928 * thus avoid nested dstruction. 929 */ 930 for (;;) { 931 refs = tdctx->refcount; 932 cpu_ccfence(); 933 nrefs = refs - 1; 934 935 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 936 if (nrefs) { 937 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) 938 break; 939 continue; 940 } 941 nrefs = 0x80000000; 942 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) { 943 dsched_thread_ctx_destroy(tdctx); 944 break; 945 } 946 } 947 } 948 949 static void 950 dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx) 951 { 952 struct dsched_thread_io *tdio; 953 954 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 955 956 while ((tdio = TAILQ_FIRST(&tdctx->tdio_list)) != NULL) { 957 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 958 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 959 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 960 tdio->tdctx = NULL; 961 lockmgr(&tdctx->lock, LK_RELEASE); /* avoid deadlock */ 962 dsched_thread_io_unref_destroy(tdio); 963 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 964 } 965 KKASSERT(tdctx->refcount == 0x80000000); 966 967 lockmgr(&tdctx->lock, LK_RELEASE); 968 969 objcache_put(dsched_tdctx_cache, tdctx); 970 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 971 } 972 973 /* 974 * Ensures that a tdio is assigned to tdctx and disk. 975 */ 976 static 977 struct dsched_thread_io * 978 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 979 struct dsched_policy *pol, int tdctx_locked) 980 { 981 struct dsched_thread_io *tdio; 982 #if 0 983 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 984 #endif 985 tdio = objcache_get(dsched_tdio_cache, M_INTWAIT); 986 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 987 988 dsched_thread_io_ref(tdio); /* prevent ripout */ 989 dsched_thread_io_ref(tdio); /* for diskctx ref */ 990 991 DSCHED_THREAD_IO_LOCKINIT(tdio); 992 tdio->dp = dp; 993 994 tdio->diskctx = dsched_get_disk_priv(dp); 995 TAILQ_INIT(&tdio->queue); 996 997 if (pol->new_tdio) 998 pol->new_tdio(tdio); 999 1000 lockmgr(&tdio->diskctx->lock, LK_EXCLUSIVE); 1001 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 1002 atomic_set_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 1003 1004 if (tdctx) { 1005 /* 1006 * Put the tdio in the tdctx list. Inherit the temporary 1007 * ref (one ref for each list). 1008 */ 1009 if (tdctx_locked == 0) 1010 DSCHED_THREAD_CTX_LOCK(tdctx); 1011 tdio->tdctx = tdctx; 1012 tdio->p = tdctx->p; 1013 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 1014 atomic_set_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 1015 if (tdctx_locked == 0) 1016 DSCHED_THREAD_CTX_UNLOCK(tdctx); 1017 } else { 1018 dsched_thread_io_unref(tdio); 1019 } 1020 1021 tdio->debug_policy = pol; 1022 tdio->debug_inited = 0xF00F1234; 1023 1024 atomic_add_int(&dsched_stats.tdio_allocations, 1); 1025 1026 return(tdio); 1027 } 1028 1029 1030 struct dsched_disk_ctx * 1031 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 1032 { 1033 struct dsched_disk_ctx *diskctx; 1034 1035 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 1036 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 1037 dsched_disk_ctx_ref(diskctx); 1038 diskctx->dp = dp; 1039 DSCHED_DISK_CTX_LOCKINIT(diskctx); 1040 TAILQ_INIT(&diskctx->tdio_list); 1041 /* 1042 * XXX: magic number 32: most device has a tag queue 1043 * of depth 32. 1044 * Better to retrive more precise value from the driver 1045 */ 1046 diskctx->max_tag_queue_depth = 32; 1047 diskctx->current_tag_queue_depth = 0; 1048 1049 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 1050 if (pol->new_diskctx) 1051 pol->new_diskctx(diskctx); 1052 return diskctx; 1053 } 1054 1055 1056 struct dsched_thread_ctx * 1057 dsched_thread_ctx_alloc(struct proc *p) 1058 { 1059 struct dsched_thread_ctx *tdctx; 1060 1061 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 1062 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 1063 dsched_thread_ctx_ref(tdctx); 1064 #if 0 1065 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 1066 #endif 1067 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 1068 TAILQ_INIT(&tdctx->tdio_list); 1069 tdctx->p = p; 1070 1071 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 1072 /* XXX: no callback here */ 1073 1074 return tdctx; 1075 } 1076 1077 void 1078 policy_new(struct disk *dp, struct dsched_policy *pol) 1079 { 1080 struct dsched_disk_ctx *diskctx; 1081 1082 diskctx = dsched_disk_ctx_alloc(dp, pol); 1083 dsched_disk_ctx_ref(diskctx); 1084 dsched_set_disk_priv(dp, diskctx); 1085 } 1086 1087 void 1088 policy_destroy(struct disk *dp) { 1089 struct dsched_disk_ctx *diskctx; 1090 1091 diskctx = dsched_get_disk_priv(dp); 1092 KKASSERT(diskctx != NULL); 1093 1094 dsched_disk_ctx_unref(diskctx); /* from prepare */ 1095 dsched_disk_ctx_unref(diskctx); /* from alloc */ 1096 1097 dsched_set_disk_priv(dp, NULL); 1098 } 1099 1100 void 1101 dsched_new_buf(struct buf *bp) 1102 { 1103 struct dsched_thread_ctx *tdctx = NULL; 1104 1105 if (dsched_inited == 0) 1106 return; 1107 1108 if (curproc != NULL) { 1109 tdctx = dsched_get_proc_priv(curproc); 1110 } else { 1111 /* This is a kernel thread, so no proc info is available */ 1112 tdctx = dsched_get_thread_priv(curthread); 1113 } 1114 1115 #if 0 1116 /* 1117 * XXX: hack. we don't want this assert because we aren't catching all 1118 * threads. mi_startup() is still getting away without an tdctx. 1119 */ 1120 1121 /* by now we should have an tdctx. if not, something bad is going on */ 1122 KKASSERT(tdctx != NULL); 1123 #endif 1124 1125 if (tdctx) { 1126 dsched_thread_ctx_ref(tdctx); 1127 } 1128 dsched_set_buf_priv(bp, tdctx); 1129 } 1130 1131 void 1132 dsched_exit_buf(struct buf *bp) 1133 { 1134 struct dsched_thread_ctx *tdctx; 1135 1136 tdctx = dsched_get_buf_priv(bp); 1137 if (tdctx != NULL) { 1138 dsched_clr_buf_priv(bp); 1139 dsched_thread_ctx_unref(tdctx); 1140 } 1141 } 1142 1143 void 1144 dsched_new_proc(struct proc *p) 1145 { 1146 struct dsched_thread_ctx *tdctx; 1147 1148 if (dsched_inited == 0) 1149 return; 1150 1151 KKASSERT(p != NULL); 1152 1153 tdctx = dsched_thread_ctx_alloc(p); 1154 tdctx->p = p; 1155 dsched_thread_ctx_ref(tdctx); 1156 1157 dsched_set_proc_priv(p, tdctx); 1158 atomic_add_int(&dsched_stats.nprocs, 1); 1159 } 1160 1161 1162 void 1163 dsched_new_thread(struct thread *td) 1164 { 1165 struct dsched_thread_ctx *tdctx; 1166 1167 if (dsched_inited == 0) 1168 return; 1169 1170 KKASSERT(td != NULL); 1171 1172 tdctx = dsched_thread_ctx_alloc(NULL); 1173 tdctx->td = td; 1174 dsched_thread_ctx_ref(tdctx); 1175 1176 dsched_set_thread_priv(td, tdctx); 1177 atomic_add_int(&dsched_stats.nthreads, 1); 1178 } 1179 1180 void 1181 dsched_exit_proc(struct proc *p) 1182 { 1183 struct dsched_thread_ctx *tdctx; 1184 1185 if (dsched_inited == 0) 1186 return; 1187 1188 KKASSERT(p != NULL); 1189 1190 tdctx = dsched_get_proc_priv(p); 1191 KKASSERT(tdctx != NULL); 1192 1193 tdctx->dead = 0xDEAD; 1194 dsched_set_proc_priv(p, NULL); 1195 1196 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1197 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1198 atomic_subtract_int(&dsched_stats.nprocs, 1); 1199 } 1200 1201 1202 void 1203 dsched_exit_thread(struct thread *td) 1204 { 1205 struct dsched_thread_ctx *tdctx; 1206 1207 if (dsched_inited == 0) 1208 return; 1209 1210 KKASSERT(td != NULL); 1211 1212 tdctx = dsched_get_thread_priv(td); 1213 KKASSERT(tdctx != NULL); 1214 1215 tdctx->dead = 0xDEAD; 1216 dsched_set_thread_priv(td, 0); 1217 1218 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1219 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1220 atomic_subtract_int(&dsched_stats.nthreads, 1); 1221 } 1222 1223 /* 1224 * Returns ref'd tdio. 1225 * 1226 * tdio may have additional refs for the diskctx and tdctx it resides on. 1227 */ 1228 void 1229 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 1230 struct dsched_policy *pol) 1231 { 1232 struct dsched_thread_ctx *tdctx; 1233 1234 tdctx = dsched_get_thread_priv(curthread); 1235 KKASSERT(tdctx != NULL); 1236 dsched_thread_io_alloc(diskctx->dp, tdctx, pol, 0); 1237 } 1238 1239 /* DEFAULT NOOP POLICY */ 1240 1241 static int 1242 noop_prepare(struct dsched_disk_ctx *diskctx) 1243 { 1244 return 0; 1245 } 1246 1247 static void 1248 noop_teardown(struct dsched_disk_ctx *diskctx) 1249 { 1250 1251 } 1252 1253 static void 1254 noop_cancel(struct dsched_disk_ctx *diskctx) 1255 { 1256 1257 } 1258 1259 static int 1260 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 1261 struct bio *bio) 1262 { 1263 dsched_strategy_raw(diskctx->dp, bio); 1264 #if 0 1265 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 1266 #endif 1267 return 0; 1268 } 1269 1270 /* 1271 * SYSINIT stuff 1272 */ 1273 static void 1274 dsched_init(void) 1275 { 1276 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 1277 NULL, NULL, NULL, 1278 objcache_malloc_alloc, 1279 objcache_malloc_free, 1280 &dsched_thread_io_malloc_args ); 1281 1282 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1283 NULL, NULL, NULL, 1284 objcache_malloc_alloc, 1285 objcache_malloc_free, 1286 &dsched_thread_ctx_malloc_args ); 1287 1288 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1289 NULL, NULL, NULL, 1290 objcache_malloc_alloc, 1291 objcache_malloc_free, 1292 &dsched_disk_ctx_malloc_args ); 1293 1294 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1295 1296 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1297 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1298 1299 dsched_register(&dsched_noop_policy); 1300 1301 dsched_inited = 1; 1302 } 1303 1304 static void 1305 dsched_uninit(void) 1306 { 1307 } 1308 1309 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1310 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1311 1312 /* 1313 * SYSCTL stuff 1314 */ 1315 static int 1316 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1317 { 1318 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1319 } 1320 1321 static int 1322 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1323 { 1324 struct dsched_policy *pol = NULL; 1325 int error, first = 1; 1326 1327 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1328 1329 while ((pol = dsched_policy_enumerate(pol))) { 1330 if (!first) { 1331 error = SYSCTL_OUT(req, " ", 1); 1332 if (error) 1333 break; 1334 } else { 1335 first = 0; 1336 } 1337 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1338 if (error) 1339 break; 1340 1341 } 1342 1343 lockmgr(&dsched_lock, LK_RELEASE); 1344 1345 error = SYSCTL_OUT(req, "", 1); 1346 1347 return error; 1348 } 1349 1350 static int 1351 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1352 { 1353 char buf[DSCHED_POLICY_NAME_LENGTH]; 1354 struct dsched_disk_ctx *diskctx = arg1; 1355 struct dsched_policy *pol = NULL; 1356 int error; 1357 1358 if (diskctx == NULL) { 1359 return 0; 1360 } 1361 1362 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1363 1364 pol = diskctx->dp->d_sched_policy; 1365 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1366 1367 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1368 if (error || req->newptr == NULL) { 1369 lockmgr(&dsched_lock, LK_RELEASE); 1370 return (error); 1371 } 1372 1373 pol = dsched_find_policy(buf); 1374 if (pol == NULL) { 1375 lockmgr(&dsched_lock, LK_RELEASE); 1376 return 0; 1377 } 1378 1379 dsched_switch(diskctx->dp, pol); 1380 1381 lockmgr(&dsched_lock, LK_RELEASE); 1382 1383 return error; 1384 } 1385 1386 static int 1387 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1388 { 1389 char buf[DSCHED_POLICY_NAME_LENGTH]; 1390 struct dsched_policy *pol = NULL; 1391 int error; 1392 1393 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1394 1395 pol = default_policy; 1396 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1397 1398 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1399 if (error || req->newptr == NULL) { 1400 lockmgr(&dsched_lock, LK_RELEASE); 1401 return (error); 1402 } 1403 1404 pol = dsched_find_policy(buf); 1405 if (pol == NULL) { 1406 lockmgr(&dsched_lock, LK_RELEASE); 1407 return 0; 1408 } 1409 1410 default_set = 1; 1411 default_policy = pol; 1412 1413 lockmgr(&dsched_lock, LK_RELEASE); 1414 1415 return error; 1416 } 1417 1418 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1419 "Disk Scheduler Framework (dsched) magic"); 1420 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1421 "List of disks and their policies"); 1422 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1423 0, "Enable dsched debugging"); 1424 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1425 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1426 "dsched statistics"); 1427 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1428 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1429 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1430 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1431 1432 static void 1433 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1434 { 1435 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1436 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1437 sysctl_ctx_init(&diskctx->sysctl_ctx); 1438 } 1439 1440 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1441 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1442 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1443 } 1444