1 /* 2 * Copyright (c) 2009, 2010 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Alex Hornung <ahornung@gmail.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 */ 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/proc.h> 38 #include <sys/sysctl.h> 39 #include <sys/buf.h> 40 #include <sys/conf.h> 41 #include <sys/diskslice.h> 42 #include <sys/disk.h> 43 #include <sys/malloc.h> 44 #include <machine/md_var.h> 45 #include <sys/ctype.h> 46 #include <sys/syslog.h> 47 #include <sys/device.h> 48 #include <sys/msgport.h> 49 #include <sys/msgport2.h> 50 #include <sys/buf2.h> 51 #include <sys/dsched.h> 52 #include <sys/fcntl.h> 53 #include <machine/varargs.h> 54 55 TAILQ_HEAD(tdio_list_head, dsched_thread_io); 56 57 MALLOC_DEFINE(M_DSCHED, "dsched", "dsched allocs"); 58 59 static dsched_prepare_t noop_prepare; 60 static dsched_teardown_t noop_teardown; 61 static dsched_cancel_t noop_cancel; 62 static dsched_queue_t noop_queue; 63 64 static void dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio); 65 static void dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name); 66 static void dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx); 67 static void dsched_thread_io_destroy(struct dsched_thread_io *tdio); 68 static void dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx); 69 70 static struct dsched_thread_io *dsched_thread_io_alloc( 71 struct disk *dp, struct dsched_thread_ctx *tdctx, 72 struct dsched_policy *pol, int tdctx_locked); 73 74 static int dsched_inited = 0; 75 static int default_set = 0; 76 77 struct lock dsched_lock; 78 static int dsched_debug_enable = 0; 79 80 struct dsched_stats dsched_stats; 81 82 struct objcache_malloc_args dsched_disk_ctx_malloc_args = { 83 DSCHED_DISK_CTX_MAX_SZ, M_DSCHED }; 84 struct objcache_malloc_args dsched_thread_io_malloc_args = { 85 DSCHED_THREAD_IO_MAX_SZ, M_DSCHED }; 86 struct objcache_malloc_args dsched_thread_ctx_malloc_args = { 87 DSCHED_THREAD_CTX_MAX_SZ, M_DSCHED }; 88 89 static struct objcache *dsched_diskctx_cache; 90 static struct objcache *dsched_tdctx_cache; 91 static struct objcache *dsched_tdio_cache; 92 93 struct lock dsched_tdctx_lock; 94 95 static struct dsched_policy_head dsched_policy_list = 96 TAILQ_HEAD_INITIALIZER(dsched_policy_list); 97 98 static struct dsched_policy dsched_noop_policy = { 99 .name = "noop", 100 101 .prepare = noop_prepare, 102 .teardown = noop_teardown, 103 .cancel_all = noop_cancel, 104 .bio_queue = noop_queue 105 }; 106 107 static struct dsched_policy *default_policy = &dsched_noop_policy; 108 109 /* 110 * dsched_debug() is a SYSCTL and TUNABLE controlled debug output function 111 * using kvprintf 112 */ 113 int 114 dsched_debug(int level, char *fmt, ...) 115 { 116 __va_list ap; 117 118 __va_start(ap, fmt); 119 if (level <= dsched_debug_enable) 120 kvprintf(fmt, ap); 121 __va_end(ap); 122 123 return 0; 124 } 125 126 /* 127 * Called on disk_create() 128 * tries to read which policy to use from loader.conf, if there's 129 * none specified, the default policy is used. 130 */ 131 void 132 dsched_disk_create_callback(struct disk *dp, const char *head_name, int unit) 133 { 134 char tunable_key[SPECNAMELEN + 48]; 135 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 136 char *ptr; 137 struct dsched_policy *policy = NULL; 138 139 /* Also look for serno stuff? */ 140 lockmgr(&dsched_lock, LK_EXCLUSIVE); 141 142 ksnprintf(tunable_key, sizeof(tunable_key), 143 "dsched.policy.%s%d", head_name, unit); 144 if (TUNABLE_STR_FETCH(tunable_key, sched_policy, 145 sizeof(sched_policy)) != 0) { 146 policy = dsched_find_policy(sched_policy); 147 } 148 149 ksnprintf(tunable_key, sizeof(tunable_key), 150 "dsched.policy.%s", head_name); 151 152 for (ptr = tunable_key; *ptr; ptr++) { 153 if (*ptr == '/') 154 *ptr = '-'; 155 } 156 if (!policy && (TUNABLE_STR_FETCH(tunable_key, sched_policy, 157 sizeof(sched_policy)) != 0)) { 158 policy = dsched_find_policy(sched_policy); 159 } 160 161 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.default"); 162 if (!policy && !default_set && 163 (TUNABLE_STR_FETCH(tunable_key, sched_policy, 164 sizeof(sched_policy)) != 0)) { 165 policy = dsched_find_policy(sched_policy); 166 } 167 168 if (!policy) { 169 if (!default_set && bootverbose) { 170 dsched_debug(0, 171 "No policy for %s%d specified, " 172 "or policy not found\n", 173 head_name, unit); 174 } 175 dsched_set_policy(dp, default_policy); 176 } else { 177 dsched_set_policy(dp, policy); 178 } 179 180 if (strncmp(head_name, "mapper/", strlen("mapper/")) == 0) 181 ksnprintf(tunable_key, sizeof(tunable_key), "%s", head_name); 182 else 183 ksnprintf(tunable_key, sizeof(tunable_key), "%s%d", head_name, unit); 184 for (ptr = tunable_key; *ptr; ptr++) { 185 if (*ptr == '/') 186 *ptr = '-'; 187 } 188 dsched_sysctl_add_disk( 189 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 190 tunable_key); 191 192 lockmgr(&dsched_lock, LK_RELEASE); 193 } 194 195 /* 196 * Called from disk_setdiskinfo (or rather _setdiskinfo). This will check if 197 * there's any policy associated with the serial number of the device. 198 */ 199 void 200 dsched_disk_update_callback(struct disk *dp, struct disk_info *info) 201 { 202 char tunable_key[SPECNAMELEN + 48]; 203 char sched_policy[DSCHED_POLICY_NAME_LENGTH]; 204 struct dsched_policy *policy = NULL; 205 206 if (info->d_serialno == NULL) 207 return; 208 209 lockmgr(&dsched_lock, LK_EXCLUSIVE); 210 211 ksnprintf(tunable_key, sizeof(tunable_key), "dsched.policy.%s", 212 info->d_serialno); 213 214 if((TUNABLE_STR_FETCH(tunable_key, sched_policy, 215 sizeof(sched_policy)) != 0)) { 216 policy = dsched_find_policy(sched_policy); 217 } 218 219 if (policy) { 220 dsched_switch(dp, policy); 221 } 222 223 dsched_sysctl_add_disk( 224 (struct dsched_disk_ctx *)dsched_get_disk_priv(dp), 225 info->d_serialno); 226 227 lockmgr(&dsched_lock, LK_RELEASE); 228 } 229 230 /* 231 * Called on disk_destroy() 232 * shuts down the scheduler core and cancels all remaining bios 233 */ 234 void 235 dsched_disk_destroy_callback(struct disk *dp) 236 { 237 struct dsched_policy *old_policy; 238 struct dsched_disk_ctx *diskctx; 239 240 lockmgr(&dsched_lock, LK_EXCLUSIVE); 241 242 diskctx = dsched_get_disk_priv(dp); 243 244 old_policy = dp->d_sched_policy; 245 dp->d_sched_policy = &dsched_noop_policy; 246 old_policy->cancel_all(dsched_get_disk_priv(dp)); 247 old_policy->teardown(dsched_get_disk_priv(dp)); 248 249 if (diskctx->flags & DSCHED_SYSCTL_CTX_INITED) 250 sysctl_ctx_free(&diskctx->sysctl_ctx); 251 252 policy_destroy(dp); 253 atomic_subtract_int(&old_policy->ref_count, 1); 254 KKASSERT(old_policy->ref_count >= 0); 255 256 lockmgr(&dsched_lock, LK_RELEASE); 257 } 258 259 260 /* 261 * Caller must have dp->diskctx locked 262 */ 263 void 264 dsched_queue(struct disk *dp, struct bio *bio) 265 { 266 struct dsched_thread_ctx *tdctx; 267 struct dsched_thread_io *tdio; 268 struct dsched_disk_ctx *diskctx; 269 int error; 270 271 if (dp->d_sched_policy == &dsched_noop_policy) { 272 dsched_clr_buf_priv(bio->bio_buf); 273 atomic_add_int(&dsched_stats.no_tdctx, 1); 274 dsched_strategy_raw(dp, bio); 275 return; 276 } 277 278 error = 0; 279 tdctx = dsched_get_buf_priv(bio->bio_buf); 280 if (tdctx == NULL) { 281 /* We don't handle this case, let dsched dispatch */ 282 atomic_add_int(&dsched_stats.no_tdctx, 1); 283 dsched_strategy_raw(dp, bio); 284 return; 285 } 286 287 DSCHED_THREAD_CTX_LOCK(tdctx); 288 289 /* 290 * XXX: 291 * iterate in reverse to make sure we find the most up-to-date 292 * tdio for a given disk. After a switch it may take some time 293 * for everything to clean up. 294 */ 295 TAILQ_FOREACH_REVERSE(tdio, &tdctx->tdio_list, tdio_list_head, link) { 296 if (tdio->dp == dp) { 297 dsched_thread_io_ref(tdio); 298 break; 299 } 300 } 301 if (tdio == NULL) { 302 tdio = dsched_thread_io_alloc(dp, tdctx, dp->d_sched_policy, 1); 303 dsched_thread_io_ref(tdio); 304 } 305 306 DSCHED_THREAD_CTX_UNLOCK(tdctx); 307 dsched_clr_buf_priv(bio->bio_buf); 308 dsched_thread_ctx_unref(tdctx); /* acquired on new_buf */ 309 310 diskctx = dsched_get_disk_priv(dp); 311 dsched_disk_ctx_ref(diskctx); 312 313 if (dp->d_sched_policy != &dsched_noop_policy) 314 KKASSERT(tdio->debug_policy == dp->d_sched_policy); 315 316 KKASSERT(tdio->debug_inited == 0xF00F1234); 317 318 error = dp->d_sched_policy->bio_queue(diskctx, tdio, bio); 319 320 if (error) { 321 dsched_strategy_raw(dp, bio); 322 } 323 dsched_disk_ctx_unref(diskctx); 324 dsched_thread_io_unref(tdio); 325 } 326 327 328 /* 329 * Called from each module_init or module_attach of each policy 330 * registers the policy in the local policy list. 331 */ 332 int 333 dsched_register(struct dsched_policy *d_policy) 334 { 335 struct dsched_policy *policy; 336 int error = 0; 337 338 lockmgr(&dsched_lock, LK_EXCLUSIVE); 339 340 policy = dsched_find_policy(d_policy->name); 341 342 if (!policy) { 343 TAILQ_INSERT_TAIL(&dsched_policy_list, d_policy, link); 344 atomic_add_int(&d_policy->ref_count, 1); 345 } else { 346 dsched_debug(LOG_ERR, "Policy with name %s already registered!\n", 347 d_policy->name); 348 error = EEXIST; 349 } 350 351 lockmgr(&dsched_lock, LK_RELEASE); 352 return error; 353 } 354 355 /* 356 * Called from each module_detach of each policy 357 * unregisters the policy 358 */ 359 int 360 dsched_unregister(struct dsched_policy *d_policy) 361 { 362 struct dsched_policy *policy; 363 364 lockmgr(&dsched_lock, LK_EXCLUSIVE); 365 policy = dsched_find_policy(d_policy->name); 366 367 if (policy) { 368 if (policy->ref_count > 1) { 369 lockmgr(&dsched_lock, LK_RELEASE); 370 return EBUSY; 371 } 372 TAILQ_REMOVE(&dsched_policy_list, policy, link); 373 atomic_subtract_int(&policy->ref_count, 1); 374 KKASSERT(policy->ref_count == 0); 375 } 376 lockmgr(&dsched_lock, LK_RELEASE); 377 378 return 0; 379 } 380 381 382 /* 383 * switches the policy by first removing the old one and then 384 * enabling the new one. 385 */ 386 int 387 dsched_switch(struct disk *dp, struct dsched_policy *new_policy) 388 { 389 struct dsched_policy *old_policy; 390 391 /* If we are asked to set the same policy, do nothing */ 392 if (dp->d_sched_policy == new_policy) 393 return 0; 394 395 /* lock everything down, diskwise */ 396 lockmgr(&dsched_lock, LK_EXCLUSIVE); 397 old_policy = dp->d_sched_policy; 398 399 atomic_subtract_int(&old_policy->ref_count, 1); 400 KKASSERT(old_policy->ref_count >= 0); 401 402 dp->d_sched_policy = &dsched_noop_policy; 403 old_policy->teardown(dsched_get_disk_priv(dp)); 404 policy_destroy(dp); 405 406 /* Bring everything back to life */ 407 dsched_set_policy(dp, new_policy); 408 lockmgr(&dsched_lock, LK_RELEASE); 409 410 return 0; 411 } 412 413 414 /* 415 * Loads a given policy and attaches it to the specified disk. 416 * Also initializes the core for the policy 417 */ 418 void 419 dsched_set_policy(struct disk *dp, struct dsched_policy *new_policy) 420 { 421 int locked = 0; 422 423 /* Check if it is locked already. if not, we acquire the devfs lock */ 424 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 425 lockmgr(&dsched_lock, LK_EXCLUSIVE); 426 locked = 1; 427 } 428 429 DSCHED_GLOBAL_THREAD_CTX_LOCK(); 430 431 policy_new(dp, new_policy); 432 new_policy->prepare(dsched_get_disk_priv(dp)); 433 dp->d_sched_policy = new_policy; 434 atomic_add_int(&new_policy->ref_count, 1); 435 436 DSCHED_GLOBAL_THREAD_CTX_UNLOCK(); 437 438 kprintf("disk scheduler: set policy of %s to %s\n", dp->d_cdev->si_name, 439 new_policy->name); 440 441 /* If we acquired the lock, we also get rid of it */ 442 if (locked) 443 lockmgr(&dsched_lock, LK_RELEASE); 444 } 445 446 struct dsched_policy* 447 dsched_find_policy(char *search) 448 { 449 struct dsched_policy *policy; 450 struct dsched_policy *policy_found = NULL; 451 int locked = 0; 452 453 /* Check if it is locked already. if not, we acquire the devfs lock */ 454 if ((lockstatus(&dsched_lock, curthread)) != LK_EXCLUSIVE) { 455 lockmgr(&dsched_lock, LK_EXCLUSIVE); 456 locked = 1; 457 } 458 459 TAILQ_FOREACH(policy, &dsched_policy_list, link) { 460 if (!strcmp(policy->name, search)) { 461 policy_found = policy; 462 break; 463 } 464 } 465 466 /* If we acquired the lock, we also get rid of it */ 467 if (locked) 468 lockmgr(&dsched_lock, LK_RELEASE); 469 470 return policy_found; 471 } 472 473 /* 474 * Returns ref'd disk 475 */ 476 struct disk * 477 dsched_find_disk(char *search) 478 { 479 struct disk marker; 480 struct disk *dp = NULL; 481 482 while ((dp = disk_enumerate(&marker, dp)) != NULL) { 483 if (strcmp(dp->d_cdev->si_name, search) == 0) { 484 disk_enumerate_stop(&marker, NULL); 485 /* leave ref on dp */ 486 break; 487 } 488 } 489 return dp; 490 } 491 492 struct disk * 493 dsched_disk_enumerate(struct disk *marker, struct disk *dp, 494 struct dsched_policy *policy) 495 { 496 while ((dp = disk_enumerate(marker, dp)) != NULL) { 497 if (dp->d_sched_policy == policy) 498 break; 499 } 500 return NULL; 501 } 502 503 struct dsched_policy * 504 dsched_policy_enumerate(struct dsched_policy *pol) 505 { 506 if (!pol) 507 return (TAILQ_FIRST(&dsched_policy_list)); 508 else 509 return (TAILQ_NEXT(pol, link)); 510 } 511 512 void 513 dsched_cancel_bio(struct bio *bp) 514 { 515 bp->bio_buf->b_error = ENXIO; 516 bp->bio_buf->b_flags |= B_ERROR; 517 bp->bio_buf->b_resid = bp->bio_buf->b_bcount; 518 519 biodone(bp); 520 } 521 522 void 523 dsched_strategy_raw(struct disk *dp, struct bio *bp) 524 { 525 /* 526 * Ideally, this stuff shouldn't be needed... but just in case, we leave it in 527 * to avoid panics 528 */ 529 KASSERT(dp->d_rawdev != NULL, ("dsched_strategy_raw sees NULL d_rawdev!!")); 530 if(bp->bio_track != NULL) { 531 dsched_debug(LOG_INFO, 532 "dsched_strategy_raw sees non-NULL bio_track!! " 533 "bio: %p\n", bp); 534 bp->bio_track = NULL; 535 } 536 dev_dstrategy(dp->d_rawdev, bp); 537 } 538 539 void 540 dsched_strategy_sync(struct disk *dp, struct bio *bio) 541 { 542 struct buf *bp, *nbp; 543 struct bio *nbio; 544 545 bp = bio->bio_buf; 546 547 nbp = getpbuf(NULL); 548 nbio = &nbp->b_bio1; 549 550 nbp->b_cmd = bp->b_cmd; 551 nbp->b_bufsize = bp->b_bufsize; 552 nbp->b_runningbufspace = bp->b_runningbufspace; 553 nbp->b_bcount = bp->b_bcount; 554 nbp->b_resid = bp->b_resid; 555 nbp->b_data = bp->b_data; 556 #if 0 557 /* 558 * Buffers undergoing device I/O do not need a kvabase/size. 559 */ 560 nbp->b_kvabase = bp->b_kvabase; 561 nbp->b_kvasize = bp->b_kvasize; 562 #endif 563 nbp->b_dirtyend = bp->b_dirtyend; 564 565 nbio->bio_done = biodone_sync; 566 nbio->bio_flags |= BIO_SYNC; 567 nbio->bio_track = NULL; 568 569 nbio->bio_caller_info1.ptr = dp; 570 nbio->bio_offset = bio->bio_offset; 571 572 dev_dstrategy(dp->d_rawdev, nbio); 573 biowait(nbio, "dschedsync"); 574 bp->b_resid = nbp->b_resid; 575 bp->b_error = nbp->b_error; 576 biodone(bio); 577 #if 0 578 nbp->b_kvabase = NULL; 579 nbp->b_kvasize = 0; 580 #endif 581 relpbuf(nbp, NULL); 582 } 583 584 void 585 dsched_strategy_async(struct disk *dp, struct bio *bio, biodone_t *done, void *priv) 586 { 587 struct bio *nbio; 588 589 nbio = push_bio(bio); 590 nbio->bio_done = done; 591 nbio->bio_offset = bio->bio_offset; 592 593 dsched_set_bio_dp(nbio, dp); 594 dsched_set_bio_priv(nbio, priv); 595 596 getmicrotime(&nbio->bio_caller_info3.tv); 597 dev_dstrategy(dp->d_rawdev, nbio); 598 } 599 600 /* 601 * A special bio done call back function 602 * used by policy having request polling implemented. 603 */ 604 static void 605 request_polling_biodone(struct bio *bp) 606 { 607 struct dsched_disk_ctx *diskctx = NULL; 608 struct disk *dp = NULL; 609 struct bio *obio; 610 struct dsched_policy *policy; 611 612 dp = dsched_get_bio_dp(bp); 613 policy = dp->d_sched_policy; 614 diskctx = dsched_get_disk_priv(dp); 615 KKASSERT(diskctx && policy); 616 dsched_disk_ctx_ref(diskctx); 617 618 /* 619 * XXX: 620 * the bio_done function should not be blocked ! 621 */ 622 if (diskctx->dp->d_sched_policy->bio_done) 623 diskctx->dp->d_sched_policy->bio_done(bp); 624 625 obio = pop_bio(bp); 626 biodone(obio); 627 628 atomic_subtract_int(&diskctx->current_tag_queue_depth, 1); 629 630 /* call the polling function, 631 * XXX: 632 * the polling function should not be blocked! 633 */ 634 if (policy->polling_func) 635 policy->polling_func(diskctx); 636 else 637 dsched_debug(0, "dsched: the policy uses request polling without a polling function!\n"); 638 dsched_disk_ctx_unref(diskctx); 639 } 640 641 /* 642 * A special dsched strategy used by policy having request polling 643 * (polling function) implemented. 644 * 645 * The strategy is the just like dsched_strategy_async(), but 646 * the biodone call back is set to a preset one. 647 * 648 * If the policy needs its own biodone callback, it should 649 * register it in the policy structure. (bio_done field) 650 * 651 * The current_tag_queue_depth is maintained by this function 652 * and the request_polling_biodone() function 653 */ 654 655 void 656 dsched_strategy_request_polling(struct disk *dp, struct bio *bio, struct dsched_disk_ctx *diskctx) 657 { 658 atomic_add_int(&diskctx->current_tag_queue_depth, 1); 659 dsched_strategy_async(dp, bio, request_polling_biodone, dsched_get_bio_priv(bio)); 660 } 661 662 /* 663 * Ref and deref various structures. The 1->0 transition of the reference 664 * count actually transitions 1->0x80000000 and causes the object to be 665 * destroyed. It is possible for transitory references to occur on the 666 * object while it is being destroyed. We use bit 31 to indicate that 667 * destruction is in progress and to prevent nested destructions. 668 */ 669 void 670 dsched_disk_ctx_ref(struct dsched_disk_ctx *diskctx) 671 { 672 int refcount __unused; 673 674 refcount = atomic_fetchadd_int(&diskctx->refcount, 1); 675 } 676 677 void 678 dsched_thread_io_ref(struct dsched_thread_io *tdio) 679 { 680 int refcount __unused; 681 682 refcount = atomic_fetchadd_int(&tdio->refcount, 1); 683 } 684 685 void 686 dsched_thread_ctx_ref(struct dsched_thread_ctx *tdctx) 687 { 688 int refcount __unused; 689 690 refcount = atomic_fetchadd_int(&tdctx->refcount, 1); 691 } 692 693 void 694 dsched_disk_ctx_unref(struct dsched_disk_ctx *diskctx) 695 { 696 int refs; 697 int nrefs; 698 699 /* 700 * Handle 1->0 transitions for diskctx and nested destruction 701 * recursions. If the refs are already in destruction mode (bit 31 702 * set) on the 1->0 transition we don't try to destruct it again. 703 * 704 * 0x80000001->0x80000000 transitions are handled normally and 705 * thus avoid nested dstruction. 706 */ 707 for (;;) { 708 refs = diskctx->refcount; 709 cpu_ccfence(); 710 nrefs = refs - 1; 711 712 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 713 if (nrefs) { 714 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 715 break; 716 continue; 717 } 718 nrefs = 0x80000000; 719 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) { 720 dsched_disk_ctx_destroy(diskctx); 721 break; 722 } 723 } 724 } 725 726 static 727 void 728 dsched_disk_ctx_destroy(struct dsched_disk_ctx *diskctx) 729 { 730 struct dsched_thread_io *tdio; 731 int refs; 732 int nrefs; 733 734 #if 0 735 kprintf("diskctx (%p) destruction started, trace:\n", diskctx); 736 print_backtrace(4); 737 #endif 738 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 739 while ((tdio = TAILQ_FIRST(&diskctx->tdio_list)) != NULL) { 740 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 741 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 742 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 743 tdio->diskctx = NULL; 744 /* XXX tdio->diskctx->dp->d_sched_policy->destroy_tdio(tdio);*/ 745 lockmgr(&diskctx->lock, LK_RELEASE); 746 dsched_thread_io_unref_destroy(tdio); 747 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 748 } 749 lockmgr(&diskctx->lock, LK_RELEASE); 750 751 /* 752 * Expect diskctx->refcount to be 0x80000000. If it isn't someone 753 * else still has a temporary ref on the diskctx and we have to 754 * transition it back to an undestroyed-state (albeit without any 755 * associations), so the other user destroys it properly when the 756 * ref is released. 757 */ 758 while ((refs = diskctx->refcount) != 0x80000000) { 759 kprintf("dsched_thread_io: destroy race diskctx=%p\n", diskctx); 760 cpu_ccfence(); 761 KKASSERT(refs & 0x80000000); 762 nrefs = refs & 0x7FFFFFFF; 763 if (atomic_cmpset_int(&diskctx->refcount, refs, nrefs)) 764 return; 765 } 766 767 /* 768 * Really for sure now. 769 */ 770 if (diskctx->dp->d_sched_policy->destroy_diskctx) 771 diskctx->dp->d_sched_policy->destroy_diskctx(diskctx); 772 objcache_put(dsched_diskctx_cache, diskctx); 773 atomic_subtract_int(&dsched_stats.diskctx_allocations, 1); 774 } 775 776 void 777 dsched_thread_io_unref(struct dsched_thread_io *tdio) 778 { 779 int refs; 780 int nrefs; 781 782 /* 783 * Handle 1->0 transitions for tdio and nested destruction 784 * recursions. If the refs are already in destruction mode (bit 31 785 * set) on the 1->0 transition we don't try to destruct it again. 786 * 787 * 0x80000001->0x80000000 transitions are handled normally and 788 * thus avoid nested dstruction. 789 */ 790 for (;;) { 791 refs = tdio->refcount; 792 cpu_ccfence(); 793 nrefs = refs - 1; 794 795 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 796 if (nrefs) { 797 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 798 break; 799 continue; 800 } 801 nrefs = 0x80000000; 802 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 803 dsched_thread_io_destroy(tdio); 804 break; 805 } 806 } 807 } 808 809 /* 810 * Unref and destroy the tdio even if additional refs are present. 811 */ 812 static 813 void 814 dsched_thread_io_unref_destroy(struct dsched_thread_io *tdio) 815 { 816 int refs; 817 int nrefs; 818 819 /* 820 * If not already transitioned to destroy-in-progress we transition 821 * to destroy-in-progress, cleanup our ref, and destroy the tdio. 822 */ 823 for (;;) { 824 refs = tdio->refcount; 825 cpu_ccfence(); 826 nrefs = refs - 1; 827 828 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 829 if (nrefs & 0x80000000) { 830 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 831 break; 832 continue; 833 } 834 nrefs |= 0x80000000; 835 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) { 836 dsched_thread_io_destroy(tdio); 837 break; 838 } 839 } 840 } 841 842 static void 843 dsched_thread_io_destroy(struct dsched_thread_io *tdio) 844 { 845 struct dsched_thread_ctx *tdctx; 846 struct dsched_disk_ctx *diskctx; 847 int refs; 848 int nrefs; 849 850 #if 0 851 kprintf("tdio (%p) destruction started, trace:\n", tdio); 852 print_backtrace(8); 853 #endif 854 KKASSERT(tdio->qlength == 0); 855 856 while ((diskctx = tdio->diskctx) != NULL) { 857 dsched_disk_ctx_ref(diskctx); 858 lockmgr(&diskctx->lock, LK_EXCLUSIVE); 859 if (diskctx != tdio->diskctx) { 860 lockmgr(&diskctx->lock, LK_RELEASE); 861 dsched_disk_ctx_unref(diskctx); 862 continue; 863 } 864 KKASSERT(tdio->flags & DSCHED_LINKED_DISK_CTX); 865 if (diskctx->dp->d_sched_policy->destroy_tdio) 866 diskctx->dp->d_sched_policy->destroy_tdio(tdio); 867 TAILQ_REMOVE(&diskctx->tdio_list, tdio, dlink); 868 atomic_clear_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 869 tdio->diskctx = NULL; 870 dsched_thread_io_unref(tdio); 871 lockmgr(&diskctx->lock, LK_RELEASE); 872 dsched_disk_ctx_unref(diskctx); 873 } 874 while ((tdctx = tdio->tdctx) != NULL) { 875 dsched_thread_ctx_ref(tdctx); 876 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 877 if (tdctx != tdio->tdctx) { 878 lockmgr(&tdctx->lock, LK_RELEASE); 879 dsched_thread_ctx_unref(tdctx); 880 continue; 881 } 882 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 883 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 884 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 885 tdio->tdctx = NULL; 886 dsched_thread_io_unref(tdio); 887 lockmgr(&tdctx->lock, LK_RELEASE); 888 dsched_thread_ctx_unref(tdctx); 889 } 890 891 /* 892 * Expect tdio->refcount to be 0x80000000. If it isn't someone else 893 * still has a temporary ref on the tdio and we have to transition 894 * it back to an undestroyed-state (albeit without any associations) 895 * so the other user destroys it properly when the ref is released. 896 */ 897 while ((refs = tdio->refcount) != 0x80000000) { 898 kprintf("dsched_thread_io: destroy race tdio=%p\n", tdio); 899 cpu_ccfence(); 900 KKASSERT(refs & 0x80000000); 901 nrefs = refs & 0x7FFFFFFF; 902 if (atomic_cmpset_int(&tdio->refcount, refs, nrefs)) 903 return; 904 } 905 906 /* 907 * Really for sure now. 908 */ 909 objcache_put(dsched_tdio_cache, tdio); 910 atomic_subtract_int(&dsched_stats.tdio_allocations, 1); 911 } 912 913 void 914 dsched_thread_ctx_unref(struct dsched_thread_ctx *tdctx) 915 { 916 int refs; 917 int nrefs; 918 919 /* 920 * Handle 1->0 transitions for tdctx and nested destruction 921 * recursions. If the refs are already in destruction mode (bit 31 922 * set) on the 1->0 transition we don't try to destruct it again. 923 * 924 * 0x80000001->0x80000000 transitions are handled normally and 925 * thus avoid nested dstruction. 926 */ 927 for (;;) { 928 refs = tdctx->refcount; 929 cpu_ccfence(); 930 nrefs = refs - 1; 931 932 KKASSERT(((refs ^ nrefs) & 0x80000000) == 0); 933 if (nrefs) { 934 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) 935 break; 936 continue; 937 } 938 nrefs = 0x80000000; 939 if (atomic_cmpset_int(&tdctx->refcount, refs, nrefs)) { 940 dsched_thread_ctx_destroy(tdctx); 941 break; 942 } 943 } 944 } 945 946 static void 947 dsched_thread_ctx_destroy(struct dsched_thread_ctx *tdctx) 948 { 949 struct dsched_thread_io *tdio; 950 951 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 952 953 while ((tdio = TAILQ_FIRST(&tdctx->tdio_list)) != NULL) { 954 KKASSERT(tdio->flags & DSCHED_LINKED_THREAD_CTX); 955 TAILQ_REMOVE(&tdctx->tdio_list, tdio, link); 956 atomic_clear_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 957 tdio->tdctx = NULL; 958 lockmgr(&tdctx->lock, LK_RELEASE); /* avoid deadlock */ 959 dsched_thread_io_unref_destroy(tdio); 960 lockmgr(&tdctx->lock, LK_EXCLUSIVE); 961 } 962 KKASSERT(tdctx->refcount == 0x80000000); 963 964 lockmgr(&tdctx->lock, LK_RELEASE); 965 966 objcache_put(dsched_tdctx_cache, tdctx); 967 atomic_subtract_int(&dsched_stats.tdctx_allocations, 1); 968 } 969 970 /* 971 * Ensures that a tdio is assigned to tdctx and disk. 972 */ 973 static 974 struct dsched_thread_io * 975 dsched_thread_io_alloc(struct disk *dp, struct dsched_thread_ctx *tdctx, 976 struct dsched_policy *pol, int tdctx_locked) 977 { 978 struct dsched_thread_io *tdio; 979 #if 0 980 dsched_disk_ctx_ref(dsched_get_disk_priv(dp)); 981 #endif 982 tdio = objcache_get(dsched_tdio_cache, M_INTWAIT); 983 bzero(tdio, DSCHED_THREAD_IO_MAX_SZ); 984 985 dsched_thread_io_ref(tdio); /* prevent ripout */ 986 dsched_thread_io_ref(tdio); /* for diskctx ref */ 987 988 DSCHED_THREAD_IO_LOCKINIT(tdio); 989 tdio->dp = dp; 990 991 tdio->diskctx = dsched_get_disk_priv(dp); 992 TAILQ_INIT(&tdio->queue); 993 994 if (pol->new_tdio) 995 pol->new_tdio(tdio); 996 997 DSCHED_DISK_CTX_LOCK(tdio->diskctx); 998 TAILQ_INSERT_TAIL(&tdio->diskctx->tdio_list, tdio, dlink); 999 atomic_set_int(&tdio->flags, DSCHED_LINKED_DISK_CTX); 1000 DSCHED_DISK_CTX_UNLOCK(tdio->diskctx); 1001 1002 if (tdctx) { 1003 /* 1004 * Put the tdio in the tdctx list. Inherit the temporary 1005 * ref (one ref for each list). 1006 */ 1007 if (tdctx_locked == 0) 1008 DSCHED_THREAD_CTX_LOCK(tdctx); 1009 tdio->tdctx = tdctx; 1010 tdio->p = tdctx->p; 1011 TAILQ_INSERT_TAIL(&tdctx->tdio_list, tdio, link); 1012 atomic_set_int(&tdio->flags, DSCHED_LINKED_THREAD_CTX); 1013 if (tdctx_locked == 0) 1014 DSCHED_THREAD_CTX_UNLOCK(tdctx); 1015 } else { 1016 dsched_thread_io_unref(tdio); 1017 } 1018 1019 tdio->debug_policy = pol; 1020 tdio->debug_inited = 0xF00F1234; 1021 1022 atomic_add_int(&dsched_stats.tdio_allocations, 1); 1023 1024 return(tdio); 1025 } 1026 1027 1028 struct dsched_disk_ctx * 1029 dsched_disk_ctx_alloc(struct disk *dp, struct dsched_policy *pol) 1030 { 1031 struct dsched_disk_ctx *diskctx; 1032 1033 diskctx = objcache_get(dsched_diskctx_cache, M_WAITOK); 1034 bzero(diskctx, DSCHED_DISK_CTX_MAX_SZ); 1035 dsched_disk_ctx_ref(diskctx); 1036 diskctx->dp = dp; 1037 DSCHED_DISK_CTX_LOCKINIT(diskctx); 1038 TAILQ_INIT(&diskctx->tdio_list); 1039 /* 1040 * XXX: magic number 32: most device has a tag queue 1041 * of depth 32. 1042 * Better to retrive more precise value from the driver 1043 */ 1044 diskctx->max_tag_queue_depth = 32; 1045 diskctx->current_tag_queue_depth = 0; 1046 1047 atomic_add_int(&dsched_stats.diskctx_allocations, 1); 1048 if (pol->new_diskctx) 1049 pol->new_diskctx(diskctx); 1050 return diskctx; 1051 } 1052 1053 1054 struct dsched_thread_ctx * 1055 dsched_thread_ctx_alloc(struct proc *p) 1056 { 1057 struct dsched_thread_ctx *tdctx; 1058 1059 tdctx = objcache_get(dsched_tdctx_cache, M_WAITOK); 1060 bzero(tdctx, DSCHED_THREAD_CTX_MAX_SZ); 1061 dsched_thread_ctx_ref(tdctx); 1062 #if 0 1063 kprintf("dsched_thread_ctx_alloc, new tdctx = %p\n", tdctx); 1064 #endif 1065 DSCHED_THREAD_CTX_LOCKINIT(tdctx); 1066 TAILQ_INIT(&tdctx->tdio_list); 1067 tdctx->p = p; 1068 1069 atomic_add_int(&dsched_stats.tdctx_allocations, 1); 1070 /* XXX: no callback here */ 1071 1072 return tdctx; 1073 } 1074 1075 void 1076 policy_new(struct disk *dp, struct dsched_policy *pol) 1077 { 1078 struct dsched_disk_ctx *diskctx; 1079 1080 diskctx = dsched_disk_ctx_alloc(dp, pol); 1081 dsched_disk_ctx_ref(diskctx); 1082 dsched_set_disk_priv(dp, diskctx); 1083 } 1084 1085 void 1086 policy_destroy(struct disk *dp) { 1087 struct dsched_disk_ctx *diskctx; 1088 1089 diskctx = dsched_get_disk_priv(dp); 1090 KKASSERT(diskctx != NULL); 1091 1092 dsched_disk_ctx_unref(diskctx); /* from prepare */ 1093 dsched_disk_ctx_unref(diskctx); /* from alloc */ 1094 1095 dsched_set_disk_priv(dp, NULL); 1096 } 1097 1098 void 1099 dsched_new_buf(struct buf *bp) 1100 { 1101 struct dsched_thread_ctx *tdctx = NULL; 1102 1103 if (dsched_inited == 0) 1104 return; 1105 1106 if (curproc != NULL) { 1107 tdctx = dsched_get_proc_priv(curproc); 1108 } else { 1109 /* This is a kernel thread, so no proc info is available */ 1110 tdctx = dsched_get_thread_priv(curthread); 1111 } 1112 1113 #if 0 1114 /* 1115 * XXX: hack. we don't want this assert because we aren't catching all 1116 * threads. mi_startup() is still getting away without an tdctx. 1117 */ 1118 1119 /* by now we should have an tdctx. if not, something bad is going on */ 1120 KKASSERT(tdctx != NULL); 1121 #endif 1122 1123 if (tdctx) { 1124 dsched_thread_ctx_ref(tdctx); 1125 } 1126 dsched_set_buf_priv(bp, tdctx); 1127 } 1128 1129 void 1130 dsched_exit_buf(struct buf *bp) 1131 { 1132 struct dsched_thread_ctx *tdctx; 1133 1134 tdctx = dsched_get_buf_priv(bp); 1135 if (tdctx != NULL) { 1136 dsched_clr_buf_priv(bp); 1137 dsched_thread_ctx_unref(tdctx); 1138 } 1139 } 1140 1141 void 1142 dsched_new_proc(struct proc *p) 1143 { 1144 struct dsched_thread_ctx *tdctx; 1145 1146 if (dsched_inited == 0) 1147 return; 1148 1149 KKASSERT(p != NULL); 1150 1151 tdctx = dsched_thread_ctx_alloc(p); 1152 tdctx->p = p; 1153 dsched_thread_ctx_ref(tdctx); 1154 1155 dsched_set_proc_priv(p, tdctx); 1156 atomic_add_int(&dsched_stats.nprocs, 1); 1157 } 1158 1159 1160 void 1161 dsched_new_thread(struct thread *td) 1162 { 1163 struct dsched_thread_ctx *tdctx; 1164 1165 if (dsched_inited == 0) 1166 return; 1167 1168 KKASSERT(td != NULL); 1169 1170 tdctx = dsched_thread_ctx_alloc(NULL); 1171 tdctx->td = td; 1172 dsched_thread_ctx_ref(tdctx); 1173 1174 dsched_set_thread_priv(td, tdctx); 1175 atomic_add_int(&dsched_stats.nthreads, 1); 1176 } 1177 1178 void 1179 dsched_exit_proc(struct proc *p) 1180 { 1181 struct dsched_thread_ctx *tdctx; 1182 1183 if (dsched_inited == 0) 1184 return; 1185 1186 KKASSERT(p != NULL); 1187 1188 tdctx = dsched_get_proc_priv(p); 1189 KKASSERT(tdctx != NULL); 1190 1191 tdctx->dead = 0xDEAD; 1192 dsched_set_proc_priv(p, NULL); 1193 1194 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1195 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1196 atomic_subtract_int(&dsched_stats.nprocs, 1); 1197 } 1198 1199 1200 void 1201 dsched_exit_thread(struct thread *td) 1202 { 1203 struct dsched_thread_ctx *tdctx; 1204 1205 if (dsched_inited == 0) 1206 return; 1207 1208 KKASSERT(td != NULL); 1209 1210 tdctx = dsched_get_thread_priv(td); 1211 KKASSERT(tdctx != NULL); 1212 1213 tdctx->dead = 0xDEAD; 1214 dsched_set_thread_priv(td, 0); 1215 1216 dsched_thread_ctx_unref(tdctx); /* one for alloc, */ 1217 dsched_thread_ctx_unref(tdctx); /* one for ref */ 1218 atomic_subtract_int(&dsched_stats.nthreads, 1); 1219 } 1220 1221 /* 1222 * Returns ref'd tdio. 1223 * 1224 * tdio may have additional refs for the diskctx and tdctx it resides on. 1225 */ 1226 void 1227 dsched_new_policy_thread_tdio(struct dsched_disk_ctx *diskctx, 1228 struct dsched_policy *pol) 1229 { 1230 struct dsched_thread_ctx *tdctx; 1231 1232 tdctx = dsched_get_thread_priv(curthread); 1233 KKASSERT(tdctx != NULL); 1234 dsched_thread_io_alloc(diskctx->dp, tdctx, pol, 0); 1235 } 1236 1237 /* DEFAULT NOOP POLICY */ 1238 1239 static int 1240 noop_prepare(struct dsched_disk_ctx *diskctx) 1241 { 1242 return 0; 1243 } 1244 1245 static void 1246 noop_teardown(struct dsched_disk_ctx *diskctx) 1247 { 1248 1249 } 1250 1251 static void 1252 noop_cancel(struct dsched_disk_ctx *diskctx) 1253 { 1254 1255 } 1256 1257 static int 1258 noop_queue(struct dsched_disk_ctx *diskctx, struct dsched_thread_io *tdio, 1259 struct bio *bio) 1260 { 1261 dsched_strategy_raw(diskctx->dp, bio); 1262 #if 0 1263 dsched_strategy_async(diskctx->dp, bio, noop_completed, NULL); 1264 #endif 1265 return 0; 1266 } 1267 1268 /* 1269 * SYSINIT stuff 1270 */ 1271 static void 1272 dsched_init(void) 1273 { 1274 dsched_tdio_cache = objcache_create("dsched-tdio-cache", 0, 0, 1275 NULL, NULL, NULL, 1276 objcache_malloc_alloc, 1277 objcache_malloc_free, 1278 &dsched_thread_io_malloc_args ); 1279 1280 dsched_tdctx_cache = objcache_create("dsched-tdctx-cache", 0, 0, 1281 NULL, NULL, NULL, 1282 objcache_malloc_alloc, 1283 objcache_malloc_free, 1284 &dsched_thread_ctx_malloc_args ); 1285 1286 dsched_diskctx_cache = objcache_create("dsched-diskctx-cache", 0, 0, 1287 NULL, NULL, NULL, 1288 objcache_malloc_alloc, 1289 objcache_malloc_free, 1290 &dsched_disk_ctx_malloc_args ); 1291 1292 bzero(&dsched_stats, sizeof(struct dsched_stats)); 1293 1294 lockinit(&dsched_lock, "dsched lock", 0, LK_CANRECURSE); 1295 DSCHED_GLOBAL_THREAD_CTX_LOCKINIT(); 1296 1297 dsched_register(&dsched_noop_policy); 1298 1299 dsched_inited = 1; 1300 } 1301 1302 static void 1303 dsched_uninit(void) 1304 { 1305 } 1306 1307 SYSINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_FIRST, dsched_init, NULL); 1308 SYSUNINIT(subr_dsched_register, SI_SUB_CREATE_INIT-1, SI_ORDER_ANY, dsched_uninit, NULL); 1309 1310 /* 1311 * SYSCTL stuff 1312 */ 1313 static int 1314 sysctl_dsched_stats(SYSCTL_HANDLER_ARGS) 1315 { 1316 return (sysctl_handle_opaque(oidp, &dsched_stats, sizeof(struct dsched_stats), req)); 1317 } 1318 1319 static int 1320 sysctl_dsched_list_policies(SYSCTL_HANDLER_ARGS) 1321 { 1322 struct dsched_policy *pol = NULL; 1323 int error, first = 1; 1324 1325 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1326 1327 while ((pol = dsched_policy_enumerate(pol))) { 1328 if (!first) { 1329 error = SYSCTL_OUT(req, " ", 1); 1330 if (error) 1331 break; 1332 } else { 1333 first = 0; 1334 } 1335 error = SYSCTL_OUT(req, pol->name, strlen(pol->name)); 1336 if (error) 1337 break; 1338 1339 } 1340 1341 lockmgr(&dsched_lock, LK_RELEASE); 1342 1343 error = SYSCTL_OUT(req, "", 1); 1344 1345 return error; 1346 } 1347 1348 static int 1349 sysctl_dsched_policy(SYSCTL_HANDLER_ARGS) 1350 { 1351 char buf[DSCHED_POLICY_NAME_LENGTH]; 1352 struct dsched_disk_ctx *diskctx = arg1; 1353 struct dsched_policy *pol = NULL; 1354 int error; 1355 1356 if (diskctx == NULL) { 1357 return 0; 1358 } 1359 1360 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1361 1362 pol = diskctx->dp->d_sched_policy; 1363 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1364 1365 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1366 if (error || req->newptr == NULL) { 1367 lockmgr(&dsched_lock, LK_RELEASE); 1368 return (error); 1369 } 1370 1371 pol = dsched_find_policy(buf); 1372 if (pol == NULL) { 1373 lockmgr(&dsched_lock, LK_RELEASE); 1374 return 0; 1375 } 1376 1377 dsched_switch(diskctx->dp, pol); 1378 1379 lockmgr(&dsched_lock, LK_RELEASE); 1380 1381 return error; 1382 } 1383 1384 static int 1385 sysctl_dsched_default_policy(SYSCTL_HANDLER_ARGS) 1386 { 1387 char buf[DSCHED_POLICY_NAME_LENGTH]; 1388 struct dsched_policy *pol = NULL; 1389 int error; 1390 1391 lockmgr(&dsched_lock, LK_EXCLUSIVE); 1392 1393 pol = default_policy; 1394 memcpy(buf, pol->name, DSCHED_POLICY_NAME_LENGTH); 1395 1396 error = sysctl_handle_string(oidp, buf, DSCHED_POLICY_NAME_LENGTH, req); 1397 if (error || req->newptr == NULL) { 1398 lockmgr(&dsched_lock, LK_RELEASE); 1399 return (error); 1400 } 1401 1402 pol = dsched_find_policy(buf); 1403 if (pol == NULL) { 1404 lockmgr(&dsched_lock, LK_RELEASE); 1405 return 0; 1406 } 1407 1408 default_set = 1; 1409 default_policy = pol; 1410 1411 lockmgr(&dsched_lock, LK_RELEASE); 1412 1413 return error; 1414 } 1415 1416 SYSCTL_NODE(, OID_AUTO, dsched, CTLFLAG_RD, NULL, 1417 "Disk Scheduler Framework (dsched) magic"); 1418 SYSCTL_NODE(_dsched, OID_AUTO, policy, CTLFLAG_RW, NULL, 1419 "List of disks and their policies"); 1420 SYSCTL_INT(_dsched, OID_AUTO, debug, CTLFLAG_RW, &dsched_debug_enable, 1421 0, "Enable dsched debugging"); 1422 SYSCTL_PROC(_dsched, OID_AUTO, stats, CTLTYPE_OPAQUE|CTLFLAG_RD, 1423 0, sizeof(struct dsched_stats), sysctl_dsched_stats, "dsched_stats", 1424 "dsched statistics"); 1425 SYSCTL_PROC(_dsched, OID_AUTO, policies, CTLTYPE_STRING|CTLFLAG_RD, 1426 NULL, 0, sysctl_dsched_list_policies, "A", "names of available policies"); 1427 SYSCTL_PROC(_dsched_policy, OID_AUTO, default, CTLTYPE_STRING|CTLFLAG_RW, 1428 NULL, 0, sysctl_dsched_default_policy, "A", "default dsched policy"); 1429 1430 static void 1431 dsched_sysctl_add_disk(struct dsched_disk_ctx *diskctx, char *name) 1432 { 1433 if (!(diskctx->flags & DSCHED_SYSCTL_CTX_INITED)) { 1434 diskctx->flags |= DSCHED_SYSCTL_CTX_INITED; 1435 sysctl_ctx_init(&diskctx->sysctl_ctx); 1436 } 1437 1438 SYSCTL_ADD_PROC(&diskctx->sysctl_ctx, SYSCTL_STATIC_CHILDREN(_dsched_policy), 1439 OID_AUTO, name, CTLTYPE_STRING|CTLFLAG_RW, 1440 diskctx, 0, sysctl_dsched_policy, "A", "policy"); 1441 } 1442