1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/usched_dummy.c,v 1.9 2008/04/21 15:24:46 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/queue.h> 42 #include <sys/proc.h> 43 #include <sys/rtprio.h> 44 #include <sys/uio.h> 45 #include <sys/sysctl.h> 46 #include <sys/resourcevar.h> 47 #include <sys/spinlock.h> 48 #include <machine/cpu.h> 49 #include <machine/smp.h> 50 51 #include <sys/thread2.h> 52 #include <sys/spinlock2.h> 53 #include <sys/mplock2.h> 54 55 #define MAXPRI 128 56 #define PRIBASE_REALTIME 0 57 #define PRIBASE_NORMAL MAXPRI 58 #define PRIBASE_IDLE (MAXPRI * 2) 59 #define PRIBASE_THREAD (MAXPRI * 3) 60 #define PRIBASE_NULL (MAXPRI * 4) 61 62 #define lwp_priority lwp_usdata.bsd4.priority 63 #define lwp_estcpu lwp_usdata.bsd4.estcpu 64 65 static void dummy_acquire_curproc(struct lwp *lp); 66 static void dummy_release_curproc(struct lwp *lp); 67 static void dummy_select_curproc(globaldata_t gd); 68 static void dummy_setrunqueue(struct lwp *lp); 69 static void dummy_schedulerclock(struct lwp *lp, sysclock_t period, 70 sysclock_t cpstamp); 71 static void dummy_recalculate_estcpu(struct lwp *lp); 72 static void dummy_resetpriority(struct lwp *lp); 73 static void dummy_forking(struct lwp *plp, struct lwp *lp); 74 static void dummy_exiting(struct lwp *plp, struct lwp *lp); 75 static void dummy_yield(struct lwp *lp); 76 77 struct usched usched_dummy = { 78 { NULL }, 79 "dummy", "Dummy DragonFly Scheduler", 80 NULL, /* default registration */ 81 NULL, /* default deregistration */ 82 dummy_acquire_curproc, 83 dummy_release_curproc, 84 dummy_setrunqueue, 85 dummy_schedulerclock, 86 dummy_recalculate_estcpu, 87 dummy_resetpriority, 88 dummy_forking, 89 dummy_exiting, 90 NULL, /* setcpumask not supported */ 91 dummy_yield 92 }; 93 94 struct usched_dummy_pcpu { 95 int rrcount; 96 struct thread helper_thread; 97 struct lwp *uschedcp; 98 }; 99 100 typedef struct usched_dummy_pcpu *dummy_pcpu_t; 101 102 static struct usched_dummy_pcpu dummy_pcpu[MAXCPU]; 103 static cpumask_t dummy_curprocmask = -1; 104 static cpumask_t dummy_rdyprocmask; 105 static struct spinlock dummy_spin; 106 static TAILQ_HEAD(rq, lwp) dummy_runq; 107 static int dummy_runqcount; 108 109 static int usched_dummy_rrinterval = (ESTCPUFREQ + 9) / 10; 110 SYSCTL_INT(_kern, OID_AUTO, usched_dummy_rrinterval, CTLFLAG_RW, 111 &usched_dummy_rrinterval, 0, ""); 112 113 /* 114 * Initialize the run queues at boot time, clear cpu 0 in curprocmask 115 * to allow dummy scheduling on cpu 0. 116 */ 117 static void 118 dummyinit(void *dummy) 119 { 120 TAILQ_INIT(&dummy_runq); 121 spin_init(&dummy_spin); 122 atomic_clear_int(&dummy_curprocmask, 1); 123 } 124 SYSINIT(runqueue, SI_BOOT2_USCHED, SI_ORDER_FIRST, dummyinit, NULL) 125 126 /* 127 * DUMMY_ACQUIRE_CURPROC 128 * 129 * This function is called when the kernel intends to return to userland. 130 * It is responsible for making the thread the current designated userland 131 * thread for this cpu, blocking if necessary. 132 * 133 * We are expected to handle userland reschedule requests here too. 134 * 135 * WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE 136 * TO ANOTHER CPU! Because most of the kernel assumes that no migration will 137 * occur, this function is called only under very controlled circumstances. 138 * 139 * MPSAFE 140 */ 141 static void 142 dummy_acquire_curproc(struct lwp *lp) 143 { 144 globaldata_t gd = mycpu; 145 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 146 thread_t td = lp->lwp_thread; 147 148 /* 149 * Possibly select another thread 150 */ 151 if (user_resched_wanted()) 152 dummy_select_curproc(gd); 153 154 /* 155 * If this cpu has no current thread, select ourself 156 */ 157 if (dd->uschedcp == lp || 158 (dd->uschedcp == NULL && TAILQ_EMPTY(&dummy_runq))) { 159 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 160 dd->uschedcp = lp; 161 return; 162 } 163 164 /* 165 * If this cpu's current user process thread is not our thread, 166 * deschedule ourselves and place us on the run queue, then 167 * switch away. 168 * 169 * We loop until we become the current process. Its a good idea 170 * to run any passive release(s) before we mess with the scheduler 171 * so our thread is in the expected state. 172 */ 173 KKASSERT(dd->uschedcp != lp); 174 if (td->td_release) 175 td->td_release(lp->lwp_thread); 176 do { 177 crit_enter(); 178 lwkt_deschedule_self(td); 179 dummy_setrunqueue(lp); 180 if ((td->td_flags & TDF_RUNQ) == 0) 181 ++lp->lwp_ru.ru_nivcsw; 182 lwkt_switch(); /* WE MAY MIGRATE TO ANOTHER CPU */ 183 crit_exit(); 184 gd = mycpu; 185 dd = &dummy_pcpu[gd->gd_cpuid]; 186 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 187 } while (dd->uschedcp != lp); 188 } 189 190 /* 191 * DUMMY_RELEASE_CURPROC 192 * 193 * This routine detaches the current thread from the userland scheduler, 194 * usually because the thread needs to run in the kernel (at kernel priority) 195 * for a while. 196 * 197 * This routine is also responsible for selecting a new thread to 198 * make the current thread. 199 * 200 * WARNING! The MP lock may be in an unsynchronized state due to the 201 * way get_mplock() works and the fact that this function may be called 202 * from a passive release during a lwkt_switch(). try_mplock() will deal 203 * with this for us but you should be aware that td_mpcount may not be 204 * useable. 205 * 206 * MPSAFE 207 */ 208 static void 209 dummy_release_curproc(struct lwp *lp) 210 { 211 globaldata_t gd = mycpu; 212 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 213 214 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 215 if (dd->uschedcp == lp) { 216 dummy_select_curproc(gd); 217 } 218 } 219 220 /* 221 * DUMMY_SELECT_CURPROC 222 * 223 * Select a new current process for this cpu. This satisfies a user 224 * scheduler reschedule request so clear that too. 225 * 226 * This routine is also responsible for equal-priority round-robining, 227 * typically triggered from dummy_schedulerclock(). In our dummy example 228 * all the 'user' threads are LWKT scheduled all at once and we just 229 * call lwkt_switch(). 230 * 231 * MPSAFE 232 */ 233 static 234 void 235 dummy_select_curproc(globaldata_t gd) 236 { 237 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 238 struct lwp *lp; 239 240 clear_user_resched(); 241 spin_lock(&dummy_spin); 242 if ((lp = TAILQ_FIRST(&dummy_runq)) == NULL) { 243 dd->uschedcp = NULL; 244 atomic_clear_int(&dummy_curprocmask, gd->gd_cpumask); 245 spin_unlock(&dummy_spin); 246 } else { 247 --dummy_runqcount; 248 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 249 lp->lwp_flag &= ~LWP_ONRUNQ; 250 dd->uschedcp = lp; 251 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 252 spin_unlock(&dummy_spin); 253 #ifdef SMP 254 lwkt_acquire(lp->lwp_thread); 255 #endif 256 lwkt_schedule(lp->lwp_thread); 257 } 258 } 259 260 /* 261 * DUMMY_SETRUNQUEUE 262 * 263 * This routine is called to schedule a new user process after a fork. 264 * The scheduler module itself might also call this routine to place 265 * the current process on the userland scheduler's run queue prior 266 * to calling dummy_select_curproc(). 267 * 268 * The caller may set P_PASSIVE_ACQ in p_flag to indicate that we should 269 * attempt to leave the thread on the current cpu. 270 * 271 * MPSAFE 272 */ 273 static void 274 dummy_setrunqueue(struct lwp *lp) 275 { 276 globaldata_t gd = mycpu; 277 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 278 cpumask_t mask; 279 int cpuid; 280 281 if (dd->uschedcp == NULL) { 282 dd->uschedcp = lp; 283 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 284 lwkt_schedule(lp->lwp_thread); 285 } else { 286 /* 287 * Add to our global runq 288 */ 289 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 290 spin_lock(&dummy_spin); 291 ++dummy_runqcount; 292 TAILQ_INSERT_TAIL(&dummy_runq, lp, lwp_procq); 293 lp->lwp_flag |= LWP_ONRUNQ; 294 #ifdef SMP 295 lwkt_giveaway(lp->lwp_thread); 296 #endif 297 298 /* lp = TAILQ_FIRST(&dummy_runq); */ 299 300 /* 301 * Notify the next available cpu. P.S. some 302 * cpu affinity could be done here. 303 * 304 * The rdyprocmask bit placeholds the knowledge that there 305 * is a process on the runq that needs service. If the 306 * helper thread cannot find a home for it it will forward 307 * the request to another available cpu. 308 */ 309 mask = ~dummy_curprocmask & dummy_rdyprocmask & 310 gd->gd_other_cpus; 311 if (mask) { 312 cpuid = bsfl(mask); 313 atomic_clear_int(&dummy_rdyprocmask, 1 << cpuid); 314 spin_unlock(&dummy_spin); 315 lwkt_schedule(&dummy_pcpu[cpuid].helper_thread); 316 } else { 317 spin_unlock(&dummy_spin); 318 } 319 } 320 } 321 322 /* 323 * This routine is called from a systimer IPI. Thus it is called with 324 * a critical section held. Any spinlocks we get here that are also 325 * obtained in other procedures must be proected by a critical section 326 * in those other procedures to avoid a deadlock. 327 * 328 * The MP lock may or may not be held on entry and cannot be obtained 329 * by this routine (because it is called from a systimer IPI). Additionally, 330 * because this is equivalent to a FAST interrupt, spinlocks cannot be used 331 * (or at least, you have to check that gd_spin* counts are 0 before you 332 * can). 333 * 334 * This routine is called at ESTCPUFREQ on each cpu independantly. 335 * 336 * This routine typically queues a reschedule request, which will cause 337 * the scheduler's BLAH_select_curproc() to be called as soon as possible. 338 * 339 * MPSAFE 340 */ 341 static 342 void 343 dummy_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp) 344 { 345 globaldata_t gd = mycpu; 346 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 347 348 if (++dd->rrcount >= usched_dummy_rrinterval) { 349 dd->rrcount = 0; 350 need_user_resched(); 351 } 352 } 353 354 /* 355 * DUMMY_RECALCULATE_ESTCPU 356 * 357 * Called once a second for any process that is running or has slept 358 * for less then 2 seconds. 359 * 360 * MPSAFE 361 */ 362 static 363 void 364 dummy_recalculate_estcpu(struct lwp *lp) 365 { 366 } 367 368 /* 369 * MPSAFE 370 */ 371 static 372 void 373 dummy_yield(struct lwp *lp) 374 { 375 need_user_resched(); 376 } 377 378 /* 379 * DUMMY_RESETPRIORITY 380 * 381 * This routine is called after the kernel has potentially modified 382 * the lwp_rtprio structure. The target process may be running or sleeping 383 * or scheduled but not yet running or owned by another cpu. Basically, 384 * it can be in virtually any state. 385 * 386 * This routine is called by fork1() for initial setup with the process 387 * of the run queue, and also may be called normally with the process on or 388 * off the run queue. 389 * 390 * MPSAFE 391 */ 392 static void 393 dummy_resetpriority(struct lwp *lp) 394 { 395 /* XXX spinlock usually needed */ 396 /* 397 * Set p_priority for general process comparisons 398 */ 399 switch(lp->lwp_rtprio.type) { 400 case RTP_PRIO_REALTIME: 401 lp->lwp_priority = PRIBASE_REALTIME + lp->lwp_rtprio.prio; 402 return; 403 case RTP_PRIO_NORMAL: 404 lp->lwp_priority = PRIBASE_NORMAL + lp->lwp_rtprio.prio; 405 break; 406 case RTP_PRIO_IDLE: 407 lp->lwp_priority = PRIBASE_IDLE + lp->lwp_rtprio.prio; 408 return; 409 case RTP_PRIO_THREAD: 410 lp->lwp_priority = PRIBASE_THREAD + lp->lwp_rtprio.prio; 411 return; 412 } 413 /* XXX spinlock usually needed */ 414 } 415 416 417 /* 418 * DUMMY_FORKING 419 * 420 * Called from fork1() when a new child process is being created. Allows 421 * the scheduler to predispose the child process before it gets scheduled. 422 * 423 * MPSAFE 424 */ 425 static void 426 dummy_forking(struct lwp *plp, struct lwp *lp) 427 { 428 lp->lwp_estcpu = plp->lwp_estcpu; 429 #if 0 430 ++plp->lwp_estcpu; 431 #endif 432 } 433 434 /* 435 * DUMMY_EXITING 436 * 437 * Called when the parent reaps a child. Typically used to propogate cpu 438 * use by the child back to the parent as part of a batch detection 439 * heuristic. 440 * 441 * NOTE: cpu use is not normally back-propogated to PID 1. 442 * 443 * MPSAFE 444 */ 445 static void 446 dummy_exiting(struct lwp *plp, struct lwp *lp) 447 { 448 } 449 450 /* 451 * SMP systems may need a scheduler helper thread. This is how one can be 452 * setup. 453 * 454 * We use a neat LWKT scheduling trick to interlock the helper thread. It 455 * is possible to deschedule an LWKT thread and then do some work before 456 * switching away. The thread can be rescheduled at any time, even before 457 * we switch away. 458 * 459 * MPSAFE 460 */ 461 #ifdef SMP 462 463 static void 464 dummy_sched_thread(void *dummy) 465 { 466 globaldata_t gd; 467 dummy_pcpu_t dd; 468 struct lwp *lp; 469 cpumask_t cpumask; 470 cpumask_t tmpmask; 471 int cpuid; 472 int tmpid; 473 474 gd = mycpu; 475 cpuid = gd->gd_cpuid; 476 dd = &dummy_pcpu[cpuid]; 477 cpumask = 1 << cpuid; 478 479 for (;;) { 480 lwkt_deschedule_self(gd->gd_curthread); /* interlock */ 481 atomic_set_int(&dummy_rdyprocmask, cpumask); 482 spin_lock(&dummy_spin); 483 if (dd->uschedcp) { 484 /* 485 * We raced another cpu trying to schedule a thread onto us. 486 * If the runq isn't empty hit another free cpu. 487 */ 488 tmpmask = ~dummy_curprocmask & dummy_rdyprocmask & 489 gd->gd_other_cpus; 490 if (tmpmask && dummy_runqcount) { 491 tmpid = bsfl(tmpmask); 492 KKASSERT(tmpid != cpuid); 493 atomic_clear_int(&dummy_rdyprocmask, 1 << tmpid); 494 spin_unlock(&dummy_spin); 495 lwkt_schedule(&dummy_pcpu[tmpid].helper_thread); 496 } else { 497 spin_unlock(&dummy_spin); 498 } 499 } else if ((lp = TAILQ_FIRST(&dummy_runq)) != NULL) { 500 --dummy_runqcount; 501 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 502 lp->lwp_flag &= ~LWP_ONRUNQ; 503 dd->uschedcp = lp; 504 atomic_set_int(&dummy_curprocmask, cpumask); 505 spin_unlock(&dummy_spin); 506 #ifdef SMP 507 lwkt_acquire(lp->lwp_thread); 508 #endif 509 lwkt_schedule(lp->lwp_thread); 510 } else { 511 spin_unlock(&dummy_spin); 512 } 513 lwkt_switch(); 514 } 515 } 516 517 /* 518 * Setup our scheduler helpers. Note that curprocmask bit 0 has already 519 * been cleared by rqinit() and we should not mess with it further. 520 */ 521 static void 522 dummy_sched_thread_cpu_init(void) 523 { 524 int i; 525 526 if (bootverbose) 527 kprintf("start dummy scheduler helpers on cpus:"); 528 529 for (i = 0; i < ncpus; ++i) { 530 dummy_pcpu_t dd = &dummy_pcpu[i]; 531 cpumask_t mask = 1 << i; 532 533 if ((mask & smp_active_mask) == 0) 534 continue; 535 536 if (bootverbose) 537 kprintf(" %d", i); 538 539 lwkt_create(dummy_sched_thread, NULL, NULL, &dd->helper_thread, 540 TDF_STOPREQ, i, "dsched %d", i); 541 542 /* 543 * Allow user scheduling on the target cpu. cpu #0 has already 544 * been enabled in rqinit(). 545 */ 546 if (i) 547 atomic_clear_int(&dummy_curprocmask, mask); 548 atomic_set_int(&dummy_rdyprocmask, mask); 549 } 550 if (bootverbose) 551 kprintf("\n"); 552 } 553 SYSINIT(uschedtd, SI_BOOT2_USCHED, SI_ORDER_SECOND, 554 dummy_sched_thread_cpu_init, NULL) 555 556 #endif 557 558