1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/usched_dummy.c,v 1.9 2008/04/21 15:24:46 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/queue.h> 42 #include <sys/proc.h> 43 #include <sys/rtprio.h> 44 #include <sys/uio.h> 45 #include <sys/sysctl.h> 46 #include <sys/resourcevar.h> 47 #include <sys/spinlock.h> 48 #include <machine/cpu.h> 49 #include <machine/smp.h> 50 51 #include <sys/thread2.h> 52 #include <sys/spinlock2.h> 53 54 #define MAXPRI 128 55 #define PRIBASE_REALTIME 0 56 #define PRIBASE_NORMAL MAXPRI 57 #define PRIBASE_IDLE (MAXPRI * 2) 58 #define PRIBASE_THREAD (MAXPRI * 3) 59 #define PRIBASE_NULL (MAXPRI * 4) 60 61 #define lwp_priority lwp_usdata.bsd4.priority 62 #define lwp_estcpu lwp_usdata.bsd4.estcpu 63 64 static void dummy_acquire_curproc(struct lwp *lp); 65 static void dummy_release_curproc(struct lwp *lp); 66 static void dummy_select_curproc(globaldata_t gd); 67 static void dummy_setrunqueue(struct lwp *lp); 68 static void dummy_schedulerclock(struct lwp *lp, sysclock_t period, 69 sysclock_t cpstamp); 70 static void dummy_recalculate_estcpu(struct lwp *lp); 71 static void dummy_resetpriority(struct lwp *lp); 72 static void dummy_forking(struct lwp *plp, struct lwp *lp); 73 static void dummy_exiting(struct lwp *plp, struct lwp *lp); 74 static void dummy_yield(struct lwp *lp); 75 76 struct usched usched_dummy = { 77 { NULL }, 78 "dummy", "Dummy DragonFly Scheduler", 79 NULL, /* default registration */ 80 NULL, /* default deregistration */ 81 dummy_acquire_curproc, 82 dummy_release_curproc, 83 dummy_setrunqueue, 84 dummy_schedulerclock, 85 dummy_recalculate_estcpu, 86 dummy_resetpriority, 87 dummy_forking, 88 dummy_exiting, 89 NULL, /* setcpumask not supported */ 90 dummy_yield 91 }; 92 93 struct usched_dummy_pcpu { 94 int rrcount; 95 struct thread helper_thread; 96 struct lwp *uschedcp; 97 }; 98 99 typedef struct usched_dummy_pcpu *dummy_pcpu_t; 100 101 static struct usched_dummy_pcpu dummy_pcpu[MAXCPU]; 102 static cpumask_t dummy_curprocmask = -1; 103 static cpumask_t dummy_rdyprocmask; 104 static struct spinlock dummy_spin; 105 static TAILQ_HEAD(rq, lwp) dummy_runq; 106 static int dummy_runqcount; 107 108 static int usched_dummy_rrinterval = (ESTCPUFREQ + 9) / 10; 109 SYSCTL_INT(_kern, OID_AUTO, usched_dummy_rrinterval, CTLFLAG_RW, 110 &usched_dummy_rrinterval, 0, ""); 111 112 /* 113 * Initialize the run queues at boot time, clear cpu 0 in curprocmask 114 * to allow dummy scheduling on cpu 0. 115 */ 116 static void 117 dummyinit(void *dummy) 118 { 119 TAILQ_INIT(&dummy_runq); 120 spin_init(&dummy_spin); 121 atomic_clear_int(&dummy_curprocmask, 1); 122 } 123 SYSINIT(runqueue, SI_BOOT2_USCHED, SI_ORDER_FIRST, dummyinit, NULL) 124 125 /* 126 * DUMMY_ACQUIRE_CURPROC 127 * 128 * This function is called when the kernel intends to return to userland. 129 * It is responsible for making the thread the current designated userland 130 * thread for this cpu, blocking if necessary. 131 * 132 * We are expected to handle userland reschedule requests here too. 133 * 134 * WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE 135 * TO ANOTHER CPU! Because most of the kernel assumes that no migration will 136 * occur, this function is called only under very controlled circumstances. 137 * 138 * MPSAFE 139 */ 140 static void 141 dummy_acquire_curproc(struct lwp *lp) 142 { 143 globaldata_t gd = mycpu; 144 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 145 thread_t td = lp->lwp_thread; 146 147 /* 148 * Possibly select another thread 149 */ 150 if (user_resched_wanted()) 151 dummy_select_curproc(gd); 152 153 /* 154 * If this cpu has no current thread, select ourself 155 */ 156 if (dd->uschedcp == NULL && TAILQ_EMPTY(&dummy_runq)) { 157 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 158 dd->uschedcp = lp; 159 return; 160 } 161 162 /* 163 * If this cpu's current user process thread is not our thread, 164 * deschedule ourselves and place us on the run queue, then 165 * switch away. 166 * 167 * We loop until we become the current process. Its a good idea 168 * to run any passive release(s) before we mess with the scheduler 169 * so our thread is in the expected state. 170 */ 171 KKASSERT(dd->uschedcp != lp); 172 if (td->td_release) 173 td->td_release(lp->lwp_thread); 174 do { 175 crit_enter(); 176 lwkt_deschedule_self(td); 177 dummy_setrunqueue(lp); 178 if ((td->td_flags & TDF_RUNQ) == 0) 179 ++lp->lwp_ru.ru_nivcsw; 180 lwkt_switch(); /* WE MAY MIGRATE TO ANOTHER CPU */ 181 crit_exit(); 182 gd = mycpu; 183 dd = &dummy_pcpu[gd->gd_cpuid]; 184 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 185 } while (dd->uschedcp != lp); 186 } 187 188 /* 189 * DUMMY_RELEASE_CURPROC 190 * 191 * This routine detaches the current thread from the userland scheduler, 192 * usually because the thread needs to run in the kernel (at kernel priority) 193 * for a while. 194 * 195 * This routine is also responsible for selecting a new thread to 196 * make the current thread. 197 * 198 * WARNING! The MP lock may be in an unsynchronized state due to the 199 * way get_mplock() works and the fact that this function may be called 200 * from a passive release during a lwkt_switch(). try_mplock() will deal 201 * with this for us but you should be aware that td_mpcount may not be 202 * useable. 203 * 204 * MPSAFE 205 */ 206 static void 207 dummy_release_curproc(struct lwp *lp) 208 { 209 globaldata_t gd = mycpu; 210 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 211 212 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 213 if (dd->uschedcp == lp) { 214 dummy_select_curproc(gd); 215 } 216 } 217 218 /* 219 * DUMMY_SELECT_CURPROC 220 * 221 * Select a new current process for this cpu. This satisfies a user 222 * scheduler reschedule request so clear that too. 223 * 224 * This routine is also responsible for equal-priority round-robining, 225 * typically triggered from dummy_schedulerclock(). In our dummy example 226 * all the 'user' threads are LWKT scheduled all at once and we just 227 * call lwkt_switch(). 228 * 229 * MPSAFE 230 */ 231 static 232 void 233 dummy_select_curproc(globaldata_t gd) 234 { 235 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 236 struct lwp *lp; 237 238 clear_user_resched(); 239 spin_lock_wr(&dummy_spin); 240 if ((lp = TAILQ_FIRST(&dummy_runq)) == NULL) { 241 dd->uschedcp = NULL; 242 atomic_clear_int(&dummy_curprocmask, gd->gd_cpumask); 243 spin_unlock_wr(&dummy_spin); 244 } else { 245 --dummy_runqcount; 246 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 247 lp->lwp_flag &= ~LWP_ONRUNQ; 248 dd->uschedcp = lp; 249 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 250 spin_unlock_wr(&dummy_spin); 251 #ifdef SMP 252 lwkt_acquire(lp->lwp_thread); 253 #endif 254 lwkt_schedule(lp->lwp_thread); 255 } 256 } 257 258 /* 259 * DUMMY_SETRUNQUEUE 260 * 261 * This routine is called to schedule a new user process after a fork. 262 * The scheduler module itself might also call this routine to place 263 * the current process on the userland scheduler's run queue prior 264 * to calling dummy_select_curproc(). 265 * 266 * The caller may set P_PASSIVE_ACQ in p_flag to indicate that we should 267 * attempt to leave the thread on the current cpu. 268 * 269 * MPSAFE 270 */ 271 static void 272 dummy_setrunqueue(struct lwp *lp) 273 { 274 globaldata_t gd = mycpu; 275 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 276 cpumask_t mask; 277 int cpuid; 278 279 if (dd->uschedcp == NULL) { 280 dd->uschedcp = lp; 281 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 282 lwkt_schedule(lp->lwp_thread); 283 } else { 284 /* 285 * Add to our global runq 286 */ 287 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 288 spin_lock_wr(&dummy_spin); 289 ++dummy_runqcount; 290 TAILQ_INSERT_TAIL(&dummy_runq, lp, lwp_procq); 291 lp->lwp_flag |= LWP_ONRUNQ; 292 #ifdef SMP 293 lwkt_giveaway(lp->lwp_thread); 294 #endif 295 296 /* lp = TAILQ_FIRST(&dummy_runq); */ 297 298 /* 299 * Notify the next available cpu. P.S. some 300 * cpu affinity could be done here. 301 * 302 * The rdyprocmask bit placeholds the knowledge that there 303 * is a process on the runq that needs service. If the 304 * helper thread cannot find a home for it it will forward 305 * the request to another available cpu. 306 */ 307 mask = ~dummy_curprocmask & dummy_rdyprocmask & 308 gd->gd_other_cpus; 309 if (mask) { 310 cpuid = bsfl(mask); 311 atomic_clear_int(&dummy_rdyprocmask, 1 << cpuid); 312 spin_unlock_wr(&dummy_spin); 313 lwkt_schedule(&dummy_pcpu[cpuid].helper_thread); 314 } else { 315 spin_unlock_wr(&dummy_spin); 316 } 317 } 318 } 319 320 /* 321 * This routine is called from a systimer IPI. Thus it is called with 322 * a critical section held. Any spinlocks we get here that are also 323 * obtained in other procedures must be proected by a critical section 324 * in those other procedures to avoid a deadlock. 325 * 326 * The MP lock may or may not be held on entry and cannot be obtained 327 * by this routine (because it is called from a systimer IPI). Additionally, 328 * because this is equivalent to a FAST interrupt, spinlocks cannot be used 329 * (or at least, you have to check that gd_spin* counts are 0 before you 330 * can). 331 * 332 * This routine is called at ESTCPUFREQ on each cpu independantly. 333 * 334 * This routine typically queues a reschedule request, which will cause 335 * the scheduler's BLAH_select_curproc() to be called as soon as possible. 336 * 337 * MPSAFE 338 */ 339 static 340 void 341 dummy_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp) 342 { 343 globaldata_t gd = mycpu; 344 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 345 346 if (++dd->rrcount >= usched_dummy_rrinterval) { 347 dd->rrcount = 0; 348 need_user_resched(); 349 } 350 } 351 352 /* 353 * DUMMY_RECALCULATE_ESTCPU 354 * 355 * Called once a second for any process that is running or has slept 356 * for less then 2 seconds. 357 * 358 * MPSAFE 359 */ 360 static 361 void 362 dummy_recalculate_estcpu(struct lwp *lp) 363 { 364 } 365 366 static 367 void 368 dummy_yield(struct lwp *lp) 369 { 370 need_user_resched(); 371 } 372 373 /* 374 * DUMMY_RESETPRIORITY 375 * 376 * This routine is called after the kernel has potentially modified 377 * the lwp_rtprio structure. The target process may be running or sleeping 378 * or scheduled but not yet running or owned by another cpu. Basically, 379 * it can be in virtually any state. 380 * 381 * This routine is called by fork1() for initial setup with the process 382 * of the run queue, and also may be called normally with the process on or 383 * off the run queue. 384 * 385 * MPSAFE 386 */ 387 static void 388 dummy_resetpriority(struct lwp *lp) 389 { 390 /* XXX spinlock usually needed */ 391 /* 392 * Set p_priority for general process comparisons 393 */ 394 switch(lp->lwp_rtprio.type) { 395 case RTP_PRIO_REALTIME: 396 lp->lwp_priority = PRIBASE_REALTIME + lp->lwp_rtprio.prio; 397 return; 398 case RTP_PRIO_NORMAL: 399 lp->lwp_priority = PRIBASE_NORMAL + lp->lwp_rtprio.prio; 400 break; 401 case RTP_PRIO_IDLE: 402 lp->lwp_priority = PRIBASE_IDLE + lp->lwp_rtprio.prio; 403 return; 404 case RTP_PRIO_THREAD: 405 lp->lwp_priority = PRIBASE_THREAD + lp->lwp_rtprio.prio; 406 return; 407 } 408 /* XXX spinlock usually needed */ 409 } 410 411 412 /* 413 * DUMMY_FORKING 414 * 415 * Called from fork1() when a new child process is being created. Allows 416 * the scheduler to predispose the child process before it gets scheduled. 417 * 418 * MPSAFE 419 */ 420 static void 421 dummy_forking(struct lwp *plp, struct lwp *lp) 422 { 423 lp->lwp_estcpu = plp->lwp_estcpu; 424 #if 0 425 ++plp->lwp_estcpu; 426 #endif 427 } 428 429 /* 430 * DUMMY_EXITING 431 * 432 * Called when the parent reaps a child. Typically used to propogate cpu 433 * use by the child back to the parent as part of a batch detection 434 * heuristic. 435 * 436 * NOTE: cpu use is not normally back-propogated to PID 1. 437 * 438 * MPSAFE 439 */ 440 static void 441 dummy_exiting(struct lwp *plp, struct lwp *lp) 442 { 443 } 444 445 /* 446 * SMP systems may need a scheduler helper thread. This is how one can be 447 * setup. 448 * 449 * We use a neat LWKT scheduling trick to interlock the helper thread. It 450 * is possible to deschedule an LWKT thread and then do some work before 451 * switching away. The thread can be rescheduled at any time, even before 452 * we switch away. 453 */ 454 #ifdef SMP 455 456 static void 457 dummy_sched_thread(void *dummy) 458 { 459 globaldata_t gd; 460 dummy_pcpu_t dd; 461 struct lwp *lp; 462 cpumask_t cpumask; 463 cpumask_t tmpmask; 464 int cpuid; 465 int tmpid; 466 467 gd = mycpu; 468 cpuid = gd->gd_cpuid; 469 dd = &dummy_pcpu[cpuid]; 470 cpumask = 1 << cpuid; 471 472 /* 473 * Our Scheduler helper thread does not need to hold the MP lock 474 */ 475 rel_mplock(); 476 477 for (;;) { 478 lwkt_deschedule_self(gd->gd_curthread); /* interlock */ 479 atomic_set_int(&dummy_rdyprocmask, cpumask); 480 spin_lock_wr(&dummy_spin); 481 if (dd->uschedcp) { 482 /* 483 * We raced another cpu trying to schedule a thread onto us. 484 * If the runq isn't empty hit another free cpu. 485 */ 486 tmpmask = ~dummy_curprocmask & dummy_rdyprocmask & 487 gd->gd_other_cpus; 488 if (tmpmask && dummy_runqcount) { 489 tmpid = bsfl(tmpmask); 490 KKASSERT(tmpid != cpuid); 491 atomic_clear_int(&dummy_rdyprocmask, 1 << tmpid); 492 spin_unlock_wr(&dummy_spin); 493 lwkt_schedule(&dummy_pcpu[tmpid].helper_thread); 494 } else { 495 spin_unlock_wr(&dummy_spin); 496 } 497 } else if ((lp = TAILQ_FIRST(&dummy_runq)) != NULL) { 498 --dummy_runqcount; 499 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 500 lp->lwp_flag &= ~LWP_ONRUNQ; 501 dd->uschedcp = lp; 502 atomic_set_int(&dummy_curprocmask, cpumask); 503 spin_unlock_wr(&dummy_spin); 504 #ifdef SMP 505 lwkt_acquire(lp->lwp_thread); 506 #endif 507 lwkt_schedule(lp->lwp_thread); 508 } else { 509 spin_unlock_wr(&dummy_spin); 510 } 511 lwkt_switch(); 512 } 513 } 514 515 /* 516 * Setup our scheduler helpers. Note that curprocmask bit 0 has already 517 * been cleared by rqinit() and we should not mess with it further. 518 */ 519 static void 520 dummy_sched_thread_cpu_init(void) 521 { 522 int i; 523 524 if (bootverbose) 525 kprintf("start dummy scheduler helpers on cpus:"); 526 527 for (i = 0; i < ncpus; ++i) { 528 dummy_pcpu_t dd = &dummy_pcpu[i]; 529 cpumask_t mask = 1 << i; 530 531 if ((mask & smp_active_mask) == 0) 532 continue; 533 534 if (bootverbose) 535 kprintf(" %d", i); 536 537 lwkt_create(dummy_sched_thread, NULL, NULL, &dd->helper_thread, 538 TDF_STOPREQ, i, "dsched %d", i); 539 540 /* 541 * Allow user scheduling on the target cpu. cpu #0 has already 542 * been enabled in rqinit(). 543 */ 544 if (i) 545 atomic_clear_int(&dummy_curprocmask, mask); 546 atomic_set_int(&dummy_rdyprocmask, mask); 547 } 548 if (bootverbose) 549 kprintf("\n"); 550 } 551 SYSINIT(uschedtd, SI_BOOT2_USCHED, SI_ORDER_SECOND, 552 dummy_sched_thread_cpu_init, NULL) 553 554 #endif 555 556