1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/usched_dummy.c,v 1.7 2007/02/18 16:16:11 corecode Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/queue.h> 42 #include <sys/proc.h> 43 #include <sys/rtprio.h> 44 #include <sys/uio.h> 45 #include <sys/sysctl.h> 46 #include <sys/resourcevar.h> 47 #include <sys/spinlock.h> 48 #include <machine/cpu.h> 49 #include <machine/smp.h> 50 51 #include <sys/thread2.h> 52 #include <sys/spinlock2.h> 53 54 #define MAXPRI 128 55 #define PRIBASE_REALTIME 0 56 #define PRIBASE_NORMAL MAXPRI 57 #define PRIBASE_IDLE (MAXPRI * 2) 58 #define PRIBASE_THREAD (MAXPRI * 3) 59 #define PRIBASE_NULL (MAXPRI * 4) 60 61 #define lwp_priority lwp_usdata.bsd4.priority 62 #define lwp_estcpu lwp_usdata.bsd4.estcpu 63 64 static void dummy_acquire_curproc(struct lwp *lp); 65 static void dummy_release_curproc(struct lwp *lp); 66 static void dummy_select_curproc(globaldata_t gd); 67 static void dummy_setrunqueue(struct lwp *lp); 68 static void dummy_schedulerclock(struct lwp *lp, sysclock_t period, 69 sysclock_t cpstamp); 70 static void dummy_recalculate_estcpu(struct lwp *lp); 71 static void dummy_resetpriority(struct lwp *lp); 72 static void dummy_forking(struct lwp *plp, struct lwp *lp); 73 static void dummy_exiting(struct lwp *plp, struct lwp *lp); 74 75 struct usched usched_dummy = { 76 { NULL }, 77 "dummy", "Dummy DragonFly Scheduler", 78 NULL, /* default registration */ 79 NULL, /* default deregistration */ 80 dummy_acquire_curproc, 81 dummy_release_curproc, 82 dummy_setrunqueue, 83 dummy_schedulerclock, 84 dummy_recalculate_estcpu, 85 dummy_resetpriority, 86 dummy_forking, 87 dummy_exiting, 88 NULL /* setcpumask not supported */ 89 }; 90 91 struct usched_dummy_pcpu { 92 int rrcount; 93 struct thread helper_thread; 94 struct lwp *uschedcp; 95 }; 96 97 typedef struct usched_dummy_pcpu *dummy_pcpu_t; 98 99 static struct usched_dummy_pcpu dummy_pcpu[MAXCPU]; 100 static cpumask_t dummy_curprocmask = -1; 101 static cpumask_t dummy_rdyprocmask; 102 static struct spinlock dummy_spin; 103 static TAILQ_HEAD(rq, lwp) dummy_runq; 104 static int dummy_runqcount; 105 106 static int usched_dummy_rrinterval = (ESTCPUFREQ + 9) / 10; 107 SYSCTL_INT(_kern, OID_AUTO, usched_dummy_rrinterval, CTLFLAG_RW, 108 &usched_dummy_rrinterval, 0, ""); 109 110 /* 111 * Initialize the run queues at boot time, clear cpu 0 in curprocmask 112 * to allow dummy scheduling on cpu 0. 113 */ 114 static void 115 dummyinit(void *dummy) 116 { 117 TAILQ_INIT(&dummy_runq); 118 spin_init(&dummy_spin); 119 atomic_clear_int(&dummy_curprocmask, 1); 120 } 121 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, dummyinit, NULL) 122 123 /* 124 * DUMMY_ACQUIRE_CURPROC 125 * 126 * This function is called when the kernel intends to return to userland. 127 * It is responsible for making the thread the current designated userland 128 * thread for this cpu, blocking if necessary. 129 * 130 * We are expected to handle userland reschedule requests here too. 131 * 132 * WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE 133 * TO ANOTHER CPU! Because most of the kernel assumes that no migration will 134 * occur, this function is called only under very controlled circumstances. 135 * 136 * MPSAFE 137 */ 138 static void 139 dummy_acquire_curproc(struct lwp *lp) 140 { 141 globaldata_t gd = mycpu; 142 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 143 thread_t td = lp->lwp_thread; 144 145 /* 146 * Possibly select another thread 147 */ 148 if (user_resched_wanted()) 149 dummy_select_curproc(gd); 150 151 /* 152 * If this cpu has no current thread, select ourself 153 */ 154 if (dd->uschedcp == NULL && TAILQ_EMPTY(&dummy_runq)) { 155 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 156 dd->uschedcp = lp; 157 return; 158 } 159 160 /* 161 * If this cpu's current user process thread is not our thread, 162 * deschedule ourselves and place us on the run queue, then 163 * switch away. 164 * 165 * We loop until we become the current process. Its a good idea 166 * to run any passive release(s) before we mess with the scheduler 167 * so our thread is in the expected state. 168 */ 169 KKASSERT(dd->uschedcp != lp); 170 if (td->td_release) 171 td->td_release(lp->lwp_thread); 172 do { 173 crit_enter(); 174 lwkt_deschedule_self(td); 175 dummy_setrunqueue(lp); 176 if ((td->td_flags & TDF_RUNQ) == 0) 177 ++lp->lwp_ru.ru_nivcsw; 178 lwkt_switch(); /* WE MAY MIGRATE TO ANOTHER CPU */ 179 crit_exit(); 180 gd = mycpu; 181 dd = &dummy_pcpu[gd->gd_cpuid]; 182 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 183 } while (dd->uschedcp != lp); 184 } 185 186 /* 187 * DUMMY_RELEASE_CURPROC 188 * 189 * This routine detaches the current thread from the userland scheduler, 190 * usually because the thread needs to run in the kernel (at kernel priority) 191 * for a while. 192 * 193 * This routine is also responsible for selecting a new thread to 194 * make the current thread. 195 * 196 * WARNING! The MP lock may be in an unsynchronized state due to the 197 * way get_mplock() works and the fact that this function may be called 198 * from a passive release during a lwkt_switch(). try_mplock() will deal 199 * with this for us but you should be aware that td_mpcount may not be 200 * useable. 201 * 202 * MPSAFE 203 */ 204 static void 205 dummy_release_curproc(struct lwp *lp) 206 { 207 globaldata_t gd = mycpu; 208 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 209 210 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 211 if (dd->uschedcp == lp) { 212 dummy_select_curproc(gd); 213 } 214 } 215 216 /* 217 * DUMMY_SELECT_CURPROC 218 * 219 * Select a new current process for this cpu. This satisfies a user 220 * scheduler reschedule request so clear that too. 221 * 222 * This routine is also responsible for equal-priority round-robining, 223 * typically triggered from dummy_schedulerclock(). In our dummy example 224 * all the 'user' threads are LWKT scheduled all at once and we just 225 * call lwkt_switch(). 226 * 227 * MPSAFE 228 */ 229 static 230 void 231 dummy_select_curproc(globaldata_t gd) 232 { 233 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 234 struct lwp *lp; 235 236 clear_user_resched(); 237 spin_lock_wr(&dummy_spin); 238 if ((lp = TAILQ_FIRST(&dummy_runq)) == NULL) { 239 dd->uschedcp = NULL; 240 atomic_clear_int(&dummy_curprocmask, gd->gd_cpumask); 241 spin_unlock_wr(&dummy_spin); 242 } else { 243 --dummy_runqcount; 244 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 245 lp->lwp_flag &= ~LWP_ONRUNQ; 246 dd->uschedcp = lp; 247 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 248 spin_unlock_wr(&dummy_spin); 249 #ifdef SMP 250 lwkt_acquire(lp->lwp_thread); 251 #endif 252 lwkt_schedule(lp->lwp_thread); 253 } 254 } 255 256 /* 257 * DUMMY_SETRUNQUEUE 258 * 259 * This routine is called to schedule a new user process after a fork. 260 * The scheduler module itself might also call this routine to place 261 * the current process on the userland scheduler's run queue prior 262 * to calling dummy_select_curproc(). 263 * 264 * The caller may set P_PASSIVE_ACQ in p_flag to indicate that we should 265 * attempt to leave the thread on the current cpu. 266 * 267 * MPSAFE 268 */ 269 static void 270 dummy_setrunqueue(struct lwp *lp) 271 { 272 globaldata_t gd = mycpu; 273 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 274 cpumask_t mask; 275 int cpuid; 276 277 if (dd->uschedcp == NULL) { 278 dd->uschedcp = lp; 279 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 280 lwkt_schedule(lp->lwp_thread); 281 } else { 282 /* 283 * Add to our global runq 284 */ 285 KKASSERT((lp->lwp_flag & LWP_ONRUNQ) == 0); 286 spin_lock_wr(&dummy_spin); 287 ++dummy_runqcount; 288 TAILQ_INSERT_TAIL(&dummy_runq, lp, lwp_procq); 289 lp->lwp_flag |= LWP_ONRUNQ; 290 #ifdef SMP 291 lwkt_giveaway(lp->lwp_thread); 292 #endif 293 294 /* lp = TAILQ_FIRST(&dummy_runq); */ 295 296 /* 297 * Notify the next available cpu. P.S. some 298 * cpu affinity could be done here. 299 * 300 * The rdyprocmask bit placeholds the knowledge that there 301 * is a process on the runq that needs service. If the 302 * helper thread cannot find a home for it it will forward 303 * the request to another available cpu. 304 */ 305 mask = ~dummy_curprocmask & dummy_rdyprocmask & 306 gd->gd_other_cpus; 307 if (mask) { 308 cpuid = bsfl(mask); 309 atomic_clear_int(&dummy_rdyprocmask, 1 << cpuid); 310 spin_unlock_wr(&dummy_spin); 311 lwkt_schedule(&dummy_pcpu[cpuid].helper_thread); 312 } else { 313 spin_unlock_wr(&dummy_spin); 314 } 315 } 316 } 317 318 /* 319 * This routine is called from a systimer IPI. Thus it is called with 320 * a critical section held. Any spinlocks we get here that are also 321 * obtained in other procedures must be proected by a critical section 322 * in those other procedures to avoid a deadlock. 323 * 324 * The MP lock may or may not be held on entry and cannot be obtained 325 * by this routine (because it is called from a systimer IPI). Additionally, 326 * because this is equivalent to a FAST interrupt, spinlocks cannot be used 327 * (or at least, you have to check that gd_spin* counts are 0 before you 328 * can). 329 * 330 * This routine is called at ESTCPUFREQ on each cpu independantly. 331 * 332 * This routine typically queues a reschedule request, which will cause 333 * the scheduler's BLAH_select_curproc() to be called as soon as possible. 334 * 335 * MPSAFE 336 */ 337 static 338 void 339 dummy_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp) 340 { 341 globaldata_t gd = mycpu; 342 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 343 344 if (++dd->rrcount >= usched_dummy_rrinterval) { 345 dd->rrcount = 0; 346 need_user_resched(); 347 } 348 } 349 350 /* 351 * DUMMY_RECALCULATE_ESTCPU 352 * 353 * Called once a second for any process that is running or has slept 354 * for less then 2 seconds. 355 * 356 * MPSAFE 357 */ 358 static 359 void 360 dummy_recalculate_estcpu(struct lwp *lp) 361 { 362 } 363 364 /* 365 * DUMMY_RESETPRIORITY 366 * 367 * This routine is called after the kernel has potentially modified 368 * the lwp_rtprio structure. The target process may be running or sleeping 369 * or scheduled but not yet running or owned by another cpu. Basically, 370 * it can be in virtually any state. 371 * 372 * This routine is called by fork1() for initial setup with the process 373 * of the run queue, and also may be called normally with the process on or 374 * off the run queue. 375 * 376 * MPSAFE 377 */ 378 static void 379 dummy_resetpriority(struct lwp *lp) 380 { 381 /* XXX spinlock usually needed */ 382 /* 383 * Set p_priority for general process comparisons 384 */ 385 switch(lp->lwp_rtprio.type) { 386 case RTP_PRIO_REALTIME: 387 lp->lwp_priority = PRIBASE_REALTIME + lp->lwp_rtprio.prio; 388 return; 389 case RTP_PRIO_NORMAL: 390 lp->lwp_priority = PRIBASE_NORMAL + lp->lwp_rtprio.prio; 391 break; 392 case RTP_PRIO_IDLE: 393 lp->lwp_priority = PRIBASE_IDLE + lp->lwp_rtprio.prio; 394 return; 395 case RTP_PRIO_THREAD: 396 lp->lwp_priority = PRIBASE_THREAD + lp->lwp_rtprio.prio; 397 return; 398 } 399 /* XXX spinlock usually needed */ 400 } 401 402 403 /* 404 * DUMMY_FORKING 405 * 406 * Called from fork1() when a new child process is being created. Allows 407 * the scheduler to predispose the child process before it gets scheduled. 408 * 409 * MPSAFE 410 */ 411 static void 412 dummy_forking(struct lwp *plp, struct lwp *lp) 413 { 414 lp->lwp_estcpu = plp->lwp_estcpu; 415 #if 0 416 ++plp->lwp_estcpu; 417 #endif 418 } 419 420 /* 421 * DUMMY_EXITING 422 * 423 * Called when the parent reaps a child. Typically used to propogate cpu 424 * use by the child back to the parent as part of a batch detection 425 * heuristic. 426 * 427 * NOTE: cpu use is not normally back-propogated to PID 1. 428 * 429 * MPSAFE 430 */ 431 static void 432 dummy_exiting(struct lwp *plp, struct lwp *lp) 433 { 434 } 435 436 /* 437 * SMP systems may need a scheduler helper thread. This is how one can be 438 * setup. 439 * 440 * We use a neat LWKT scheduling trick to interlock the helper thread. It 441 * is possible to deschedule an LWKT thread and then do some work before 442 * switching away. The thread can be rescheduled at any time, even before 443 * we switch away. 444 */ 445 #ifdef SMP 446 447 static void 448 dummy_sched_thread(void *dummy) 449 { 450 globaldata_t gd; 451 dummy_pcpu_t dd; 452 struct lwp *lp; 453 cpumask_t cpumask; 454 cpumask_t tmpmask; 455 int cpuid; 456 int tmpid; 457 458 gd = mycpu; 459 cpuid = gd->gd_cpuid; 460 dd = &dummy_pcpu[cpuid]; 461 cpumask = 1 << cpuid; 462 463 /* 464 * Our Scheduler helper thread does not need to hold the MP lock 465 */ 466 rel_mplock(); 467 468 for (;;) { 469 lwkt_deschedule_self(gd->gd_curthread); /* interlock */ 470 atomic_set_int(&dummy_rdyprocmask, cpumask); 471 spin_lock_wr(&dummy_spin); 472 if (dd->uschedcp) { 473 /* 474 * We raced another cpu trying to schedule a thread onto us. 475 * If the runq isn't empty hit another free cpu. 476 */ 477 tmpmask = ~dummy_curprocmask & dummy_rdyprocmask & 478 gd->gd_other_cpus; 479 if (tmpmask && dummy_runqcount) { 480 tmpid = bsfl(tmpmask); 481 KKASSERT(tmpid != cpuid); 482 atomic_clear_int(&dummy_rdyprocmask, 1 << tmpid); 483 spin_unlock_wr(&dummy_spin); 484 lwkt_schedule(&dummy_pcpu[tmpid].helper_thread); 485 } else { 486 spin_unlock_wr(&dummy_spin); 487 } 488 } else if ((lp = TAILQ_FIRST(&dummy_runq)) != NULL) { 489 --dummy_runqcount; 490 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 491 lp->lwp_flag &= ~LWP_ONRUNQ; 492 dd->uschedcp = lp; 493 atomic_set_int(&dummy_curprocmask, cpumask); 494 spin_unlock_wr(&dummy_spin); 495 #ifdef SMP 496 lwkt_acquire(lp->lwp_thread); 497 #endif 498 lwkt_schedule(lp->lwp_thread); 499 } else { 500 spin_unlock_wr(&dummy_spin); 501 } 502 lwkt_switch(); 503 } 504 } 505 506 /* 507 * Setup our scheduler helpers. Note that curprocmask bit 0 has already 508 * been cleared by rqinit() and we should not mess with it further. 509 */ 510 static void 511 dummy_sched_thread_cpu_init(void) 512 { 513 int i; 514 515 if (bootverbose) 516 kprintf("start dummy scheduler helpers on cpus:"); 517 518 for (i = 0; i < ncpus; ++i) { 519 dummy_pcpu_t dd = &dummy_pcpu[i]; 520 cpumask_t mask = 1 << i; 521 522 if ((mask & smp_active_mask) == 0) 523 continue; 524 525 if (bootverbose) 526 kprintf(" %d", i); 527 528 lwkt_create(dummy_sched_thread, NULL, NULL, &dd->helper_thread, 529 TDF_STOPREQ, i, "dsched %d", i); 530 531 /* 532 * Allow user scheduling on the target cpu. cpu #0 has already 533 * been enabled in rqinit(). 534 */ 535 if (i) 536 atomic_clear_int(&dummy_curprocmask, mask); 537 atomic_set_int(&dummy_rdyprocmask, mask); 538 } 539 if (bootverbose) 540 kprintf("\n"); 541 } 542 SYSINIT(uschedtd, SI_SUB_FINISH_SMP, SI_ORDER_ANY, 543 dummy_sched_thread_cpu_init, NULL) 544 545 #endif 546 547