1 /* 2 * Copyright (c) 2006 The DragonFly Project. All rights reserved. 3 * 4 * This code is derived from software contributed to The DragonFly Project 5 * by Matthew Dillon <dillon@backplane.com> 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in 15 * the documentation and/or other materials provided with the 16 * distribution. 17 * 3. Neither the name of The DragonFly Project nor the names of its 18 * contributors may be used to endorse or promote products derived 19 * from this software without specific, prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * $DragonFly: src/sys/kern/usched_dummy.c,v 1.3 2006/06/10 20:19:38 dillon Exp $ 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/queue.h> 42 #include <sys/proc.h> 43 #include <sys/rtprio.h> 44 #include <sys/uio.h> 45 #include <sys/sysctl.h> 46 #include <sys/resourcevar.h> 47 #include <sys/spinlock.h> 48 #include <machine/ipl.h> 49 #include <machine/cpu.h> 50 #include <machine/smp.h> 51 52 #include <sys/thread2.h> 53 #include <sys/spinlock2.h> 54 55 #define MAXPRI 128 56 #define PRIBASE_REALTIME 0 57 #define PRIBASE_NORMAL MAXPRI 58 #define PRIBASE_IDLE (MAXPRI * 2) 59 #define PRIBASE_THREAD (MAXPRI * 3) 60 #define PRIBASE_NULL (MAXPRI * 4) 61 62 #define lwp_priority lwp_usdata.bsd4.priority 63 #define lwp_estcpu lwp_usdata.bsd4.estcpu 64 65 static void dummy_acquire_curproc(struct lwp *lp); 66 static void dummy_release_curproc(struct lwp *lp); 67 static void dummy_select_curproc(globaldata_t gd); 68 static void dummy_setrunqueue(struct lwp *lp); 69 static void dummy_schedulerclock(struct lwp *lp, sysclock_t period, 70 sysclock_t cpstamp); 71 static void dummy_recalculate_estcpu(struct lwp *lp); 72 static void dummy_resetpriority(struct lwp *lp); 73 static void dummy_forking(struct lwp *plp, struct lwp *lp); 74 static void dummy_exiting(struct lwp *plp, struct lwp *lp); 75 76 struct usched usched_dummy = { 77 { NULL }, 78 "dummy", "Dummy DragonFly Scheduler", 79 NULL, /* default registration */ 80 NULL, /* default deregistration */ 81 dummy_acquire_curproc, 82 dummy_release_curproc, 83 dummy_setrunqueue, 84 dummy_schedulerclock, 85 dummy_recalculate_estcpu, 86 dummy_resetpriority, 87 dummy_forking, 88 dummy_exiting, 89 NULL /* setcpumask not supported */ 90 }; 91 92 struct usched_dummy_pcpu { 93 int rrcount; 94 struct thread helper_thread; 95 struct lwp *uschedcp; 96 }; 97 98 typedef struct usched_dummy_pcpu *dummy_pcpu_t; 99 100 static struct usched_dummy_pcpu dummy_pcpu[MAXCPU]; 101 static cpumask_t dummy_curprocmask = -1; 102 static cpumask_t dummy_rdyprocmask; 103 static struct spinlock dummy_spin; 104 static TAILQ_HEAD(rq, lwp) dummy_runq; 105 static int dummy_runqcount; 106 107 static int usched_dummy_rrinterval = (ESTCPUFREQ + 9) / 10; 108 SYSCTL_INT(_kern, OID_AUTO, usched_dummy_rrinterval, CTLFLAG_RW, 109 &usched_dummy_rrinterval, 0, ""); 110 111 /* 112 * Initialize the run queues at boot time, clear cpu 0 in curprocmask 113 * to allow dummy scheduling on cpu 0. 114 */ 115 static void 116 dummyinit(void *dummy) 117 { 118 TAILQ_INIT(&dummy_runq); 119 spin_init(&dummy_spin); 120 atomic_clear_int(&dummy_curprocmask, 1); 121 } 122 SYSINIT(runqueue, SI_SUB_RUN_QUEUE, SI_ORDER_FIRST, dummyinit, NULL) 123 124 /* 125 * DUMMY_ACQUIRE_CURPROC 126 * 127 * This function is called when the kernel intends to return to userland. 128 * It is responsible for making the thread the current designated userland 129 * thread for this cpu, blocking if necessary. 130 * 131 * We are expected to handle userland reschedule requests here too. 132 * 133 * WARNING! THIS FUNCTION IS ALLOWED TO CAUSE THE CURRENT THREAD TO MIGRATE 134 * TO ANOTHER CPU! Because most of the kernel assumes that no migration will 135 * occur, this function is called only under very controlled circumstances. 136 * 137 * MPSAFE 138 */ 139 static void 140 dummy_acquire_curproc(struct lwp *lp) 141 { 142 globaldata_t gd = mycpu; 143 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 144 thread_t td = lp->lwp_thread; 145 146 /* 147 * Possibly select another thread 148 */ 149 if (user_resched_wanted()) 150 dummy_select_curproc(gd); 151 152 /* 153 * If this cpu has no current thread, select ourself 154 */ 155 if (dd->uschedcp == NULL && TAILQ_EMPTY(&dummy_runq)) { 156 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 157 dd->uschedcp = lp; 158 return; 159 } 160 161 /* 162 * If this cpu's current user process thread is not our thread, 163 * deschedule ourselves and place us on the run queue, then 164 * switch away. 165 * 166 * We loop until we become the current process. Its a good idea 167 * to run any passive release(s) before we mess with the scheduler 168 * so our thread is in the expected state. 169 */ 170 KKASSERT(dd->uschedcp != lp); 171 if (td->td_release) 172 td->td_release(lp->lwp_thread); 173 do { 174 crit_enter(); 175 lwkt_deschedule_self(td); 176 dummy_setrunqueue(lp); 177 if ((td->td_flags & TDF_RUNQ) == 0) 178 ++lp->lwp_stats->p_ru.ru_nivcsw; 179 lwkt_switch(); /* WE MAY MIGRATE TO ANOTHER CPU */ 180 crit_exit(); 181 gd = mycpu; 182 dd = &dummy_pcpu[gd->gd_cpuid]; 183 KKASSERT((lp->lwp_proc->p_flag & P_ONRUNQ) == 0); 184 } while (dd->uschedcp != lp); 185 } 186 187 /* 188 * DUMMY_RELEASE_CURPROC 189 * 190 * This routine detaches the current thread from the userland scheduler, 191 * usually because the thread needs to run in the kernel (at kernel priority) 192 * for a while. 193 * 194 * This routine is also responsible for selecting a new thread to 195 * make the current thread. 196 * 197 * WARNING! The MP lock may be in an unsynchronized state due to the 198 * way get_mplock() works and the fact that this function may be called 199 * from a passive release during a lwkt_switch(). try_mplock() will deal 200 * with this for us but you should be aware that td_mpcount may not be 201 * useable. 202 * 203 * MPSAFE 204 */ 205 static void 206 dummy_release_curproc(struct lwp *lp) 207 { 208 globaldata_t gd = mycpu; 209 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 210 211 KKASSERT((lp->lwp_proc->p_flag & P_ONRUNQ) == 0); 212 if (dd->uschedcp == lp) { 213 dummy_select_curproc(gd); 214 } 215 } 216 217 /* 218 * DUMMY_SELECT_CURPROC 219 * 220 * Select a new current process for this cpu. This satisfies a user 221 * scheduler reschedule request so clear that too. 222 * 223 * This routine is also responsible for equal-priority round-robining, 224 * typically triggered from dummy_schedulerclock(). In our dummy example 225 * all the 'user' threads are LWKT scheduled all at once and we just 226 * call lwkt_switch(). 227 * 228 * MPSAFE 229 */ 230 static 231 void 232 dummy_select_curproc(globaldata_t gd) 233 { 234 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 235 struct lwp *lp; 236 237 clear_user_resched(); 238 spin_lock_wr(&dummy_spin); 239 if ((lp = TAILQ_FIRST(&dummy_runq)) == NULL) { 240 dd->uschedcp = NULL; 241 atomic_clear_int(&dummy_curprocmask, gd->gd_cpumask); 242 spin_unlock_wr(&dummy_spin); 243 } else { 244 --dummy_runqcount; 245 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 246 lp->lwp_proc->p_flag &= ~P_ONRUNQ; 247 dd->uschedcp = lp; 248 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 249 spin_unlock_wr(&dummy_spin); 250 #ifdef SMP 251 lwkt_acquire(lp->lwp_thread); 252 #endif 253 lwkt_schedule(lp->lwp_thread); 254 } 255 } 256 257 /* 258 * DUMMY_SETRUNQUEUE 259 * 260 * This routine is called to schedule a new user process after a fork. 261 * The scheduler module itself might also call this routine to place 262 * the current process on the userland scheduler's run queue prior 263 * to calling dummy_select_curproc(). 264 * 265 * The caller may set P_PASSIVE_ACQ in p_flag to indicate that we should 266 * attempt to leave the thread on the current cpu. 267 * 268 * MPSAFE 269 */ 270 static void 271 dummy_setrunqueue(struct lwp *lp) 272 { 273 globaldata_t gd = mycpu; 274 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 275 cpumask_t mask; 276 int cpuid; 277 278 if (dd->uschedcp == NULL) { 279 dd->uschedcp = lp; 280 atomic_set_int(&dummy_curprocmask, gd->gd_cpumask); 281 lwkt_schedule(lp->lwp_thread); 282 } else { 283 /* 284 * Add to our global runq 285 */ 286 KKASSERT((lp->lwp_proc->p_flag & P_ONRUNQ) == 0); 287 spin_lock_wr(&dummy_spin); 288 ++dummy_runqcount; 289 TAILQ_INSERT_TAIL(&dummy_runq, lp, lwp_procq); 290 lp->lwp_proc->p_flag |= P_ONRUNQ; 291 #ifdef SMP 292 lwkt_giveaway(lp->lwp_thread); 293 #endif 294 295 /* lp = TAILQ_FIRST(&dummy_runq); */ 296 297 /* 298 * Notify the next available cpu. P.S. some 299 * cpu affinity could be done here. 300 * 301 * The rdyprocmask bit placeholds the knowledge that there 302 * is a process on the runq that needs service. If the 303 * helper thread cannot find a home for it it will forward 304 * the request to another available cpu. 305 */ 306 mask = ~dummy_curprocmask & dummy_rdyprocmask & 307 gd->gd_other_cpus; 308 if (mask) { 309 cpuid = bsfl(mask); 310 atomic_clear_int(&dummy_rdyprocmask, 1 << cpuid); 311 spin_unlock_wr(&dummy_spin); 312 lwkt_schedule(&dummy_pcpu[cpuid].helper_thread); 313 } else { 314 spin_unlock_wr(&dummy_spin); 315 } 316 } 317 } 318 319 /* 320 * This routine is called from a systimer IPI. Thus it is called with 321 * a critical section held. Any spinlocks we get here that are also 322 * obtained in other procedures must be proected by a critical section 323 * in those other procedures to avoid a deadlock. 324 * 325 * The MP lock may or may not be held on entry and cannot be obtained 326 * by this routine (because it is called from a systimer IPI). Additionally, 327 * because this is equivalent to a FAST interrupt, spinlocks cannot be used 328 * (or at least, you have to check that gd_spin* counts are 0 before you 329 * can). 330 * 331 * This routine is called at ESTCPUFREQ on each cpu independantly. 332 * 333 * This routine typically queues a reschedule request, which will cause 334 * the scheduler's BLAH_select_curproc() to be called as soon as possible. 335 * 336 * MPSAFE 337 */ 338 static 339 void 340 dummy_schedulerclock(struct lwp *lp, sysclock_t period, sysclock_t cpstamp) 341 { 342 globaldata_t gd = mycpu; 343 dummy_pcpu_t dd = &dummy_pcpu[gd->gd_cpuid]; 344 345 if (++dd->rrcount >= usched_dummy_rrinterval) { 346 dd->rrcount = 0; 347 need_user_resched(); 348 } 349 } 350 351 /* 352 * DUMMY_RECALCULATE_ESTCPU 353 * 354 * Called once a second for any process that is running or has slept 355 * for less then 2 seconds. 356 * 357 * MPSAFE 358 */ 359 static 360 void 361 dummy_recalculate_estcpu(struct lwp *lp) 362 { 363 } 364 365 /* 366 * DUMMY_RESETPRIORITY 367 * 368 * This routine is called after the kernel has potentially modified 369 * the lwp_rtprio structure. The target process may be running or sleeping 370 * or scheduled but not yet running or owned by another cpu. Basically, 371 * it can be in virtually any state. 372 * 373 * This routine is called by fork1() for initial setup with the process 374 * of the run queue, and also may be called normally with the process on or 375 * off the run queue. 376 * 377 * MPSAFE 378 */ 379 static void 380 dummy_resetpriority(struct lwp *lp) 381 { 382 /* XXX spinlock usually needed */ 383 /* 384 * Set p_priority for general process comparisons 385 */ 386 switch(lp->lwp_rtprio.type) { 387 case RTP_PRIO_REALTIME: 388 lp->lwp_priority = PRIBASE_REALTIME + lp->lwp_rtprio.prio; 389 return; 390 case RTP_PRIO_NORMAL: 391 lp->lwp_priority = PRIBASE_NORMAL + lp->lwp_rtprio.prio; 392 break; 393 case RTP_PRIO_IDLE: 394 lp->lwp_priority = PRIBASE_IDLE + lp->lwp_rtprio.prio; 395 return; 396 case RTP_PRIO_THREAD: 397 lp->lwp_priority = PRIBASE_THREAD + lp->lwp_rtprio.prio; 398 return; 399 } 400 /* XXX spinlock usually needed */ 401 } 402 403 404 /* 405 * DUMMY_FORKING 406 * 407 * Called from fork1() when a new child process is being created. Allows 408 * the scheduler to predispose the child process before it gets scheduled. 409 * 410 * MPSAFE 411 */ 412 static void 413 dummy_forking(struct lwp *plp, struct lwp *lp) 414 { 415 lp->lwp_estcpu = plp->lwp_estcpu; 416 #if 0 417 ++plp->lwp_estcpu; 418 #endif 419 } 420 421 /* 422 * DUMMY_EXITING 423 * 424 * Called when the parent reaps a child. Typically used to propogate cpu 425 * use by the child back to the parent as part of a batch detection 426 * heuristic. 427 * 428 * NOTE: cpu use is not normally back-propogated to PID 1. 429 * 430 * MPSAFE 431 */ 432 static void 433 dummy_exiting(struct lwp *plp, struct lwp *lp) 434 { 435 } 436 437 /* 438 * SMP systems may need a scheduler helper thread. This is how one can be 439 * setup. 440 * 441 * We use a neat LWKT scheduling trick to interlock the helper thread. It 442 * is possible to deschedule an LWKT thread and then do some work before 443 * switching away. The thread can be rescheduled at any time, even before 444 * we switch away. 445 */ 446 #ifdef SMP 447 448 static void 449 dummy_sched_thread(void *dummy) 450 { 451 globaldata_t gd; 452 dummy_pcpu_t dd; 453 struct lwp *lp; 454 cpumask_t cpumask; 455 cpumask_t tmpmask; 456 int cpuid; 457 int tmpid; 458 459 gd = mycpu; 460 cpuid = gd->gd_cpuid; 461 dd = &dummy_pcpu[cpuid]; 462 cpumask = 1 << cpuid; 463 464 /* 465 * Our Scheduler helper thread does not need to hold the MP lock 466 */ 467 rel_mplock(); 468 469 for (;;) { 470 lwkt_deschedule_self(gd->gd_curthread); /* interlock */ 471 atomic_set_int(&dummy_rdyprocmask, cpumask); 472 spin_lock_wr(&dummy_spin); 473 if (dd->uschedcp) { 474 /* 475 * We raced another cpu trying to schedule a thread onto us. 476 * If the runq isn't empty hit another free cpu. 477 */ 478 tmpmask = ~dummy_curprocmask & dummy_rdyprocmask & 479 gd->gd_other_cpus; 480 if (tmpmask && dummy_runqcount) { 481 tmpid = bsfl(tmpmask); 482 KKASSERT(tmpid != cpuid); 483 atomic_clear_int(&dummy_rdyprocmask, 1 << tmpid); 484 spin_unlock_wr(&dummy_spin); 485 lwkt_schedule(&dummy_pcpu[tmpid].helper_thread); 486 } else { 487 spin_unlock_wr(&dummy_spin); 488 } 489 } else if ((lp = TAILQ_FIRST(&dummy_runq)) != NULL) { 490 --dummy_runqcount; 491 TAILQ_REMOVE(&dummy_runq, lp, lwp_procq); 492 lp->lwp_proc->p_flag &= ~P_ONRUNQ; 493 dd->uschedcp = lp; 494 atomic_set_int(&dummy_curprocmask, cpumask); 495 spin_unlock_wr(&dummy_spin); 496 #ifdef SMP 497 lwkt_acquire(lp->lwp_thread); 498 #endif 499 lwkt_schedule(lp->lwp_thread); 500 } else { 501 spin_unlock_wr(&dummy_spin); 502 } 503 lwkt_switch(); 504 } 505 } 506 507 /* 508 * Setup our scheduler helpers. Note that curprocmask bit 0 has already 509 * been cleared by rqinit() and we should not mess with it further. 510 */ 511 static void 512 dummy_sched_thread_cpu_init(void) 513 { 514 int i; 515 516 if (bootverbose) 517 printf("start dummy scheduler helpers on cpus:"); 518 519 for (i = 0; i < ncpus; ++i) { 520 dummy_pcpu_t dd = &dummy_pcpu[i]; 521 cpumask_t mask = 1 << i; 522 523 if ((mask & smp_active_mask) == 0) 524 continue; 525 526 if (bootverbose) 527 printf(" %d", i); 528 529 lwkt_create(dummy_sched_thread, NULL, NULL, &dd->helper_thread, 530 TDF_STOPREQ, i, "dsched %d", i); 531 532 /* 533 * Allow user scheduling on the target cpu. cpu #0 has already 534 * been enabled in rqinit(). 535 */ 536 if (i) 537 atomic_clear_int(&dummy_curprocmask, mask); 538 atomic_set_int(&dummy_rdyprocmask, mask); 539 } 540 if (bootverbose) 541 printf("\n"); 542 } 543 SYSINIT(uschedtd, SI_SUB_FINISH_SMP, SI_ORDER_ANY, 544 dummy_sched_thread_cpu_init, NULL) 545 546 #endif 547 548