1 /* $NetBSD: kern_synch.c,v 1.108 2002/05/21 01:38:27 thorpej Exp $ */ 2 3 /*- 4 * Copyright (c) 1999, 2000 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility, 9 * NASA Ames Research Center. 10 * 11 * Redistribution and use in source and binary forms, with or without 12 * modification, are permitted provided that the following conditions 13 * are met: 14 * 1. Redistributions of source code must retain the above copyright 15 * notice, this list of conditions and the following disclaimer. 16 * 2. Redistributions in binary form must reproduce the above copyright 17 * notice, this list of conditions and the following disclaimer in the 18 * documentation and/or other materials provided with the distribution. 19 * 3. All advertising materials mentioning features or use of this software 20 * must display the following acknowledgement: 21 * This product includes software developed by the NetBSD 22 * Foundation, Inc. and its contributors. 23 * 4. Neither the name of The NetBSD Foundation nor the names of its 24 * contributors may be used to endorse or promote products derived 25 * from this software without specific prior written permission. 26 * 27 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 28 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 29 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 30 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 31 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 32 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 33 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 34 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 35 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 36 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 37 * POSSIBILITY OF SUCH DAMAGE. 38 */ 39 40 /*- 41 * Copyright (c) 1982, 1986, 1990, 1991, 1993 42 * The Regents of the University of California. All rights reserved. 43 * (c) UNIX System Laboratories, Inc. 44 * All or some portions of this file are derived from material licensed 45 * to the University of California by American Telephone and Telegraph 46 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 47 * the permission of UNIX System Laboratories, Inc. 48 * 49 * Redistribution and use in source and binary forms, with or without 50 * modification, are permitted provided that the following conditions 51 * are met: 52 * 1. Redistributions of source code must retain the above copyright 53 * notice, this list of conditions and the following disclaimer. 54 * 2. Redistributions in binary form must reproduce the above copyright 55 * notice, this list of conditions and the following disclaimer in the 56 * documentation and/or other materials provided with the distribution. 57 * 3. All advertising materials mentioning features or use of this software 58 * must display the following acknowledgement: 59 * This product includes software developed by the University of 60 * California, Berkeley and its contributors. 61 * 4. Neither the name of the University nor the names of its contributors 62 * may be used to endorse or promote products derived from this software 63 * without specific prior written permission. 64 * 65 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 66 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 67 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 68 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 69 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 70 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 71 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 72 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 73 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 74 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 75 * SUCH DAMAGE. 76 * 77 * @(#)kern_synch.c 8.9 (Berkeley) 5/19/95 78 */ 79 80 #include <sys/cdefs.h> 81 __KERNEL_RCSID(0, "$NetBSD: kern_synch.c,v 1.108 2002/05/21 01:38:27 thorpej Exp $"); 82 83 #include "opt_ddb.h" 84 #include "opt_ktrace.h" 85 #include "opt_lockdebug.h" 86 #include "opt_multiprocessor.h" 87 88 #include <sys/param.h> 89 #include <sys/systm.h> 90 #include <sys/callout.h> 91 #include <sys/proc.h> 92 #include <sys/kernel.h> 93 #include <sys/buf.h> 94 #include <sys/signalvar.h> 95 #include <sys/resourcevar.h> 96 #include <sys/sched.h> 97 98 #include <uvm/uvm_extern.h> 99 100 #ifdef KTRACE 101 #include <sys/ktrace.h> 102 #endif 103 104 #include <machine/cpu.h> 105 106 int lbolt; /* once a second sleep address */ 107 int rrticks; /* number of hardclock ticks per roundrobin() */ 108 109 /* 110 * The global scheduler state. 111 */ 112 struct prochd sched_qs[RUNQUE_NQS]; /* run queues */ 113 __volatile u_int32_t sched_whichqs; /* bitmap of non-empty queues */ 114 struct slpque sched_slpque[SLPQUE_TABLESIZE]; /* sleep queues */ 115 116 struct simplelock sched_lock = SIMPLELOCK_INITIALIZER; 117 118 void schedcpu(void *); 119 void updatepri(struct proc *); 120 void endtsleep(void *); 121 122 __inline void awaken(struct proc *); 123 124 struct callout schedcpu_ch = CALLOUT_INITIALIZER; 125 126 /* 127 * Force switch among equal priority processes every 100ms. 128 * Called from hardclock every hz/10 == rrticks hardclock ticks. 129 */ 130 /* ARGSUSED */ 131 void 132 roundrobin(struct cpu_info *ci) 133 { 134 struct schedstate_percpu *spc = &ci->ci_schedstate; 135 136 spc->spc_rrticks = rrticks; 137 138 if (curproc != NULL) { 139 if (spc->spc_flags & SPCF_SEENRR) { 140 /* 141 * The process has already been through a roundrobin 142 * without switching and may be hogging the CPU. 143 * Indicate that the process should yield. 144 */ 145 spc->spc_flags |= SPCF_SHOULDYIELD; 146 } else 147 spc->spc_flags |= SPCF_SEENRR; 148 } 149 need_resched(curcpu()); 150 } 151 152 /* 153 * Constants for digital decay and forget: 154 * 90% of (p_estcpu) usage in 5 * loadav time 155 * 95% of (p_pctcpu) usage in 60 seconds (load insensitive) 156 * Note that, as ps(1) mentions, this can let percentages 157 * total over 100% (I've seen 137.9% for 3 processes). 158 * 159 * Note that hardclock updates p_estcpu and p_cpticks independently. 160 * 161 * We wish to decay away 90% of p_estcpu in (5 * loadavg) seconds. 162 * That is, the system wants to compute a value of decay such 163 * that the following for loop: 164 * for (i = 0; i < (5 * loadavg); i++) 165 * p_estcpu *= decay; 166 * will compute 167 * p_estcpu *= 0.1; 168 * for all values of loadavg: 169 * 170 * Mathematically this loop can be expressed by saying: 171 * decay ** (5 * loadavg) ~= .1 172 * 173 * The system computes decay as: 174 * decay = (2 * loadavg) / (2 * loadavg + 1) 175 * 176 * We wish to prove that the system's computation of decay 177 * will always fulfill the equation: 178 * decay ** (5 * loadavg) ~= .1 179 * 180 * If we compute b as: 181 * b = 2 * loadavg 182 * then 183 * decay = b / (b + 1) 184 * 185 * We now need to prove two things: 186 * 1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1) 187 * 2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg) 188 * 189 * Facts: 190 * For x close to zero, exp(x) =~ 1 + x, since 191 * exp(x) = 0! + x**1/1! + x**2/2! + ... . 192 * therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b. 193 * For x close to zero, ln(1+x) =~ x, since 194 * ln(1+x) = x - x**2/2 + x**3/3 - ... -1 < x < 1 195 * therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1). 196 * ln(.1) =~ -2.30 197 * 198 * Proof of (1): 199 * Solve (factor)**(power) =~ .1 given power (5*loadav): 200 * solving for factor, 201 * ln(factor) =~ (-2.30/5*loadav), or 202 * factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) = 203 * exp(-1/b) =~ (b-1)/b =~ b/(b+1). QED 204 * 205 * Proof of (2): 206 * Solve (factor)**(power) =~ .1 given factor == (b/(b+1)): 207 * solving for power, 208 * power*ln(b/(b+1)) =~ -2.30, or 209 * power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav. QED 210 * 211 * Actual power values for the implemented algorithm are as follows: 212 * loadav: 1 2 3 4 213 * power: 5.68 10.32 14.94 19.55 214 */ 215 216 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */ 217 #define loadfactor(loadav) (2 * (loadav)) 218 #define decay_cpu(loadfac, cpu) (((loadfac) * (cpu)) / ((loadfac) + FSCALE)) 219 220 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */ 221 fixpt_t ccpu = 0.95122942450071400909 * FSCALE; /* exp(-1/20) */ 222 223 /* 224 * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the 225 * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below 226 * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT). 227 * 228 * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used: 229 * 1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits). 230 * 231 * If you dont want to bother with the faster/more-accurate formula, you 232 * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate 233 * (more general) method of calculating the %age of CPU used by a process. 234 */ 235 #define CCPU_SHIFT 11 236 237 /* 238 * Recompute process priorities, every hz ticks. 239 */ 240 /* ARGSUSED */ 241 void 242 schedcpu(void *arg) 243 { 244 fixpt_t loadfac = loadfactor(averunnable.ldavg[0]); 245 struct proc *p; 246 int s, s1; 247 unsigned int newcpu; 248 int clkhz; 249 250 proclist_lock_read(); 251 for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) { 252 /* 253 * Increment time in/out of memory and sleep time 254 * (if sleeping). We ignore overflow; with 16-bit int's 255 * (remember them?) overflow takes 45 days. 256 */ 257 p->p_swtime++; 258 if (p->p_stat == SSLEEP || p->p_stat == SSTOP) 259 p->p_slptime++; 260 p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT; 261 /* 262 * If the process has slept the entire second, 263 * stop recalculating its priority until it wakes up. 264 */ 265 if (p->p_slptime > 1) 266 continue; 267 s = splstatclock(); /* prevent state changes */ 268 /* 269 * p_pctcpu is only for ps. 270 */ 271 clkhz = stathz != 0 ? stathz : hz; 272 #if (FSHIFT >= CCPU_SHIFT) 273 p->p_pctcpu += (clkhz == 100)? 274 ((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT): 275 100 * (((fixpt_t) p->p_cpticks) 276 << (FSHIFT - CCPU_SHIFT)) / clkhz; 277 #else 278 p->p_pctcpu += ((FSCALE - ccpu) * 279 (p->p_cpticks * FSCALE / clkhz)) >> FSHIFT; 280 #endif 281 p->p_cpticks = 0; 282 newcpu = (u_int)decay_cpu(loadfac, p->p_estcpu); 283 p->p_estcpu = newcpu; 284 SCHED_LOCK(s1); 285 resetpriority(p); 286 if (p->p_priority >= PUSER) { 287 if (p->p_stat == SRUN && 288 (p->p_flag & P_INMEM) && 289 (p->p_priority / PPQ) != (p->p_usrpri / PPQ)) { 290 remrunqueue(p); 291 p->p_priority = p->p_usrpri; 292 setrunqueue(p); 293 } else 294 p->p_priority = p->p_usrpri; 295 } 296 SCHED_UNLOCK(s1); 297 splx(s); 298 } 299 proclist_unlock_read(); 300 uvm_meter(); 301 wakeup((caddr_t)&lbolt); 302 callout_reset(&schedcpu_ch, hz, schedcpu, NULL); 303 } 304 305 /* 306 * Recalculate the priority of a process after it has slept for a while. 307 * For all load averages >= 1 and max p_estcpu of 255, sleeping for at 308 * least six times the loadfactor will decay p_estcpu to zero. 309 */ 310 void 311 updatepri(struct proc *p) 312 { 313 unsigned int newcpu; 314 fixpt_t loadfac; 315 316 SCHED_ASSERT_LOCKED(); 317 318 newcpu = p->p_estcpu; 319 loadfac = loadfactor(averunnable.ldavg[0]); 320 321 if (p->p_slptime > 5 * loadfac) 322 p->p_estcpu = 0; 323 else { 324 p->p_slptime--; /* the first time was done in schedcpu */ 325 while (newcpu && --p->p_slptime) 326 newcpu = (int) decay_cpu(loadfac, newcpu); 327 p->p_estcpu = newcpu; 328 } 329 resetpriority(p); 330 } 331 332 /* 333 * During autoconfiguration or after a panic, a sleep will simply 334 * lower the priority briefly to allow interrupts, then return. 335 * The priority to be used (safepri) is machine-dependent, thus this 336 * value is initialized and maintained in the machine-dependent layers. 337 * This priority will typically be 0, or the lowest priority 338 * that is safe for use on the interrupt stack; it can be made 339 * higher to block network software interrupts after panics. 340 */ 341 int safepri; 342 343 /* 344 * General sleep call. Suspends the current process until a wakeup is 345 * performed on the specified identifier. The process will then be made 346 * runnable with the specified priority. Sleeps at most timo/hz seconds 347 * (0 means no timeout). If pri includes PCATCH flag, signals are checked 348 * before and after sleeping, else signals are not checked. Returns 0 if 349 * awakened, EWOULDBLOCK if the timeout expires. If PCATCH is set and a 350 * signal needs to be delivered, ERESTART is returned if the current system 351 * call should be restarted if possible, and EINTR is returned if the system 352 * call should be interrupted by the signal (return EINTR). 353 * 354 * The interlock is held until the scheduler_slock is acquired. The 355 * interlock will be locked before returning back to the caller 356 * unless the PNORELOCK flag is specified, in which case the 357 * interlock will always be unlocked upon return. 358 */ 359 int 360 ltsleep(void *ident, int priority, const char *wmesg, int timo, 361 __volatile struct simplelock *interlock) 362 { 363 struct proc *p = curproc; 364 struct slpque *qp; 365 int sig, s; 366 int catch = priority & PCATCH; 367 int relock = (priority & PNORELOCK) == 0; 368 369 /* 370 * XXXSMP 371 * This is probably bogus. Figure out what the right 372 * thing to do here really is. 373 * Note that not sleeping if ltsleep is called with curproc == NULL 374 * in the shutdown case is disgusting but partly necessary given 375 * how shutdown (barely) works. 376 */ 377 if (cold || (doing_shutdown && (panicstr || (p == NULL)))) { 378 /* 379 * After a panic, or during autoconfiguration, 380 * just give interrupts a chance, then just return; 381 * don't run any other procs or panic below, 382 * in case this is the idle process and already asleep. 383 */ 384 s = splhigh(); 385 splx(safepri); 386 splx(s); 387 if (interlock != NULL && relock == 0) 388 simple_unlock(interlock); 389 return (0); 390 } 391 392 KASSERT(p != NULL); 393 LOCK_ASSERT(interlock == NULL || simple_lock_held(interlock)); 394 395 #ifdef KTRACE 396 if (KTRPOINT(p, KTR_CSW)) 397 ktrcsw(p, 1, 0); 398 #endif 399 400 SCHED_LOCK(s); 401 402 #ifdef DIAGNOSTIC 403 if (ident == NULL) 404 panic("ltsleep: ident == NULL"); 405 if (p->p_stat != SONPROC) 406 panic("ltsleep: p_stat %d != SONPROC", p->p_stat); 407 if (p->p_back != NULL) 408 panic("ltsleep: p_back != NULL"); 409 #endif 410 411 p->p_wchan = ident; 412 p->p_wmesg = wmesg; 413 p->p_slptime = 0; 414 p->p_priority = priority & PRIMASK; 415 416 qp = SLPQUE(ident); 417 if (qp->sq_head == 0) 418 qp->sq_head = p; 419 else 420 *qp->sq_tailp = p; 421 *(qp->sq_tailp = &p->p_forw) = 0; 422 423 if (timo) 424 callout_reset(&p->p_tsleep_ch, timo, endtsleep, p); 425 426 /* 427 * We can now release the interlock; the scheduler_slock 428 * is held, so a thread can't get in to do wakeup() before 429 * we do the switch. 430 * 431 * XXX We leave the code block here, after inserting ourselves 432 * on the sleep queue, because we might want a more clever 433 * data structure for the sleep queues at some point. 434 */ 435 if (interlock != NULL) 436 simple_unlock(interlock); 437 438 /* 439 * We put ourselves on the sleep queue and start our timeout 440 * before calling CURSIG, as we could stop there, and a wakeup 441 * or a SIGCONT (or both) could occur while we were stopped. 442 * A SIGCONT would cause us to be marked as SSLEEP 443 * without resuming us, thus we must be ready for sleep 444 * when CURSIG is called. If the wakeup happens while we're 445 * stopped, p->p_wchan will be 0 upon return from CURSIG. 446 */ 447 if (catch) { 448 p->p_flag |= P_SINTR; 449 if ((sig = CURSIG(p)) != 0) { 450 if (p->p_wchan != NULL) 451 unsleep(p); 452 p->p_stat = SONPROC; 453 SCHED_UNLOCK(s); 454 goto resume; 455 } 456 if (p->p_wchan == NULL) { 457 catch = 0; 458 SCHED_UNLOCK(s); 459 goto resume; 460 } 461 } else 462 sig = 0; 463 p->p_stat = SSLEEP; 464 p->p_stats->p_ru.ru_nvcsw++; 465 466 SCHED_ASSERT_LOCKED(); 467 mi_switch(p); 468 469 #if defined(DDB) && !defined(GPROF) 470 /* handy breakpoint location after process "wakes" */ 471 __asm(".globl bpendtsleep ; bpendtsleep:"); 472 #endif 473 474 SCHED_ASSERT_UNLOCKED(); 475 splx(s); 476 477 resume: 478 KDASSERT(p->p_cpu != NULL); 479 KDASSERT(p->p_cpu == curcpu()); 480 p->p_cpu->ci_schedstate.spc_curpriority = p->p_usrpri; 481 482 p->p_flag &= ~P_SINTR; 483 if (p->p_flag & P_TIMEOUT) { 484 p->p_flag &= ~P_TIMEOUT; 485 if (sig == 0) { 486 #ifdef KTRACE 487 if (KTRPOINT(p, KTR_CSW)) 488 ktrcsw(p, 0, 0); 489 #endif 490 if (relock && interlock != NULL) 491 simple_lock(interlock); 492 return (EWOULDBLOCK); 493 } 494 } else if (timo) 495 callout_stop(&p->p_tsleep_ch); 496 if (catch && (sig != 0 || (sig = CURSIG(p)) != 0)) { 497 #ifdef KTRACE 498 if (KTRPOINT(p, KTR_CSW)) 499 ktrcsw(p, 0, 0); 500 #endif 501 if (relock && interlock != NULL) 502 simple_lock(interlock); 503 if ((SIGACTION(p, sig).sa_flags & SA_RESTART) == 0) 504 return (EINTR); 505 return (ERESTART); 506 } 507 #ifdef KTRACE 508 if (KTRPOINT(p, KTR_CSW)) 509 ktrcsw(p, 0, 0); 510 #endif 511 if (relock && interlock != NULL) 512 simple_lock(interlock); 513 return (0); 514 } 515 516 /* 517 * Implement timeout for tsleep. 518 * If process hasn't been awakened (wchan non-zero), 519 * set timeout flag and undo the sleep. If proc 520 * is stopped, just unsleep so it will remain stopped. 521 */ 522 void 523 endtsleep(void *arg) 524 { 525 struct proc *p; 526 int s; 527 528 p = (struct proc *)arg; 529 530 SCHED_LOCK(s); 531 if (p->p_wchan) { 532 if (p->p_stat == SSLEEP) 533 setrunnable(p); 534 else 535 unsleep(p); 536 p->p_flag |= P_TIMEOUT; 537 } 538 SCHED_UNLOCK(s); 539 } 540 541 /* 542 * Remove a process from its wait queue 543 */ 544 void 545 unsleep(struct proc *p) 546 { 547 struct slpque *qp; 548 struct proc **hp; 549 550 SCHED_ASSERT_LOCKED(); 551 552 if (p->p_wchan) { 553 hp = &(qp = SLPQUE(p->p_wchan))->sq_head; 554 while (*hp != p) 555 hp = &(*hp)->p_forw; 556 *hp = p->p_forw; 557 if (qp->sq_tailp == &p->p_forw) 558 qp->sq_tailp = hp; 559 p->p_wchan = 0; 560 } 561 } 562 563 /* 564 * Optimized-for-wakeup() version of setrunnable(). 565 */ 566 __inline void 567 awaken(struct proc *p) 568 { 569 570 SCHED_ASSERT_LOCKED(); 571 572 if (p->p_slptime > 1) 573 updatepri(p); 574 p->p_slptime = 0; 575 p->p_stat = SRUN; 576 577 /* 578 * Since curpriority is a user priority, p->p_priority 579 * is always better than curpriority. 580 */ 581 if (p->p_flag & P_INMEM) { 582 setrunqueue(p); 583 KASSERT(p->p_cpu != NULL); 584 need_resched(p->p_cpu); 585 } else 586 sched_wakeup(&proc0); 587 } 588 589 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG) 590 void 591 sched_unlock_idle(void) 592 { 593 594 simple_unlock(&sched_lock); 595 } 596 597 void 598 sched_lock_idle(void) 599 { 600 601 simple_lock(&sched_lock); 602 } 603 #endif /* MULTIPROCESSOR || LOCKDEBUG */ 604 605 /* 606 * Make all processes sleeping on the specified identifier runnable. 607 */ 608 609 void 610 wakeup(void *ident) 611 { 612 int s; 613 614 SCHED_ASSERT_UNLOCKED(); 615 616 SCHED_LOCK(s); 617 sched_wakeup(ident); 618 SCHED_UNLOCK(s); 619 } 620 621 void 622 sched_wakeup(void *ident) 623 { 624 struct slpque *qp; 625 struct proc *p, **q; 626 627 SCHED_ASSERT_LOCKED(); 628 629 qp = SLPQUE(ident); 630 restart: 631 for (q = &qp->sq_head; (p = *q) != NULL; ) { 632 #ifdef DIAGNOSTIC 633 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 634 panic("wakeup"); 635 #endif 636 if (p->p_wchan == ident) { 637 p->p_wchan = 0; 638 *q = p->p_forw; 639 if (qp->sq_tailp == &p->p_forw) 640 qp->sq_tailp = q; 641 if (p->p_stat == SSLEEP) { 642 awaken(p); 643 goto restart; 644 } 645 } else 646 q = &p->p_forw; 647 } 648 } 649 650 /* 651 * Make the highest priority process first in line on the specified 652 * identifier runnable. 653 */ 654 void 655 wakeup_one(void *ident) 656 { 657 struct slpque *qp; 658 struct proc *p, **q; 659 struct proc *best_sleepp, **best_sleepq; 660 struct proc *best_stopp, **best_stopq; 661 int s; 662 663 best_sleepp = best_stopp = NULL; 664 best_sleepq = best_stopq = NULL; 665 666 SCHED_LOCK(s); 667 668 qp = SLPQUE(ident); 669 670 for (q = &qp->sq_head; (p = *q) != NULL; q = &p->p_forw) { 671 #ifdef DIAGNOSTIC 672 if (p->p_back || (p->p_stat != SSLEEP && p->p_stat != SSTOP)) 673 panic("wakeup_one"); 674 #endif 675 if (p->p_wchan == ident) { 676 if (p->p_stat == SSLEEP) { 677 if (best_sleepp == NULL || 678 p->p_priority < best_sleepp->p_priority) { 679 best_sleepp = p; 680 best_sleepq = q; 681 } 682 } else { 683 if (best_stopp == NULL || 684 p->p_priority < best_stopp->p_priority) { 685 best_stopp = p; 686 best_stopq = q; 687 } 688 } 689 } 690 } 691 692 /* 693 * Consider any SSLEEP process higher than the highest priority SSTOP 694 * process. 695 */ 696 if (best_sleepp != NULL) { 697 p = best_sleepp; 698 q = best_sleepq; 699 } else { 700 p = best_stopp; 701 q = best_stopq; 702 } 703 704 if (p != NULL) { 705 p->p_wchan = NULL; 706 *q = p->p_forw; 707 if (qp->sq_tailp == &p->p_forw) 708 qp->sq_tailp = q; 709 if (p->p_stat == SSLEEP) 710 awaken(p); 711 } 712 SCHED_UNLOCK(s); 713 } 714 715 /* 716 * General yield call. Puts the current process back on its run queue and 717 * performs a voluntary context switch. 718 */ 719 void 720 yield(void) 721 { 722 struct proc *p = curproc; 723 int s; 724 725 SCHED_LOCK(s); 726 p->p_priority = p->p_usrpri; 727 p->p_stat = SRUN; 728 setrunqueue(p); 729 p->p_stats->p_ru.ru_nvcsw++; 730 mi_switch(p); 731 SCHED_ASSERT_UNLOCKED(); 732 splx(s); 733 } 734 735 /* 736 * General preemption call. Puts the current process back on its run queue 737 * and performs an involuntary context switch. If a process is supplied, 738 * we switch to that process. Otherwise, we use the normal process selection 739 * criteria. 740 */ 741 void 742 preempt(struct proc *newp) 743 { 744 struct proc *p = curproc; 745 int s; 746 747 /* 748 * XXX Switching to a specific process is not supported yet. 749 */ 750 if (newp != NULL) 751 panic("preempt: cpu_preempt not yet implemented"); 752 753 SCHED_LOCK(s); 754 p->p_priority = p->p_usrpri; 755 p->p_stat = SRUN; 756 setrunqueue(p); 757 p->p_stats->p_ru.ru_nivcsw++; 758 mi_switch(p); 759 SCHED_ASSERT_UNLOCKED(); 760 splx(s); 761 } 762 763 /* 764 * The machine independent parts of context switch. 765 * Must be called at splsched() (no higher!) and with 766 * the sched_lock held. 767 */ 768 void 769 mi_switch(struct proc *p) 770 { 771 struct schedstate_percpu *spc; 772 struct rlimit *rlim; 773 long s, u; 774 struct timeval tv; 775 #if defined(MULTIPROCESSOR) 776 int hold_count; 777 #endif 778 779 SCHED_ASSERT_LOCKED(); 780 781 #if defined(MULTIPROCESSOR) 782 /* 783 * Release the kernel_lock, as we are about to yield the CPU. 784 * The scheduler lock is still held until cpu_switch() 785 * selects a new process and removes it from the run queue. 786 */ 787 if (p->p_flag & P_BIGLOCK) 788 hold_count = spinlock_release_all(&kernel_lock); 789 #endif 790 791 KDASSERT(p->p_cpu != NULL); 792 KDASSERT(p->p_cpu == curcpu()); 793 794 spc = &p->p_cpu->ci_schedstate; 795 796 #if defined(LOCKDEBUG) || defined(DIAGNOSTIC) 797 spinlock_switchcheck(); 798 #endif 799 #ifdef LOCKDEBUG 800 simple_lock_switchcheck(); 801 #endif 802 803 /* 804 * Compute the amount of time during which the current 805 * process was running, and add that to its total so far. 806 */ 807 microtime(&tv); 808 u = p->p_rtime.tv_usec + (tv.tv_usec - spc->spc_runtime.tv_usec); 809 s = p->p_rtime.tv_sec + (tv.tv_sec - spc->spc_runtime.tv_sec); 810 if (u < 0) { 811 u += 1000000; 812 s--; 813 } else if (u >= 1000000) { 814 u -= 1000000; 815 s++; 816 } 817 p->p_rtime.tv_usec = u; 818 p->p_rtime.tv_sec = s; 819 820 /* 821 * Check if the process exceeds its cpu resource allocation. 822 * If over max, kill it. In any case, if it has run for more 823 * than 10 minutes, reduce priority to give others a chance. 824 */ 825 rlim = &p->p_rlimit[RLIMIT_CPU]; 826 if (s >= rlim->rlim_cur) { 827 /* 828 * XXXSMP: we're inside the scheduler lock perimeter; 829 * use sched_psignal. 830 */ 831 if (s >= rlim->rlim_max) 832 sched_psignal(p, SIGKILL); 833 else { 834 sched_psignal(p, SIGXCPU); 835 if (rlim->rlim_cur < rlim->rlim_max) 836 rlim->rlim_cur += 5; 837 } 838 } 839 if (autonicetime && s > autonicetime && p->p_ucred->cr_uid && 840 p->p_nice == NZERO) { 841 p->p_nice = autoniceval + NZERO; 842 resetpriority(p); 843 } 844 845 /* 846 * Process is about to yield the CPU; clear the appropriate 847 * scheduling flags. 848 */ 849 spc->spc_flags &= ~SPCF_SWITCHCLEAR; 850 851 /* 852 * Pick a new current process and switch to it. When we 853 * run again, we'll return back here. 854 */ 855 uvmexp.swtch++; 856 cpu_switch(p); 857 858 /* 859 * Make sure that MD code released the scheduler lock before 860 * resuming us. 861 */ 862 SCHED_ASSERT_UNLOCKED(); 863 864 /* 865 * We're running again; record our new start time. We might 866 * be running on a new CPU now, so don't use the cache'd 867 * schedstate_percpu pointer. 868 */ 869 KDASSERT(p->p_cpu != NULL); 870 KDASSERT(p->p_cpu == curcpu()); 871 microtime(&p->p_cpu->ci_schedstate.spc_runtime); 872 873 #if defined(MULTIPROCESSOR) 874 /* 875 * Reacquire the kernel_lock now. We do this after we've 876 * released the scheduler lock to avoid deadlock, and before 877 * we reacquire the interlock. 878 */ 879 if (p->p_flag & P_BIGLOCK) 880 spinlock_acquire_count(&kernel_lock, hold_count); 881 #endif 882 } 883 884 /* 885 * Initialize the (doubly-linked) run queues 886 * to be empty. 887 */ 888 void 889 rqinit() 890 { 891 int i; 892 893 for (i = 0; i < RUNQUE_NQS; i++) 894 sched_qs[i].ph_link = sched_qs[i].ph_rlink = 895 (struct proc *)&sched_qs[i]; 896 } 897 898 /* 899 * Change process state to be runnable, 900 * placing it on the run queue if it is in memory, 901 * and awakening the swapper if it isn't in memory. 902 */ 903 void 904 setrunnable(struct proc *p) 905 { 906 907 SCHED_ASSERT_LOCKED(); 908 909 switch (p->p_stat) { 910 case 0: 911 case SRUN: 912 case SONPROC: 913 case SZOMB: 914 case SDEAD: 915 default: 916 panic("setrunnable"); 917 case SSTOP: 918 /* 919 * If we're being traced (possibly because someone attached us 920 * while we were stopped), check for a signal from the debugger. 921 */ 922 if ((p->p_flag & P_TRACED) != 0 && p->p_xstat != 0) { 923 sigaddset(&p->p_sigctx.ps_siglist, p->p_xstat); 924 CHECKSIGS(p); 925 } 926 case SSLEEP: 927 unsleep(p); /* e.g. when sending signals */ 928 break; 929 930 case SIDL: 931 break; 932 } 933 p->p_stat = SRUN; 934 if (p->p_flag & P_INMEM) 935 setrunqueue(p); 936 937 if (p->p_slptime > 1) 938 updatepri(p); 939 p->p_slptime = 0; 940 if ((p->p_flag & P_INMEM) == 0) 941 sched_wakeup((caddr_t)&proc0); 942 else if (p->p_priority < curcpu()->ci_schedstate.spc_curpriority) { 943 /* 944 * XXXSMP 945 * This is not exactly right. Since p->p_cpu persists 946 * across a context switch, this gives us some sort 947 * of processor affinity. But we need to figure out 948 * at what point it's better to reschedule on a different 949 * CPU than the last one. 950 */ 951 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu()); 952 } 953 } 954 955 /* 956 * Compute the priority of a process when running in user mode. 957 * Arrange to reschedule if the resulting priority is better 958 * than that of the current process. 959 */ 960 void 961 resetpriority(struct proc *p) 962 { 963 unsigned int newpriority; 964 965 SCHED_ASSERT_LOCKED(); 966 967 newpriority = PUSER + p->p_estcpu + NICE_WEIGHT * (p->p_nice - NZERO); 968 newpriority = min(newpriority, MAXPRI); 969 p->p_usrpri = newpriority; 970 if (newpriority < curcpu()->ci_schedstate.spc_curpriority) { 971 /* 972 * XXXSMP 973 * Same applies as in setrunnable() above. 974 */ 975 need_resched((p->p_cpu != NULL) ? p->p_cpu : curcpu()); 976 } 977 } 978 979 /* 980 * We adjust the priority of the current process. The priority of a process 981 * gets worse as it accumulates CPU time. The cpu usage estimator (p_estcpu) 982 * is increased here. The formula for computing priorities (in kern_synch.c) 983 * will compute a different value each time p_estcpu increases. This can 984 * cause a switch, but unless the priority crosses a PPQ boundary the actual 985 * queue will not change. The cpu usage estimator ramps up quite quickly 986 * when the process is running (linearly), and decays away exponentially, at 987 * a rate which is proportionally slower when the system is busy. The basic 988 * principle is that the system will 90% forget that the process used a lot 989 * of CPU time in 5 * loadav seconds. This causes the system to favor 990 * processes which haven't run much recently, and to round-robin among other 991 * processes. 992 */ 993 994 void 995 schedclock(struct proc *p) 996 { 997 int s; 998 999 p->p_estcpu = ESTCPULIM(p->p_estcpu + 1); 1000 1001 SCHED_LOCK(s); 1002 resetpriority(p); 1003 SCHED_UNLOCK(s); 1004 1005 if (p->p_priority >= PUSER) 1006 p->p_priority = p->p_usrpri; 1007 } 1008 1009 void 1010 suspendsched() 1011 { 1012 struct proc *p; 1013 int s; 1014 1015 /* 1016 * Convert all non-P_SYSTEM SSLEEP or SRUN processes to SSTOP. 1017 */ 1018 proclist_lock_read(); 1019 SCHED_LOCK(s); 1020 for (p = LIST_FIRST(&allproc); p != NULL; p = LIST_NEXT(p, p_list)) { 1021 if ((p->p_flag & P_SYSTEM) != 0) 1022 continue; 1023 switch (p->p_stat) { 1024 case SRUN: 1025 if ((p->p_flag & P_INMEM) != 0) 1026 remrunqueue(p); 1027 /* FALLTHROUGH */ 1028 case SSLEEP: 1029 p->p_stat = SSTOP; 1030 break; 1031 case SONPROC: 1032 /* 1033 * XXX SMP: we need to deal with processes on 1034 * others CPU ! 1035 */ 1036 break; 1037 default: 1038 break; 1039 } 1040 } 1041 SCHED_UNLOCK(s); 1042 proclist_unlock_read(); 1043 } 1044