xref: /original-bsd/sys/kern/kern_synch.c (revision 3d11fed5)
1 /*
2  * Copyright (c) 1982, 1986, 1990 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)kern_synch.c	7.14 (Berkeley) 03/17/91
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "user.h"
12 #include "proc.h"
13 #include "kernel.h"
14 #include "buf.h"
15 
16 #include "machine/cpu.h"
17 
18 /*
19  * Force switch among equal priority processes every 100ms.
20  */
21 roundrobin()
22 {
23 
24 	need_resched();
25 	timeout(roundrobin, (caddr_t)0, hz / 10);
26 }
27 
28 /*
29  * constants for digital decay and forget
30  *	90% of (p_cpu) usage in 5*loadav time
31  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
32  *          Note that, as ps(1) mentions, this can let percentages
33  *          total over 100% (I've seen 137.9% for 3 processes).
34  *
35  * Note that hardclock updates p_cpu and p_cpticks independently.
36  *
37  * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
38  * That is, the system wants to compute a value of decay such
39  * that the following for loop:
40  * 	for (i = 0; i < (5 * loadavg); i++)
41  * 		p_cpu *= decay;
42  * will compute
43  * 	p_cpu *= 0.1;
44  * for all values of loadavg:
45  *
46  * Mathematically this loop can be expressed by saying:
47  * 	decay ** (5 * loadavg) ~= .1
48  *
49  * The system computes decay as:
50  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
51  *
52  * We wish to prove that the system's computation of decay
53  * will always fulfill the equation:
54  * 	decay ** (5 * loadavg) ~= .1
55  *
56  * If we compute b as:
57  * 	b = 2 * loadavg
58  * then
59  * 	decay = b / (b + 1)
60  *
61  * We now need to prove two things:
62  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
63  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
64  *
65  * Facts:
66  *         For x close to zero, exp(x) =~ 1 + x, since
67  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
68  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
69  *         For x close to zero, ln(1+x) =~ x, since
70  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
71  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
72  *         ln(.1) =~ -2.30
73  *
74  * Proof of (1):
75  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
76  *	solving for factor,
77  *      ln(factor) =~ (-2.30/5*loadav), or
78  *      factor =~ exp(-1/((5/2.30)*loadav)) =~ exp(-1/(2*loadav)) =
79  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
80  *
81  * Proof of (2):
82  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
83  *	solving for power,
84  *      power*ln(b/(b+1)) =~ -2.30, or
85  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
86  *
87  * Actual power values for the implemented algorithm are as follows:
88  *      loadav: 1       2       3       4
89  *      power:  5.68    10.32   14.94   19.55
90  */
91 
92 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
93 #define	loadfactor(loadav)	(2 * (loadav))
94 #define	decay_cpu(loadfac, cpu)	(((loadfac) * (cpu)) / ((loadfac) + FSCALE))
95 
96 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
97 fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
98 
99 /*
100  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
101  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
102  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
103  *
104  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
105  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
106  *
107  * If you dont want to bother with the faster/more-accurate formula, you
108  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
109  * (more general) method of calculating the %age of CPU used by a process.
110  */
111 #define	CCPU_SHIFT	11
112 
113 /*
114  * Recompute process priorities, once a second
115  */
116 schedcpu()
117 {
118 	register fixpt_t loadfac = loadfactor(averunnable[0]);
119 	register struct proc *p;
120 	register int s;
121 	register unsigned int newcpu;
122 
123 	wakeup((caddr_t)&lbolt);
124 	for (p = allproc; p != NULL; p = p->p_nxt) {
125 		/*
126 		 * Increment time in/out of memory and sleep time
127 		 * (if sleeping).  We ignore overflow; with 16-bit int's
128 		 * (remember them?) overflow takes 45 days.
129 		 */
130 		p->p_time++;
131 		if (p->p_stat == SSLEEP || p->p_stat == SSTOP)
132 			p->p_slptime++;
133 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
134 		/*
135 		 * If the process has slept the entire second,
136 		 * stop recalculating its priority until it wakes up.
137 		 */
138 		if (p->p_slptime > 1)
139 			continue;
140 		/*
141 		 * p_pctcpu is only for ps.
142 		 */
143 #if	(FSHIFT >= CCPU_SHIFT)
144 		p->p_pctcpu += (hz == 100)?
145 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
146                 	100 * (((fixpt_t) p->p_cpticks)
147 				<< (FSHIFT - CCPU_SHIFT)) / hz;
148 #else
149 		p->p_pctcpu += ((FSCALE - ccpu) *
150 			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
151 #endif
152 		p->p_cpticks = 0;
153 		newcpu = (u_int) decay_cpu(loadfac, p->p_cpu) + p->p_nice;
154 		p->p_cpu = min(newcpu, UCHAR_MAX);
155 		setpri(p);
156 		s = splhigh();	/* prevent state changes */
157 		if (p->p_pri >= PUSER) {
158 #define	PPQ	(128 / NQS)		/* priorities per queue */
159 			if ((p != curproc || noproc) &&
160 			    p->p_stat == SRUN &&
161 			    (p->p_flag & SLOAD) &&
162 			    (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
163 				remrq(p);
164 				p->p_pri = p->p_usrpri;
165 				setrq(p);
166 			} else
167 				p->p_pri = p->p_usrpri;
168 		}
169 		splx(s);
170 	}
171 	vmmeter();
172 	if (bclnlist != NULL)
173 		wakeup((caddr_t)pageproc);
174 	timeout(schedcpu, (caddr_t)0, hz);
175 }
176 
177 /*
178  * Recalculate the priority of a process after it has slept for a while.
179  * For all load averages >= 1 and max p_cpu of 255, sleeping for at least
180  * six times the loadfactor will decay p_cpu to zero.
181  */
182 updatepri(p)
183 	register struct proc *p;
184 {
185 	register unsigned int newcpu = p->p_cpu;
186 	register fixpt_t loadfac = loadfactor(averunnable[0]);
187 
188 	if (p->p_slptime > 5 * loadfac)
189 		p->p_cpu = 0;
190 	else {
191 		p->p_slptime--;	/* the first time was done in schedcpu */
192 		while (newcpu && --p->p_slptime)
193 			newcpu = (int) decay_cpu(loadfac, newcpu);
194 		p->p_cpu = min(newcpu, UCHAR_MAX);
195 	}
196 	setpri(p);
197 }
198 
199 #define SQSIZE 0100	/* Must be power of 2 */
200 #define HASH(x)	(( (int) x >> 5) & (SQSIZE-1))
201 struct slpque {
202 	struct proc *sq_head;
203 	struct proc **sq_tailp;
204 } slpque[SQSIZE];
205 
206 /*
207  * During autoconfiguration or after a panic, a sleep will simply
208  * lower the priority briefly to allow interrupts, then return.
209  * The priority to be used (safepri) is machine-dependent, thus this
210  * value is initialized and maintained in the machine-dependent layers.
211  * This priority will typically be 0, or the lowest priority
212  * that is safe for use on the interrupt stack; it can be made
213  * higher to block network software interrupts after panics.
214  */
215 int safepri;
216 
217 /*
218  * General sleep call.
219  * Suspends current process until a wakeup is made on chan.
220  * The process will then be made runnable with priority pri.
221  * Sleeps at most timo/hz seconds (0 means no timeout).
222  * If pri includes PCATCH flag, signals are checked
223  * before and after sleeping, else signals are not checked.
224  * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
225  * If PCATCH is set and a signal needs to be delivered,
226  * ERESTART is returned if the current system call should be restarted
227  * if possible, and EINTR is returned if the system call should
228  * be interrupted by the signal (return EINTR).
229  */
230 tsleep(chan, pri, wmesg, timo)
231 	caddr_t chan;
232 	int pri;
233 	char *wmesg;
234 	int timo;
235 {
236 	register struct proc *p = curproc;		/* XXX */
237 	register struct slpque *qp;
238 	register s;
239 	int sig, catch = pri & PCATCH;
240 	extern int cold;
241 	int endtsleep();
242 
243 	s = splhigh();
244 	if (cold || panicstr) {
245 		/*
246 		 * After a panic, or during autoconfiguration,
247 		 * just give interrupts a chance, then just return;
248 		 * don't run any other procs or panic below,
249 		 * in case this is the idle process and already asleep.
250 		 */
251 		splx(safepri);
252 		splx(s);
253 		return (0);
254 	}
255 #ifdef DIAGNOSTIC
256 	if (chan == 0 || p->p_stat != SRUN || p->p_rlink)
257 		panic("tsleep");
258 #endif
259 	p->p_wchan = chan;
260 	p->p_wmesg = wmesg;
261 	p->p_slptime = 0;
262 	p->p_pri = pri & PRIMASK;
263 	qp = &slpque[HASH(chan)];
264 	if (qp->sq_head == 0)
265 		qp->sq_head = p;
266 	else
267 		*qp->sq_tailp = p;
268 	*(qp->sq_tailp = &p->p_link) = 0;
269 	if (timo)
270 		timeout(endtsleep, (caddr_t)p, timo);
271 	/*
272 	 * We put ourselves on the sleep queue and start our timeout
273 	 * before calling CURSIG, as we could stop there, and a wakeup
274 	 * or a SIGCONT (or both) could occur while we were stopped.
275 	 * A SIGCONT would cause us to be marked as SSLEEP
276 	 * without resuming us, thus we must be ready for sleep
277 	 * when CURSIG is called.  If the wakeup happens while we're
278 	 * stopped, p->p_wchan will be 0 upon return from CURSIG.
279 	 */
280 	if (catch) {
281 		p->p_flag |= SSINTR;
282 		if (sig = CURSIG(p)) {
283 			if (p->p_wchan)
284 				unsleep(p);
285 			p->p_stat = SRUN;
286 			goto resume;
287 		}
288 		if (p->p_wchan == 0) {
289 			catch = 0;
290 			goto resume;
291 		}
292 	}
293 	p->p_stat = SSLEEP;
294 	(void) spl0();
295 	p->p_stats->p_ru.ru_nvcsw++;
296 	swtch();
297 resume:
298 	curpri = p->p_usrpri;
299 	splx(s);
300 	p->p_flag &= ~SSINTR;
301 	if (p->p_flag & STIMO) {
302 		p->p_flag &= ~STIMO;
303 		if (catch == 0 || sig == 0)
304 			return (EWOULDBLOCK);
305 	} else if (timo)
306 		untimeout(endtsleep, (caddr_t)p);
307 	if (catch && (sig != 0 || (sig = CURSIG(p)))) {
308 		if (p->p_sigacts->ps_sigintr & sigmask(sig))
309 			return (EINTR);
310 		return (ERESTART);
311 	}
312 	return (0);
313 }
314 
315 /*
316  * Implement timeout for tsleep.
317  * If process hasn't been awakened (wchan non-zero),
318  * set timeout flag and undo the sleep.  If proc
319  * is stopped, just unsleep so it will remain stopped.
320  */
321 endtsleep(p)
322 	register struct proc *p;
323 {
324 	int s = splhigh();
325 
326 	if (p->p_wchan) {
327 		if (p->p_stat == SSLEEP)
328 			setrun(p);
329 		else
330 			unsleep(p);
331 		p->p_flag |= STIMO;
332 	}
333 	splx(s);
334 }
335 
336 /*
337  * Short-term, non-interruptable sleep.
338  */
339 sleep(chan, pri)
340 	caddr_t chan;
341 	int pri;
342 {
343 	register struct proc *p = curproc;		/* XXX */
344 	register struct slpque *qp;
345 	register s;
346 	extern int cold;
347 
348 #ifdef DIAGNOSTIC
349 	if (pri > PZERO) {
350 		printf("sleep called with pri %d > PZERO, wchan: %x\n",
351 			pri, chan);
352 		panic("old sleep");
353 	}
354 #endif
355 	s = splhigh();
356 	if (cold || panicstr) {
357 		/*
358 		 * After a panic, or during autoconfiguration,
359 		 * just give interrupts a chance, then just return;
360 		 * don't run any other procs or panic below,
361 		 * in case this is the idle process and already asleep.
362 		 */
363 		splx(safepri);
364 		splx(s);
365 		return;
366 	}
367 #ifdef DIAGNOSTIC
368 	if (chan==0 || p->p_stat != SRUN || p->p_rlink)
369 		panic("sleep");
370 #endif
371 	p->p_wchan = chan;
372 	p->p_wmesg = NULL;
373 	p->p_slptime = 0;
374 	p->p_pri = pri;
375 	qp = &slpque[HASH(chan)];
376 	if (qp->sq_head == 0)
377 		qp->sq_head = p;
378 	else
379 		*qp->sq_tailp = p;
380 	*(qp->sq_tailp = &p->p_link) = 0;
381 	p->p_stat = SSLEEP;
382 	(void) spl0();
383 	p->p_stats->p_ru.ru_nvcsw++;
384 	swtch();
385 	curpri = p->p_usrpri;
386 	splx(s);
387 }
388 
389 /*
390  * Remove a process from its wait queue
391  */
392 unsleep(p)
393 	register struct proc *p;
394 {
395 	register struct slpque *qp;
396 	register struct proc **hp;
397 	int s;
398 
399 	s = splhigh();
400 	if (p->p_wchan) {
401 		hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
402 		while (*hp != p)
403 			hp = &(*hp)->p_link;
404 		*hp = p->p_link;
405 		if (qp->sq_tailp == &p->p_link)
406 			qp->sq_tailp = hp;
407 		p->p_wchan = 0;
408 	}
409 	splx(s);
410 }
411 
412 /*
413  * Wakeup on "chan"; set all processes
414  * sleeping on chan to run state.
415  */
416 wakeup(chan)
417 	register caddr_t chan;
418 {
419 	register struct slpque *qp;
420 	register struct proc *p, **q;
421 	int s;
422 
423 	s = splhigh();
424 	qp = &slpque[HASH(chan)];
425 restart:
426 	for (q = &qp->sq_head; p = *q; ) {
427 #ifdef DIAGNOSTIC
428 		if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
429 			panic("wakeup");
430 #endif
431 		if (p->p_wchan == chan) {
432 			p->p_wchan = 0;
433 			*q = p->p_link;
434 			if (qp->sq_tailp == &p->p_link)
435 				qp->sq_tailp = q;
436 			if (p->p_stat == SSLEEP) {
437 				/* OPTIMIZED INLINE EXPANSION OF setrun(p) */
438 				if (p->p_slptime > 1)
439 					updatepri(p);
440 				p->p_slptime = 0;
441 				p->p_stat = SRUN;
442 				if (p->p_flag & SLOAD)
443 					setrq(p);
444 				/*
445 				 * Since curpri is a usrpri,
446 				 * p->p_pri is always better than curpri.
447 				 */
448 				if ((p->p_flag&SLOAD) == 0)
449 					wakeup((caddr_t)&proc0);
450 				else
451 					need_resched();
452 				/* END INLINE EXPANSION */
453 				goto restart;
454 			}
455 		} else
456 			q = &p->p_link;
457 	}
458 	splx(s);
459 }
460 
461 /*
462  * Initialize the (doubly-linked) run queues
463  * to be empty.
464  */
465 rqinit()
466 {
467 	register int i;
468 
469 	for (i = 0; i < NQS; i++)
470 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
471 }
472 
473 /*
474  * Change process state to be runnable,
475  * placing it on the run queue if it is in memory,
476  * and awakening the swapper if it isn't in memory.
477  */
478 setrun(p)
479 	register struct proc *p;
480 {
481 	register int s;
482 
483 	s = splhigh();
484 	switch (p->p_stat) {
485 
486 	case 0:
487 	case SWAIT:
488 	case SRUN:
489 	case SZOMB:
490 	default:
491 		panic("setrun");
492 
493 	case SSTOP:
494 	case SSLEEP:
495 		unsleep(p);		/* e.g. when sending signals */
496 		break;
497 
498 	case SIDL:
499 		break;
500 	}
501 	p->p_stat = SRUN;
502 	if (p->p_flag & SLOAD)
503 		setrq(p);
504 	splx(s);
505 	if (p->p_slptime > 1)
506 		updatepri(p);
507 	p->p_slptime = 0;
508 	if ((p->p_flag&SLOAD) == 0)
509 		wakeup((caddr_t)&proc0);
510 	else if (p->p_pri < curpri)
511 		need_resched();
512 }
513 
514 /*
515  * Compute priority of process when running in user mode.
516  * Arrange to reschedule if the resulting priority
517  * is better than that of the current process.
518  */
519 setpri(p)
520 	register struct proc *p;
521 {
522 	register unsigned int newpri;
523 
524 	newpri = PUSER + p->p_cpu / 4 + 2 * p->p_nice;
525 	newpri = min(newpri, MAXPRI);
526 	p->p_usrpri = newpri;
527 	if (newpri < curpri)
528 		need_resched();
529 }
530