xref: /original-bsd/sys/kern/kern_synch.c (revision abd50c55)
1 /*
2  * Copyright (c) 1982, 1986, 1990 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)kern_synch.c	7.11 (Berkeley) 04/03/90
7  */
8 
9 #include "machine/pte.h"
10 #include "machine/psl.h"
11 #include "machine/mtpr.h"
12 
13 #include "param.h"
14 #include "systm.h"
15 #include "user.h"
16 #include "proc.h"
17 #include "vm.h"
18 #include "kernel.h"
19 #include "buf.h"
20 
21 /*
22  * Force switch among equal priority processes every 100ms.
23  */
24 roundrobin()
25 {
26 
27 	runrun++;
28 	aston();
29 	timeout(roundrobin, (caddr_t)0, hz / 10);
30 }
31 
32 /*
33  * constants for digital decay and forget
34  *	90% of (p_cpu) usage in 5*loadav time
35  *	95% of (p_pctcpu) usage in 60 seconds (load insensitive)
36  *          Note that, as ps(1) mentions, this can let percentages
37  *          total over 100% (I've seen 137.9% for 3 processes).
38  *
39  * Note that hardclock updates p_cpu and p_cpticks independently.
40  *
41  * We wish to decay away 90% of p_cpu in (5 * loadavg) seconds.
42  * That is, the system wants to compute a value of decay such
43  * that the following for loop:
44  * 	for (i = 0; i < (5 * loadavg); i++)
45  * 		p_cpu *= decay;
46  * will compute
47  * 	p_cpu *= 0.1;
48  * for all values of loadavg:
49  *
50  * Mathematically this loop can be expressed by saying:
51  * 	decay ** (5 * loadavg) ~= .1
52  *
53  * The system computes decay as:
54  * 	decay = (2 * loadavg) / (2 * loadavg + 1)
55  *
56  * We wish to prove that the system's computation of decay
57  * will always fulfill the equation:
58  * 	decay ** (5 * loadavg) ~= .1
59  *
60  * If we compute b as:
61  * 	b = 2 * loadavg
62  * then
63  * 	decay = b / (b + 1)
64  *
65  * We now need to prove two things:
66  *	1) Given factor ** (5 * loadavg) ~= .1, prove factor == b/(b+1)
67  *	2) Given b/(b+1) ** power ~= .1, prove power == (5 * loadavg)
68  *
69  * Facts:
70  *         For x close to zero, exp(x) =~ 1 + x, since
71  *              exp(x) = 0! + x**1/1! + x**2/2! + ... .
72  *              therefore exp(-1/b) =~ 1 - (1/b) = (b-1)/b.
73  *         For x close to zero, ln(1+x) =~ x, since
74  *              ln(1+x) = x - x**2/2 + x**3/3 - ...     -1 < x < 1
75  *              therefore ln(b/(b+1)) = ln(1 - 1/(b+1)) =~ -1/(b+1).
76  *         ln(.1) =~ -2.30
77  *
78  * Proof of (1):
79  *    Solve (factor)**(power) =~ .1 given power (5*loadav):
80  *	solving for factor,
81  *      ln(factor) =~ (-2.30/5*loadav), or
82  *      factor =~ exp(-1/((5/2.30)*loadav) =~ exp(-1/(2*loadav)) =
83  *          exp(-1/b) =~ (b-1)/b =~ b/(b+1).                    QED
84  *
85  * Proof of (2):
86  *    Solve (factor)**(power) =~ .1 given factor == (b/(b+1)):
87  *	solving for power,
88  *      power*ln(b/(b+1)) =~ -2.30, or
89  *      power =~ 2.3 * (b + 1) = 4.6*loadav + 2.3 =~ 5*loadav.  QED
90  *
91  * Actual power values for the implemented algorithm are as follows:
92  *      loadav: 1       2       3       4
93  *      power:  5.68    10.32   14.94   19.55
94  */
95 
96 /* calculations for digital decay to forget 90% of usage in 5*loadav sec */
97 #define	get_b(loadav)		(2 * (loadav))
98 #define	get_pcpu(b, cpu)	(((b) * ((cpu) & 0377)) / ((b) + FSCALE))
99 
100 /* decay 95% of `p_pctcpu' in 60 seconds; see CCPU_SHIFT before changing */
101 fixpt_t	ccpu = 0.95122942450071400909 * FSCALE;		/* exp(-1/20) */
102 
103 /*
104  * If `ccpu' is not equal to `exp(-1/20)' and you still want to use the
105  * faster/more-accurate formula, you'll have to estimate CCPU_SHIFT below
106  * and possibly adjust FSHIFT in "param.h" so that (FSHIFT >= CCPU_SHIFT).
107  *
108  * To estimate CCPU_SHIFT for exp(-1/20), the following formula was used:
109  *	1 - exp(-1/20) ~= 0.0487 ~= 0.0488 == 1 (fixed pt, *11* bits).
110  *
111  * If you dont want to bother with the faster/more-accurate formula, you
112  * can set CCPU_SHIFT to (FSHIFT + 1) which will use a slower/less-accurate
113  * (more general) method of calculating the %age of CPU used by a process.
114  */
115 #define	CCPU_SHIFT	11
116 
117 /*
118  * Recompute process priorities, once a second
119  */
120 schedcpu()
121 {
122 	register fixpt_t b = get_b(averunnable[0]);
123 	register struct proc *p;
124 	register int s, a;
125 
126 	wakeup((caddr_t)&lbolt);
127 	for (p = allproc; p != NULL; p = p->p_nxt) {
128 		if (p->p_time != 127)
129 			p->p_time++;
130 		if (p->p_stat==SSLEEP || p->p_stat==SSTOP)
131 			if (p->p_slptime != 127)
132 				p->p_slptime++;
133 		p->p_pctcpu = (p->p_pctcpu * ccpu) >> FSHIFT;
134 		/*
135 		 * If the process has slept the entire second,
136 		 * stop recalculating its priority until it wakes up.
137 		 */
138 		if (p->p_slptime > 1)
139 			continue;
140 		/*
141 		 * p_pctcpu is only for ps.
142 		 */
143 #if	(FSHIFT >= CCPU_SHIFT)
144 		p->p_pctcpu += (hz == 100)?
145 			((fixpt_t) p->p_cpticks) << (FSHIFT - CCPU_SHIFT):
146                 	100 * (((fixpt_t) p->p_cpticks)
147 				<< (FSHIFT - CCPU_SHIFT)) / hz;
148 #else
149 		p->p_pctcpu += ((FSCALE - ccpu) *
150 			(p->p_cpticks * FSCALE / hz)) >> FSHIFT;
151 #endif
152 		p->p_cpticks = 0;
153 		a = (int) get_pcpu(b, p->p_cpu) + p->p_nice;
154 		if (a < 0)
155 			a = 0;
156 		if (a > 255)
157 			a = 255;
158 		p->p_cpu = a;
159 		(void) setpri(p);
160 		s = splhigh();	/* prevent state changes */
161 		if (p->p_pri >= PUSER) {
162 #define	PPQ	(128 / NQS)
163 			if ((p != u.u_procp || noproc) &&
164 			    p->p_stat == SRUN &&
165 			    (p->p_flag & SLOAD) &&
166 			    (p->p_pri / PPQ) != (p->p_usrpri / PPQ)) {
167 				remrq(p);
168 				p->p_pri = p->p_usrpri;
169 				setrq(p);
170 			} else
171 				p->p_pri = p->p_usrpri;
172 		}
173 		splx(s);
174 	}
175 	vmmeter();
176 	if (runin!=0) {
177 		runin = 0;
178 		wakeup((caddr_t)&runin);
179 	}
180 	if (bclnlist != NULL)
181 		wakeup((caddr_t)&proc[2]);
182 	timeout(schedcpu, (caddr_t)0, hz);
183 }
184 
185 /*
186  * Recalculate the priority of a process after it has slept for a while.
187  */
188 updatepri(p)
189 	register struct proc *p;
190 {
191 	register int a = p->p_cpu & 0377;
192 	register fixpt_t b = get_b(averunnable[0]);
193 
194 	p->p_slptime--;		/* the first time was done in schedcpu */
195 	while (a && --p->p_slptime)
196 		a = (int) get_pcpu(b, a) /* + p->p_nice */;
197 	p->p_slptime = 0;
198 	if (a < 0)
199 		a = 0;
200 	if (a > 255)
201 		a = 255;
202 	p->p_cpu = a;
203 	(void) setpri(p);
204 }
205 
206 #define SQSIZE 0100	/* Must be power of 2 */
207 #define HASH(x)	(( (int) x >> 5) & (SQSIZE-1))
208 struct slpque {
209 	struct proc *sq_head;
210 	struct proc **sq_tailp;
211 } slpque[SQSIZE];
212 
213 /*
214  * General sleep call.
215  * Suspends current process until a wakeup is made on chan.
216  * The process will then be made runnable with priority pri.
217  * Sleeps at most timo/hz seconds (0 means no timeout).
218  * If pri includes PCATCH flag, signals are checked
219  * before and after sleeping, else signals are not checked.
220  * Returns 0 if awakened, EWOULDBLOCK if the timeout expires.
221  * If PCATCH is set and a signal needs to be delivered,
222  * ERESTART is returned if the current system call should be restarted
223  * if possible, and EINTR is returned if the system call should
224  * be interrupted by the signal (return EINTR).
225  */
226 tsleep(chan, pri, wmesg, timo)
227 	caddr_t chan;
228 	int pri;
229 	char *wmesg;
230 	int timo;
231 {
232 	register struct proc *rp;
233 	register struct slpque *qp;
234 	register s;
235 	int sig, catch = pri & PCATCH;
236 	extern int cold;
237 	int endtsleep();
238 
239 	rp = u.u_procp;
240 	s = splhigh();
241 	if (cold || panicstr) {
242 		/*
243 		 * After a panic, or during autoconfiguration,
244 		 * just give interrupts a chance, then just return;
245 		 * don't run any other procs or panic below,
246 		 * in case this is the idle process and already asleep.
247 		 */
248 		(void) spl0();
249 		splx(s);
250 		return (0);
251 	}
252 #ifdef DIAGNOSTIC
253 	if (chan == 0 || rp->p_stat != SRUN || rp->p_rlink)
254 		panic("tsleep");
255 #endif
256 	rp->p_wchan = chan;
257 	rp->p_wmesg = wmesg;
258 	rp->p_slptime = 0;
259 	rp->p_pri = pri & PRIMASK;
260 	qp = &slpque[HASH(chan)];
261 	if (qp->sq_head == 0)
262 		qp->sq_head = rp;
263 	else
264 		*qp->sq_tailp = rp;
265 	*(qp->sq_tailp = &rp->p_link) = 0;
266 	/*
267 	 * If we stop in CURSIG/issig(), wakeup may already
268 	 * have happened when we return.
269 	 * rp->p_wchan will then be 0.
270 	 */
271 	if (catch) {
272 		if (sig = CURSIG(rp)) {
273 			if (rp->p_wchan)
274 				unsleep(rp);
275 			rp->p_stat = SRUN;
276 			splx(s);
277 			if (u.u_sigintr & sigmask(sig))
278 				return (EINTR);
279 			return (ERESTART);
280 		}
281 		if (rp->p_wchan == 0) {
282 			splx(s);
283 			return (0);
284 		}
285 		rp->p_flag |= SSINTR;
286 	}
287 	rp->p_stat = SSLEEP;
288 	if (timo)
289 		timeout(endtsleep, (caddr_t)rp, timo);
290 	(void) spl0();
291 	u.u_ru.ru_nvcsw++;
292 	swtch();
293 	curpri = rp->p_usrpri;
294 	splx(s);
295 	rp->p_flag &= ~SSINTR;
296 	if (rp->p_flag & STIMO) {
297 		rp->p_flag &= ~STIMO;
298 		return (EWOULDBLOCK);
299 	}
300 	if (timo)
301 		untimeout(endtsleep, (caddr_t)rp);
302 	if (catch && (sig = CURSIG(rp))) {
303 		if (u.u_sigintr & sigmask(sig))
304 			return (EINTR);
305 		return (ERESTART);
306 	}
307 	return (0);
308 }
309 
310 /*
311  * Implement timeout for tsleep.
312  * If process hasn't been awakened (wchan non-zero),
313  * set timeout flag and undo the sleep.  If proc
314  * is stopped, just unsleep so it will remain stopped.
315  */
316 endtsleep(p)
317 	register struct proc *p;
318 {
319 	int s = splhigh();
320 
321 	if (p->p_wchan) {
322 		if (p->p_stat == SSLEEP)
323 			setrun(p);
324 		else
325 			unsleep(p);
326 		p->p_flag |= STIMO;
327 	}
328 	splx(s);
329 }
330 
331 /*
332  * Short-term, non-interruptable sleep.
333  */
334 sleep(chan, pri)
335 	caddr_t chan;
336 	int pri;
337 {
338 	register struct proc *rp;
339 	register struct slpque *qp;
340 	register s;
341 	extern int cold;
342 
343 #ifdef DIAGNOSTIC
344 	if (pri > PZERO) {
345 		printf("sleep called with pri %d > PZERO, wchan: %x\n",
346 			pri, chan);
347 		panic("old sleep");
348 	}
349 #endif
350 	rp = u.u_procp;
351 	s = splhigh();
352 	if (cold || panicstr) {
353 		/*
354 		 * After a panic, or during autoconfiguration,
355 		 * just give interrupts a chance, then just return;
356 		 * don't run any other procs or panic below,
357 		 * in case this is the idle process and already asleep.
358 		 */
359 		(void) spl0();
360 		splx(s);
361 		return;
362 	}
363 #ifdef DIAGNOSTIC
364 	if (chan==0 || rp->p_stat != SRUN || rp->p_rlink)
365 		panic("sleep");
366 #endif
367 	rp->p_wchan = chan;
368 	rp->p_wmesg = NULL;
369 	rp->p_slptime = 0;
370 	rp->p_pri = pri;
371 	qp = &slpque[HASH(chan)];
372 	if (qp->sq_head == 0)
373 		qp->sq_head = rp;
374 	else
375 		*qp->sq_tailp = rp;
376 	*(qp->sq_tailp = &rp->p_link) = 0;
377 	rp->p_stat = SSLEEP;
378 	(void) spl0();
379 	u.u_ru.ru_nvcsw++;
380 	swtch();
381 	curpri = rp->p_usrpri;
382 	splx(s);
383 }
384 
385 /*
386  * Remove a process from its wait queue
387  */
388 unsleep(p)
389 	register struct proc *p;
390 {
391 	register struct slpque *qp;
392 	register struct proc **hp;
393 	int s;
394 
395 	s = splhigh();
396 	if (p->p_wchan) {
397 		hp = &(qp = &slpque[HASH(p->p_wchan)])->sq_head;
398 		while (*hp != p)
399 			hp = &(*hp)->p_link;
400 		*hp = p->p_link;
401 		if (qp->sq_tailp == &p->p_link)
402 			qp->sq_tailp = hp;
403 		p->p_wchan = 0;
404 	}
405 	splx(s);
406 }
407 
408 /*
409  * Wake up all processes sleeping on chan.
410  */
411 wakeup(chan)
412 	register caddr_t chan;
413 {
414 	register struct slpque *qp;
415 	register struct proc *p, **q;
416 	int s;
417 
418 	s = splhigh();
419 	qp = &slpque[HASH(chan)];
420 restart:
421 	for (q = &qp->sq_head; p = *q; ) {
422 #ifdef DIAGNOSTIC
423 		if (p->p_rlink || p->p_stat != SSLEEP && p->p_stat != SSTOP)
424 			panic("wakeup");
425 #endif
426 		if (p->p_wchan==chan) {
427 			p->p_wchan = 0;
428 			*q = p->p_link;
429 			if (qp->sq_tailp == &p->p_link)
430 				qp->sq_tailp = q;
431 			if (p->p_stat == SSLEEP) {
432 				/* OPTIMIZED INLINE EXPANSION OF setrun(p) */
433 				if (p->p_slptime > 1)
434 					updatepri(p);
435 				p->p_stat = SRUN;
436 				if (p->p_flag & SLOAD)
437 					setrq(p);
438 				/*
439 				 * Since curpri is a usrpri,
440 				 * p->p_pri is always better than curpri.
441 				 */
442 				runrun++;
443 				aston();
444 				if ((p->p_flag&SLOAD) == 0) {
445 					if (runout != 0) {
446 						runout = 0;
447 						wakeup((caddr_t)&runout);
448 					}
449 					wantin++;
450 				}
451 				/* END INLINE EXPANSION */
452 				goto restart;
453 			}
454 		} else
455 			q = &p->p_link;
456 	}
457 	splx(s);
458 }
459 
460 /*
461  * Initialize the (doubly-linked) run queues
462  * to be empty.
463  */
464 rqinit()
465 {
466 	register int i;
467 
468 	for (i = 0; i < NQS; i++)
469 		qs[i].ph_link = qs[i].ph_rlink = (struct proc *)&qs[i];
470 }
471 
472 /*
473  * Set the process running;
474  * arrange for it to be swapped in if necessary.
475  */
476 setrun(p)
477 	register struct proc *p;
478 {
479 	register int s;
480 
481 	s = splhigh();
482 	switch (p->p_stat) {
483 
484 	case 0:
485 	case SWAIT:
486 	case SRUN:
487 	case SZOMB:
488 	default:
489 		panic("setrun");
490 
491 	case SSTOP:
492 	case SSLEEP:
493 		unsleep(p);		/* e.g. when sending signals */
494 		break;
495 
496 	case SIDL:
497 		break;
498 	}
499 	p->p_stat = SRUN;
500 	if (p->p_flag & SLOAD)
501 		setrq(p);
502 	splx(s);
503 	if (p->p_slptime > 1)
504 		updatepri(p);
505 	if (p->p_pri < curpri) {
506 		runrun++;
507 		aston();
508 	}
509 	if ((p->p_flag&SLOAD) == 0) {
510 		if (runout != 0) {
511 			runout = 0;
512 			wakeup((caddr_t)&runout);
513 		}
514 		wantin++;
515 	}
516 }
517 
518 /*
519  * Set user priority.
520  * The rescheduling flag (runrun)
521  * is set if the priority is better
522  * than the currently running process.
523  */
524 setpri(pp)
525 	register struct proc *pp;
526 {
527 	register int p;
528 
529 	p = (pp->p_cpu & 0377)/4;
530 	p += PUSER + 2 * pp->p_nice;
531 	if (pp->p_rssize > pp->p_maxrss && freemem < desfree)
532 		p += 2*4;	/* effectively, nice(4) */
533 	if (p > 127)
534 		p = 127;
535 	if (p < curpri) {
536 		runrun++;
537 		aston();
538 	}
539 	pp->p_usrpri = p;
540 	return (p);
541 }
542