xref: /original-bsd/sys/kern/kern_clock.c (revision fbed46ce)
1 /*	kern_clock.c	4.30	81/12/19	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/dk.h"
6 #include "../h/callout.h"
7 #include "../h/seg.h"
8 #include "../h/dir.h"
9 #include "../h/user.h"
10 #include "../h/proc.h"
11 #include "../h/reg.h"
12 #include "../h/psl.h"
13 #include "../h/vm.h"
14 #include "../h/buf.h"
15 #include "../h/text.h"
16 #include "../h/vlimit.h"
17 #include "../h/mtpr.h"
18 #include "../h/clock.h"
19 #include "../h/cpu.h"
20 #include "../h/protosw.h"
21 
22 #include "bk.h"
23 #include "dh.h"
24 #include "dz.h"
25 
26 /*
27  * Hardclock is called straight from
28  * the real time clock interrupt.
29  * We limit the work we do at real clock interrupt time to:
30  *	reloading clock
31  *	decrementing time to callouts
32  *	recording cpu time usage
33  *	modifying priority of current process
34  *	arrange for soft clock interrupt
35  *	kernel pc profiling
36  *
37  * At software (softclock) interrupt time we:
38  *	implement callouts
39  *	maintain date
40  *	lightning bolt wakeup (every second)
41  *	alarm clock signals
42  *	jab the scheduler
43  *
44  * On the vax softclock interrupts are implemented by
45  * software interrupts.  Note that we may have multiple softclock
46  * interrupts compressed into one (due to excessive interrupt load),
47  * but that hardclock interrupts should never be lost.
48  */
49 #ifdef KPROF
50 int	kcounts[20000];
51 #endif
52 
53 /*
54  * Protoslow is like lbolt, but for slow protocol timeouts, counting
55  * up to (hz/PR_SLOWHZ), then causing a pfslowtimo().
56  * Protofast is like lbolt, but for fast protocol timeouts, counting
57  * up to (hz/PR_FASTHZ), then causing a pffasttimo().
58  */
59 int	protoslow;
60 int	protofast;
61 
62 /*ARGSUSED*/
63 hardclock(pc, ps)
64 	caddr_t pc;
65 {
66 	register struct callout *p1;
67 	register struct proc *pp;
68 	register int s, cpstate;
69 
70 	/*
71 	 * reprime clock
72 	 */
73 	clkreld();
74 
75 	/*
76 	 * update callout times
77 	 */
78 	for (p1 = calltodo.c_next; p1 && p1->c_time <= 0; p1 = p1->c_next)
79 		;
80 	if (p1)
81 		p1->c_time--;
82 
83 	/*
84 	 * Maintain iostat and per-process cpu statistics
85 	 */
86 	if (!noproc) {
87 		s = u.u_procp->p_rssize;
88 		u.u_vm.vm_idsrss += s;
89 		if (u.u_procp->p_textp) {
90 			register int xrss = u.u_procp->p_textp->x_rssize;
91 
92 			s += xrss;
93 			u.u_vm.vm_ixrss += xrss;
94 		}
95 		if (s > u.u_vm.vm_maxrss)
96 			u.u_vm.vm_maxrss = s;
97 		if ((u.u_vm.vm_utime+u.u_vm.vm_stime+1)/hz > u.u_limit[LIM_CPU]) {
98 			psignal(u.u_procp, SIGXCPU);
99 			if (u.u_limit[LIM_CPU] < INFINITY - 5)
100 				u.u_limit[LIM_CPU] += 5;
101 		}
102 	}
103 	/*
104 	 * Update iostat information.
105 	 */
106 	if (USERMODE(ps)) {
107 		u.u_vm.vm_utime++;
108 		if(u.u_procp->p_nice > NZERO)
109 			cpstate = CP_NICE;
110 		else
111 			cpstate = CP_USER;
112 	} else {
113 #ifdef KPROF
114 	int k = ((int)pc & 0x7fffffff) / 8;
115 	if (k < 20000)
116 		kcounts[k]++;
117 #endif
118 		cpstate = CP_SYS;
119 		if (noproc)
120 			cpstate = CP_IDLE;
121 		else
122 			u.u_vm.vm_stime++;
123 	}
124 	cp_time[cpstate]++;
125 	for (s = 0; s < DK_NDRIVE; s++)
126 		if (dk_busy&(1<<s))
127 			dk_time[s]++;
128 	/*
129 	 * Adjust priority of current process.
130 	 */
131 	if (!noproc) {
132 		pp = u.u_procp;
133 		pp->p_cpticks++;
134 		if(++pp->p_cpu == 0)
135 			pp->p_cpu--;
136 		if(pp->p_cpu % 4 == 0) {
137 			(void) setpri(pp);
138 			if (pp->p_pri >= PUSER)
139 				pp->p_pri = pp->p_usrpri;
140 		}
141 	}
142 	/*
143 	 * Time moves on.
144 	 */
145 	++lbolt;
146 
147 	/*
148 	 * Time moves on for protocols.
149 	 */
150 	--protoslow; --protofast;
151 
152 #if VAX780
153 	/*
154 	 * On 780's, impelement a fast UBA watcher,
155 	 * to make sure uba's don't get stuck.
156 	 */
157 	if (cpu == VAX_780 && panicstr == 0 && !BASEPRI(ps))
158 		unhang();
159 #endif
160 	/*
161 	 * Schedule a software interrupt for the rest
162 	 * of clock activities.
163 	 */
164 	setsoftclock();
165 }
166 
167 /*
168  * The digital decay cpu usage priority assignment is scaled to run in
169  * time as expanded by the 1 minute load average.  Each second we
170  * multiply the the previous cpu usage estimate by
171  *		nrscale*avenrun[0]
172  * The following relates the load average to the period over which
173  * cpu usage is 90% forgotten:
174  *	loadav 1	 5 seconds
175  *	loadav 5	24 seconds
176  *	loadav 10	47 seconds
177  *	loadav 20	93 seconds
178  * This is a great improvement on the previous algorithm which
179  * decayed the priorities by a constant, and decayed away all knowledge
180  * of previous activity in about 20 seconds.  Under heavy load,
181  * the previous algorithm degenerated to round-robin with poor response
182  * time when there was a high load average.
183  */
184 #undef ave
185 #define	ave(a,b) ((int)(((int)(a*b))/(b+1)))
186 int	nrscale = 2;
187 double	avenrun[];
188 
189 /*
190  * Constant for decay filter for cpu usage field
191  * in process table (used by ps au).
192  */
193 double	ccpu = 0.95122942450071400909;		/* exp(-1/20) */
194 
195 /*
196  * Software clock interrupt.
197  * This routine runs at lower priority than device interrupts.
198  */
199 /*ARGSUSED*/
200 softclock(pc, ps)
201 	caddr_t pc;
202 {
203 	register struct callout *p1;
204 	register struct proc *pp;
205 	register int a, s;
206 	caddr_t arg;
207 	int (*func)();
208 
209 	/*
210 	 * Perform callouts (but not after panic's!)
211 	 */
212 	if (panicstr == 0) {
213 		for (;;) {
214 			s = spl7();
215 			if ((p1 = calltodo.c_next) == 0 || p1->c_time > 0) {
216 				splx(s);
217 				break;
218 			}
219 			calltodo.c_next = p1->c_next;
220 			arg = p1->c_arg;
221 			func = p1->c_func;
222 			p1->c_next = callfree;
223 			callfree = p1;
224 			(void) splx(s);
225 			(*func)(arg);
226 		}
227 	}
228 
229 	/*
230 	 * Drain silos.
231 	 */
232 #if NDH > 0
233 	s = spl5(); dhtimer(); splx(s);
234 #endif
235 #if NDZ > 0
236 	s = spl5(); dztimer(); splx(s);
237 #endif
238 
239 	/*
240 	 * If idling and processes are waiting to swap in,
241 	 * check on them.
242 	 */
243 	if (noproc && runin) {
244 		runin = 0;
245 		wakeup((caddr_t)&runin);
246 	}
247 
248 	/*
249 	 * Run paging daemon every 1/4 sec.
250 	 */
251 	if (lbolt % (hz/4) == 0) {
252 		vmpago();
253 	}
254 
255 	/*
256 	 * Reschedule every 1/10 sec.
257 	 */
258 	if (lbolt % (hz/10) == 0) {
259 		runrun++;
260 		aston();
261 	}
262 
263 	/*
264 	 * Run network slow and fast timeouts.
265 	 */
266 	if (protofast <= 0) {
267 		protofast = hz / PR_FASTHZ;
268 		pffasttimo();
269 	}
270 	if (protoslow <= 0) {
271 		protoslow = hz / PR_SLOWHZ;
272 		pfslowtimo();
273 	}
274 
275 	/*
276 	 * Lightning bolt every second:
277 	 *	sleep timeouts
278 	 *	process priority recomputation
279 	 *	process %cpu averaging
280 	 *	virtual memory metering
281 	 *	kick swapper if processes want in
282 	 */
283 	if (lbolt >= hz) {
284 		/*
285 		 * This doesn't mean much on VAX since we run at
286 		 * software interrupt time... if hardclock()
287 		 * calls softclock() directly, it prevents
288 		 * this code from running when the priority
289 		 * was raised when the clock interrupt occurred.
290 		 */
291 		if (BASEPRI(ps))
292 			return;
293 
294 		/*
295 		 * If we didn't run a few times because of
296 		 * long blockage at high ipl, we don't
297 		 * really want to run this code several times,
298 		 * so squish out all multiples of hz here.
299 		 */
300 		time += lbolt / hz;
301 		lbolt %= hz;
302 
303 		/*
304 		 * Wakeup lightning bolt sleepers.
305 		 * Processes sleep on lbolt to wait
306 		 * for short amounts of time (e.g. 1 second).
307 		 */
308 		wakeup((caddr_t)&lbolt);
309 
310 		/*
311 		 * Recompute process priority and process
312 		 * sleep() system calls as well as internal
313 		 * sleeps with timeouts (tsleep() kernel routine).
314 		 */
315 		for (pp = proc; pp < procNPROC; pp++)
316 		if (pp->p_stat && pp->p_stat!=SZOMB) {
317 			/*
318 			 * Increase resident time, to max of 127 seconds
319 			 * (it is kept in a character.)  For
320 			 * loaded processes this is time in core; for
321 			 * swapped processes, this is time on drum.
322 			 */
323 			if (pp->p_time != 127)
324 				pp->p_time++;
325 			/*
326 			 * If process has clock counting down, and it
327 			 * expires, set it running (if this is a tsleep()),
328 			 * or give it an SIGALRM (if the user process
329 			 * is using alarm signals.
330 			 */
331 			if (pp->p_clktim && --pp->p_clktim == 0)
332 				if (pp->p_flag & STIMO) {
333 					s = spl6();
334 					switch (pp->p_stat) {
335 
336 					case SSLEEP:
337 						setrun(pp);
338 						break;
339 
340 					case SSTOP:
341 						unsleep(pp);
342 						break;
343 					}
344 					pp->p_flag &= ~STIMO;
345 					splx(s);
346 				} else
347 					psignal(pp, SIGALRM);
348 			/*
349 			 * If process is blocked, increment computed
350 			 * time blocked.  This is used in swap scheduling.
351 			 */
352 			if (pp->p_stat==SSLEEP || pp->p_stat==SSTOP)
353 				if (pp->p_slptime != 127)
354 					pp->p_slptime++;
355 			/*
356 			 * Update digital filter estimation of process
357 			 * cpu utilization for loaded processes.
358 			 */
359 			if (pp->p_flag&SLOAD)
360 				pp->p_pctcpu = ccpu * pp->p_pctcpu +
361 				    (1.0 - ccpu) * (pp->p_cpticks/(float)hz);
362 			/*
363 			 * Recompute process priority.  The number p_cpu
364 			 * is a weighted estimate of cpu time consumed.
365 			 * A process which consumes cpu time has this
366 			 * increase regularly.  We here decrease it by
367 			 * a fraction based on load average giving a digital
368 			 * decay filter which damps out in about 5 seconds
369 			 * when seconds are measured in time expanded by the
370 			 * load average.
371 			 *
372 			 * If a process is niced, then the nice directly
373 			 * affects the new priority.  The final priority
374 			 * is in the range 0 to 255, to fit in a character.
375 			 */
376 			pp->p_cpticks = 0;
377 			a = ave((pp->p_cpu & 0377), avenrun[0]*nrscale) +
378 			     pp->p_nice - NZERO;
379 			if (a < 0)
380 				a = 0;
381 			if (a > 255)
382 				a = 255;
383 			pp->p_cpu = a;
384 			(void) setpri(pp);
385 			/*
386 			 * Now have computed new process priority
387 			 * in p->p_usrpri.  Carefully change p->p_pri.
388 			 * A process is on a run queue associated with
389 			 * this priority, so we must block out process
390 			 * state changes during the transition.
391 			 */
392 			s = spl6();
393 			if (pp->p_pri >= PUSER) {
394 				if ((pp != u.u_procp || noproc) &&
395 				    pp->p_stat == SRUN &&
396 				    (pp->p_flag & SLOAD) &&
397 				    pp->p_pri != pp->p_usrpri) {
398 					remrq(pp);
399 					pp->p_pri = pp->p_usrpri;
400 					setrq(pp);
401 				} else
402 					pp->p_pri = pp->p_usrpri;
403 			}
404 			splx(s);
405 		}
406 
407 		/*
408 		 * Perform virtual memory metering.
409 		 */
410 		vmmeter();
411 
412 		/*
413 		 * If the swap process is trying to bring
414 		 * a process in, have it look again to see
415 		 * if it is possible now.
416 		 */
417 		if (runin!=0) {
418 			runin = 0;
419 			wakeup((caddr_t)&runin);
420 		}
421 
422 		/*
423 		 * If there are pages that have been cleaned,
424 		 * jolt the pageout daemon to process them.
425 		 * We do this here so that these pages will be
426 		 * freed if there is an abundance of memory and the
427 		 * daemon would not be awakened otherwise.
428 		 */
429 		if (bclnlist != NULL)
430 			wakeup((caddr_t)&proc[2]);
431 
432 		/*
433 		 * If the trap occurred from usermode,
434 		 * then check to see if it has now been
435 		 * running more than 10 minutes of user time
436 		 * and should thus run with reduced priority
437 		 * to give other processes a chance.
438 		 */
439 		if (USERMODE(ps)) {
440 			pp = u.u_procp;
441 			if (pp->p_uid && pp->p_nice == NZERO &&
442 			    u.u_vm.vm_utime > 600 * hz)
443 				pp->p_nice = NZERO+4;
444 			(void) setpri(pp);
445 			pp->p_pri = pp->p_usrpri;
446 		}
447 	}
448 	/*
449 	 * If trapped user-mode, give it a profiling tick.
450 	 */
451 	if (USERMODE(ps) && u.u_prof.pr_scale) {
452 		u.u_procp->p_flag |= SOWEUPC;
453 		aston();
454 	}
455 }
456 
457 /*
458  * Timeout is called to arrange that
459  * fun(arg) is called in tim/hz seconds.
460  * An entry is linked into the callout
461  * structure.  The time in each structure
462  * entry is the number of hz's more
463  * than the previous entry.
464  * In this way, decrementing the
465  * first entry has the effect of
466  * updating all entries.
467  *
468  * The panic is there because there is nothing
469  * intelligent to be done if an entry won't fit.
470  */
471 timeout(fun, arg, tim)
472 	int (*fun)();
473 	caddr_t arg;
474 {
475 	register struct callout *p1, *p2, *pnew;
476 	register int t;
477 	int s;
478 
479 /* DEBUGGING CODE */
480 	int ttrstrt();
481 
482 	if (fun == ttrstrt && arg == 0)
483 		panic("timeout ttrstr arg");
484 /* END DEBUGGING CODE */
485 	t = tim;
486 	s = spl7();
487 	pnew = callfree;
488 	if (pnew == NULL)
489 		panic("timeout table overflow");
490 	callfree = pnew->c_next;
491 	pnew->c_arg = arg;
492 	pnew->c_func = fun;
493 	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
494 		t -= p2->c_time;
495 	p1->c_next = pnew;
496 	pnew->c_next = p2;
497 	pnew->c_time = t;
498 	if (p2)
499 		p2->c_time -= t;
500 	splx(s);
501 }
502