xref: /original-bsd/sys/kern/kern_clock.c (revision 03bd62d7)
1 /*-
2  * Copyright (c) 1982, 1986, 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)kern_clock.c	7.30 (Berkeley) 03/04/93
8  */
9 
10 #include <sys/param.h>
11 #include <sys/systm.h>
12 #include <sys/dkstat.h>
13 #include <sys/callout.h>
14 #include <sys/kernel.h>
15 #include <sys/proc.h>
16 #include <sys/resourcevar.h>
17 
18 #include <machine/cpu.h>
19 
20 #ifdef GPROF
21 #include <sys/gmon.h>
22 extern u_short *kcount;
23 #endif
24 
25 /*
26  * Clock handling routines.
27  *
28  * This code is written to operate with two timers that run independently of
29  * each other.  The main clock, running hz times per second, is used to keep
30  * track of real time.  The second timer handles kernel and user profiling,
31  * and does resource use estimation.  If the second timer is programmable,
32  * it is randomized to avoid aliasing between the two clocks.  For example,
33  * the randomization prevents an adversary from always giving up the cpu
34  * just before its quantum expires.  Otherwise, it would never accumulate
35  * cpu ticks.  The mean frequency of the second timer is stathz.
36  *
37  * If no second timer exists, stathz will be zero; in this case we drive
38  * profiling and statistics off the main clock.  This WILL NOT be accurate;
39  * do not do it unless absolutely necessary.
40  *
41  * The statistics clock may (or may not) be run at a higher rate while
42  * profiling.  This profile clock runs at profhz.  We require that profhz
43  * be an integral multiple of stathz.
44  *
45  * If the statistics clock is running fast, it must be divided by the ratio
46  * profhz/stathz for statistics.  (For profiling, every tick counts.)
47  */
48 
49 /*
50  * TODO:
51  *	allocate more timeout table slots when table overflows.
52  */
53 
54 /*
55  * Bump a timeval by a small number of usec's.
56  */
57 #define BUMPTIME(t, usec) { \
58 	register volatile struct timeval *tp = (t); \
59 	register long us; \
60  \
61 	tp->tv_usec = us = tp->tv_usec + (usec); \
62 	if (us >= 1000000) { \
63 		tp->tv_usec = us - 1000000; \
64 		tp->tv_sec++; \
65 	} \
66 }
67 
68 int	stathz;
69 int	profhz;
70 int	profprocs;
71 int	ticks;
72 static int psdiv, pscnt;	/* prof => stat divider */
73 int	psratio;		/* ratio: prof / stat */
74 
75 volatile struct	timeval time;
76 volatile struct	timeval mono_time;
77 
78 /*
79  * Initialize clock frequencies and start both clocks running.
80  */
81 void
82 initclocks()
83 {
84 	register int i;
85 
86 	/*
87 	 * Set divisors to 1 (normal case) and let the machine-specific
88 	 * code do its bit.
89 	 */
90 	psdiv = pscnt = 1;
91 	cpu_initclocks();
92 
93 	/*
94 	 * Compute profhz/stathz, and fix profhz if needed.
95 	 */
96 	i = stathz ? stathz : hz;
97 	if (profhz == 0)
98 		profhz = i;
99 	psratio = profhz / i;
100 }
101 
102 /*
103  * The real-time timer, interrupting hz times per second.
104  */
105 void
106 hardclock(frame)
107 	register struct clockframe *frame;
108 {
109 	register struct callout *p1;
110 	register struct proc *p;
111 	register int delta, needsoft;
112 	extern int tickdelta;
113 	extern long timedelta;
114 
115 	/*
116 	 * Update real-time timeout queue.
117 	 * At front of queue are some number of events which are ``due''.
118 	 * The time to these is <= 0 and if negative represents the
119 	 * number of ticks which have passed since it was supposed to happen.
120 	 * The rest of the q elements (times > 0) are events yet to happen,
121 	 * where the time for each is given as a delta from the previous.
122 	 * Decrementing just the first of these serves to decrement the time
123 	 * to all events.
124 	 */
125 	needsoft = 0;
126 	for (p1 = calltodo.c_next; p1 != NULL; p1 = p1->c_next) {
127 		if (--p1->c_time > 0)
128 			break;
129 		needsoft = 1;
130 		if (p1->c_time == 0)
131 			break;
132 	}
133 
134 	p = curproc;
135 	if (p) {
136 		register struct pstats *pstats;
137 
138 		/*
139 		 * Run current process's virtual and profile time, as needed.
140 		 */
141 		pstats = p->p_stats;
142 		if (CLKF_USERMODE(frame) &&
143 		    timerisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) &&
144 		    itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0)
145 			psignal(p, SIGVTALRM);
146 		if (timerisset(&pstats->p_timer[ITIMER_PROF].it_value) &&
147 		    itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0)
148 			psignal(p, SIGPROF);
149 	}
150 
151 	/*
152 	 * If no separate statistics clock is available, run it from here.
153 	 */
154 	if (stathz == 0)
155 		statclock(frame);
156 
157 	/*
158 	 * Increment the time-of-day.  The increment is just ``tick'' unless
159 	 * we are still adjusting the clock; see adjtime().
160 	 */
161 	ticks++;
162 	if (timedelta == 0)
163 		delta = tick;
164 	else {
165 		delta = tick + tickdelta;
166 		timedelta -= tickdelta;
167 	}
168 	BUMPTIME(&time, delta);
169 	BUMPTIME(&mono_time, delta);
170 
171 	/*
172 	 * Process callouts at a very low cpu priority, so we don't keep the
173 	 * relatively high clock interrupt priority any longer than necessary.
174 	 */
175 	if (needsoft) {
176 		if (CLKF_BASEPRI(frame)) {
177 			/*
178 			 * Save the overhead of a software interrupt;
179 			 * it will happen as soon as we return, so do it now.
180 			 */
181 			(void)splsoftclock();
182 			softclock();
183 		} else
184 			setsoftclock();
185 	}
186 }
187 
188 /*
189  * Software (low priority) clock interrupt.
190  * Run periodic events from timeout queue.
191  */
192 /*ARGSUSED*/
193 void
194 softclock()
195 {
196 	register struct callout *c;
197 	register void *arg;
198 	register void (*func) __P((void *));
199 	register int s;
200 
201 	s = splhigh();
202 	while ((c = calltodo.c_next) != NULL && c->c_time <= 0) {
203 		func = c->c_func;
204 		arg = c->c_arg;
205 		calltodo.c_next = c->c_next;
206 		c->c_next = callfree;
207 		callfree = c;
208 		splx(s);
209 		(*func)(arg);
210 		(void) splhigh();
211 	}
212 	splx(s);
213 }
214 
215 /*
216  * Arrange that (*func)(arg) is called in t/hz seconds.
217  */
218 void
219 timeout(func, arg, t)
220 	void (*func) __P((void *));
221 	void *arg;
222 	register int t;
223 {
224 	register struct callout *p1, *p2, *pnew;
225 	register int s;
226 
227 	s = splhigh();
228 	if (t <= 0)
229 		t = 1;
230 	pnew = callfree;
231 	if (pnew == NULL)
232 		panic("timeout table overflow");
233 	callfree = pnew->c_next;
234 	pnew->c_arg = arg;
235 	pnew->c_func = func;
236 	for (p1 = &calltodo; (p2 = p1->c_next) && p2->c_time < t; p1 = p2)
237 		if (p2->c_time > 0)
238 			t -= p2->c_time;
239 	p1->c_next = pnew;
240 	pnew->c_next = p2;
241 	pnew->c_time = t;
242 	if (p2)
243 		p2->c_time -= t;
244 	splx(s);
245 }
246 
247 /*
248  * untimeout is called to remove a function timeout call
249  * from the callout structure.
250  */
251 void
252 untimeout(func, arg)
253 	void (*func) __P((void *));
254 	void *arg;
255 {
256 	register struct callout *p1, *p2;
257 	register int s;
258 
259 	s = splhigh();
260 	for (p1 = &calltodo; (p2 = p1->c_next) != NULL; p1 = p2) {
261 		if (p2->c_func == func && p2->c_arg == arg) {
262 			if (p2->c_next && p2->c_time > 0)
263 				p2->c_next->c_time += p2->c_time;
264 			p1->c_next = p2->c_next;
265 			p2->c_next = callfree;
266 			callfree = p2;
267 			break;
268 		}
269 	}
270 	splx(s);
271 }
272 
273 /*
274  * Compute number of hz until specified time.
275  * Used to compute third argument to timeout() from an
276  * absolute time.
277  */
278 int
279 hzto(tv)
280 	struct timeval *tv;
281 {
282 	register long ticks, sec;
283 	int s;
284 
285 	/*
286 	 * If number of milliseconds will fit in 32 bit arithmetic,
287 	 * then compute number of milliseconds to time and scale to
288 	 * ticks.  Otherwise just compute number of hz in time, rounding
289 	 * times greater than representible to maximum value.
290 	 *
291 	 * Delta times less than 25 days can be computed ``exactly''.
292 	 * Maximum value for any timeout in 10ms ticks is 250 days.
293 	 */
294 	s = splhigh();
295 	sec = tv->tv_sec - time.tv_sec;
296 	if (sec <= 0x7fffffff / 1000 - 1000)
297 		ticks = ((tv->tv_sec - time.tv_sec) * 1000 +
298 			(tv->tv_usec - time.tv_usec) / 1000) / (tick / 1000);
299 	else if (sec <= 0x7fffffff / hz)
300 		ticks = sec * hz;
301 	else
302 		ticks = 0x7fffffff;
303 	splx(s);
304 	return (ticks);
305 }
306 
307 /*
308  * Start profiling on a process.
309  *
310  * Kernel profiling passes proc0 which never exits and hence
311  * keeps the profile clock running constantly.
312  */
313 void
314 startprofclock(p)
315 	register struct proc *p;
316 {
317 	int s;
318 
319 	if ((p->p_flag & SPROFIL) == 0) {
320 		p->p_flag |= SPROFIL;
321 		if (++profprocs == 1 && stathz != 0) {
322 			s = splstatclock();
323 			psdiv = pscnt = psratio;
324 			setstatclockrate(profhz);
325 			splx(s);
326 		}
327 	}
328 }
329 
330 /*
331  * Stop profiling on a process.
332  */
333 void
334 stopprofclock(p)
335 	register struct proc *p;
336 {
337 	int s;
338 
339 	if (p->p_flag & SPROFIL) {
340 		p->p_flag &= ~SPROFIL;
341 		if (--profprocs == 0 && stathz != 0) {
342 			s = splstatclock();
343 			psdiv = pscnt = 1;
344 			setstatclockrate(stathz);
345 			splx(s);
346 		}
347 	}
348 }
349 
350 int	dk_ndrive = DK_NDRIVE;
351 
352 /*
353  * Statistics clock.  Grab profile sample, and if divider reaches 0,
354  * do process and kernel statistics.
355  */
356 void
357 statclock(frame)
358 	register struct clockframe *frame;
359 {
360 #ifdef GPROF
361 	register struct gmonparam *g;
362 #endif
363 	register struct proc *p;
364 	register int i;
365 
366 	if (CLKF_USERMODE(frame)) {
367 		p = curproc;
368 		if (p->p_flag & SPROFIL)
369 			addupc_intr(p, CLKF_PC(frame), 1);
370 		if (--pscnt > 0)
371 			return;
372 		/*
373 		 * Came from user mode; CPU was in user state.
374 		 * If this process is being profiled record the tick.
375 		 */
376 		p->p_uticks++;
377 		if (p->p_nice > NZERO)
378 			cp_time[CP_NICE]++;
379 		else
380 			cp_time[CP_USER]++;
381 	} else {
382 #ifdef GPROF
383 		/*
384 		 * Kernel statistics are just like addupc_intr, only easier.
385 		 */
386 		g = &_gmonparam;
387 		if (g->state == GMON_PROF_ON) {
388 			i = CLKF_PC(frame) - g->lowpc;
389 			if (i < g->textsize)
390 				kcount[i / (HISTFRACTION * sizeof(*kcount))]++;
391 		}
392 #endif
393 		if (--pscnt > 0)
394 			return;
395 		/*
396 		 * Came from kernel mode, so we were:
397 		 * - handling an interrupt,
398 		 * - doing syscall or trap work on behalf of the current
399 		 *   user process, or
400 		 * - spinning in the idle loop.
401 		 * Whichever it is, charge the time as appropriate.
402 		 * Note that we charge interrupts to the current process,
403 		 * regardless of whether they are ``for'' that process,
404 		 * so that we know how much of its real time was spent
405 		 * in ``non-process'' (i.e., interrupt) work.
406 		 */
407 		p = curproc;
408 		if (CLKF_INTR(frame)) {
409 			if (p != NULL)
410 				p->p_iticks++;
411 			cp_time[CP_INTR]++;
412 		} else if (p != NULL) {
413 			p->p_sticks++;
414 			cp_time[CP_SYS]++;
415 		} else
416 			cp_time[CP_IDLE]++;
417 	}
418 	pscnt = psdiv;
419 
420 	/*
421 	 * We maintain statistics shown by user-level statistics
422 	 * programs:  the amount of time in each cpu state, and
423 	 * the amount of time each of DK_NDRIVE ``drives'' is busy.
424 	 *
425 	 * XXX	should either run linked list of drives, or (better)
426 	 *	grab timestamps in the start & done code.
427 	 */
428 	for (i = 0; i < DK_NDRIVE; i++)
429 		if (dk_busy & (1 << i))
430 			dk_time[i]++;
431 
432 	/*
433 	 * We adjust the priority of the current process.
434 	 * The priority of a process gets worse as it accumulates
435 	 * CPU time.  The cpu usage estimator (p_cpu) is increased here
436 	 * and the formula for computing priorities (in kern_synch.c)
437 	 * will compute a different value each time the p_cpu increases
438 	 * by 4.  The cpu usage estimator ramps up quite quickly when
439 	 * the process is running (linearly), and decays away
440 	 * exponentially, at a rate which is proportionally slower
441 	 * when the system is busy.  The basic principal is that the
442 	 * system will 90% forget that a process used a lot of CPU
443 	 * time in 5*loadav seconds.  This causes the system to favor
444 	 * processes which haven't run much recently, and to
445 	 * round-robin among other processes.
446 	 */
447 	if (p != NULL) {
448 		p->p_cpticks++;
449 		if (++p->p_cpu == 0)
450 			p->p_cpu--;
451 		if ((p->p_cpu & 3) == 0) {
452 			setpri(p);
453 			if (p->p_pri >= PUSER)
454 				p->p_pri = p->p_usrpri;
455 		}
456 	}
457 }
458 
459 /*
460  * Return information about system clocks.
461  */
462 sysctl_clockrate(where, sizep)
463 	register char *where;
464 	size_t *sizep;
465 {
466 	struct clockinfo clkinfo;
467 
468 	/*
469 	 * Construct clockinfo structure.
470 	 */
471 	clkinfo.hz = hz;
472 	clkinfo.tick = tick;
473 	clkinfo.profhz = profhz;
474 	clkinfo.stathz = stathz ? stathz : hz;
475 	return (sysctl_rdstruct(where, sizep, NULL, &clkinfo, sizeof(clkinfo)));
476 }
477