xref: /original-bsd/sys/vm/vm_meter.c (revision b3b53e97)
1 /*	vm_meter.c	4.14	82/03/31	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/seg.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/proc.h"
9 #include "../h/text.h"
10 #include "../h/vm.h"
11 #include "../h/cmap.h"
12 
13 int	maxslp = MAXSLP;
14 int	saferss = SAFERSS;
15 
16 /*
17  * The following parameters control operation of the page replacement
18  * algorithm.  They are initialized to 0, and then computed at boot time
19  * based on the size of the system.  If they are patched non-zero in
20  * a loaded vmunix they are left alone and may thus be changed per system
21  * using adb on the loaded system.
22  */
23 int	maxpgio = 0;
24 int	minfree = 0;
25 int	desfree = 0;
26 int	lotsfree = 0;
27 int	slowscan = 0;
28 int	fastscan = 0;
29 int	klin = KLIN;
30 int	klseql = KLSEQL;
31 int	klsdist = KLSDIST;
32 int	kltxt = KLTXT;
33 int	klout = KLOUT;
34 int	multprog = -1;		/* so we don't count process 2 */
35 
36 double	avenrun[3];		/* load average, of runnable procs */
37 
38 /*
39  * Setup the paging constants for the clock algorithm.
40  * Called after the system is initialized and the amount of memory
41  * and number of paging devices is known.
42  */
43 setupclock()
44 {
45 
46 	/*
47 	 * Setup thresholds for paging:
48 	 *	lotsfree	is threshold where paging daemon turns on
49 	 *	desfree		is amount of memory desired free.  if less
50 	 *			than this for extended period, do swapping
51 	 *	minfree		is minimal amount of free memory which is
52 	 *			tolerable.
53 	 *
54 	 * Strategy of 4/22/81:
55 	 *	lotsfree is 1/4 of memory free.
56 	 *	desfree is 200k bytes, but at most 1/8 of memory
57 	 *	minfree is 64k bytes, but at most 1/2 of desfree
58 	 */
59 	if (lotsfree == 0)
60 		lotsfree = LOOPPAGES / 4;
61 	if (desfree == 0) {
62 		desfree = (200*1024) / NBPG;
63 		if (desfree > LOOPPAGES / 8)
64 			desfree = LOOPPAGES / 8;
65 	}
66 	if (minfree == 0) {
67 		minfree = (64*1024) / NBPG;
68 		if (minfree > desfree/2)
69 			minfree = desfree / 2;
70 	}
71 
72 	/*
73 	 * Maxpgio thresholds how much paging is acceptable.
74 	 * This figures that 2/3 busy on an arm is all that is
75 	 * tolerable for paging.  We assume one operation per disk rev.
76 	 */
77 	if (maxpgio == 0)
78 		maxpgio = (DISKRPM * 2) / 3;
79 
80 	/*
81 	 * Clock to scan using max of ~~10% of processor time for sampling,
82 	 *     this estimated to allow maximum of 200 samples per second.
83 	 * This yields a ``fastscan'' of roughly (with CLSIZE=2):
84 	 *	<=1m	2m	3m	4m	8m
85 	 * 	5s	10s	15s	20s	40s
86 	 */
87 	if (nswdev == 1 && physmem*NBPG > 2*1024*(1024-16))
88 		printf("WARNING: should run interleaved swap with >= 2Mb\n");
89 	if (fastscan == 0)
90 		fastscan = (LOOPPAGES/CLSIZE) / 200;
91 	if (fastscan < 5)
92 		fastscan = 5;
93 	if (nswdev >= 2)
94 		maxpgio = (maxpgio * 3) / 2;
95 
96 	/*
97 	 * Set slow scan time to 1/2 the fast scan time.
98 	 */
99 	if (slowscan == 0)
100 		slowscan = 2 * fastscan;
101 #ifdef notdef
102 	printf("slowscan %d, fastscan %d, maxpgio %d\n",
103 	    slowscan, fastscan, maxpgio);
104 	printf("lotsfree %d, desfree %d, minfree %d\n",
105 	    lotsfree, desfree, minfree);
106 #endif
107 }
108 
109 /*
110  * The main loop of the scheduling (swapping) process.
111  *
112  * The basic idea is:
113  *	see if anyone wants to be swapped in;
114  *	swap out processes until there is room;
115  *	swap him in;
116  *	repeat.
117  * If the paging rate is too high, or the average free memory
118  * is very low, then we do not consider swapping anyone in,
119  * but rather look for someone to swap out.
120  *
121  * The runout flag is set whenever someone is swapped out.
122  * Sched sleeps on it awaiting work.
123  *
124  * Sched sleeps on runin whenever it cannot find enough
125  * core (by swapping out or otherwise) to fit the
126  * selected swapped process.  It is awakened when the
127  * core situation changes and in any case once per second.
128  *
129  * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
130  */
131 
132 #define	swappable(p) \
133 	(((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
134 
135 /* insure non-zero */
136 #define	nz(x)	(x != 0 ? x : 1)
137 
138 #define	NBIG	4
139 #define	MAXNBIG	10
140 int	nbig = NBIG;
141 
142 struct bigp {
143 	struct	proc *bp_proc;
144 	int	bp_pri;
145 	struct	bigp *bp_link;
146 } bigp[MAXNBIG], bplist;
147 
148 sched()
149 {
150 	register struct proc *rp, *p, *inp;
151 	int outpri, inpri, rppri;
152 	int sleeper, desperate, deservin, needs, divisor;
153 	register struct bigp *bp, *nbp;
154 	int biggot, gives;
155 
156 loop:
157 	wantin = 0;
158 	deservin = 0;
159 	sleeper = 0;
160 	p = 0;
161 	/*
162 	 * See if paging system is overloaded; if so swap someone out.
163 	 * Conditions for hard outswap are:
164 	 *	if need kernel map (mix it up).
165 	 * or
166 	 *	1. if there are at least 2 runnable processes (on the average)
167 	 * and	2. the paging rate is excessive or memory is now VERY low.
168 	 * and	3. the short (5-second) and longer (30-second) average
169 	 *	   memory is less than desirable.
170 	 */
171 	if (kmapwnt || (avenrun[0] >= 2 && imax(avefree, avefree30) < desfree &&
172 	    (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
173 		desperate = 1;
174 		goto hardswap;
175 	}
176 	desperate = 0;
177 	/*
178 	 * Not desperate for core,
179 	 * look for someone who deserves to be brought in.
180 	 */
181 	outpri = -20000;
182 	for (rp = proc; rp < procNPROC; rp++) switch(rp->p_stat) {
183 
184 	case SRUN:
185 		if ((rp->p_flag&SLOAD) == 0) {
186 			rppri = rp->p_time -
187 			    rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
188 			    rp->p_slptime - (rp->p_nice-NZERO)*8;
189 			if (rppri > outpri) {
190 				if (rp->p_poip)
191 					continue;
192 				if (rp->p_textp && rp->p_textp->x_poip)
193 					continue;
194 				p = rp;
195 				outpri = rppri;
196 			}
197 		}
198 		continue;
199 
200 	case SSLEEP:
201 	case SSTOP:
202 		if ((freemem < desfree || rp->p_rssize == 0) &&
203 		    rp->p_slptime > maxslp &&
204 		    (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
205 		    swappable(rp)) {
206 			/*
207 			 * Kick out deadwood.
208 			 */
209 			(void) spl6();
210 			rp->p_flag &= ~SLOAD;
211 			if (rp->p_stat == SRUN)
212 				remrq(rp);
213 			(void) spl0();
214 			(void) swapout(rp, rp->p_dsize, rp->p_ssize);
215 			goto loop;
216 		}
217 		continue;
218 	}
219 
220 	/*
221 	 * No one wants in, so nothing to do.
222 	 */
223 	if (outpri == -20000) {
224 		(void) spl6();
225 		if (wantin) {
226 			wantin = 0;
227 			sleep((caddr_t)&lbolt, PSWP);
228 		} else {
229 			runout++;
230 			sleep((caddr_t)&runout, PSWP);
231 		}
232 		(void) spl0();
233 		goto loop;
234 	}
235 	/*
236 	 * Decide how deserving this guy is.  If he is deserving
237 	 * we will be willing to work harder to bring him in.
238 	 * Needs is an estimate of how much core he will need.
239 	 * If he has been out for a while, then we will
240 	 * bring him in with 1/2 the core he will need, otherwise
241 	 * we are conservative.
242 	 */
243 	deservin = 0;
244 	divisor = 1;
245 	if (outpri > maxslp/2) {
246 		deservin = 1;
247 		divisor = 2;
248 	}
249 	needs = p->p_swrss;
250 	if (p->p_textp && p->p_textp->x_ccount == 0)
251 		needs += p->p_textp->x_swrss;
252 	needs = imin(needs, lotsfree);
253 	if (freemem - deficit > needs / divisor) {
254 		deficit += needs;
255 		if (swapin(p))
256 			goto loop;
257 		deficit -= imin(needs, deficit);
258 	}
259 
260 hardswap:
261 	/*
262 	 * Need resources (kernel map or memory), swap someone out.
263 	 * Select the nbig largest jobs, then the oldest of these
264 	 * is ``most likely to get booted.''
265 	 */
266 	inp = p;
267 	sleeper = 0;
268 	if (nbig > MAXNBIG)
269 		nbig = MAXNBIG;
270 	if (nbig < 1)
271 		nbig = 1;
272 	biggot = 0;
273 	bplist.bp_link = 0;
274 	for (rp = proc; rp < procNPROC; rp++) {
275 		if (!swappable(rp))
276 			continue;
277 		if (rp->p_stat==SZOMB)
278 			continue;
279 		if (rp == inp)
280 			continue;
281 		if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
282 			continue;
283 		if (rp->p_slptime > maxslp &&
284 		    (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
285 			if (sleeper < rp->p_slptime) {
286 				p = rp;
287 				sleeper = rp->p_slptime;
288 			}
289 		} else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
290 			rppri = rp->p_rssize;
291 			if (rp->p_textp)
292 				rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
293 			if (biggot < nbig)
294 				nbp = &bigp[biggot++];
295 			else {
296 				nbp = bplist.bp_link;
297 				if (nbp->bp_pri > rppri)
298 					continue;
299 				bplist.bp_link = nbp->bp_link;
300 			}
301 			for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
302 				if (rppri < bp->bp_link->bp_pri)
303 					break;
304 			nbp->bp_link = bp->bp_link;
305 			bp->bp_link = nbp;
306 			nbp->bp_pri = rppri;
307 			nbp->bp_proc = rp;
308 		}
309 	}
310 	if (!sleeper) {
311 		p = NULL;
312 		inpri = -1000;
313 		for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
314 			rp = bp->bp_proc;
315 			rppri = rp->p_time+rp->p_nice-NZERO;
316 			if (rppri >= inpri) {
317 				p = rp;
318 				inpri = rppri;
319 			}
320 		}
321 	}
322 	/*
323 	 * If we found a long-time sleeper, or we are desperate and
324 	 * found anyone to swap out, or if someone deserves to come
325 	 * in and we didn't find a sleeper, but found someone who
326 	 * has been in core for a reasonable length of time, then
327 	 * we kick the poor luser out.
328 	 */
329 	if (sleeper || desperate && p || deservin && inpri > maxslp) {
330 		(void) spl6();
331 		p->p_flag &= ~SLOAD;
332 		if (p->p_stat == SRUN)
333 			remrq(p);
334 		(void) spl0();
335 		if (desperate) {
336 			/*
337 			 * Want to give this space to the rest of
338 			 * the processes in core so give them a chance
339 			 * by increasing the deficit.
340 			 */
341 			gives = p->p_rssize;
342 			if (p->p_textp)
343 				gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
344 			gives = imin(gives, lotsfree);
345 			deficit += gives;
346 		} else
347 			gives = 0;	/* someone else taketh away */
348 		if (swapout(p, p->p_dsize, p->p_ssize) == 0)
349 			deficit -= imin(gives, deficit);
350 		goto loop;
351 	}
352 	/*
353 	 * Want to swap someone in, but can't
354 	 * so wait on runin.
355 	 */
356 	(void) spl6();
357 	runin++;
358 	sleep((caddr_t)&runin, PSWP);
359 	(void) spl0();
360 	goto loop;
361 }
362 
363 vmmeter()
364 {
365 	register unsigned *cp, *rp, *sp;
366 
367 	deficit -= imin(deficit,
368 	    imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
369 	ave(avefree, freemem, 5);
370 	ave(avefree30, freemem, 30);
371 	/* v_pgin is maintained by clock.c */
372 	cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
373 	while (cp <= &cnt.v_last) {
374 		ave(*rp, *cp, 5);
375 		*sp += *cp;
376 		*cp = 0;
377 		rp++, cp++, sp++;
378 	}
379 	if (time % 5 == 0) {
380 		vmtotal();
381 		rate.v_swpin = cnt.v_swpin;
382 		sum.v_swpin += cnt.v_swpin;
383 		cnt.v_swpin = 0;
384 		rate.v_swpout = cnt.v_swpout;
385 		sum.v_swpout += cnt.v_swpout;
386 		cnt.v_swpout = 0;
387 	}
388 	if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
389 		runout = 0;
390 		runin = 0;
391 		wakeup((caddr_t)&runin);
392 		wakeup((caddr_t)&runout);
393 	}
394 }
395 
396 vmpago()
397 {
398 	register int vavail;
399 	register int scanrate;
400 
401 	/*
402 	 * Compute new rate for clock; if
403 	 * nonzero, restart clock.
404 	 * Rate ranges linearly from one rev per
405 	 * slowscan seconds when there is lotsfree memory
406 	 * available to one rev per fastscan seconds when
407 	 * there is no memory available.
408 	 */
409 	nscan = desscan = 0;
410 	vavail = freemem - deficit;
411 	if (vavail < 0)
412 		vavail = 0;
413 	if (freemem >= lotsfree)
414 		return;
415 	scanrate = (slowscan * vavail + fastscan * (lotsfree - vavail)) / nz(lotsfree);
416 	desscan = (LOOPPAGES / CLSIZE) / nz(scanrate);
417 	/*
418 	 * DIVIDE BY 4 TO ACCOUNT FOR RUNNING 4* A SECOND (see clock.c)
419 	 */
420 	desscan /= 4;
421 	wakeup((caddr_t)&proc[2]);
422 }
423 
424 vmtotal()
425 {
426 	register struct proc *p;
427 	register struct text *xp;
428 	int nrun = 0;
429 
430 	total.t_vmtxt = 0;
431 	total.t_avmtxt = 0;
432 	total.t_rmtxt = 0;
433 	total.t_armtxt = 0;
434 	for (xp = text; xp < textNTEXT; xp++)
435 		if (xp->x_iptr) {
436 			total.t_vmtxt += xp->x_size;
437 			total.t_rmtxt += xp->x_rssize;
438 			for (p = xp->x_caddr; p; p = p->p_xlink)
439 			switch (p->p_stat) {
440 
441 			case SSTOP:
442 			case SSLEEP:
443 				if (p->p_slptime >= maxslp)
444 					continue;
445 				/* fall into... */
446 
447 			case SRUN:
448 			case SIDL:
449 				total.t_avmtxt += xp->x_size;
450 				total.t_armtxt += xp->x_rssize;
451 				goto next;
452 			}
453 next:
454 			;
455 		}
456 	total.t_vm = 0;
457 	total.t_avm = 0;
458 	total.t_rm = 0;
459 	total.t_arm = 0;
460 	total.t_rq = 0;
461 	total.t_dw = 0;
462 	total.t_pw = 0;
463 	total.t_sl = 0;
464 	total.t_sw = 0;
465 	for (p = proc; p < procNPROC; p++) {
466 		if (p->p_flag & SSYS)
467 			continue;
468 		if (p->p_stat) {
469 			total.t_vm += p->p_dsize + p->p_ssize;
470 			total.t_rm += p->p_rssize;
471 			switch (p->p_stat) {
472 
473 			case SSLEEP:
474 			case SSTOP:
475 				if (p->p_pri <= PZERO)
476 					nrun++;
477 				if (p->p_flag & SPAGE)
478 					total.t_pw++;
479 				else if (p->p_flag & SLOAD) {
480 					if (p->p_pri <= PZERO)
481 						total.t_dw++;
482 					else if (p->p_slptime < maxslp)
483 						total.t_sl++;
484 				} else if (p->p_slptime < maxslp)
485 					total.t_sw++;
486 				if (p->p_slptime < maxslp)
487 					goto active;
488 				break;
489 
490 			case SRUN:
491 			case SIDL:
492 				nrun++;
493 				if (p->p_flag & SLOAD)
494 					total.t_rq++;
495 				else
496 					total.t_sw++;
497 active:
498 				total.t_avm += p->p_dsize + p->p_ssize;
499 				total.t_arm += p->p_rssize;
500 				break;
501 			}
502 		}
503 	}
504 	total.t_vm += total.t_vmtxt;
505 	total.t_avm += total.t_avmtxt;
506 	total.t_rm += total.t_rmtxt;
507 	total.t_arm += total.t_armtxt;
508 	total.t_free = avefree;
509 	loadav(avenrun, nrun);
510 }
511 
512 /*
513  * Constants for averages over 1, 5, and 15 minutes
514  * when sampling at 5 second intervals.
515  */
516 double	cexp[3] = {
517 	0.9200444146293232,	/* exp(-1/12) */
518 	0.9834714538216174,	/* exp(-1/60) */
519 	0.9944598480048967,	/* exp(-1/180) */
520 };
521 
522 /*
523  * Compute a tenex style load average of a quantity on
524  * 1, 5 and 15 minute intervals.
525  */
526 loadav(avg, n)
527 	register double *avg;
528 	int n;
529 {
530 	register int i;
531 
532 	for (i = 0; i < 3; i++)
533 		avg[i] = cexp[i] * avg[i] + n * (1.0 - cexp[i]);
534 }
535