xref: /original-bsd/sys/vm/vm_meter.c (revision 6c57d260)
1 /*	vm_meter.c	4.11	81/04/28	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/seg.h"
6 #include "../h/dir.h"
7 #include "../h/user.h"
8 #include "../h/proc.h"
9 #include "../h/text.h"
10 #include "../h/vm.h"
11 #include "../h/cmap.h"
12 
13 int	maxslp = MAXSLP;
14 int	saferss = SAFERSS;
15 
16 /*
17  * The following parameters control operation of the page replacement
18  * algorithm.  They are initialized to 0, and then computed at boot time
19  * based on the size of the system.  If they are patched non-zero in
20  * a loaded vmunix they are left alone and may thus be changed per system
21  * using adb on the loaded system.
22  */
23 int	maxpgio = 0;
24 int	minfree = 0;
25 int	desfree = 0;
26 int	lotsfree = 0;
27 int	slowscan = 0;
28 int	fastscan = 0;
29 int	klin = KLIN;
30 int	klseql = KLSEQL;
31 int	klsdist = KLSDIST;
32 int	kltxt = KLTXT;
33 int	klout = KLOUT;
34 int	multprog = -1;		/* so we don't count process 2 */
35 
36 double	avenrun[3];		/* load average, of runnable procs */
37 
38 /*
39  * Setup the paging constants for the clock algorithm.
40  * Called after the system is initialized and the amount of memory
41  * and number of paging devices is known.
42  */
43 setupclock()
44 {
45 
46 	/*
47 	 * Setup thresholds for paging:
48 	 *	lotsfree	is threshold where paging daemon turns on
49 	 *	desfree		is amount of memory desired free.  if less
50 	 *			than this for extended period, do swapping
51 	 *	minfree		is minimal amount of free memory which is
52 	 *			tolerable.
53 	 *
54 	 * Strategy of 4/22/81:
55 	 *	lotsfree is 1/4 of memory free.
56 	 *	desfree is 200k bytes, but at most 1/8 of memory
57 	 *	minfree is 64k bytes, but at most 1/2 of desfree
58 	 */
59 	if (lotsfree == 0)
60 		lotsfree = LOOPPAGES / 4;
61 	if (desfree == 0) {
62 		desfree = (200*1024) / NBPG;
63 		if (desfree > LOOPPAGES / 8)
64 			desfree = LOOPPAGES / 8;
65 	}
66 	if (minfree == 0) {
67 		minfree = (64*1024) / NBPG;
68 		if (minfree > desfree/2)
69 			minfree = desfree / 2;
70 	}
71 
72 	/*
73 	 * Maxpgio thresholds how much paging is acceptable.
74 	 * This figures that 2/3 busy on an arm is all that is
75 	 * tolerable for paging.  We assume one operation per disk rev.
76 	 */
77 	if (maxpgio == 0)
78 		maxpgio = (DISKRPM * 2) / 3;
79 
80 	/*
81 	 * Clock to scan using max of ~~15% of processor time for sampling,
82 	 *     this estimated to allow maximum of 300 samples per second.
83 	 * This yields a ``fastscan'' of roughly (with CLSIZE=2):
84 	 *	<=1m	2m	3m	4m	>=6m
85 	 * 	5s	6s	7s	13s	20s
86 	 */
87 	if (nswdev == 1 && physmem*NBPG > 2*1024*(1024-16))
88 		printf("WARNING: should run interleaved swap with >= 2Mb\n");
89 	if (fastscan == 0)
90 		fastscan = (LOOPPAGES/CLSIZE) / 300;
91 	if (fastscan < 5)
92 		fastscan = 5;
93 	if (fastscan > maxslp)
94 		fastscan = maxslp;
95 	if (nswdev == 2)
96 		maxpgio = (maxpgio * 3) / 2;
97 
98 	/*
99 	 * Set slow scan time to 1/3 the fast scan time but at most
100 	 * maxslp (a macroscopic slow).
101 	 */
102 	if (slowscan == 0)
103 		slowscan = 3 * fastscan;
104 	if (slowscan > maxslp)
105 		slowscan = maxslp;
106 #if defined(BERT) || defined(ERNIE)
107 	printf("slowscan %d, fastscan %d, maxpgio %d\n",
108 	    slowscan, fastscan, maxpgio);
109 	printf("lotsfree %d, desfree %d, minfree %d\n",
110 	    lotsfree, desfree, minfree);
111 #endif
112 }
113 
114 /*
115  * The main loop of the scheduling (swapping) process.
116  *
117  * The basic idea is:
118  *	see if anyone wants to be swapped in;
119  *	swap out processes until there is room;
120  *	swap him in;
121  *	repeat.
122  * If the paging rate is too high, or the average free memory
123  * is very low, then we do not consider swapping anyone in,
124  * but rather look for someone to swap out.
125  *
126  * The runout flag is set whenever someone is swapped out.
127  * Sched sleeps on it awaiting work.
128  *
129  * Sched sleeps on runin whenever it cannot find enough
130  * core (by swapping out or otherwise) to fit the
131  * selected swapped process.  It is awakened when the
132  * core situation changes and in any case once per second.
133  *
134  * sched DOESN'T ACCOUNT FOR PAGE TABLE SIZE IN CALCULATIONS.
135  */
136 
137 #define	swappable(p) \
138 	(((p)->p_flag&(SSYS|SLOCK|SULOCK|SLOAD|SPAGE|SKEEP|SWEXIT|SPHYSIO))==SLOAD)
139 
140 /* insure non-zero */
141 #define	nz(x)	(x != 0 ? x : 1)
142 
143 #define	NBIG	4
144 #define	MAXNBIG	10
145 int	nbig = NBIG;
146 
147 struct bigp {
148 	struct	proc *bp_proc;
149 	int	bp_pri;
150 	struct	bigp *bp_link;
151 } bigp[MAXNBIG], bplist;
152 
153 sched()
154 {
155 	register struct proc *rp, *p, *inp;
156 	int outpri, inpri, rppri;
157 	int sleeper, desperate, deservin, needs, divisor;
158 	register struct bigp *bp, *nbp;
159 	int biggot, gives;
160 
161 loop:
162 	wantin = 0;
163 	deservin = 0;
164 	sleeper = 0;
165 	p = 0;
166 	/*
167 	 * See if paging system is overloaded; if so swap someone out.
168 	 * Conditions for hard outswap are:
169 	 *	if need kernel map (mix it up).
170 	 * or
171 	 *	1. if there are at least 2 runnable processes (on the average)
172 	 * and	2. the paging rate is excessive or memory is now VERY low.
173 	 * and	3. the short (5-second) and longer (30-second) average
174 	 *	   memory is less than desirable.
175 	 */
176 	if (kmapwnt || (avenrun[0] >= 2 && imax(avefree, avefree30) < desfree &&
177 	    (rate.v_pgin + rate.v_pgout > maxpgio || avefree < minfree))) {
178 		desperate = 1;
179 		goto hardswap;
180 	}
181 	desperate = 0;
182 	/*
183 	 * Not desperate for core,
184 	 * look for someone who deserves to be brought in.
185 	 */
186 	outpri = -20000;
187 	for (rp = proc; rp < procNPROC; rp++) switch(rp->p_stat) {
188 
189 	case SRUN:
190 		if ((rp->p_flag&SLOAD) == 0) {
191 			rppri = rp->p_time -
192 			    rp->p_swrss / nz((maxpgio/2) * (klin * CLSIZE)) +
193 			    rp->p_slptime - (rp->p_nice-NZERO)*8;
194 			if (rppri > outpri) {
195 				if (rp->p_poip)
196 					continue;
197 				if (rp->p_textp && rp->p_textp->x_poip)
198 					continue;
199 				p = rp;
200 				outpri = rppri;
201 			}
202 		}
203 		continue;
204 
205 	case SSLEEP:
206 	case SSTOP:
207 		if ((freemem < desfree || rp->p_rssize == 0) &&
208 		    rp->p_slptime > maxslp &&
209 		    (!rp->p_textp || (rp->p_textp->x_flag&XLOCK)==0) &&
210 		    swappable(rp)) {
211 			/*
212 			 * Kick out deadwood.
213 			 */
214 			(void) spl6();
215 			rp->p_flag &= ~SLOAD;
216 			if (rp->p_stat == SRUN)
217 				remrq(rp);
218 			(void) spl0();
219 			(void) swapout(rp, rp->p_dsize, rp->p_ssize);
220 			goto loop;
221 		}
222 		continue;
223 	}
224 
225 	/*
226 	 * No one wants in, so nothing to do.
227 	 */
228 	if (outpri == -20000) {
229 		(void) spl6();
230 		if (wantin) {
231 			wantin = 0;
232 			sleep((caddr_t)&lbolt, PSWP);
233 		} else {
234 			runout++;
235 			sleep((caddr_t)&runout, PSWP);
236 		}
237 		(void) spl0();
238 		goto loop;
239 	}
240 	/*
241 	 * Decide how deserving this guy is.  If he is deserving
242 	 * we will be willing to work harder to bring him in.
243 	 * Needs is an estimate of how much core he will need.
244 	 * If he has been out for a while, then we will
245 	 * bring him in with 1/2 the core he will need, otherwise
246 	 * we are conservative.
247 	 */
248 	deservin = 0;
249 	divisor = 1;
250 	if (outpri > maxslp/2) {
251 		deservin = 1;
252 		divisor = 2;
253 	}
254 	needs = p->p_swrss;
255 	if (p->p_textp && p->p_textp->x_ccount == 0)
256 		needs += p->p_textp->x_swrss;
257 	needs = imin(needs, lotsfree);
258 	if (freemem - deficit > needs / divisor) {
259 		deficit += needs;
260 		if (swapin(p))
261 			goto loop;
262 		deficit -= imin(needs, deficit);
263 	}
264 
265 hardswap:
266 	/*
267 	 * Need resources (kernel map or memory), swap someone out.
268 	 * Select the nbig largest jobs, then the oldest of these
269 	 * is ``most likely to get booted.''
270 	 */
271 	inp = p;
272 	sleeper = 0;
273 	if (nbig > MAXNBIG)
274 		nbig = MAXNBIG;
275 	if (nbig < 1)
276 		nbig = 1;
277 	biggot = 0;
278 	bplist.bp_link = 0;
279 	for (rp = proc; rp < procNPROC; rp++) {
280 		if (!swappable(rp))
281 			continue;
282 		if (rp->p_stat==SZOMB)
283 			continue;
284 		if (rp == inp)
285 			continue;
286 		if (rp->p_textp && rp->p_textp->x_flag&XLOCK)
287 			continue;
288 		if (rp->p_slptime > maxslp &&
289 		    (rp->p_stat==SSLEEP&&rp->p_pri>PZERO||rp->p_stat==SSTOP)) {
290 			if (sleeper < rp->p_slptime) {
291 				p = rp;
292 				sleeper = rp->p_slptime;
293 			}
294 		} else if (!sleeper && (rp->p_stat==SRUN||rp->p_stat==SSLEEP)) {
295 			rppri = rp->p_rssize;
296 			if (rp->p_textp)
297 				rppri += rp->p_textp->x_rssize/rp->p_textp->x_ccount;
298 			if (biggot < nbig)
299 				nbp = &bigp[biggot++];
300 			else {
301 				nbp = bplist.bp_link;
302 				if (nbp->bp_pri > rppri)
303 					continue;
304 				bplist.bp_link = nbp->bp_link;
305 			}
306 			for (bp = &bplist; bp->bp_link; bp = bp->bp_link)
307 				if (rppri < bp->bp_link->bp_pri)
308 					break;
309 			nbp->bp_link = bp->bp_link;
310 			bp->bp_link = nbp;
311 			nbp->bp_pri = rppri;
312 			nbp->bp_proc = rp;
313 		}
314 	}
315 	if (!sleeper) {
316 		p = NULL;
317 		inpri = -1000;
318 		for (bp = bplist.bp_link; bp; bp = bp->bp_link) {
319 			rp = bp->bp_proc;
320 			rppri = rp->p_time+rp->p_nice-NZERO;
321 			if (rppri >= inpri) {
322 				p = rp;
323 				inpri = rppri;
324 			}
325 		}
326 	}
327 	/*
328 	 * If we found a long-time sleeper, or we are desperate and
329 	 * found anyone to swap out, or if someone deserves to come
330 	 * in and we didn't find a sleeper, but found someone who
331 	 * has been in core for a reasonable length of time, then
332 	 * we kick the poor luser out.
333 	 */
334 	if (sleeper || desperate && p || deservin && inpri > maxslp) {
335 		(void) spl6();
336 		p->p_flag &= ~SLOAD;
337 		if (p->p_stat == SRUN)
338 			remrq(p);
339 		(void) spl0();
340 		if (desperate) {
341 			/*
342 			 * Want to give this space to the rest of
343 			 * the processes in core so give them a chance
344 			 * by increasing the deficit.
345 			 */
346 			gives = p->p_rssize;
347 			if (p->p_textp)
348 				gives += p->p_textp->x_rssize / p->p_textp->x_ccount;
349 			gives = imin(gives, lotsfree);
350 			deficit += gives;
351 		} else
352 			gives = 0;	/* someone else taketh away */
353 		if (swapout(p, p->p_dsize, p->p_ssize) == 0)
354 			deficit -= imin(gives, deficit);
355 		goto loop;
356 	}
357 	/*
358 	 * Want to swap someone in, but can't
359 	 * so wait on runin.
360 	 */
361 	(void) spl6();
362 	runin++;
363 	sleep((caddr_t)&runin, PSWP);
364 	(void) spl0();
365 	goto loop;
366 }
367 
368 vmmeter()
369 {
370 	register unsigned *cp, *rp, *sp;
371 
372 	deficit -= imin(deficit,
373 	    imax(deficit / 10, ((klin * CLSIZE) / 2) * maxpgio / 2));
374 	ave(avefree, freemem, 5);
375 	ave(avefree30, freemem, 30);
376 	/* v_pgin is maintained by clock.c */
377 	cp = &cnt.v_first; rp = &rate.v_first; sp = &sum.v_first;
378 	while (cp <= &cnt.v_last) {
379 		ave(*rp, *cp, 5);
380 		*sp += *cp;
381 		*cp = 0;
382 		rp++, cp++, sp++;
383 	}
384 	if (time % 5 == 0) {
385 		vmtotal();
386 		rate.v_swpin = cnt.v_swpin;
387 		sum.v_swpin += cnt.v_swpin;
388 		cnt.v_swpin = 0;
389 		rate.v_swpout = cnt.v_swpout;
390 		sum.v_swpout += cnt.v_swpout;
391 		cnt.v_swpout = 0;
392 	}
393 	if (avefree < minfree && runout || proc[0].p_slptime > maxslp/2) {
394 		runout = 0;
395 		runin = 0;
396 		wakeup((caddr_t)&runin);
397 		wakeup((caddr_t)&runout);
398 	}
399 }
400 
401 vmpago()
402 {
403 	register int vavail;
404 	register int scanrate;
405 
406 	/*
407 	 * Compute new rate for clock; if
408 	 * nonzero, restart clock.
409 	 * Rate ranges linearly from one rev per
410 	 * slowscan seconds when there is lotsfree memory
411 	 * available to one rev per fastscan seconds when
412 	 * there is no memory available.
413 	 */
414 	nscan = desscan = 0;
415 	vavail = freemem - deficit;
416 	if (vavail < 0)
417 		vavail = 0;
418 	if (freemem >= lotsfree)
419 		return;
420 	scanrate = (slowscan * vavail + fastscan * (lotsfree - vavail)) / nz(lotsfree);
421 	desscan = (LOOPPAGES / CLSIZE) / nz(scanrate);
422 	/*
423 	 * DIVIDE BY 4 TO ACCOUNT FOR RUNNING 4* A SECOND (see clock.c)
424 	 */
425 	desscan /= 4;
426 	wakeup((caddr_t)&proc[2]);
427 }
428 
429 vmtotal()
430 {
431 	register struct proc *p;
432 	register struct text *xp;
433 	int nrun = 0;
434 
435 	total.t_vmtxt = 0;
436 	total.t_avmtxt = 0;
437 	total.t_rmtxt = 0;
438 	total.t_armtxt = 0;
439 	for (xp = text; xp < textNTEXT; xp++)
440 		if (xp->x_iptr) {
441 			total.t_vmtxt += xp->x_size;
442 			total.t_rmtxt += xp->x_rssize;
443 			for (p = xp->x_caddr; p; p = p->p_xlink)
444 			switch (p->p_stat) {
445 
446 			case SSTOP:
447 			case SSLEEP:
448 				if (p->p_slptime >= maxslp)
449 					continue;
450 				/* fall into... */
451 
452 			case SRUN:
453 			case SIDL:
454 				total.t_avmtxt += xp->x_size;
455 				total.t_armtxt += xp->x_rssize;
456 				goto next;
457 			}
458 next:
459 			;
460 		}
461 	total.t_vm = 0;
462 	total.t_avm = 0;
463 	total.t_rm = 0;
464 	total.t_arm = 0;
465 	total.t_rq = 0;
466 	total.t_dw = 0;
467 	total.t_pw = 0;
468 	total.t_sl = 0;
469 	total.t_sw = 0;
470 	for (p = proc; p < procNPROC; p++) {
471 		if (p->p_flag & SSYS)
472 			continue;
473 		if (p->p_stat) {
474 			total.t_vm += p->p_dsize + p->p_ssize;
475 			total.t_rm += p->p_rssize;
476 			switch (p->p_stat) {
477 
478 			case SSLEEP:
479 			case SSTOP:
480 				if (p->p_pri <= PZERO)
481 					nrun++;
482 				if (p->p_flag & SPAGE)
483 					total.t_pw++;
484 				else if (p->p_flag & SLOAD) {
485 					if (p->p_pri <= PZERO)
486 						total.t_dw++;
487 					else if (p->p_slptime < maxslp)
488 						total.t_sl++;
489 				} else if (p->p_slptime < maxslp)
490 					total.t_sw++;
491 				if (p->p_slptime < maxslp)
492 					goto active;
493 				break;
494 
495 			case SRUN:
496 			case SIDL:
497 				nrun++;
498 				if (p->p_flag & SLOAD)
499 					total.t_rq++;
500 				else
501 					total.t_sw++;
502 active:
503 				total.t_avm += p->p_dsize + p->p_ssize;
504 				total.t_arm += p->p_rssize;
505 				break;
506 			}
507 		}
508 	}
509 	total.t_vm += total.t_vmtxt;
510 	total.t_avm += total.t_avmtxt;
511 	total.t_rm += total.t_rmtxt;
512 	total.t_arm += total.t_armtxt;
513 	total.t_free = avefree;
514 	loadav(avenrun, nrun);
515 }
516 
517 /*
518  * Constants for averages over 1, 5, and 15 minutes
519  * when sampling at 5 second intervals.
520  */
521 double	cexp[3] = {
522 	0.9200444146293232,	/* exp(-1/12) */
523 	0.9834714538216174,	/* exp(-1/60) */
524 	0.9944598480048967,	/* exp(-1/180) */
525 };
526 
527 /*
528  * Compute a tenex style load average of a quantity on
529  * 1, 5 and 15 minute intervals.
530  */
531 loadav(avg, n)
532 	register double *avg;
533 	int n;
534 {
535 	register int i;
536 
537 	for (i = 0; i < 3; i++)
538 		avg[i] = cexp[i] * avg[i] + n * (1.0 - cexp[i]);
539 }
540