xref: /original-bsd/sys/vm/vm_glue.c (revision 95ecee29)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_glue.c	8.4 (Berkeley) 09/23/93
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Permission to use, copy, modify and distribute this software and
17  * its documentation is hereby granted, provided that both the copyright
18  * notice and this permission notice appear in all copies of the
19  * software, derivative works or modified versions, and any portions
20  * thereof, and that both notices appear in supporting documentation.
21  *
22  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25  *
26  * Carnegie Mellon requests users of this software to return to
27  *
28  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
29  *  School of Computer Science
30  *  Carnegie Mellon University
31  *  Pittsburgh PA 15213-3890
32  *
33  * any improvements or extensions that they make and grant Carnegie the
34  * rights to redistribute these changes.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/resourcevar.h>
41 #include <sys/buf.h>
42 #include <sys/user.h>
43 
44 #include <vm/vm.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_kern.h>
47 
48 int	avefree = 0;		/* XXX */
49 unsigned maxdmap = MAXDSIZ;	/* XXX */
50 int	readbuffers = 0;	/* XXX allow kgdb to read kernel buffer pool */
51 
52 int
53 kernacc(addr, len, rw)
54 	caddr_t addr;
55 	int len, rw;
56 {
57 	boolean_t rv;
58 	vm_offset_t saddr, eaddr;
59 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
60 
61 	saddr = trunc_page(addr);
62 	eaddr = round_page(addr+len);
63 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
64 	/*
65 	 * XXX there are still some things (e.g. the buffer cache) that
66 	 * are managed behind the VM system's back so even though an
67 	 * address is accessible in the mind of the VM system, there may
68 	 * not be physical pages where the VM thinks there is.  This can
69 	 * lead to bogus allocation of pages in the kernel address space
70 	 * or worse, inconsistencies at the pmap level.  We only worry
71 	 * about the buffer cache for now.
72 	 */
73 	if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
74 		   saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
75 		rv = FALSE;
76 	return(rv == TRUE);
77 }
78 
79 int
80 useracc(addr, len, rw)
81 	caddr_t addr;
82 	int len, rw;
83 {
84 	boolean_t rv;
85 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
86 
87 	rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
88 	    trunc_page(addr), round_page(addr+len), prot);
89 	return(rv == TRUE);
90 }
91 
92 #ifdef KGDB
93 /*
94  * Change protections on kernel pages from addr to addr+len
95  * (presumably so debugger can plant a breakpoint).
96  *
97  * We force the protection change at the pmap level.  If we were
98  * to use vm_map_protect a change to allow writing would be lazily-
99  * applied meaning we would still take a protection fault, something
100  * we really don't want to do.  It would also fragment the kernel
101  * map unnecessarily.  We cannot use pmap_protect since it also won't
102  * enforce a write-enable request.  Using pmap_enter is the only way
103  * we can ensure the change takes place properly.
104  */
105 void
106 chgkprot(addr, len, rw)
107 	register caddr_t addr;
108 	int len, rw;
109 {
110 	vm_prot_t prot;
111 	vm_offset_t pa, sva, eva;
112 
113 	prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
114 	eva = round_page(addr + len);
115 	for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
116 		/*
117 		 * Extract physical address for the page.
118 		 * We use a cheezy hack to differentiate physical
119 		 * page 0 from an invalid mapping, not that it
120 		 * really matters...
121 		 */
122 		pa = pmap_extract(kernel_pmap, sva|1);
123 		if (pa == 0)
124 			panic("chgkprot: invalid page");
125 		pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
126 	}
127 }
128 #endif
129 
130 void
131 vslock(addr, len)
132 	caddr_t	addr;
133 	u_int	len;
134 {
135 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
136 			round_page(addr+len), FALSE);
137 }
138 
139 void
140 vsunlock(addr, len, dirtied)
141 	caddr_t	addr;
142 	u_int	len;
143 	int dirtied;
144 {
145 #ifdef	lint
146 	dirtied++;
147 #endif
148 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
149 			round_page(addr+len), TRUE);
150 }
151 
152 /*
153  * Implement fork's actions on an address space.
154  * Here we arrange for the address space to be copied or referenced,
155  * allocate a user struct (pcb and kernel stack), then call the
156  * machine-dependent layer to fill those in and make the new process
157  * ready to run.
158  * NOTE: the kernel stack may be at a different location in the child
159  * process, and thus addresses of automatic variables may be invalid
160  * after cpu_fork returns in the child process.  We do nothing here
161  * after cpu_fork returns.
162  */
163 int
164 vm_fork(p1, p2, isvfork)
165 	register struct proc *p1, *p2;
166 	int isvfork;
167 {
168 	register struct user *up;
169 	vm_offset_t addr;
170 
171 #ifdef i386
172 	/*
173 	 * avoid copying any of the parent's pagetables or other per-process
174 	 * objects that reside in the map by marking all of them non-inheritable
175 	 */
176 	(void)vm_map_inherit(&p1->p_vmspace->vm_map,
177 		UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
178 #endif
179 	p2->p_vmspace = vmspace_fork(p1->p_vmspace);
180 
181 #ifdef SYSVSHM
182 	if (p1->p_vmspace->vm_shm)
183 		shmfork(p1, p2, isvfork);
184 #endif
185 
186 #ifndef	i386
187 	/*
188 	 * Allocate a wired-down (for now) pcb and kernel stack for the process
189 	 */
190 	addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
191 	if (addr == 0)
192 		panic("vm_fork: no more kernel virtual memory");
193 	vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
194 #else
195 /* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
196 and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
197 not yet clear, yet it does... */
198 	addr = kmem_alloc(kernel_map, ctob(UPAGES));
199 	if (addr == 0)
200 		panic("vm_fork: no more kernel virtual memory");
201 #endif
202 	up = (struct user *)addr;
203 	p2->p_addr = up;
204 
205 	/*
206 	 * p_stats and p_sigacts currently point at fields
207 	 * in the user struct but not at &u, instead at p_addr.
208 	 * Copy p_sigacts and parts of p_stats; zero the rest
209 	 * of p_stats (statistics).
210 	 */
211 	p2->p_stats = &up->u_stats;
212 	p2->p_sigacts = &up->u_sigacts;
213 	up->u_sigacts = *p1->p_sigacts;
214 	bzero(&up->u_stats.pstat_startzero,
215 	    (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
216 	    (caddr_t)&up->u_stats.pstat_startzero));
217 	bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
218 	    ((caddr_t)&up->u_stats.pstat_endcopy -
219 	     (caddr_t)&up->u_stats.pstat_startcopy));
220 
221 #ifdef i386
222 	{ u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
223 
224 	vp = &p2->p_vmspace->vm_map;
225 	(void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
226 	(void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
227 	(void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
228 	}
229 #endif
230 	/*
231 	 * cpu_fork will copy and update the kernel stack and pcb,
232 	 * and make the child ready to run.  It marks the child
233 	 * so that it can return differently than the parent.
234 	 * It returns twice, once in the parent process and
235 	 * once in the child.
236 	 */
237 	return (cpu_fork(p1, p2));
238 }
239 
240 /*
241  * Set default limits for VM system.
242  * Called for proc 0, and then inherited by all others.
243  */
244 void
245 vm_init_limits(p)
246 	register struct proc *p;
247 {
248 
249 	/*
250 	 * Set up the initial limits on process VM.
251 	 * Set the maximum resident set size to be all
252 	 * of (reasonably) available memory.  This causes
253 	 * any single, large process to start random page
254 	 * replacement once it fills memory.
255 	 */
256         p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
257         p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
258         p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
259         p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
260 	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
261 }
262 
263 #include <vm/vm_pageout.h>
264 
265 #ifdef DEBUG
266 int	enableswap = 1;
267 int	swapdebug = 0;
268 #define	SDB_FOLLOW	1
269 #define SDB_SWAPIN	2
270 #define SDB_SWAPOUT	4
271 #endif
272 
273 /*
274  * Brutally simple:
275  *	1. Attempt to swapin every swaped-out, runnable process in
276  *	   order of priority.
277  *	2. If not enough memory, wake the pageout daemon and let it
278  *	   clear some space.
279  */
280 void
281 scheduler()
282 {
283 	register struct proc *p;
284 	register int pri;
285 	struct proc *pp;
286 	int ppri;
287 	vm_offset_t addr;
288 	vm_size_t size;
289 
290 loop:
291 #ifdef DEBUG
292 	while (!enableswap)
293 		sleep((caddr_t)&proc0, PVM);
294 #endif
295 	pp = NULL;
296 	ppri = INT_MIN;
297 	for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
298 		if (p->p_stat == SRUN && (p->p_flag & P_INMEM) == 0) {
299 			pri = p->p_swtime + p->p_slptime - p->p_nice * 8;
300 			if (pri > ppri) {
301 				pp = p;
302 				ppri = pri;
303 			}
304 		}
305 	}
306 #ifdef DEBUG
307 	if (swapdebug & SDB_FOLLOW)
308 		printf("sched: running, procp %x pri %d\n", pp, ppri);
309 #endif
310 	/*
311 	 * Nothing to do, back to sleep
312 	 */
313 	if ((p = pp) == NULL) {
314 		sleep((caddr_t)&proc0, PVM);
315 		goto loop;
316 	}
317 
318 	/*
319 	 * We would like to bring someone in.
320 	 * This part is really bogus cuz we could deadlock on memory
321 	 * despite our feeble check.
322 	 */
323 	size = round_page(ctob(UPAGES));
324 	addr = (vm_offset_t) p->p_addr;
325 	if (cnt.v_free_count > atop(size)) {
326 #ifdef DEBUG
327 		if (swapdebug & SDB_SWAPIN)
328 			printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
329 			       p->p_pid, p->p_comm, p->p_addr,
330 			       ppri, cnt.v_free_count);
331 #endif
332 		vm_map_pageable(kernel_map, addr, addr+size, FALSE);
333 		(void) splstatclock();
334 		if (p->p_stat == SRUN)
335 			setrunqueue(p);
336 		p->p_flag |= P_INMEM;
337 		(void) spl0();
338 		p->p_swtime = 0;
339 		goto loop;
340 	}
341 	/*
342 	 * Not enough memory, jab the pageout daemon and wait til the
343 	 * coast is clear.
344 	 */
345 #ifdef DEBUG
346 	if (swapdebug & SDB_FOLLOW)
347 		printf("sched: no room for pid %d(%s), free %d\n",
348 		       p->p_pid, p->p_comm, cnt.v_free_count);
349 #endif
350 	(void) splhigh();
351 	VM_WAIT;
352 	(void) spl0();
353 #ifdef DEBUG
354 	if (swapdebug & SDB_FOLLOW)
355 		printf("sched: room again, free %d\n", cnt.v_free_count);
356 #endif
357 	goto loop;
358 }
359 
360 #define	swappable(p)							\
361 	(((p)->p_flag &							\
362 	    (P_SYSTEM | P_INMEM | P_NOSWAP | P_WEXIT | P_PHYSIO)) == P_INMEM)
363 
364 /*
365  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
366  * procs and unwire their u-areas.  We try to always "swap" at least one
367  * process in case we need the room for a swapin.
368  * If any procs have been sleeping/stopped for at least maxslp seconds,
369  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
370  * if any, otherwise the longest-resident process.
371  */
372 void
373 swapout_threads()
374 {
375 	register struct proc *p;
376 	struct proc *outp, *outp2;
377 	int outpri, outpri2;
378 	int didswap = 0;
379 	extern int maxslp;
380 
381 #ifdef DEBUG
382 	if (!enableswap)
383 		return;
384 #endif
385 	outp = outp2 = NULL;
386 	outpri = outpri2 = 0;
387 	for (p = (struct proc *)allproc; p != NULL; p = p->p_next) {
388 		if (!swappable(p))
389 			continue;
390 		switch (p->p_stat) {
391 		case SRUN:
392 			if (p->p_swtime > outpri2) {
393 				outp2 = p;
394 				outpri2 = p->p_swtime;
395 			}
396 			continue;
397 
398 		case SSLEEP:
399 		case SSTOP:
400 			if (p->p_slptime >= maxslp) {
401 				swapout(p);
402 				didswap++;
403 			} else if (p->p_slptime > outpri) {
404 				outp = p;
405 				outpri = p->p_slptime;
406 			}
407 			continue;
408 		}
409 	}
410 	/*
411 	 * If we didn't get rid of any real duds, toss out the next most
412 	 * likely sleeping/stopped or running candidate.  We only do this
413 	 * if we are real low on memory since we don't gain much by doing
414 	 * it (UPAGES pages).
415 	 */
416 	if (didswap == 0 &&
417 	    cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
418 		if ((p = outp) == 0)
419 			p = outp2;
420 #ifdef DEBUG
421 		if (swapdebug & SDB_SWAPOUT)
422 			printf("swapout_threads: no duds, try procp %x\n", p);
423 #endif
424 		if (p)
425 			swapout(p);
426 	}
427 }
428 
429 void
430 swapout(p)
431 	register struct proc *p;
432 {
433 	vm_offset_t addr;
434 	vm_size_t size;
435 
436 #ifdef DEBUG
437 	if (swapdebug & SDB_SWAPOUT)
438 		printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
439 		       p->p_pid, p->p_comm, p->p_addr, p->p_stat,
440 		       p->p_slptime, cnt.v_free_count);
441 #endif
442 	size = round_page(ctob(UPAGES));
443 	addr = (vm_offset_t) p->p_addr;
444 #if defined(hp300) || defined(luna68k)
445 	/*
446 	 * Ugh!  u-area is double mapped to a fixed address behind the
447 	 * back of the VM system and accesses are usually through that
448 	 * address rather than the per-process address.  Hence reference
449 	 * and modify information are recorded at the fixed address and
450 	 * lost at context switch time.  We assume the u-struct and
451 	 * kernel stack are always accessed/modified and force it to be so.
452 	 */
453 	{
454 		register int i;
455 		volatile long tmp;
456 
457 		for (i = 0; i < UPAGES; i++) {
458 			tmp = *(long *)addr; *(long *)addr = tmp;
459 			addr += NBPG;
460 		}
461 		addr = (vm_offset_t) p->p_addr;
462 	}
463 #endif
464 #ifdef mips
465 	/*
466 	 * Be sure to save the floating point coprocessor state before
467 	 * paging out the u-struct.
468 	 */
469 	{
470 		extern struct proc *machFPCurProcPtr;
471 
472 		if (p == machFPCurProcPtr) {
473 			MachSaveCurFPState(p);
474 			machFPCurProcPtr = (struct proc *)0;
475 		}
476 	}
477 #endif
478 #ifndef	i386 /* temporary measure till we find spontaineous unwire of kstack */
479 	vm_map_pageable(kernel_map, addr, addr+size, TRUE);
480 	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
481 #endif
482 	(void) splhigh();
483 	p->p_flag &= ~P_INMEM;
484 	if (p->p_stat == SRUN)
485 		remrq(p);
486 	(void) spl0();
487 	p->p_swtime = 0;
488 }
489 
490 /*
491  * The rest of these routines fake thread handling
492  */
493 
494 void
495 assert_wait(event, ruptible)
496 	int event;
497 	boolean_t ruptible;
498 {
499 #ifdef lint
500 	ruptible++;
501 #endif
502 	curproc->p_thread = event;
503 }
504 
505 void
506 thread_block()
507 {
508 	int s = splhigh();
509 
510 	if (curproc->p_thread)
511 		sleep((caddr_t)curproc->p_thread, PVM);
512 	splx(s);
513 }
514 
515 void
516 thread_sleep(event, lock, ruptible)
517 	int event;
518 	simple_lock_t lock;
519 	boolean_t ruptible;
520 {
521 #ifdef lint
522 	ruptible++;
523 #endif
524 	int s = splhigh();
525 
526 	curproc->p_thread = event;
527 	simple_unlock(lock);
528 	if (curproc->p_thread)
529 		sleep((caddr_t)event, PVM);
530 	splx(s);
531 }
532 
533 void
534 thread_wakeup(event)
535 	int event;
536 {
537 	int s = splhigh();
538 
539 	wakeup((caddr_t)event);
540 	splx(s);
541 }
542 
543 /*
544  * DEBUG stuff
545  */
546 
547 int indent = 0;
548 
549 #include <machine/stdarg.h>		/* see subr_prf.c */
550 
551 /*ARGSUSED2*/
552 void
553 #if __STDC__
554 iprintf(const char *fmt, ...)
555 #else
556 iprintf(fmt /* , va_alist */)
557 	char *fmt;
558 	/* va_dcl */
559 #endif
560 {
561 	register int i;
562 	va_list ap;
563 
564 	for (i = indent; i >= 8; i -= 8)
565 		printf("\t");
566 	while (--i >= 0)
567 		printf(" ");
568 	va_start(ap, fmt);
569 	printf("%r", fmt, ap);
570 	va_end(ap);
571 }
572