xref: /original-bsd/sys/vm/vm_glue.c (revision bacd16ee)
1 /*
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * The Mach Operating System project at Carnegie-Mellon University.
7  *
8  * %sccs.include.redist.c%
9  *
10  *	@(#)vm_glue.c	8.1 (Berkeley) 07/15/93
11  *
12  *
13  * Copyright (c) 1987, 1990 Carnegie-Mellon University.
14  * All rights reserved.
15  *
16  * Permission to use, copy, modify and distribute this software and
17  * its documentation is hereby granted, provided that both the copyright
18  * notice and this permission notice appear in all copies of the
19  * software, derivative works or modified versions, and any portions
20  * thereof, and that both notices appear in supporting documentation.
21  *
22  * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
23  * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
24  * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
25  *
26  * Carnegie Mellon requests users of this software to return to
27  *
28  *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
29  *  School of Computer Science
30  *  Carnegie Mellon University
31  *  Pittsburgh PA 15213-3890
32  *
33  * any improvements or extensions that they make and grant Carnegie the
34  * rights to redistribute these changes.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/proc.h>
40 #include <sys/resourcevar.h>
41 #include <sys/buf.h>
42 #include <sys/user.h>
43 
44 #include <vm/vm.h>
45 #include <vm/vm_page.h>
46 #include <vm/vm_kern.h>
47 
48 int	avefree = 0;		/* XXX */
49 unsigned maxdmap = MAXDSIZ;	/* XXX */
50 int	readbuffers = 0;	/* XXX allow kgdb to read kernel buffer pool */
51 
52 int
53 kernacc(addr, len, rw)
54 	caddr_t addr;
55 	int len, rw;
56 {
57 	boolean_t rv;
58 	vm_offset_t saddr, eaddr;
59 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
60 
61 	saddr = trunc_page(addr);
62 	eaddr = round_page(addr+len);
63 	rv = vm_map_check_protection(kernel_map, saddr, eaddr, prot);
64 	/*
65 	 * XXX there are still some things (e.g. the buffer cache) that
66 	 * are managed behind the VM system's back so even though an
67 	 * address is accessible in the mind of the VM system, there may
68 	 * not be physical pages where the VM thinks there is.  This can
69 	 * lead to bogus allocation of pages in the kernel address space
70 	 * or worse, inconsistencies at the pmap level.  We only worry
71 	 * about the buffer cache for now.
72 	 */
73 	if (!readbuffers && rv && (eaddr > (vm_offset_t)buffers &&
74 		   saddr < (vm_offset_t)buffers + MAXBSIZE * nbuf))
75 		rv = FALSE;
76 	return(rv == TRUE);
77 }
78 
79 int
80 useracc(addr, len, rw)
81 	caddr_t addr;
82 	int len, rw;
83 {
84 	boolean_t rv;
85 	vm_prot_t prot = rw == B_READ ? VM_PROT_READ : VM_PROT_WRITE;
86 
87 	rv = vm_map_check_protection(&curproc->p_vmspace->vm_map,
88 	    trunc_page(addr), round_page(addr+len), prot);
89 	return(rv == TRUE);
90 }
91 
92 #ifdef KGDB
93 /*
94  * Change protections on kernel pages from addr to addr+len
95  * (presumably so debugger can plant a breakpoint).
96  *
97  * We force the protection change at the pmap level.  If we were
98  * to use vm_map_protect a change to allow writing would be lazily-
99  * applied meaning we would still take a protection fault, something
100  * we really don't want to do.  It would also fragment the kernel
101  * map unnecessarily.  We cannot use pmap_protect since it also won't
102  * enforce a write-enable request.  Using pmap_enter is the only way
103  * we can ensure the change takes place properly.
104  */
105 void
106 chgkprot(addr, len, rw)
107 	register caddr_t addr;
108 	int len, rw;
109 {
110 	vm_prot_t prot;
111 	vm_offset_t pa, sva, eva;
112 
113 	prot = rw == B_READ ? VM_PROT_READ : VM_PROT_READ|VM_PROT_WRITE;
114 	eva = round_page(addr + len);
115 	for (sva = trunc_page(addr); sva < eva; sva += PAGE_SIZE) {
116 		/*
117 		 * Extract physical address for the page.
118 		 * We use a cheezy hack to differentiate physical
119 		 * page 0 from an invalid mapping, not that it
120 		 * really matters...
121 		 */
122 		pa = pmap_extract(kernel_pmap, sva|1);
123 		if (pa == 0)
124 			panic("chgkprot: invalid page");
125 		pmap_enter(kernel_pmap, sva, pa&~1, prot, TRUE);
126 	}
127 }
128 #endif
129 
130 void
131 vslock(addr, len)
132 	caddr_t	addr;
133 	u_int	len;
134 {
135 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
136 			round_page(addr+len), FALSE);
137 }
138 
139 void
140 vsunlock(addr, len, dirtied)
141 	caddr_t	addr;
142 	u_int	len;
143 	int dirtied;
144 {
145 #ifdef	lint
146 	dirtied++;
147 #endif
148 	vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page(addr),
149 			round_page(addr+len), TRUE);
150 }
151 
152 /*
153  * Implement fork's actions on an address space.
154  * Here we arrange for the address space to be copied or referenced,
155  * allocate a user struct (pcb and kernel stack), then call the
156  * machine-dependent layer to fill those in and make the new process
157  * ready to run.
158  * NOTE: the kernel stack may be at a different location in the child
159  * process, and thus addresses of automatic variables may be invalid
160  * after cpu_fork returns in the child process.  We do nothing here
161  * after cpu_fork returns.
162  */
163 int
164 vm_fork(p1, p2, isvfork)
165 	register struct proc *p1, *p2;
166 	int isvfork;
167 {
168 	register struct user *up;
169 	vm_offset_t addr;
170 
171 #ifdef i386
172 	/*
173 	 * avoid copying any of the parent's pagetables or other per-process
174 	 * objects that reside in the map by marking all of them non-inheritable
175 	 */
176 	(void)vm_map_inherit(&p1->p_vmspace->vm_map,
177 		UPT_MIN_ADDRESS-UPAGES*NBPG, VM_MAX_ADDRESS, VM_INHERIT_NONE);
178 #endif
179 	p2->p_vmspace = vmspace_fork(p1->p_vmspace);
180 
181 #ifdef SYSVSHM
182 	if (p1->p_vmspace->vm_shm)
183 		shmfork(p1, p2, isvfork);
184 #endif
185 
186 #ifndef	i386
187 	/*
188 	 * Allocate a wired-down (for now) pcb and kernel stack for the process
189 	 */
190 	addr = kmem_alloc_pageable(kernel_map, ctob(UPAGES));
191 	if (addr == 0)
192 		panic("vm_fork: no more kernel virtual memory");
193 	vm_map_pageable(kernel_map, addr, addr + ctob(UPAGES), FALSE);
194 #else
195 /* XXX somehow, on 386, ocassionally pageout removes active, wired down kstack,
196 and pagetables, WITHOUT going thru vm_page_unwire! Why this appears to work is
197 not yet clear, yet it does... */
198 	addr = kmem_alloc(kernel_map, ctob(UPAGES));
199 	if (addr == 0)
200 		panic("vm_fork: no more kernel virtual memory");
201 #endif
202 	up = (struct user *)addr;
203 	p2->p_addr = up;
204 
205 	/*
206 	 * p_stats and p_sigacts currently point at fields
207 	 * in the user struct but not at &u, instead at p_addr.
208 	 * Copy p_sigacts and parts of p_stats; zero the rest
209 	 * of p_stats (statistics).
210 	 */
211 	p2->p_stats = &up->u_stats;
212 	p2->p_sigacts = &up->u_sigacts;
213 	up->u_sigacts = *p1->p_sigacts;
214 	bzero(&up->u_stats.pstat_startzero,
215 	    (unsigned) ((caddr_t)&up->u_stats.pstat_endzero -
216 	    (caddr_t)&up->u_stats.pstat_startzero));
217 	bcopy(&p1->p_stats->pstat_startcopy, &up->u_stats.pstat_startcopy,
218 	    ((caddr_t)&up->u_stats.pstat_endcopy -
219 	     (caddr_t)&up->u_stats.pstat_startcopy));
220 
221 #ifdef i386
222 	{ u_int addr = UPT_MIN_ADDRESS - UPAGES*NBPG; struct vm_map *vp;
223 
224 	vp = &p2->p_vmspace->vm_map;
225 	(void)vm_deallocate(vp, addr, UPT_MAX_ADDRESS - addr);
226 	(void)vm_allocate(vp, &addr, UPT_MAX_ADDRESS - addr, FALSE);
227 	(void)vm_map_inherit(vp, addr, UPT_MAX_ADDRESS, VM_INHERIT_NONE);
228 	}
229 #endif
230 	/*
231 	 * cpu_fork will copy and update the kernel stack and pcb,
232 	 * and make the child ready to run.  It marks the child
233 	 * so that it can return differently than the parent.
234 	 * It returns twice, once in the parent process and
235 	 * once in the child.
236 	 */
237 	return (cpu_fork(p1, p2));
238 }
239 
240 /*
241  * Set default limits for VM system.
242  * Called for proc 0, and then inherited by all others.
243  */
244 void
245 vm_init_limits(p)
246 	register struct proc *p;
247 {
248 
249 	/*
250 	 * Set up the initial limits on process VM.
251 	 * Set the maximum resident set size to be all
252 	 * of (reasonably) available memory.  This causes
253 	 * any single, large process to start random page
254 	 * replacement once it fills memory.
255 	 */
256         p->p_rlimit[RLIMIT_STACK].rlim_cur = DFLSSIZ;
257         p->p_rlimit[RLIMIT_STACK].rlim_max = MAXSSIZ;
258         p->p_rlimit[RLIMIT_DATA].rlim_cur = DFLDSIZ;
259         p->p_rlimit[RLIMIT_DATA].rlim_max = MAXDSIZ;
260 	p->p_rlimit[RLIMIT_RSS].rlim_cur = ptoa(cnt.v_free_count);
261 }
262 
263 #include <vm/vm_pageout.h>
264 
265 #ifdef DEBUG
266 int	enableswap = 1;
267 int	swapdebug = 0;
268 #define	SDB_FOLLOW	1
269 #define SDB_SWAPIN	2
270 #define SDB_SWAPOUT	4
271 #endif
272 
273 /*
274  * Brutally simple:
275  *	1. Attempt to swapin every swaped-out, runnable process in
276  *	   order of priority.
277  *	2. If not enough memory, wake the pageout daemon and let it
278  *	   clear some space.
279  */
280 void
281 sched()
282 {
283 	register struct proc *p;
284 	register int pri;
285 	struct proc *pp;
286 	int ppri;
287 	vm_offset_t addr;
288 	vm_size_t size;
289 
290 loop:
291 #ifdef DEBUG
292 	while (!enableswap)
293 		sleep((caddr_t)&proc0, PVM);
294 #endif
295 	pp = NULL;
296 	ppri = INT_MIN;
297 	for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
298 		if (p->p_stat == SRUN && (p->p_flag & SLOAD) == 0) {
299 			pri = p->p_time + p->p_slptime - p->p_nice * 8;
300 			if (pri > ppri) {
301 				pp = p;
302 				ppri = pri;
303 			}
304 		}
305 	}
306 #ifdef DEBUG
307 	if (swapdebug & SDB_FOLLOW)
308 		printf("sched: running, procp %x pri %d\n", pp, ppri);
309 #endif
310 	/*
311 	 * Nothing to do, back to sleep
312 	 */
313 	if ((p = pp) == NULL) {
314 		sleep((caddr_t)&proc0, PVM);
315 		goto loop;
316 	}
317 
318 	/*
319 	 * We would like to bring someone in.
320 	 * This part is really bogus cuz we could deadlock on memory
321 	 * despite our feeble check.
322 	 */
323 	size = round_page(ctob(UPAGES));
324 	addr = (vm_offset_t) p->p_addr;
325 	if (cnt.v_free_count > atop(size)) {
326 #ifdef DEBUG
327 		if (swapdebug & SDB_SWAPIN)
328 			printf("swapin: pid %d(%s)@%x, pri %d free %d\n",
329 			       p->p_pid, p->p_comm, p->p_addr,
330 			       ppri, cnt.v_free_count);
331 #endif
332 		vm_map_pageable(kernel_map, addr, addr+size, FALSE);
333 		(void) splstatclock();
334 		if (p->p_stat == SRUN)
335 			setrq(p);
336 		p->p_flag |= SLOAD;
337 		(void) spl0();
338 		p->p_time = 0;
339 		goto loop;
340 	}
341 	/*
342 	 * Not enough memory, jab the pageout daemon and wait til the
343 	 * coast is clear.
344 	 */
345 #ifdef DEBUG
346 	if (swapdebug & SDB_FOLLOW)
347 		printf("sched: no room for pid %d(%s), free %d\n",
348 		       p->p_pid, p->p_comm, cnt.v_free_count);
349 #endif
350 	(void) splhigh();
351 	VM_WAIT;
352 	(void) spl0();
353 #ifdef DEBUG
354 	if (swapdebug & SDB_FOLLOW)
355 		printf("sched: room again, free %d\n", cnt.v_free_count);
356 #endif
357 	goto loop;
358 }
359 
360 #define	swappable(p) \
361 	(((p)->p_flag & (SSYS|SLOAD|SKEEP|SWEXIT|SPHYSIO)) == SLOAD)
362 
363 /*
364  * Swapout is driven by the pageout daemon.  Very simple, we find eligible
365  * procs and unwire their u-areas.  We try to always "swap" at least one
366  * process in case we need the room for a swapin.
367  * If any procs have been sleeping/stopped for at least maxslp seconds,
368  * they are swapped.  Else, we swap the longest-sleeping or stopped process,
369  * if any, otherwise the longest-resident process.
370  */
371 void
372 swapout_threads()
373 {
374 	register struct proc *p;
375 	struct proc *outp, *outp2;
376 	int outpri, outpri2;
377 	int didswap = 0;
378 	extern int maxslp;
379 
380 #ifdef DEBUG
381 	if (!enableswap)
382 		return;
383 #endif
384 	outp = outp2 = NULL;
385 	outpri = outpri2 = 0;
386 	for (p = (struct proc *)allproc; p != NULL; p = p->p_nxt) {
387 		if (!swappable(p))
388 			continue;
389 		switch (p->p_stat) {
390 		case SRUN:
391 			if (p->p_time > outpri2) {
392 				outp2 = p;
393 				outpri2 = p->p_time;
394 			}
395 			continue;
396 
397 		case SSLEEP:
398 		case SSTOP:
399 			if (p->p_slptime >= maxslp) {
400 				swapout(p);
401 				didswap++;
402 			} else if (p->p_slptime > outpri) {
403 				outp = p;
404 				outpri = p->p_slptime;
405 			}
406 			continue;
407 		}
408 	}
409 	/*
410 	 * If we didn't get rid of any real duds, toss out the next most
411 	 * likely sleeping/stopped or running candidate.  We only do this
412 	 * if we are real low on memory since we don't gain much by doing
413 	 * it (UPAGES pages).
414 	 */
415 	if (didswap == 0 &&
416 	    cnt.v_free_count <= atop(round_page(ctob(UPAGES)))) {
417 		if ((p = outp) == 0)
418 			p = outp2;
419 #ifdef DEBUG
420 		if (swapdebug & SDB_SWAPOUT)
421 			printf("swapout_threads: no duds, try procp %x\n", p);
422 #endif
423 		if (p)
424 			swapout(p);
425 	}
426 }
427 
428 void
429 swapout(p)
430 	register struct proc *p;
431 {
432 	vm_offset_t addr;
433 	vm_size_t size;
434 
435 #ifdef DEBUG
436 	if (swapdebug & SDB_SWAPOUT)
437 		printf("swapout: pid %d(%s)@%x, stat %x pri %d free %d\n",
438 		       p->p_pid, p->p_comm, p->p_addr, p->p_stat,
439 		       p->p_slptime, cnt.v_free_count);
440 #endif
441 	size = round_page(ctob(UPAGES));
442 	addr = (vm_offset_t) p->p_addr;
443 #if defined(hp300) || defined(luna68k)
444 	/*
445 	 * Ugh!  u-area is double mapped to a fixed address behind the
446 	 * back of the VM system and accesses are usually through that
447 	 * address rather than the per-process address.  Hence reference
448 	 * and modify information are recorded at the fixed address and
449 	 * lost at context switch time.  We assume the u-struct and
450 	 * kernel stack are always accessed/modified and force it to be so.
451 	 */
452 	{
453 		register int i;
454 		volatile long tmp;
455 
456 		for (i = 0; i < UPAGES; i++) {
457 			tmp = *(long *)addr; *(long *)addr = tmp;
458 			addr += NBPG;
459 		}
460 		addr = (vm_offset_t) p->p_addr;
461 	}
462 #endif
463 #ifdef mips
464 	/*
465 	 * Be sure to save the floating point coprocessor state before
466 	 * paging out the u-struct.
467 	 */
468 	{
469 		extern struct proc *machFPCurProcPtr;
470 
471 		if (p == machFPCurProcPtr) {
472 			MachSaveCurFPState(p);
473 			machFPCurProcPtr = (struct proc *)0;
474 		}
475 	}
476 #endif
477 #ifndef	i386 /* temporary measure till we find spontaineous unwire of kstack */
478 	vm_map_pageable(kernel_map, addr, addr+size, TRUE);
479 	pmap_collect(vm_map_pmap(&p->p_vmspace->vm_map));
480 #endif
481 	(void) splhigh();
482 	p->p_flag &= ~SLOAD;
483 	if (p->p_stat == SRUN)
484 		remrq(p);
485 	(void) spl0();
486 	p->p_time = 0;
487 }
488 
489 /*
490  * The rest of these routines fake thread handling
491  */
492 
493 void
494 assert_wait(event, ruptible)
495 	int event;
496 	boolean_t ruptible;
497 {
498 #ifdef lint
499 	ruptible++;
500 #endif
501 	curproc->p_thread = event;
502 }
503 
504 void
505 thread_block()
506 {
507 	int s = splhigh();
508 
509 	if (curproc->p_thread)
510 		sleep((caddr_t)curproc->p_thread, PVM);
511 	splx(s);
512 }
513 
514 void
515 thread_sleep(event, lock, ruptible)
516 	int event;
517 	simple_lock_t lock;
518 	boolean_t ruptible;
519 {
520 #ifdef lint
521 	ruptible++;
522 #endif
523 	int s = splhigh();
524 
525 	curproc->p_thread = event;
526 	simple_unlock(lock);
527 	if (curproc->p_thread)
528 		sleep((caddr_t)event, PVM);
529 	splx(s);
530 }
531 
532 void
533 thread_wakeup(event)
534 	int event;
535 {
536 	int s = splhigh();
537 
538 	wakeup((caddr_t)event);
539 	splx(s);
540 }
541 
542 /*
543  * DEBUG stuff
544  */
545 
546 int indent = 0;
547 
548 #include <machine/stdarg.h>		/* see subr_prf.c */
549 
550 /*ARGSUSED2*/
551 void
552 #if __STDC__
553 iprintf(const char *fmt, ...)
554 #else
555 iprintf(fmt /* , va_alist */)
556 	char *fmt;
557 	/* va_dcl */
558 #endif
559 {
560 	register int i;
561 	va_list ap;
562 
563 	for (i = indent; i >= 8; i -= 8)
564 		printf("\t");
565 	while (--i >= 0)
566 		printf(" ");
567 	va_start(ap, fmt);
568 	printf("%r", fmt, ap);
569 	va_end(ap);
570 }
571