xref: /original-bsd/sys/i386/i386/vm_machdep.c (revision 95ecee29)
1 /*-
2  * Copyright (c) 1982, 1986, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  * All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department, and William Jolitz.
9  *
10  * %sccs.include.redist.c%
11  *
12  *	@(#)vm_machdep.c	8.2 (Berkeley) 09/23/93
13  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
14  */
15 
16 #include <sys/param.h>
17 #include <sys/systm.h>
18 #include <sys/proc.h>
19 #include <sys/malloc.h>
20 #include <sys/buf.h>
21 #include <sys/vnode.h>
22 #include <sys/user.h>
23 
24 #include <machine/cpu.h>
25 
26 #include <vm/vm.h>
27 #include <vm/vm_kern.h>
28 
29 /*
30  * Finish a fork operation, with process p2 nearly set up.
31  * Copy and update the kernel stack and pcb, making the child
32  * ready to run, and marking it so that it can return differently
33  * than the parent.  Returns 1 in the child process, 0 in the parent.
34  * We currently double-map the user area so that the stack is at the same
35  * address in each process; in the future we will probably relocate
36  * the frame pointers on the stack after copying.
37  */
38 cpu_fork(p1, p2)
39 	register struct proc *p1, *p2;
40 {
41 	register struct user *up = p2->p_addr;
42 	int foo, offset, addr, i;
43 	extern char kstack[];
44 	extern int mvesp();
45 
46 	/*
47 	 * Copy pcb and stack from proc p1 to p2.
48 	 * We do this as cheaply as possible, copying only the active
49 	 * part of the stack.  The stack and pcb need to agree;
50 	 * this is tricky, as the final pcb is constructed by savectx,
51 	 * but its frame isn't yet on the stack when the stack is copied.
52 	 * mi_switch compensates for this when the child eventually runs.
53 	 * This should be done differently, with a single call
54 	 * that copies and updates the pcb+stack,
55 	 * replacing the bcopy and savectx.
56 	 */
57 	p2->p_addr->u_pcb = p1->p_addr->u_pcb;
58 	offset = mvesp() - (int)kstack;
59 	bcopy((caddr_t)kstack + offset, (caddr_t)p2->p_addr + offset,
60 	    (unsigned) ctob(UPAGES) - offset);
61 	p2->p_md.md_regs = p1->p_md.md_regs;
62 
63 	/*
64 	 * Wire top of address space of child to it's u.
65 	 * First, fault in a page of pte's to map it.
66 	 */
67         addr = trunc_page((u_int)vtopte(kstack));
68 	(void)vm_map_pageable(&p2->p_vmspace->vm_map, addr, addr+NBPG, FALSE);
69 	for (i=0; i < UPAGES; i++)
70 		pmap_enter(&p2->p_vmspace->vm_pmap, (vm_offset_t)kstack+i*NBPG,
71 			pmap_extract(kernel_pmap, ((int)p2->p_addr)+i*NBPG),
72 			VM_PROT_READ, 1);
73 	pmap_activate(&p2->p_vmspace->vm_pmap, &up->u_pcb);
74 
75 	/*
76 	 *
77 	 * Arrange for a non-local goto when the new process
78 	 * is started, to resume here, returning nonzero from setjmp.
79 	 */
80 	if (savectx(up, 1)) {
81 		/*
82 		 * Return 1 in child.
83 		 */
84 		return (1);
85 	}
86 	return (0);
87 }
88 
89 #include "npx.h"
90 #if NNPX > 0
91 extern struct proc *npxproc;
92 #endif
93 
94 #ifdef notyet
95 /*
96  * cpu_exit is called as the last action during exit.
97  *
98  * We change to an inactive address space and a "safe" stack,
99  * passing thru an argument to the new stack. Now, safely isolated
100  * from the resources we're shedding, we release the address space
101  * and any remaining machine-dependent resources, including the
102  * memory for the user structure and kernel stack.
103  *
104  * Next, we assign a dummy context to be written over by mi_switch,
105  * calling it to send this process off to oblivion.
106  * [The nullpcb allows us to minimize cost in mi_switch() by not having
107  * a special case].
108  */
109 struct proc *switch_to_inactive();
110 cpu_exit(p)
111 	register struct proc *p;
112 {
113 	static struct pcb nullpcb;	/* pcb to overwrite on last switch */
114 
115 #if NNPX > 0
116 	/* free cporcessor (if we have it) */
117 	if( p == npxproc) npxproc =0;
118 #endif
119 
120 	/* move to inactive space and stack, passing arg accross */
121 	p = switch_to_inactive(p);
122 
123 	/* drop per-process resources */
124 	vmspace_free(p->p_vmspace);
125 	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
126 
127 	p->p_addr = (struct user *) &nullpcb;
128 	mi_switch();
129 	/* NOTREACHED */
130 }
131 #else
132 cpu_exit(p)
133 	register struct proc *p;
134 {
135 
136 	/* free coprocessor (if we have it) */
137 #if NNPX > 0
138 	if( p == npxproc) npxproc =0;
139 #endif
140 
141 	curproc = p;
142 	mi_switch();
143 }
144 
145 cpu_wait(p) struct proc *p; {
146 
147 	/* drop per-process resources */
148 	vmspace_free(p->p_vmspace);
149 	kmem_free(kernel_map, (vm_offset_t)p->p_addr, ctob(UPAGES));
150 }
151 #endif
152 
153 /*
154  * Dump the machine specific header information at the start of a core dump.
155  */
156 cpu_coredump(p, vp, cred)
157 	struct proc *p;
158 	struct vnode *vp;
159 	struct ucred *cred;
160 {
161 
162 	return (vn_rdwr(UIO_WRITE, vp, (caddr_t) p->p_addr, ctob(UPAGES),
163 	    (off_t)0, UIO_SYSSPACE, IO_NODELOCKED|IO_UNIT, cred, (int *)NULL,
164 	    p));
165 }
166 
167 /*
168  * Set a red zone in the kernel stack after the u. area.
169  */
170 setredzone(pte, vaddr)
171 	u_short *pte;
172 	caddr_t vaddr;
173 {
174 /* eventually do this by setting up an expand-down stack segment
175    for ss0: selector, allowing stack access down to top of u.
176    this means though that protection violations need to be handled
177    thru a double fault exception that must do an integral task
178    switch to a known good context, within which a dump can be
179    taken. a sensible scheme might be to save the initial context
180    used by sched (that has physical memory mapped 1:1 at bottom)
181    and take the dump while still in mapped mode */
182 }
183 
184 /*
185  * Move pages from one kernel virtual address to another.
186  * Both addresses are assumed to reside in the Sysmap,
187  * and size must be a multiple of CLSIZE.
188  */
189 pagemove(from, to, size)
190 	register caddr_t from, to;
191 	int size;
192 {
193 	register struct pte *fpte, *tpte;
194 
195 	if (size % CLBYTES)
196 		panic("pagemove");
197 	fpte = kvtopte(from);
198 	tpte = kvtopte(to);
199 	while (size > 0) {
200 		*tpte++ = *fpte;
201 		*(int *)fpte++ = 0;
202 		from += NBPG;
203 		to += NBPG;
204 		size -= NBPG;
205 	}
206 	tlbflush();
207 }
208 
209 /*
210  * Convert kernel VA to physical address
211  */
212 kvtop(addr)
213 	register caddr_t addr;
214 {
215 	vm_offset_t va;
216 
217 	va = pmap_extract(kernel_pmap, (vm_offset_t)addr);
218 	if (va == 0)
219 		panic("kvtop: zero page frame");
220 	return((int)va);
221 }
222 
223 #ifdef notdef
224 /*
225  * The probe[rw] routines should probably be redone in assembler
226  * for efficiency.
227  */
228 prober(addr)
229 	register u_int addr;
230 {
231 	register int page;
232 	register struct proc *p;
233 
234 	if (addr >= USRSTACK)
235 		return(0);
236 	p = u.u_procp;
237 	page = btop(addr);
238 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
239 		return(1);
240 	return(0);
241 }
242 
243 probew(addr)
244 	register u_int addr;
245 {
246 	register int page;
247 	register struct proc *p;
248 
249 	if (addr >= USRSTACK)
250 		return(0);
251 	p = u.u_procp;
252 	page = btop(addr);
253 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
254 		return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
255 	return(0);
256 }
257 
258 /*
259  * NB: assumes a physically contiguous kernel page table
260  *     (makes life a LOT simpler).
261  */
262 kernacc(addr, count, rw)
263 	register u_int addr;
264 	int count, rw;
265 {
266 	register struct pde *pde;
267 	register struct pte *pte;
268 	register int ix, cnt;
269 	extern long Syssize;
270 
271 	if (count <= 0)
272 		return(0);
273 	pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
274 	ix = (addr & PD_MASK) >> PD_SHIFT;
275 	cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
276 	cnt -= ix;
277 	for (pde += ix; cnt; cnt--, pde++)
278 		if (pde->pd_v == 0)
279 			return(0);
280 	ix = btop(addr-0xfe000000);
281 	cnt = btop(addr-0xfe000000+count+NBPG-1);
282 	if (cnt > (int)&Syssize)
283 		return(0);
284 	cnt -= ix;
285 	for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
286 		if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
287 			return(0);
288 	return(1);
289 }
290 
291 useracc(addr, count, rw)
292 	register u_int addr;
293 	int count, rw;
294 {
295 	register int (*func)();
296 	register u_int addr2;
297 	extern int prober(), probew();
298 
299 	if (count <= 0)
300 		return(0);
301 	addr2 = addr;
302 	addr += count;
303 	func = (rw == B_READ) ? prober : probew;
304 	do {
305 		if ((*func)(addr2) == 0)
306 			return(0);
307 		addr2 = (addr2 + NBPG) & ~PGOFSET;
308 	} while (addr2 < addr);
309 	return(1);
310 }
311 #endif
312 
313 extern vm_map_t phys_map;
314 
315 /*
316  * Map an IO request into kernel virtual address space.  Requests fall into
317  * one of five catagories:
318  *
319  *	B_PHYS|B_UAREA:	User u-area swap.
320  *			Address is relative to start of u-area (p_addr).
321  *	B_PHYS|B_PAGET:	User page table swap.
322  *			Address is a kernel VA in usrpt (Usrptmap).
323  *	B_PHYS|B_DIRTY:	Dirty page push.
324  *			Address is a VA in proc2's address space.
325  *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
326  *			Address is VA in user's address space.
327  *	B_PHYS:		User "raw" IO request.
328  *			Address is VA in user's address space.
329  *
330  * All requests are (re)mapped into kernel VA space via the useriomap
331  * (a name with only slightly more meaning than "kernelmap")
332  */
333 vmapbuf(bp)
334 	register struct buf *bp;
335 {
336 	register int npf;
337 	register caddr_t addr;
338 	register long flags = bp->b_flags;
339 	struct proc *p;
340 	int off;
341 	vm_offset_t kva;
342 	register vm_offset_t pa;
343 
344 	if ((flags & B_PHYS) == 0)
345 		panic("vmapbuf");
346 	addr = bp->b_saveaddr = bp->b_un.b_addr;
347 	off = (int)addr & PGOFSET;
348 	p = bp->b_proc;
349 	npf = btoc(round_page(bp->b_bcount + off));
350 	kva = kmem_alloc_wait(phys_map, ctob(npf));
351 	bp->b_un.b_addr = (caddr_t) (kva + off);
352 	while (npf--) {
353 		pa = pmap_extract(&p->p_vmspace->vm_pmap, (vm_offset_t)addr);
354 		if (pa == 0)
355 			panic("vmapbuf: null page frame");
356 		pmap_enter(vm_map_pmap(phys_map), kva, trunc_page(pa),
357 			   VM_PROT_READ|VM_PROT_WRITE, TRUE);
358 		addr += PAGE_SIZE;
359 		kva += PAGE_SIZE;
360 	}
361 }
362 
363 /*
364  * Free the io map PTEs associated with this IO operation.
365  * We also invalidate the TLB entries and restore the original b_addr.
366  */
367 vunmapbuf(bp)
368 	register struct buf *bp;
369 {
370 	register int npf;
371 	register caddr_t addr = bp->b_un.b_addr;
372 	vm_offset_t kva;
373 
374 	if ((bp->b_flags & B_PHYS) == 0)
375 		panic("vunmapbuf");
376 	npf = btoc(round_page(bp->b_bcount + ((int)addr & PGOFSET)));
377 	kva = (vm_offset_t)((int)addr & ~PGOFSET);
378 	kmem_free_wakeup(phys_map, kva, ctob(npf));
379 	bp->b_un.b_addr = bp->b_saveaddr;
380 	bp->b_saveaddr = NULL;
381 }
382