xref: /original-bsd/sys/i386/i386/vm_machdep.c (revision 23c6a147)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * the University of Utah, and William Jolitz.
7  *
8  * %sccs.include.386.c%
9  *
10  *	@(#)vm_machdep.c	5.1 (Berkeley) 04/24/90
11  */
12 
13 
14 #include "pte.h"
15 
16 #include "param.h"
17 #include "systm.h"
18 #include "dir.h"
19 #include "user.h"
20 #include "proc.h"
21 #include "cmap.h"
22 #include "mount.h"
23 #include "vm.h"
24 #include "text.h"
25 
26 #include "buf.h"
27 
28 #include "dbg.h"
29 /*
30  * Set a red zone in the kernel stack after the u. area.
31  */
32 setredzone(pte, vaddr)
33 	u_short *pte;
34 	caddr_t vaddr;
35 {
36 /* eventually do this by setting up an expand-down stack segment
37    for ss0: selector, allowing stack access down to top of u.
38    this means though that protection violations need to be handled
39    thru a double fault exception that must do an integral task
40    switch to a known good context, within which a dump can be
41    taken. a sensible scheme might be to save the initial context
42    used by sched (that has physical memory mapped 1:1 at bottom)
43    and take the dump while still in mapped mode */
44 }
45 
46 /*
47  * Check for valid program size
48  * NB - Check data and data growth separately as they may overflow
49  * when summed together.
50  */
51 chksize(ts, ids, uds, ss)
52 	unsigned ts, ids, uds, ss;
53 {
54 	extern unsigned maxtsize;
55 
56 	if (ctob(ts) > maxtsize ||
57 	    ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
58 	    ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
59 	    ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
60 	    ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) {
61 		u.u_error = ENOMEM;
62 		return (1);
63 	}
64 	return (0);
65 }
66 
67 /*ARGSUSED*/
68 newptes(pte, v, size)
69 	struct pte *pte;
70 	u_int v;
71 	register int size;
72 {
73 	register caddr_t a;
74 
75 #ifdef lint
76 	pte = pte;
77 #endif
78 	load_cr3(_cr3());
79 }
80 
81 /*
82  * Change protection codes of text segment.
83  * Have to flush translation buffer since this
84  * affect virtual memory mapping of current process.
85  */
86 chgprot(addr, tprot)
87 	caddr_t addr;
88 	long tprot;
89 {
90 	unsigned v;
91 	int tp;
92 	register struct pte *pte;
93 	register struct cmap *c;
94 
95 	v = clbase(btop(addr));
96 	if (!isatsv(u.u_procp, v)) {
97 		u.u_error = EFAULT;
98 		return (0);
99 	}
100 	tp = vtotp(u.u_procp, v);
101 	pte = tptopte(u.u_procp, tp);
102 	if (pte->pg_fod == 0 && pte->pg_pfnum) {
103 		c = &cmap[pgtocm(pte->pg_pfnum)];
104 		if (c->c_blkno && c->c_mdev != MSWAPX)
105 			munhash(mount[c->c_mdev].m_dev,
106 			    (daddr_t)(u_long)c->c_blkno);
107 	}
108 	*(u_int *)pte &= ~PG_PROT;
109 	*(u_int *)pte |= tprot;
110 	load_cr3(_cr3());
111 	return (1);
112 }
113 
114 settprot(tprot)
115 	long tprot;
116 {
117 	register u_int *ptaddr, i;
118 
119 	ptaddr = (u_int *)u.u_procp->p_p0br;
120 	for (i = 0; i < u.u_tsize; i++) {
121 		ptaddr[i] &= ~PG_PROT;
122 		ptaddr[i] |= tprot;
123 	}
124 	load_cr3(_cr3());
125 }
126 
127 /*
128  * Simulate effect of VAX region length registers.
129  * The one case where we must do anything is if a region has shrunk.
130  * In that case we must invalidate all the PTEs for the no longer valid VAs.
131  */
132 setptlr(region, nlen)
133 	int nlen;
134 {
135 	register struct pte *pte;
136 	register int change;
137 	int olen;
138 
139 	if (region == 0) {
140 		olen = u.u_pcb.pcb_p0lr;
141 		u.u_pcb.pcb_p0lr = nlen;
142 	} else {
143 		olen = P1PAGES - u.u_pcb.pcb_p1lr;
144 		u.u_pcb.pcb_p1lr = nlen;
145 		nlen = P1PAGES - nlen;
146 	}
147 /*pg("setptlr(%x,%x), was %d",region, nlen, olen);*/
148 	if ((change = olen - nlen) <= 0)
149 		return;
150 	if (region == 0)
151 		pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr;
152 	else
153 		pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change;
154 /*printf("p0b %x p0l %x", u.u_pcb.pcb_p0br, u.u_pcb.pcb_p0lr);
155 printf("p1b %x p1l %x pte %x", u.u_pcb.pcb_p1br, u.u_pcb.pcb_p1lr, pte);*/
156 	do {
157 		*(u_int *)pte++ = 0;
158 	} while (--change);
159 	/* short cut newptes */
160 	load_cr3(_cr3());
161 }
162 
163 /*
164  * Map `size' bytes of physical memory starting at `paddr' into
165  * kernel VA space using PTEs starting at `pte'.  Read/write and
166  * cache-inhibit status are specified by `prot'.
167  */
168 physaccess(pte, paddr, size, prot)
169 	register struct pte *pte;
170 	caddr_t paddr;
171 	register int size;
172 {
173 	register u_int page;
174 
175 	page = (u_int)paddr & PG_FRAME;
176 	for (size = btoc(size); size; size--) {
177 		*(int *)pte = PG_V | prot | page;
178 		page += NBPG;
179 		pte++;
180 	}
181 	load_cr3(_cr3());
182 }
183 
184 /*
185  * Move pages from one kernel virtual address to another.
186  * Both addresses are assumed to reside in the Sysmap,
187  * and size must be a multiple of CLSIZE.
188  */
189 pagemove(from, to, size)
190 	register caddr_t from, to;
191 	int size;
192 {
193 	register struct pte *fpte, *tpte;
194 
195 	if (size % CLBYTES)
196 		panic("pagemove");
197 	fpte = &Sysmap[btop(from)];
198 	tpte = &Sysmap[btop(to)];
199 	while (size > 0) {
200 		*tpte++ = *fpte;
201 		*(int *)fpte++ = 0;
202 		from += NBPG;
203 		to += NBPG;
204 		size -= NBPG;
205 	}
206 	load_cr3(_cr3());
207 }
208 
209 /*
210  * The probe[rw] routines should probably be redone in assembler
211  * for efficiency.
212  */
213 prober(addr)
214 	register u_int addr;
215 {
216 	register int page;
217 	register struct proc *p;
218 
219 	if (addr >= USRSTACK)
220 		return(0);
221 	p = u.u_procp;
222 	page = btop(addr);
223 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
224 		return(1);
225 	return(0);
226 }
227 
228 probew(addr)
229 	register u_int addr;
230 {
231 	register int page;
232 	register struct proc *p;
233 
234 	if (addr >= USRSTACK)
235 		return(0);
236 	p = u.u_procp;
237 	page = btop(addr);
238 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
239 {
240 dprintf(DPHYS,"vtopte %x %x\n", vtopte(p, page), *(int *)vtopte(p, page) );
241 		return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
242 }
243 	return(0);
244 }
245 
246 /*
247  * NB: assumes a physically contiguous kernel page table
248  *     (makes life a LOT simpler).
249  */
250 kernacc(addr, count, rw)
251 	register u_int addr;
252 	int count, rw;
253 {
254 	register struct pde *pde;
255 	register struct pte *pte;
256 	register int ix, cnt;
257 	extern long Syssize;
258 
259 dprintf(DPHYS,"kernacc %x count %d rw %d", addr, count, rw);
260 	if (count <= 0)
261 		return(0);
262 	pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
263 	ix = (addr & PD_MASK) >> PD_SHIFT;
264 	cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
265 	cnt -= ix;
266 	for (pde += ix; cnt; cnt--, pde++)
267 		if (pde->pd_v == 0)
268 {
269 dprintf(DPHYS,"nope pde %x, idx %x\n", pde, ix);
270 			return(0);
271 }
272 	ix = btop(addr-0xfe000000);
273 	cnt = btop(addr-0xfe000000+count+NBPG-1);
274 	if (cnt > (int)&Syssize)
275 {
276 dprintf(DPHYS,"nope cnt %x\n", cnt);
277 		return(0);
278 }
279 	cnt -= ix;
280 	for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
281 		if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
282 {
283 dprintf(DPHYS,"nope pte %x %x, idx %x\n", pte, *(int *)pte, ix);
284 			return(0);
285 }
286 dprintf(DPHYS,"yup\n");
287 	return(1);
288 }
289 
290 useracc(addr, count, rw)
291 	register u_int addr;
292 	int count, rw;
293 {
294 	register int (*func)();
295 	register u_int addr2;
296 	extern int prober(), probew();
297 
298 dprintf(DPHYS,"useracc %x count %d rw %d", addr, count, rw);
299 	if (count <= 0)
300 		return(0);
301 	addr2 = addr;
302 	addr += count;
303 	func = (rw == B_READ) ? prober : probew;
304 	do {
305 		if ((*func)(addr2) == 0)
306 {
307 dprintf(DPHYS,"nope %x\n", addr);
308 			return(0);
309 }
310 		addr2 = (addr2 + NBPG) & ~PGOFSET;
311 	} while (addr2 < addr);
312 dprintf(DPHYS,"yup\n", addr);
313 	return(1);
314 }
315 
316 /*
317  * Convert kernel VA to physical address
318  */
319 kvtop(addr)
320 	register u_int addr;
321 {
322 	register int pf;
323 
324 	pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum;
325 	if (pf == 0)
326 		panic("kvtop: zero page frame");
327 	return((u_int)ptob(pf) + (addr & PGOFSET));
328 }
329 
330 struct pde *
331 vtopde(p, va)
332 	register struct proc *p;
333 	register u_int va;
334 {
335 	register struct pde *pde;
336 
337 	pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG);
338 	return(pde + ((va & PD_MASK) >> PD_SHIFT));
339 }
340 
341 
342 initcr3(p)
343 	register struct proc *p;
344 {
345 	return((int)Usrptmap[btokmx(p->p_p0br) + p->p_szpt].pg_pfnum);
346 }
347 
348 /*
349  * Initialize page directory table to reflect PTEs in Usrptmap.
350  * Page directory table address is given by Usrptmap index of p_szpt.
351  * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries]
352  */
353 initpdt(p, usr)
354 	register struct proc *p;
355 {
356 	register int i, k, sz;
357 	register struct pde *pde, *toppde;
358 	extern struct pde *vtopde();
359 	extern Sysbase;
360 
361 /*pg("initpdt");*/
362 if(!usr) {
363 	/* clear entire map */
364 	pde = vtopde(p, 0);
365 	bzero(pde, NBPG);
366 	/* map kernel */
367 	pde = vtopde(p, &Sysbase);
368 	for (i = 0; i < 5; i++, pde++) {
369 		*(int *)pde = PG_UW | PG_V;
370 		pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i;
371 	}
372 	/* map u dot */
373 	pde = vtopde(p, &u);
374 	*(int *)pde = PG_UW | PG_V;
375 	pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum;
376 /*printf("%d.u. pde %x pfnum %x virt %x\n", p->p_pid, pde, pde->pd_pfnum,
377 p->p_addr);*/
378 	return;
379 }
380 
381 	/* otherwise, fill in user map */
382 	k = btokmx(p->p_p0br);
383 	pde = vtopde(p, 0);
384 	toppde = vtopde(p, &u);
385 
386 	/* text and data */
387 	sz = ctopt(p->p_tsize + p->p_dsize);
388 dprintf(DEXPAND,"textdata 0 to %d\n",sz-1);
389 	for (i = 0; i < sz; i++, pde++) {
390 		*(int *)pde = PG_UW | PG_V;
391 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
392 /*dprintf(DEXPAND,"%d.pde %x pf %x\n", p->p_pid, pde, *(int *)pde);*/
393 	}
394 	/*
395 	 * Bogus!  The kernelmap may map unused PT pages
396 	 * (since we don't shrink PTs) so we need to skip over
397 	 * those PDEs.  We should really free the unused PT
398 	 * pages in expand().
399 	 */
400 	sz += ctopt(p->p_ssize+UPAGES);
401 	if (sz < p->p_szpt)
402 		k += p->p_szpt - sz;
403 	/* hole */
404 	sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase));
405 dprintf(DEXPAND,"zero %d upto %d\n", i, sz-1);
406 	for ( ; i < sz; i++, pde++)
407 /* definite bug here... does not hit all entries, but point moot due
408 to bzero above XXX*/
409 {
410 		*(int *)pde = 0;
411 /*pg("pde %x pf %x", pde, *(int *)pde);*/
412 }
413 	/* stack and u-area */
414 	sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase));
415 dprintf(DEXPAND,"stack %d upto %d\n", i, sz-1);
416 	for ( ; i < sz; i++, pde++) {
417 		*(int *)pde = PG_UW | PG_V;
418 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
419 /*pg("pde %x pf %x", pde, *(int *)pde);*/
420 	}
421 }
422 
423 #ifdef notdef
424 /*
425  * Allocate wired-down, non-paged, cache-inhibited pages in kernel
426  * virtual memory and clear them
427  */
428 caddr_t
429 cimemall(n)
430 	int n;
431 {
432 	register int npg, a;
433 	register struct pte *pte;
434 	extern struct map *kernelmap;
435 
436 	npg = clrnd(btoc(n));
437 	a = rmalloc(kernelmap, (long)npg);
438 	if (a == 0)
439 		return ((caddr_t)0);
440 	pte = &Usrptmap[a];
441 	(void) vmemall(pte, npg, &proc[0], CSYS);
442 	while (--npg >= 0) {
443 		*(int *)pte |= (PG_V|PG_KW|PG_CI);
444 		clearseg((unsigned)pte->pg_pfnum);
445 		pte++;
446 	}
447 	TBIAS();
448 	return ((caddr_t)kmxtob(a));
449 }
450 #endif
451 
452 extern char usrio[];
453 extern struct pte Usriomap[];
454 struct map *useriomap;
455 int usriowanted;
456 
457 /*
458  * Map an IO request into kernel virtual address space.  Requests fall into
459  * one of five catagories:
460  *
461  *	B_PHYS|B_UAREA:	User u-area swap.
462  *			Address is relative to start of u-area (p_addr).
463  *	B_PHYS|B_PAGET:	User page table swap.
464  *			Address is a kernel VA in usrpt (Usrptmap).
465  *	B_PHYS|B_DIRTY:	Dirty page push.
466  *			Address is a VA in proc2's address space.
467  *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
468  *			Address is VA in user's address space.
469  *	B_PHYS:		User "raw" IO request.
470  *			Address is VA in user's address space.
471  *
472  * All requests are (re)mapped into kernel VA space via the useriomap
473  * (a name with only slightly more meaning than "kernelmap")
474  */
475 vmapbuf(bp)
476 	register struct buf *bp;
477 {
478 	register int npf, a;
479 	register caddr_t addr;
480 	register struct pte *pte, *iopte;
481 	register long flags = bp->b_flags;
482 	struct proc *p;
483 	int off, s;
484 
485 	if ((flags & B_PHYS) == 0)
486 		panic("vmapbuf");
487 	/*
488 	 * Find PTEs for the area to be mapped
489 	 */
490 	p = flags&B_DIRTY ? &proc[2] : bp->b_proc;
491 	addr = bp->b_un.b_addr;
492 	if (flags & B_UAREA)
493 		pte = &p->p_addr[btop(addr)];
494 	else if (flags & B_PAGET)
495 		pte = &Usrptmap[btokmx((struct pte *)addr)];
496 	else
497 		pte = vtopte(p, btop(addr));
498 	/*
499 	 * Allocate some kernel PTEs and load them
500 	 */
501 	off = (int)addr & PGOFSET;
502 	npf = btoc(bp->b_bcount + off);
503 	s = splbio();
504 	while ((a = rmalloc(useriomap, npf)) == 0) {
505 		usriowanted = 1;
506 		sleep((caddr_t)useriomap, PSWP);
507 	}
508 	splx(s);
509 	iopte = &Usriomap[a];
510 	addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off;
511 	a = btop(addr);
512 	while (npf--) {
513 		mapin(iopte, a, pte->pg_pfnum, PG_V);
514 		iopte++, pte++;
515 		a++;
516 	}
517 }
518 
519 /*
520  * Free the io map PTEs associated with this IO operation.
521  * We also invalidate the TLB entries.
522  */
523 vunmapbuf(bp)
524 	register struct buf *bp;
525 {
526 	register int a, npf;
527 	register caddr_t addr = bp->b_un.b_addr;
528 	register struct pte *pte;
529 	int s;
530 
531 	if ((bp->b_flags & B_PHYS) == 0)
532 		panic("vunmapbuf");
533 	a = (int)(addr - usrio) >> PGSHIFT;
534 	npf = btoc(bp->b_bcount + ((int)addr & PGOFSET));
535 	s = splbio();
536 	rmfree(useriomap, npf, a);
537 	if (usriowanted) {
538 		usriowanted = 0;
539 		wakeup((caddr_t)useriomap);
540 	}
541 	splx(s);
542 	pte = &Usriomap[a];
543 	while (npf--) {
544 		*(int *)pte = 0;
545 		addr += NBPG;
546 		pte++;
547 	}
548 	/*
549 	 * If we just completed a dirty page push, we must reconstruct
550 	 * the original b_addr since cleanup() needs it.
551 	 */
552 	if (bp->b_flags & B_DIRTY) {
553 		a = ((bp - swbuf) * CLSIZE) * KLMAX;
554 		bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], a));
555 	}
556 	load_cr3(_cr3());
557 }
558