xref: /original-bsd/sys/i386/i386/vm_machdep.c (revision e718337e)
1 /*-
2  * Copyright (c) 1990 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This code is derived from software contributed to Berkeley by
6  * the University of Utah, and William Jolitz.
7  *
8  * %sccs.include.386.c%
9  *
10  *	@(#)vm_machdep.c	5.5 (Berkeley) 11/25/90
11  */
12 
13 /*
14  * Copyright (c) 1989, 1990 William F. Jolitz
15  */
16 
17 /*
18  * Copyright (c) 1988 University of Utah.
19  * All rights reserved.  The Utah Software License Agreement
20  * specifies the terms and conditions for redistribution.
21  *
22  *	Utah $Hdr: vm_machdep.c 1.16.1.1 89/06/23$
23  */
24 /*
25  * Copyright (c) 1982, 1986 Regents of the University of California.
26  * All rights reserved.  The Berkeley software License Agreement
27  * specifies the terms and conditions for redistribution.
28  *
29  *	@(#)vm_machdep.c	7.1 (Berkeley) 6/5/86
30  */
31 
32 #include "pte.h"
33 
34 #include "param.h"
35 #include "systm.h"
36 #include "dir.h"
37 #include "user.h"
38 #include "proc.h"
39 #include "cmap.h"
40 #include "mount.h"
41 #include "vm.h"
42 #include "text.h"
43 
44 #include "buf.h"
45 
46 /*
47  * Set a red zone in the kernel stack after the u. area.
48  */
49 setredzone(pte, vaddr)
50 	u_short *pte;
51 	caddr_t vaddr;
52 {
53 /* eventually do this by setting up an expand-down stack segment
54    for ss0: selector, allowing stack access down to top of u.
55    this means though that protection violations need to be handled
56    thru a double fault exception that must do an integral task
57    switch to a known good context, within which a dump can be
58    taken. a sensible scheme might be to save the initial context
59    used by sched (that has physical memory mapped 1:1 at bottom)
60    and take the dump while still in mapped mode */
61 }
62 
63 /*
64  * Check for valid program size
65  * NB - Check data and data growth separately as they may overflow
66  * when summed together.
67  */
68 chksize(ts, ids, uds, ss)
69 	unsigned ts, ids, uds, ss;
70 {
71 	extern unsigned maxtsize;
72 
73 	if (ctob(ts) > maxtsize ||
74 	    ctob(ids) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
75 	    ctob(uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
76 	    ctob(ids + uds) > u.u_rlimit[RLIMIT_DATA].rlim_cur ||
77 	    ctob(ss) > u.u_rlimit[RLIMIT_STACK].rlim_cur) {
78 		u.u_error = ENOMEM;
79 		return (1);
80 	}
81 	return (0);
82 }
83 
84 /*ARGSUSED*/
85 newptes(pte, v, size)
86 	struct pte *pte;
87 	u_int v;
88 	register int size;
89 {
90 	register caddr_t a;
91 
92 #ifdef lint
93 	pte = pte;
94 #endif
95 	load_cr3(u.u_pcb.pcb_ptd);
96 }
97 
98 /*
99  * Change protection codes of text segment.
100  * Have to flush translation buffer since this
101  * affect virtual memory mapping of current process.
102  */
103 chgprot(addr, tprot)
104 	caddr_t addr;
105 	long tprot;
106 {
107 	unsigned v;
108 	int tp;
109 	register struct pte *pte;
110 	register struct cmap *c;
111 
112 	v = clbase(btop(addr));
113 	if (!isatsv(u.u_procp, v)) {
114 		u.u_error = EFAULT;
115 		return (0);
116 	}
117 	tp = vtotp(u.u_procp, v);
118 	pte = tptopte(u.u_procp, tp);
119 	if (pte->pg_fod == 0 && pte->pg_pfnum) {
120 		c = &cmap[pgtocm(pte->pg_pfnum)];
121 		if (c->c_blkno && c->c_mdev != MSWAPX)
122 			munhash(mount[c->c_mdev].m_dev,
123 			    (daddr_t)(u_long)c->c_blkno);
124 	}
125 	*(u_int *)pte &= ~PG_PROT;
126 	*(u_int *)pte |= tprot;
127 	load_cr3(u.u_pcb.pcb_ptd);
128 	return (1);
129 }
130 
131 settprot(tprot)
132 	long tprot;
133 {
134 	register u_int *ptaddr, i;
135 
136 	ptaddr = (u_int *)u.u_procp->p_p0br;
137 	for (i = 0; i < u.u_tsize; i++) {
138 		ptaddr[i] &= ~PG_PROT;
139 		ptaddr[i] |= tprot;
140 	}
141 	load_cr3(u.u_pcb.pcb_ptd);
142 }
143 
144 /*
145  * Simulate effect of VAX region length registers.
146  * The one case where we must do anything is if a region has shrunk.
147  * In that case we must invalidate all the PTEs for the no longer valid VAs.
148  */
149 setptlr(region, nlen)
150 	int nlen;
151 {
152 	register struct pte *pte;
153 	register int change;
154 	int olen;
155 
156 	if (region == 0) {
157 		olen = u.u_pcb.pcb_p0lr;
158 		u.u_pcb.pcb_p0lr = nlen;
159 	} else {
160 		olen = P1PAGES - u.u_pcb.pcb_p1lr;
161 		u.u_pcb.pcb_p1lr = nlen;
162 		nlen = P1PAGES - nlen;
163 	}
164 	if ((change = olen - nlen) <= 0)
165 		return;
166 	if (region == 0)
167 		pte = u.u_pcb.pcb_p0br + u.u_pcb.pcb_p0lr;
168 	else
169 		pte = u.u_pcb.pcb_p1br + u.u_pcb.pcb_p1lr - change;
170 	do {
171 		*(u_int *)pte++ = 0;
172 	} while (--change);
173 	/* short cut newptes */
174 	load_cr3(u.u_pcb.pcb_ptd);
175 }
176 
177 /*
178  * Map `size' bytes of physical memory starting at `paddr' into
179  * kernel VA space using PTEs starting at `pte'.  Read/write and
180  * cache-inhibit status are specified by `prot'.
181  */
182 physaccess(pte, paddr, size, prot)
183 	register struct pte *pte;
184 	caddr_t paddr;
185 	register int size;
186 {
187 	register u_int page;
188 
189 	page = (u_int)paddr & PG_FRAME;
190 	for (size = btoc(size); size; size--) {
191 		*(int *)pte = PG_V | prot | page;
192 		page += NBPG;
193 		pte++;
194 	}
195 	load_cr3(u.u_pcb.pcb_ptd);
196 }
197 
198 /*
199  * Move pages from one kernel virtual address to another.
200  * Both addresses are assumed to reside in the Sysmap,
201  * and size must be a multiple of CLSIZE.
202  */
203 pagemove(from, to, size)
204 	register caddr_t from, to;
205 	int size;
206 {
207 	register struct pte *fpte, *tpte;
208 
209 	if (size % CLBYTES)
210 		panic("pagemove");
211 	fpte = &Sysmap[btop(from-0xfe000000)];
212 	tpte = &Sysmap[btop(to-0xfe000000)];
213 	while (size > 0) {
214 		*tpte++ = *fpte;
215 		*(int *)fpte++ = 0;
216 		from += NBPG;
217 		to += NBPG;
218 		size -= NBPG;
219 	}
220 	load_cr3(u.u_pcb.pcb_ptd);
221 }
222 
223 /*
224  * The probe[rw] routines should probably be redone in assembler
225  * for efficiency.
226  */
227 prober(addr)
228 	register u_int addr;
229 {
230 	register int page;
231 	register struct proc *p;
232 
233 	if (addr >= USRSTACK)
234 		return(0);
235 	p = u.u_procp;
236 	page = btop(addr);
237 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
238 		return(1);
239 	return(0);
240 }
241 
242 probew(addr)
243 	register u_int addr;
244 {
245 	register int page;
246 	register struct proc *p;
247 
248 	if (addr >= USRSTACK)
249 		return(0);
250 	p = u.u_procp;
251 	page = btop(addr);
252 	if (page < dptov(p, p->p_dsize) || page > sptov(p, p->p_ssize))
253 		return((*(int *)vtopte(p, page) & PG_PROT) == PG_UW);
254 	return(0);
255 }
256 
257 /*
258  * NB: assumes a physically contiguous kernel page table
259  *     (makes life a LOT simpler).
260  */
261 kernacc(addr, count, rw)
262 	register u_int addr;
263 	int count, rw;
264 {
265 	register struct pde *pde;
266 	register struct pte *pte;
267 	register int ix, cnt;
268 	extern long Syssize;
269 
270 	if (count <= 0)
271 		return(0);
272 	pde = (struct pde *)((u_int)u.u_procp->p_p0br + u.u_procp->p_szpt * NBPG);
273 	ix = (addr & PD_MASK) >> PD_SHIFT;
274 	cnt = ((addr + count + (1 << PD_SHIFT) - 1) & PD_MASK) >> PD_SHIFT;
275 	cnt -= ix;
276 	for (pde += ix; cnt; cnt--, pde++)
277 		if (pde->pd_v == 0)
278 			return(0);
279 	ix = btop(addr-0xfe000000);
280 	cnt = btop(addr-0xfe000000+count+NBPG-1);
281 	if (cnt > (int)&Syssize)
282 		return(0);
283 	cnt -= ix;
284 	for (pte = &Sysmap[ix]; cnt; cnt--, pte++)
285 		if (pte->pg_v == 0 /*|| (rw == B_WRITE && pte->pg_prot == 1)*/)
286 			return(0);
287 	return(1);
288 }
289 
290 useracc(addr, count, rw)
291 	register u_int addr;
292 	int count, rw;
293 {
294 	register int (*func)();
295 	register u_int addr2;
296 	extern int prober(), probew();
297 
298 	if (count <= 0)
299 		return(0);
300 	addr2 = addr;
301 	addr += count;
302 	func = (rw == B_READ) ? prober : probew;
303 	do {
304 		if ((*func)(addr2) == 0)
305 			return(0);
306 		addr2 = (addr2 + NBPG) & ~PGOFSET;
307 	} while (addr2 < addr);
308 	return(1);
309 }
310 
311 /*
312  * Convert kernel VA to physical address
313  */
314 kvtop(addr)
315 	register u_int addr;
316 {
317 	register int pf;
318 
319 	pf = Sysmap[btop(addr-0xfe000000)].pg_pfnum;
320 	if (pf == 0)
321 		panic("kvtop: zero page frame");
322 	return((u_int)ptob(pf) + (addr & PGOFSET));
323 }
324 
325 struct pde *
326 vtopde(p, va)
327 	register struct proc *p;
328 	register u_int va;
329 {
330 	register struct pde *pde;
331 
332 	pde = (struct pde *)((u_int)p->p_p0br + p->p_szpt * NBPG);
333 	return(pde + ((va & PD_MASK) >> PD_SHIFT));
334 }
335 
336 
337 initcr3(p)
338 	register struct proc *p;
339 {
340 	return(ctob(Usrptmap[btokmx(p->p_p0br+p->p_szpt*NPTEPG)].pg_pfnum));
341 	/*return((int)Usrptmap[btokmx(p->p_p0br) + p->p_szpt].pg_pfnum);*/
342 }
343 
344 /*
345  * Initialize page directory table to reflect PTEs in Usrptmap.
346  * Page directory table address is given by Usrptmap index of p_szpt.
347  * [used by vgetpt for kernal mode entries, and ptexpand for user mode entries]
348  */
349 initpdt(p)
350 	register struct proc *p;
351 {
352 	register int i, k, sz;
353 	register struct pde *pde, *toppde;
354 	extern struct pde *vtopde();
355 	extern Sysbase;
356 
357 	/* clear entire map */
358 	pde = vtopde(p, 0);
359 	/*bzero(pde, NBPG); */
360 	/* map kernel */
361 	pde = vtopde(p, &Sysbase);
362 	for (i = 0; i < 5; i++, pde++) {
363 		*(int *)pde = PG_UW | PG_V;
364 		pde->pd_pfnum = btoc((unsigned) Sysmap & ~0xfe000000)+i;
365 	}
366 	/* map u dot */
367 	pde = vtopde(p, &u);
368 	*(int *)pde = PG_UW | PG_V;
369 	pde->pd_pfnum = Usrptmap[btokmx(p->p_addr)].pg_pfnum;
370 
371 	/* otherwise, fill in user map */
372 	k = btokmx(p->p_p0br);
373 	pde = vtopde(p, 0);
374 	toppde = vtopde(p, &u);
375 
376 	/* text and data */
377 	sz = ctopt(p->p_tsize + p->p_dsize);
378 	for (i = 0; i < sz; i++, pde++) {
379 		*(int *)pde = PG_UW | PG_V;
380 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
381 	}
382 	/*
383 	 * Bogus!  The kernelmap may map unused PT pages
384 	 * (since we don't shrink PTs) so we need to skip over
385 	 * those PDEs.  We should really free the unused PT
386 	 * pages in expand().
387 	 */
388 	sz += ctopt(p->p_ssize+UPAGES);
389 	if (sz < p->p_szpt)
390 		k += p->p_szpt - sz;
391 	/* hole */
392 	sz = NPTEPG - ctopt(p->p_ssize + UPAGES + btoc(&Sysbase));
393 	for ( ; i < sz; i++, pde++)
394 		*(int *)pde = 0;
395 	/* stack and u-area */
396 	sz = NPTEPG - ctopt(UPAGES + btoc(&Sysbase));
397 	for ( ; i < sz; i++, pde++) {
398 		*(int *)pde = PG_UW | PG_V;
399 		pde->pd_pfnum = Usrptmap[k++].pg_pfnum;
400 	}
401 	return(initcr3(p));
402 }
403 
404 #ifdef notdef
405 /*
406  * Allocate wired-down, non-paged, cache-inhibited pages in kernel
407  * virtual memory and clear them
408  */
409 caddr_t
410 cimemall(n)
411 	int n;
412 {
413 	register int npg, a;
414 	register struct pte *pte;
415 	extern struct map *kernelmap;
416 
417 	npg = clrnd(btoc(n));
418 	a = rmalloc(kernelmap, (long)npg);
419 	if (a == 0)
420 		return ((caddr_t)0);
421 	pte = &Usrptmap[a];
422 	(void) vmemall(pte, npg, &proc[0], CSYS);
423 	while (--npg >= 0) {
424 		*(int *)pte |= (PG_V|PG_KW|PG_CI);
425 		clearseg((unsigned)pte->pg_pfnum);
426 		pte++;
427 	}
428 	TBIAS();
429 	return ((caddr_t)kmxtob(a));
430 }
431 #endif
432 
433 extern char usrio[];
434 extern struct pte Usriomap[];
435 struct map *useriomap;
436 int usriowanted;
437 
438 /*
439  * Map an IO request into kernel virtual address space.  Requests fall into
440  * one of five catagories:
441  *
442  *	B_PHYS|B_UAREA:	User u-area swap.
443  *			Address is relative to start of u-area (p_addr).
444  *	B_PHYS|B_PAGET:	User page table swap.
445  *			Address is a kernel VA in usrpt (Usrptmap).
446  *	B_PHYS|B_DIRTY:	Dirty page push.
447  *			Address is a VA in proc2's address space.
448  *	B_PHYS|B_PGIN:	Kernel pagein of user pages.
449  *			Address is VA in user's address space.
450  *	B_PHYS:		User "raw" IO request.
451  *			Address is VA in user's address space.
452  *
453  * All requests are (re)mapped into kernel VA space via the useriomap
454  * (a name with only slightly more meaning than "kernelmap")
455  */
456 vmapbuf(bp)
457 	register struct buf *bp;
458 {
459 	register int npf, a;
460 	register caddr_t addr;
461 	register struct pte *pte, *iopte;
462 	register long flags = bp->b_flags;
463 	struct proc *p;
464 	int off, s;
465 
466 	if ((flags & B_PHYS) == 0)
467 		panic("vmapbuf");
468 	/*
469 	 * Find PTEs for the area to be mapped
470 	 */
471 	p = flags&B_DIRTY ? &proc[2] : bp->b_proc;
472 	addr = bp->b_un.b_addr;
473 	if (flags & B_UAREA)
474 		pte = &p->p_addr[btop(addr)];
475 	else if (flags & B_PAGET)
476 		pte = &Usrptmap[btokmx((struct pte *)addr)];
477 	else
478 		pte = vtopte(p, btop(addr));
479 	/*
480 	 * Allocate some kernel PTEs and load them
481 	 */
482 	off = (int)addr & PGOFSET;
483 	npf = btoc(bp->b_bcount + off);
484 	s = splbio();
485 	while ((a = rmalloc(useriomap, npf)) == 0) {
486 		usriowanted = 1;
487 		sleep((caddr_t)useriomap, PSWP);
488 	}
489 	splx(s);
490 	iopte = &Usriomap[a];
491 	addr = bp->b_un.b_addr = (caddr_t)(usrio + (a << PGSHIFT)) + off;
492 	a = btop(addr);
493 	while (npf--) {
494 		mapin(iopte, a, pte->pg_pfnum, PG_V);
495 		iopte++, pte++;
496 		a++;
497 	}
498 	load_cr3(u.u_pcb.pcb_ptd);
499 }
500 
501 /*
502  * Free the io map PTEs associated with this IO operation.
503  * We also invalidate the TLB entries.
504  */
505 vunmapbuf(bp)
506 	register struct buf *bp;
507 {
508 	register int a, npf;
509 	register caddr_t addr = bp->b_un.b_addr;
510 	register struct pte *pte;
511 	int s;
512 
513 	if ((bp->b_flags & B_PHYS) == 0)
514 		panic("vunmapbuf");
515 	a = (int)(addr - usrio) >> PGSHIFT;
516 	npf = btoc(bp->b_bcount + ((int)addr & PGOFSET));
517 	s = splbio();
518 	rmfree(useriomap, npf, a);
519 	if (usriowanted) {
520 		usriowanted = 0;
521 		wakeup((caddr_t)useriomap);
522 	}
523 	splx(s);
524 	pte = &Usriomap[a];
525 	while (npf--) {
526 		*(int *)pte = 0;
527 		addr += NBPG;
528 		pte++;
529 	}
530 	/*
531 	 * If we just completed a dirty page push, we must reconstruct
532 	 * the original b_addr since cleanup() needs it.
533 	 */
534 	if (bp->b_flags & B_DIRTY) {
535 		a = ((bp - swbuf) * CLSIZE) * KLMAX;
536 		bp->b_un.b_addr = (caddr_t)ctob(dptov(&proc[2], a));
537 	}
538 	load_cr3(u.u_pcb.pcb_ptd);
539 }
540