xref: /original-bsd/sys/sparc/sparc/pmap.c (revision 086b3864)
1 /*
2  * Copyright (c) 1992 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * This software was developed by the Computer Systems Engineering group
6  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7  * contributed to Berkeley.
8  *
9  * All advertising materials mentioning features or use of this software
10  * must display the following acknowledgement:
11  *	This product includes software developed by the University of
12  *	California, Lawrence Berkeley Laboratories.
13  *
14  * %sccs.include.redist.c%
15  *
16  *	@(#)pmap.c	7.3 (Berkeley) 10/11/92
17  *
18  * from: $Header: pmap.c,v 1.36 92/07/10 00:03:10 torek Exp $
19  */
20 
21 /*
22  * SPARC physical map management code.
23  * Does not function on multiprocessors (yet).
24  */
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/device.h>
29 #include <sys/proc.h>
30 #include <sys/malloc.h>
31 
32 #include <vm/vm.h>
33 #include <vm/vm_kern.h>
34 #include <vm/vm_prot.h>
35 #include <vm/vm_page.h>
36 
37 #include <machine/autoconf.h>
38 #include <machine/bsd_openprom.h>
39 #include <machine/cpu.h>
40 
41 #include <sparc/sparc/asm.h>
42 #include <sparc/sparc/cache.h>
43 #include <sparc/sparc/ctlreg.h>
44 
45 #ifdef DEBUG
46 #define PTE_BITS "\20\40V\37W\36S\35NC\33IO\32U\31M"
47 #endif
48 
49 extern struct promvec *promvec;
50 
51 /*
52  * The SPARCstation offers us the following challenges:
53  *
54  *   1. A virtual address cache.  This is, strictly speaking, not
55  *	part of the architecture, but the code below assumes one.
56  *	This is a write-through cache on the 4c and a write-back cache
57  *	on others.
58  *
59  *   2. An MMU that acts like a cache.  There is not enough space
60  *	in the MMU to map everything all the time.  Instead, we need
61  *	to load MMU with the `working set' of translations for each
62  *	process.
63  *
64  *   3.	Segmented virtual and physical spaces.  The upper 12 bits of
65  *	a virtual address (the virtual segment) index a segment table,
66  *	giving a physical segment.  The physical segment selects a
67  *	`Page Map Entry Group' (PMEG) and the virtual page number---the
68  *	next 5 or 6 bits of the virtual address---select the particular
69  *	`Page Map Entry' for the page.  We call the latter a PTE and
70  *	call each Page Map Entry Group a pmeg (for want of a better name).
71  *
72  *	Since there are no valid bits in the segment table, the only way
73  *	to have an invalid segment is to make one full pmeg of invalid PTEs.
74  *	We use the last one (since the ROM does as well).
75  *
76  *   4. Discontiguous physical pages.  The Mach VM expects physical pages
77  *	to be in one sequential lump.
78  *
79  *   5. The MMU is always on: it is not possible to disable it.  This is
80  *	mainly a startup hassle.
81  */
82 
83 struct pmap_stats {
84 	int	ps_unlink_pvfirst;	/* # of pv_unlinks on head */
85 	int	ps_unlink_pvsearch;	/* # of pv_unlink searches */
86 	int	ps_changeprots;		/* # of calls to changeprot */
87 	int	ps_useless_changeprots;	/* # of changeprots for wiring */
88 	int	ps_enter_firstpv;	/* pv heads entered */
89 	int	ps_enter_secondpv;	/* pv nonheads entered */
90 	int	ps_useless_changewire;	/* useless wiring changes */
91 	int	ps_npg_prot_all;	/* # of active pages protected */
92 	int	ps_npg_prot_actual;	/* # pages actually affected */
93 } pmap_stats;
94 
95 #ifdef DEBUG
96 #define	PDB_CREATE	0x0001
97 #define	PDB_DESTROY	0x0002
98 #define	PDB_REMOVE	0x0004
99 #define	PDB_CHANGEPROT	0x0008
100 #define	PDB_ENTER	0x0010
101 
102 #define	PDB_MMU_ALLOC	0x0100
103 #define	PDB_MMU_STEAL	0x0200
104 #define	PDB_CTX_ALLOC	0x0400
105 #define	PDB_CTX_STEAL	0x0800
106 int	pmapdebug = 0x0;
107 #endif
108 
109 #define	splpmap() splbio()
110 
111 /*
112  * First and last managed physical addresses.
113  */
114 #if 0
115 vm_offset_t	vm_first_phys, vm_last_phys;
116 #define	managed(pa)	((pa) >= vm_first_phys && (pa) < vm_last_phys)
117 #else
118 vm_offset_t	vm_first_phys, vm_num_phys;
119 #define	managed(pa)	((unsigned)((pa) - vm_first_phys) < vm_num_phys)
120 #endif
121 
122 /*
123  * For each managed physical page, there is a list of all currently
124  * valid virtual mappings of that page.  Since there is usually one
125  * (or zero) mapping per page, the table begins with an initial entry,
126  * rather than a pointer; this head entry is empty iff its pv_pmap
127  * field is NULL.
128  *
129  * Note that these are per machine independent page (so there may be
130  * only one for every two hardware pages, e.g.).  Since the virtual
131  * address is aligned on a page boundary, the low order bits are free
132  * for storing flags.  Only the head of each list has flags.
133  *
134  * THIS SHOULD BE PART OF THE CORE MAP
135  */
136 struct pvlist {
137 	struct	pvlist *pv_next;	/* next pvlist, if any */
138 	struct	pmap *pv_pmap;		/* pmap of this va */
139 	int	pv_va;			/* virtual address */
140 	int	pv_flags;		/* flags (below) */
141 };
142 
143 /*
144  * Flags in pv_flags.  Note that PV_MOD must be 1 and PV_REF must be 2
145  * since they must line up with the bits in the hardware PTEs (see pte.h).
146  */
147 #define PV_MOD	1		/* page modified */
148 #define PV_REF	2		/* page referenced */
149 #define PV_NC	4		/* page cannot be cached */
150 /*efine	PV_ALLF	7		** all of the above */
151 
152 struct pvlist *pv_table;	/* array of entries, one per physical page */
153 
154 #define pvhead(pa)	(&pv_table[atop((pa) - vm_first_phys)])
155 
156 /*
157  * Each virtual segment within each pmap is either valid or invalid.
158  * It is valid if pm_npte[VA_VSEG(va)] is not 0.  This does not mean
159  * it is in the MMU, however; that is true iff pm_segmap[VA_VSEG(va)]
160  * does not point to the invalid PMEG.
161  *
162  * If a virtual segment is valid and loaded, the correct PTEs appear
163  * in the MMU only.  If it is valid and unloaded, the correct PTEs appear
164  * in the pm_pte[VA_VSEG(va)] only.  However, some effort is made to keep
165  * the software copies consistent enough with the MMU so that libkvm can
166  * do user address translations.  In particular, pv_changepte() and
167  * pmap_enu() maintain consistency, while less critical changes are
168  * not maintained.  pm_pte[VA_VSEG(va)] always points to space for those
169  * PTEs, unless this is the kernel pmap, in which case pm_pte[x] is not
170  * used (sigh).
171  *
172  * Each PMEG in the MMU is either free or contains PTEs corresponding to
173  * some pmap and virtual segment.  If it contains some PTEs, it also contains
174  * reference and modify bits that belong in the pv_table.  If we need
175  * to steal a PMEG from some process (if we need one and none are free)
176  * we must copy the ref and mod bits, and update pm_segmap in the other
177  * pmap to show that its virtual segment is no longer in the MMU.
178  *
179  * There are 128 PMEGs in a small Sun-4, of which only a few dozen are
180  * tied down permanently, leaving `about' 100 to be spread among
181  * running processes.  These are managed as an LRU cache.  Before
182  * calling the VM paging code for a user page fault, the fault handler
183  * calls mmu_load(pmap, va) to try to get a set of PTEs put into the
184  * MMU.  mmu_load will check the validity of the segment and tell whether
185  * it did something.
186  *
187  * Since I hate the name PMEG I call this data structure an `mmu entry'.
188  * Each mmuentry is on exactly one of three `usage' lists: free, LRU,
189  * or locked.  The LRU list is for user processes; the locked list is
190  * for kernel entries; both are doubly linked queues headed by `mmuhd's.
191  * The free list is a simple list, headed by a free list pointer.
192  */
193 struct mmuhd {
194 	struct	mmuentry *mh_next;
195 	struct	mmuentry *mh_prev;
196 };
197 struct mmuentry {
198 	struct	mmuentry *me_next;	/* queue (MUST BE FIRST) or next free */
199 	struct	mmuentry *me_prev;	/* queue (MUST BE FIRST) */
200 	struct	pmap *me_pmap;		/* pmap, if in use */
201 	struct	mmuentry *me_pmforw;	/* pmap pmeg chain */
202 	struct	mmuentry **me_pmback;	/* pmap pmeg chain */
203 	u_short	me_vseg;		/* virtual segment number in pmap */
204 	pmeg_t	me_pmeg;		/* hardware PMEG number */
205 };
206 struct mmuentry *mmuentry;	/* allocated in pmap_bootstrap */
207 
208 struct mmuentry *me_freelist;	/* free list (not a queue) */
209 struct mmuhd me_lru = {		/* LRU (user) entries */
210 	(struct mmuentry *)&me_lru, (struct mmuentry *)&me_lru
211 };
212 struct mmuhd me_locked = {	/* locked (kernel) entries */
213 	(struct mmuentry *)&me_locked, (struct mmuentry *)&me_locked
214 };
215 
216 int	seginval;		/* the invalid segment number */
217 
218 /*
219  * A context is simply a small number that dictates which set of 4096
220  * segment map entries the MMU uses.  The Sun 4c has eight such sets.
221  * These are alloted in an `almost MRU' fashion.
222  *
223  * Each context is either free or attached to a pmap.
224  *
225  * Since the virtual address cache is tagged by context, when we steal
226  * a context we have to flush (that part of) the cache.
227  */
228 union ctxinfo {
229 	union	ctxinfo *c_nextfree;	/* free list (if free) */
230 	struct	pmap *c_pmap;		/* pmap (if busy) */
231 };
232 union ctxinfo *ctxinfo;		/* allocated at in pmap_bootstrap */
233 int	ncontext;
234 
235 union	ctxinfo *ctx_freelist;	/* context free list */
236 int	ctx_kick;		/* allocation rover when none free */
237 int	ctx_kickdir;		/* ctx_kick roves both directions */
238 
239 /* XXX need per-cpu vpage[]s (and vmempage, unless we lock in /dev/mem) */
240 caddr_t	vpage[2];		/* two reserved MD virtual pages */
241 caddr_t	vmempage;		/* one reserved MI vpage for /dev/mem */
242 caddr_t vdumppages;		/* 32KB worth of reserved dump pages */
243 
244 struct kpmap kernel_pmap_store;	/* the kernel's pmap */
245 
246 /*
247  * We need to know real physical memory ranges (for /dev/mem).
248  */
249 #define	MA_SIZE	32		/* size of memory descriptor arrays */
250 struct	memarr pmemarr[MA_SIZE];/* physical memory regions */
251 int	npmemarr;		/* number of entries in pmemarr */
252 
253 /*
254  * The following four global variables are set in pmap_bootstrap
255  * for the vm code to find.  This is Wrong.
256  */
257 vm_offset_t	avail_start;	/* first free physical page number */
258 vm_offset_t	avail_end;	/* last free physical page number */
259 vm_offset_t	virtual_avail;	/* first free virtual page number */
260 vm_offset_t	virtual_end;	/* last free virtual page number */
261 
262 /*
263  * pseudo-functions for mnemonic value
264 #ifdef notyet
265  * NB: setsegmap should be stba for 4c, but stha works and makes the
266  * code right for the Sun-4 as well.
267 #endif
268  */
269 #define	getcontext()		lduba(AC_CONTEXT, ASI_CONTROL)
270 #define	setcontext(c)		stba(AC_CONTEXT, ASI_CONTROL, c)
271 #ifdef notyet
272 #define	getsegmap(va)		lduha(va, ASI_SEGMAP)
273 #define	setsegmap(va, pmeg)	stha(va, ASI_SEGMAP, pmeg)
274 #else
275 #define	getsegmap(va)		lduba(va, ASI_SEGMAP)
276 #define	setsegmap(va, pmeg)	stba(va, ASI_SEGMAP, pmeg)
277 #endif
278 
279 #define	getpte(va)		lda(va, ASI_PTE)
280 #define	setpte(va, pte)		sta(va, ASI_PTE, pte)
281 
282 /*----------------------------------------------------------------*/
283 
284 #ifdef	sun4c
285 /*
286  * Translations from dense (contiguous) pseudo physical addresses
287  * (fed to the VM code, to keep it happy) to sparse (real, hardware)
288  * physical addresses.  We call the former `software' page frame
289  * numbers and the latter `hardware' page frame numbers.  The
290  * translation is done on a `per bank' basis.
291  *
292  * The HWTOSW and SWTOHW macros handle the actual translation.
293  * They are defined as no-ops on Sun-4s.
294  *
295  * SHOULD DO atop AND ptoa DIRECTLY IN THESE MACROS SINCE ALL CALLERS
296  * ALWAYS NEED THAT ANYWAY ... CAN JUST PRECOOK THE TABLES	(TODO)
297  *
298  * Since we cannot use the memory allocated to the ROM monitor, and
299  * this happens to be just under 64K, I have chosen a bank size of
300  * 64K.  This is necessary since all banks must be completely full.
301  * I have also chosen a physical memory limit of 128 MB.  The 4c is
302  * architecturally limited to 256 MB, but 128 MB is more than will
303  * fit on present hardware.
304  *
305  * XXX	FIX THIS: just make all of each bank available and then
306  *	take out the pages reserved to the monitor!!
307  */
308 #define MAXMEM 	(128 * 1024 * 1024)	/* no more than 128 MB phys mem */
309 #define NPGBANK	16			/* 2^4 pages per bank (64K / bank) */
310 #define	BSHIFT	4			/* log2(NPGBANK) */
311 #define BOFFSET	(NPGBANK - 1)
312 #define BTSIZE 	(MAXMEM / NBPG / NPGBANK)
313 
314 int	pmap_dtos[BTSIZE];		/* dense to sparse */
315 int	pmap_stod[BTSIZE];		/* sparse to dense */
316 
317 #define	HWTOSW(pg) (pmap_stod[(pg) >> BSHIFT] | ((pg) & BOFFSET))
318 #define	SWTOHW(pg) (pmap_dtos[(pg) >> BSHIFT] | ((pg) & BOFFSET))
319 
320 #ifdef DEBUG
321 struct	memarr pmap_ama[MA_SIZE];
322 int	pmap_nama;
323 #define ama pmap_ama
324 #endif
325 
326 /*
327  * init_translations sets up pmap_dtos[] and pmap_stod[], and
328  * returns the number of usable physical pages.
329  */
330 int
331 init_translations()
332 {
333 	register struct memarr *mp;
334 	register int n, nmem;
335 	register u_int vbank = 0, pbank, v, a;
336 	register u_int pages = 0, lost = 0;
337 #ifndef DEBUG
338 	struct memarr ama[MA_SIZE];	/* available memory array */
339 #endif
340 
341 	nmem = makememarr(ama, MA_SIZE, MEMARR_AVAILPHYS);
342 #ifdef DEBUG
343 	pmap_nama = nmem;
344 #endif
345 	for (mp = ama; --nmem >= 0; mp++) {
346 		a = mp->addr >> PGSHIFT;
347 		v = mp->len >> PGSHIFT;
348 		if ((n = a & BOFFSET) != 0) {
349 			/* round up to next bank */
350 			n = NPGBANK - n;
351 			if (v < n) {	/* not a whole bank: skip it */
352 				lost += v;
353 				continue;
354 			}
355 			lost += n;	/* lose n pages from front */
356 			a += n;
357 			v -= n;
358 		}
359 		n = v >> BSHIFT;	/* calculate number of banks */
360 		pbank = a >> BSHIFT;	/* and the bank itself */
361 		if (pbank + n >= BTSIZE)
362 			n = BTSIZE - pbank;
363 		pages += n;		/* off by a factor of 2^BSHIFT */
364 		lost += v - (n << BSHIFT);
365 		while (--n >= 0) {
366 			pmap_dtos[vbank] = pbank << BSHIFT;
367 			pmap_stod[pbank] = vbank << BSHIFT;
368 			pbank++;
369 			vbank++;
370 		}
371 	}
372 	/* adjust page count */
373 	pages <<= BSHIFT;
374 #ifdef DEBUG
375 	printf("note: lost %d pages in translation\n", lost);
376 #endif
377 	return (pages);
378 }
379 
380 #else /* sun4c */
381 
382 /*
383  * Pages are physically contiguous, and hardware PFN == software PFN.
384  *
385  * XXX assumes PAGE_SIZE == NBPG (???)
386  */
387 #define	HWTOSW(pg)	(pg)
388 #define	SWTOHW(pg)	(pg)
389 
390 #endif /* sun4c */
391 
392 /* update pv_flags given a valid pte */
393 #define	MR(pte) (((pte) >> PG_M_SHIFT) & (PV_MOD | PV_REF))
394 
395 /*----------------------------------------------------------------*/
396 
397 /*
398  * Agree with the monitor ROM as to how many MMU entries are
399  * to be reserved, and map all of its segments into all contexts.
400  *
401  * Unfortunately, while the Version 0 PROM had a nice linked list of
402  * taken virtual memory, the Version 2 PROM provides instead a convoluted
403  * description of *free* virtual memory.  Rather than invert this, we
404  * resort to two magic constants from the PROM vector description file.
405  */
406 int
407 mmu_reservemon(nmmu)
408 	register int nmmu;
409 {
410 	register u_int va, eva;
411 	register int mmuseg, i;
412 
413 	va = OPENPROM_STARTVADDR;
414 	eva = OPENPROM_ENDVADDR;
415 	while (va < eva) {
416 		mmuseg = getsegmap(va);
417 		if (mmuseg < nmmu)
418 			nmmu = mmuseg;
419 		for (i = ncontext; --i > 0;)
420 			(*promvec->pv_setctxt)(i, (caddr_t)va, mmuseg);
421 		if (mmuseg == seginval) {
422 			va += NBPSG;
423 			continue;
424 		}
425 		/* PROM maps its memory user-accessible: fix it. */
426 		for (i = NPTESG; --i >= 0; va += NBPG)
427 			setpte(va, getpte(va) | PG_S);
428 	}
429 	return (nmmu);
430 }
431 
432 /*
433  * TODO: agree with the ROM on physical pages by taking them away
434  * from the page list, rather than having a dinky BTSIZE above.
435  */
436 
437 /*----------------------------------------------------------------*/
438 
439 /*
440  * MMU management.
441  */
442 
443 /*
444  * Change contexts.  We need the old context number as well as the new
445  * one.  If the context is changing, we must write all user windows
446  * first, lest an interrupt cause them to be written to the (other)
447  * user whose context we set here.
448  */
449 #define	CHANGE_CONTEXTS(old, new) \
450 	if ((old) != (new)) { \
451 		write_user_windows(); \
452 		setcontext(new); \
453 	}
454 
455 /*
456  * Allocate an MMU entry (i.e., a PMEG).
457  * If necessary, steal one from someone else.
458  * Put it on the tail of the given queue
459  * (which is either the LRU list or the locked list).
460  * The locked list is not actually ordered, but this is easiest.
461  * Also put it on the given (new) pmap's chain,
462  * enter its pmeg number into that pmap's segmap,
463  * and store the pmeg's new virtual segment number (me->me_vseg).
464  *
465  * This routine is large and complicated, but it must be fast
466  * since it implements the dynamic allocation of MMU entries.
467  */
468 struct mmuentry *
469 me_alloc(mh, newpm, newvseg)
470 	register struct mmuhd *mh;
471 	register struct pmap *newpm;
472 	register int newvseg;
473 {
474 	register struct mmuentry *me;
475 	register struct pmap *pm;
476 	register int i, va, pa, *pte, tpte;
477 	int ctx;
478 
479 	/* try free list first */
480 	if ((me = me_freelist) != NULL) {
481 		me_freelist = me->me_next;
482 #ifdef DEBUG
483 		if (me->me_pmap != NULL)
484 			panic("me_alloc: freelist entry has pmap");
485 		if (pmapdebug & PDB_MMU_ALLOC)
486 			printf("me_alloc: got pmeg %x\n", me->me_pmeg);
487 #endif
488 		insque(me, mh->mh_prev);	/* onto end of queue */
489 
490 		/* onto on pmap chain; pmap is already locked, if needed */
491 		me->me_pmforw = NULL;
492 		me->me_pmback = newpm->pm_mmuback;
493 		*newpm->pm_mmuback = me;
494 		newpm->pm_mmuback = &me->me_pmforw;
495 
496 		/* into pmap segment table, with backpointers */
497 		newpm->pm_segmap[newvseg] = me->me_pmeg;
498 		me->me_pmap = newpm;
499 		me->me_vseg = newvseg;
500 
501 		return (me);
502 	}
503 
504 	/* no luck, take head of LRU list */
505 	if ((me = me_lru.mh_next) == (struct mmuentry *)&me_lru)
506 		panic("me_alloc: all pmegs gone");
507 	pm = me->me_pmap;
508 #ifdef DEBUG
509 	if (pm == NULL)
510 		panic("me_alloc: LRU entry has no pmap");
511 	if (pm == kernel_pmap)
512 		panic("me_alloc: stealing from kernel");
513 	pte = pm->pm_pte[me->me_vseg];
514 	if (pte == NULL)
515 		panic("me_alloc: LRU entry's pmap has no ptes");
516 	if (pmapdebug & (PDB_MMU_ALLOC | PDB_MMU_STEAL))
517 		printf("me_alloc: stealing pmeg %x from pmap %x\n",
518 		    me->me_pmeg, pm);
519 #endif
520 	/*
521 	 * Remove from LRU list, and insert at end of new list
522 	 * (probably the LRU list again, but so what?).
523 	 */
524 	remque(me);
525 	insque(me, mh->mh_prev);
526 
527 	/*
528 	 * The PMEG must be mapped into some context so that we can
529 	 * read its PTEs.  Use its current context if it has one;
530 	 * if not, and since context 0 is reserved for the kernel,
531 	 * the simplest method is to switch to 0 and map the PMEG
532 	 * to virtual address 0---which, being a user space address,
533 	 * is by definition not in use.
534 	 *
535 	 * XXX for ncpus>1 must use per-cpu VA?
536 	 * XXX do not have to flush cache immediately
537 	 */
538 	ctx = getcontext();
539 	if (pm->pm_ctx) {
540 		CHANGE_CONTEXTS(ctx, pm->pm_ctxnum);
541 #ifdef notdef
542 		if (vactype != VAC_NONE)
543 #endif
544 			cache_flush_segment(me->me_vseg);
545 		va = VSTOVA(me->me_vseg);
546 	} else {
547 		CHANGE_CONTEXTS(ctx, 0);
548 		setsegmap(0, me->me_pmeg);
549 		/*
550 		 * No cache flush needed: it happened earlier when
551 		 * the old context was taken.
552 		 */
553 		va = 0;
554 	}
555 
556 	/*
557 	 * Record reference and modify bits for each page,
558 	 * and copy PTEs into kernel memory so that they can
559 	 * be reloaded later.
560 	 */
561 	i = NPTESG;
562 	do {
563 		tpte = getpte(va);
564 		if (tpte & PG_V) {
565 			pa = ptoa(HWTOSW(tpte & PG_PFNUM));
566 			if (managed(pa))
567 				pvhead(pa)->pv_flags |= MR(tpte);
568 		}
569 		*pte++ = tpte & ~(PG_U|PG_M);
570 		va += NBPG;
571 	} while (--i > 0);
572 
573 	/* update segment tables */
574 	simple_lock(&pm->pm_lock); /* what if other cpu takes mmuentry ?? */
575 	if (pm->pm_ctx)
576 		setsegmap(VSTOVA(me->me_vseg), seginval);
577 	pm->pm_segmap[me->me_vseg] = seginval;
578 
579 	/* off old pmap chain */
580 	if ((*me->me_pmback = me->me_pmforw) != NULL) {
581 		me->me_pmforw->me_pmback = me->me_pmback;
582 		me->me_pmforw = NULL;
583 	} else
584 		pm->pm_mmuback = me->me_pmback;
585 	simple_unlock(&pm->pm_lock);
586 	setcontext(ctx);	/* done with old context */
587 
588 	/* onto new pmap chain; new pmap is already locked, if needed */
589 	/* me->me_pmforw = NULL; */	/* done earlier */
590 	me->me_pmback = newpm->pm_mmuback;
591 	*newpm->pm_mmuback = me;
592 	newpm->pm_mmuback = &me->me_pmforw;
593 
594 	/* into new segment table, with backpointers */
595 	newpm->pm_segmap[newvseg] = me->me_pmeg;
596 	me->me_pmap = newpm;
597 	me->me_vseg = newvseg;
598 
599 	return (me);
600 }
601 
602 /*
603  * Free an MMU entry.
604  *
605  * Assumes the corresponding pmap is already locked.
606  * Does NOT flush cache, but does record ref and mod bits.
607  * The rest of each PTE is discarded.
608  * CALLER MUST SET CONTEXT to pm->pm_ctxnum (if pmap has
609  * a context) or to 0 (if not).  Caller must also update
610  * pm->pm_segmap and (possibly) the hardware.
611  */
612 void
613 me_free(pm, pmeg)
614 	register struct pmap *pm;
615 	register u_int pmeg;
616 {
617 	register struct mmuentry *me = &mmuentry[pmeg];
618 	register int i, va, pa, tpte;
619 
620 #ifdef DEBUG
621 	if (pmapdebug & PDB_MMU_ALLOC)
622 		printf("me_free: freeing pmeg %x from pmap %x\n",
623 		    me->me_pmeg, pm);
624 	if (me->me_pmeg != pmeg)
625 		panic("me_free: wrong mmuentry");
626 	if (pm != me->me_pmap)
627 		panic("me_free: pm != me_pmap");
628 #endif
629 
630 	/* just like me_alloc, but no cache flush, and context already set */
631 	if (pm->pm_ctx)
632 		va = VSTOVA(me->me_vseg);
633 	else {
634 		setsegmap(0, me->me_pmeg);
635 		va = 0;
636 	}
637 	i = NPTESG;
638 	do {
639 		tpte = getpte(va);
640 		if (tpte & PG_V) {
641 			pa = ptoa(HWTOSW(tpte & PG_PFNUM));
642 			if (managed(pa))
643 				pvhead(pa)->pv_flags |= MR(tpte);
644 		}
645 		va += NBPG;
646 	} while (--i > 0);
647 
648 	/* take mmu entry off pmap chain */
649 	*me->me_pmback = me->me_pmforw;
650 	if ((*me->me_pmback = me->me_pmforw) != NULL)
651 		me->me_pmforw->me_pmback = me->me_pmback;
652 	else
653 		pm->pm_mmuback = me->me_pmback;
654 	/* ... and remove from segment map */
655 	pm->pm_segmap[me->me_vseg] = seginval;
656 
657 	/* off LRU or lock chain */
658 	remque(me);
659 
660 	/* no associated pmap; on free list */
661 	me->me_pmap = NULL;
662 	me->me_next = me_freelist;
663 	me_freelist = me;
664 }
665 
666 /*
667  * `Page in' (load or inspect) an MMU entry; called on page faults.
668  * Returns 1 if we reloaded the segment, -1 if the segment was
669  * already loaded and the page was marked valid (in which case the
670  * fault must be a bus error or something), or 0 (segment loaded but
671  * PTE not valid, or segment not loaded at all).
672  */
673 int
674 mmu_pagein(pm, va, bits)
675 	register struct pmap *pm;
676 	register int va, bits;
677 {
678 	register int *pte;
679 	register struct mmuentry *me;
680 	register int vseg = VA_VSEG(va), pmeg, i, s;
681 
682 	/* return 0 if we have no PTEs to load */
683 	if ((pte = pm->pm_pte[vseg]) == NULL)
684 		return (0);
685 	/* return -1 if the fault is `hard', 0 if not */
686 	if (pm->pm_segmap[vseg] != seginval)
687 		return (bits && (getpte(va) & bits) == bits ? -1 : 0);
688 
689 	/* reload segment: write PTEs into a new LRU entry */
690 	va = VA_ROUNDDOWNTOSEG(va);
691 	s = splpmap();		/* paranoid */
692 	pmeg = me_alloc(&me_lru, pm, vseg)->me_pmeg;
693 	setsegmap(va, pmeg);
694 	i = NPTESG;
695 	do {
696 		setpte(va, *pte++);
697 		va += NBPG;
698 	} while (--i > 0);
699 	splx(s);
700 	return (1);
701 }
702 
703 /*
704  * Allocate a context.  If necessary, steal one from someone else.
705  * Changes hardware context number and loads segment map.
706  *
707  * This routine is only ever called from locore.s just after it has
708  * saved away the previous process, so there are no active user windows.
709  */
710 void
711 ctx_alloc(pm)
712 	register struct pmap *pm;
713 {
714 	register union ctxinfo *c;
715 	register int cnum, i, va;
716 	register pmeg_t *segp;
717 
718 #ifdef DEBUG
719 	if (pm->pm_ctx)
720 		panic("ctx_alloc pm_ctx");
721 	if (pmapdebug & PDB_CTX_ALLOC)
722 		printf("ctx_alloc(%x)\n", pm);
723 #endif
724 	if ((c = ctx_freelist) != NULL) {
725 		ctx_freelist = c->c_nextfree;
726 		cnum = c - ctxinfo;
727 		setcontext(cnum);
728 	} else {
729 		if ((ctx_kick += ctx_kickdir) >= ncontext) {
730 			ctx_kick = ncontext - 1;
731 			ctx_kickdir = -1;
732 		} else if (ctx_kick < 1) {
733 			ctx_kick = 1;
734 			ctx_kickdir = 1;
735 		}
736 		c = &ctxinfo[cnum = ctx_kick];
737 #ifdef DEBUG
738 		if (c->c_pmap == NULL)
739 			panic("ctx_alloc cu_pmap");
740 		if (pmapdebug & (PDB_CTX_ALLOC | PDB_CTX_STEAL))
741 			printf("ctx_alloc: steal context %x from %x\n",
742 			    cnum, c->c_pmap);
743 #endif
744 		c->c_pmap->pm_ctx = NULL;
745 		setcontext(cnum);
746 #ifdef notdef
747 		if (vactype != VAC_NONE)
748 #endif
749 			cache_flush_context();
750 	}
751 	c->c_pmap = pm;
752 	pm->pm_ctx = c;
753 	pm->pm_ctxnum = cnum;
754 
755 	/*
756 	 * XXX	loop below makes 3584 iterations ... could reduce
757 	 *	by remembering valid ranges per context: two ranges
758 	 *	should suffice (for text/data/bss and for stack).
759 	 */
760 	segp = pm->pm_rsegmap;
761 	for (va = 0, i = NUSEG; --i >= 0; va += NBPSG)
762 		setsegmap(va, *segp++);
763 }
764 
765 /*
766  * Give away a context.  Flushes cache and sets current context to 0.
767  */
768 void
769 ctx_free(pm)
770 	struct pmap *pm;
771 {
772 	register union ctxinfo *c;
773 	register int newc, oldc;
774 
775 	if ((c = pm->pm_ctx) == NULL)
776 		panic("ctx_free");
777 	pm->pm_ctx = NULL;
778 	oldc = getcontext();
779 	if (vactype != VAC_NONE) {
780 		newc = pm->pm_ctxnum;
781 		CHANGE_CONTEXTS(oldc, newc);
782 		cache_flush_context();
783 		setcontext(0);
784 	} else {
785 		CHANGE_CONTEXTS(oldc, 0);
786 	}
787 	c->c_nextfree = ctx_freelist;
788 	ctx_freelist = c;
789 }
790 
791 
792 /*----------------------------------------------------------------*/
793 
794 /*
795  * pvlist functions.
796  */
797 
798 /*
799  * Walk the given pv list, and for each PTE, set or clear some bits
800  * (e.g., PG_W or PG_NC).
801  *
802  * As a special case, this never clears PG_W on `pager' pages.
803  * These, being kernel addresses, are always in hardware and have
804  * a context.
805  *
806  * This routine flushes the cache for any page whose PTE changes,
807  * as long as the process has a context; this is overly conservative.
808  * It also copies ref and mod bits to the pvlist, on the theory that
809  * this might save work later.  (XXX should test this theory)
810  */
811 void
812 pv_changepte(pv0, bis, bic)
813 	register struct pvlist *pv0;
814 	register int bis, bic;
815 {
816 	register int *pte;
817 	register struct pvlist *pv;
818 	register struct pmap *pm;
819 	register int va, vseg, pmeg, i, flags;
820 	int ctx, s;
821 
822 	write_user_windows();		/* paranoid? */
823 
824 	s = splpmap();			/* paranoid? */
825 	if (pv0->pv_pmap == NULL) {
826 		splx(s);
827 		return;
828 	}
829 	ctx = getcontext();
830 	flags = pv0->pv_flags;
831 	for (pv = pv0; pv != NULL; pv = pv->pv_next) {
832 		pm = pv->pv_pmap;
833 if(pm==NULL)panic("pv_changepte 1");
834 		va = pv->pv_va;
835 		vseg = VA_VSEG(va);
836 		pte = pm->pm_pte[vseg];
837 		if ((pmeg = pm->pm_segmap[vseg]) != seginval) {
838 			register int tpte;
839 
840 			/* in hardware: fix hardware copy */
841 			if (pm->pm_ctx) {
842 				extern vm_offset_t pager_sva, pager_eva;
843 
844 				if (bic == PG_W &&
845 				    va >= pager_sva && va < pager_eva)
846 					continue;
847 				setcontext(pm->pm_ctxnum);
848 				/* XXX should flush only when necessary */
849 #ifdef notdef
850 				if (vactype != VAC_NONE)
851 #endif
852 					cache_flush_page(va);
853 			} else {
854 				/* XXX per-cpu va? */
855 				setcontext(0);
856 				setsegmap(0, pmeg);
857 				va = VA_VPG(va) * NBPG;
858 			}
859 			tpte = getpte(va);
860 			if (tpte & PG_V)
861 				flags |= (tpte >> PG_M_SHIFT) &
862 				    (PV_MOD|PV_REF);
863 			tpte = (tpte | bis) & ~bic;
864 			setpte(va, tpte);
865 			if (pte != NULL)	/* update software copy */
866 				pte[VA_VPG(va)] = tpte;
867 		} else {
868 			/* not in hardware: just fix software copy */
869 			if (pte == NULL)
870 				panic("pv_changepte 2");
871 			pte += VA_VPG(va);
872 			*pte = (*pte | bis) & ~bic;
873 		}
874 	}
875 	pv0->pv_flags = flags;
876 	setcontext(ctx);
877 	splx(s);
878 }
879 
880 /*
881  * Sync ref and mod bits in pvlist (turns off same in hardware PTEs).
882  * Returns the new flags.
883  *
884  * This is just like pv_changepte, but we never add or remove bits,
885  * hence never need to adjust software copies.
886  */
887 int
888 pv_syncflags(pv0)
889 	register struct pvlist *pv0;
890 {
891 	register struct pvlist *pv;
892 	register struct pmap *pm;
893 	register int tpte, va, vseg, pmeg, i, flags;
894 	int ctx, s;
895 
896 	write_user_windows();		/* paranoid? */
897 
898 	s = splpmap();			/* paranoid? */
899 	if (pv0->pv_pmap == NULL) {	/* paranoid */
900 		splx(s);
901 		return (0);
902 	}
903 	ctx = getcontext();
904 	flags = pv0->pv_flags;
905 	for (pv = pv0; pv != NULL; pv = pv->pv_next) {
906 		pm = pv->pv_pmap;
907 		va = pv->pv_va;
908 		vseg = VA_VSEG(va);
909 		if ((pmeg = pm->pm_segmap[vseg]) == seginval)
910 			continue;
911 		if (pm->pm_ctx) {
912 			setcontext(pm->pm_ctxnum);
913 			/* XXX should flush only when necessary */
914 #ifdef notdef
915 			if (vactype != VAC_NONE)
916 #endif
917 				cache_flush_page(va);
918 		} else {
919 			/* XXX per-cpu va? */
920 			setcontext(0);
921 			setsegmap(0, pmeg);
922 			va = VA_VPG(va) * NBPG;
923 		}
924 		tpte = getpte(va);
925 		if (tpte & (PG_M|PG_U) && tpte & PG_V) {
926 			flags |= (tpte >> PG_M_SHIFT) &
927 			    (PV_MOD|PV_REF);
928 			tpte &= ~(PG_M|PG_U);
929 			setpte(va, tpte);
930 		}
931 	}
932 	pv0->pv_flags = flags;
933 	setcontext(ctx);
934 	splx(s);
935 	return (flags);
936 }
937 
938 /*
939  * pv_unlink is a helper function for pmap_remove.
940  * It takes a pointer to the pv_table head for some physical address
941  * and removes the appropriate (pmap, va) entry.
942  *
943  * Once the entry is removed, if the pv_table head has the cache
944  * inhibit bit set, see if we can turn that off; if so, walk the
945  * pvlist and turn off PG_NC in each PTE.  (The pvlist is by
946  * definition nonempty, since it must have at least two elements
947  * in it to have PV_NC set, and we only remove one here.)
948  */
949 static void
950 pv_unlink(pv, pm, va)
951 	register struct pvlist *pv;
952 	register struct pmap *pm;
953 	register vm_offset_t va;
954 {
955 	register struct pvlist *npv;
956 
957 	/*
958 	 * First entry is special (sigh).
959 	 */
960 	npv = pv->pv_next;
961 	if (pv->pv_pmap == pm && pv->pv_va == va) {
962 		pmap_stats.ps_unlink_pvfirst++;
963 		if (npv != NULL) {
964 			pv->pv_next = npv->pv_next;
965 			pv->pv_pmap = npv->pv_pmap;
966 			pv->pv_va = npv->pv_va;
967 			free((caddr_t)npv, M_VMPVENT);
968 		} else
969 			pv->pv_pmap = NULL;
970 	} else {
971 		register struct pvlist *prev;
972 
973 		for (prev = pv;; prev = npv, npv = npv->pv_next) {
974 			pmap_stats.ps_unlink_pvsearch++;
975 			if (npv == NULL)
976 				panic("pv_unlink");
977 			if (npv->pv_pmap == pm && npv->pv_va == va)
978 				break;
979 		}
980 		prev->pv_next = npv->pv_next;
981 		free((caddr_t)npv, M_VMPVENT);
982 	}
983 	if (pv->pv_flags & PV_NC) {
984 		/*
985 		 * Not cached: check to see if we can fix that now.
986 		 */
987 		va = pv->pv_va;
988 		for (npv = pv->pv_next; npv != NULL; npv = npv->pv_next)
989 			if (BADALIAS(va, npv->pv_va))
990 				return;
991 		pv->pv_flags &= ~PV_NC;
992 		pv_changepte(pv, 0, PG_NC);
993 	}
994 }
995 
996 /*
997  * pv_link is the inverse of pv_unlink, and is used in pmap_enter.
998  * It returns PG_NC if the (new) pvlist says that the address cannot
999  * be cached.
1000  */
1001 static int
1002 pv_link(pv, pm, va)
1003 	register struct pvlist *pv;
1004 	register struct pmap *pm;
1005 	register vm_offset_t va;
1006 {
1007 	register struct pvlist *npv;
1008 	register int ret;
1009 
1010 	if (pv->pv_pmap == NULL) {
1011 		/* no pvlist entries yet */
1012 		pmap_stats.ps_enter_firstpv++;
1013 		pv->pv_next = NULL;
1014 		pv->pv_pmap = pm;
1015 		pv->pv_va = va;
1016 		return (0);
1017 	}
1018 	/*
1019 	 * Before entering the new mapping, see if
1020 	 * it will cause old mappings to become aliased
1021 	 * and thus need to be `discached'.
1022 	 */
1023 	ret = 0;
1024 	pmap_stats.ps_enter_secondpv++;
1025 	if (pv->pv_flags & PV_NC) {
1026 		/* already uncached, just stay that way */
1027 		ret = PG_NC;
1028 	} else {
1029 		/* MAY NEED TO DISCACHE ANYWAY IF va IS IN DVMA SPACE? */
1030 		for (npv = pv; npv != NULL; npv = npv->pv_next) {
1031 			if (BADALIAS(va, npv->pv_va)) {
1032 				pv->pv_flags |= PV_NC;
1033 				pv_changepte(pv, ret = PG_NC, 0);
1034 				break;
1035 			}
1036 		}
1037 	}
1038 	npv = (struct pvlist *)malloc(sizeof *npv, M_VMPVENT, M_WAITOK);
1039 	npv->pv_next = pv->pv_next;
1040 	npv->pv_pmap = pm;
1041 	npv->pv_va = va;
1042 	pv->pv_next = npv;
1043 	return (ret);
1044 }
1045 
1046 /*
1047  * Walk the given list and flush the cache for each (MI) page that is
1048  * potentially in the cache.
1049  */
1050 pv_flushcache(pv)
1051 	register struct pvlist *pv;
1052 {
1053 	register struct pmap *pm;
1054 	register int i, s, ctx;
1055 
1056 	write_user_windows();	/* paranoia? */
1057 
1058 	s = splpmap();		/* XXX extreme paranoia */
1059 	if ((pm = pv->pv_pmap) != NULL) {
1060 		ctx = getcontext();
1061 		for (;;) {
1062 			if (pm->pm_ctx) {
1063 				setcontext(pm->pm_ctxnum);
1064 				cache_flush_page(pv->pv_va);
1065 			}
1066 			pv = pv->pv_next;
1067 			if (pv == NULL)
1068 				break;
1069 			pm = pv->pv_pmap;
1070 		}
1071 		setcontext(ctx);
1072 	}
1073 	splx(s);
1074 }
1075 
1076 /*----------------------------------------------------------------*/
1077 
1078 /*
1079  * At last, pmap code.
1080  */
1081 
1082 /*
1083  * Bootstrap the system enough to run with VM enabled.
1084  *
1085  * nmmu is the number of mmu entries (``PMEGs'');
1086  * nctx is the number of contexts.
1087  */
1088 void
1089 pmap_bootstrap(nmmu, nctx)
1090 	int nmmu, nctx;
1091 {
1092 	register union ctxinfo *ci;
1093 	register struct mmuentry *me;
1094 	register int i, j, n, z, vs;
1095 	register caddr_t p;
1096 	register void (*rom_setmap)(int ctx, caddr_t va, int pmeg);
1097 	int lastpage;
1098 	extern char end[];
1099 	extern caddr_t reserve_dumppages(caddr_t);
1100 
1101 	ncontext = nctx;
1102 
1103 	/*
1104 	 * Last segment is the `invalid' one (one PMEG of pte's with !pg_v).
1105 	 * It will never be used for anything else.
1106 	 */
1107 	seginval = --nmmu;
1108 
1109 	/*
1110 	 * Preserve the monitor ROM's reserved VM region, so that
1111 	 * we can use L1-A or the monitor's debugger.  As a side
1112 	 * effect we map the ROM's reserved VM into all contexts
1113 	 * (otherwise L1-A crashes the machine!).
1114 	 */
1115 	nmmu = mmu_reservemon(nmmu);
1116 
1117 	/*
1118 	 * Allocate and clear mmu entry and context structures.
1119 	 */
1120 	p = end;
1121 	mmuentry = me = (struct mmuentry *)p;
1122 	p += nmmu * sizeof *me;
1123 	ctxinfo = ci = (union ctxinfo *)p;
1124 	p += nctx * sizeof *ci;
1125 	bzero(end, p - end);
1126 
1127 	/*
1128 	 * Set up the `constants' for the call to vm_init()
1129 	 * in main().  All pages beginning at p (rounded up to
1130 	 * the next whole page) and continuing through the number
1131 	 * of available pages are free, but they start at a higher
1132 	 * virtual address.  This gives us two mappable MD pages
1133 	 * for pmap_zero_page and pmap_copy_page, and one MI page
1134 	 * for /dev/mem, all with no associated physical memory.
1135 	 */
1136 	p = (caddr_t)(((u_int)p + NBPG - 1) & ~PGOFSET);
1137 	avail_start = (int)p - KERNBASE;
1138 	avail_end = init_translations() << PGSHIFT;
1139 	i = (int)p;
1140 	vpage[0] = p, p += NBPG;
1141 	vpage[1] = p, p += NBPG;
1142 	vmempage = p, p += NBPG;
1143 	p = reserve_dumppages(p);
1144 	virtual_avail = (vm_offset_t)p;
1145 	virtual_end = VM_MAX_KERNEL_ADDRESS;
1146 
1147 	p = (caddr_t)i;			/* retract to first free phys */
1148 
1149 	/*
1150 	 * Intialize the kernel pmap.
1151 	 */
1152 	{
1153 		register struct kpmap *k = &kernel_pmap_store;
1154 
1155 /*		kernel_pmap = (struct pmap *)k; */
1156 		k->pm_ctx = ctxinfo;
1157 		/* k->pm_ctxnum = 0; */
1158 		simple_lock_init(&k->pm_lock);
1159 		k->pm_refcount = 1;
1160 		/* k->pm_mmuforw = 0; */
1161 		k->pm_mmuback = &k->pm_mmuforw;
1162 		k->pm_segmap = &k->pm_rsegmap[-NUSEG];
1163 		k->pm_pte = &k->pm_rpte[-NUSEG];
1164 		k->pm_npte = &k->pm_rnpte[-NUSEG];
1165 		for (i = NKSEG; --i >= 0;)
1166 			k->pm_rsegmap[i] = seginval;
1167 	}
1168 
1169 	/*
1170 	 * All contexts are free except the kernel's.
1171 	 *
1172 	 * XXX sun4c could use context 0 for users?
1173 	 */
1174 	ci->c_pmap = kernel_pmap;
1175 	ctx_freelist = ci + 1;
1176 	for (i = 1; i < ncontext; i++) {
1177 		ci++;
1178 		ci->c_nextfree = ci + 1;
1179 	}
1180 	ci->c_nextfree = NULL;
1181 	ctx_kick = 0;
1182 	ctx_kickdir = -1;
1183 
1184 	/* me_freelist = NULL; */	/* already NULL */
1185 
1186 	/*
1187 	 * Init mmu entries that map the kernel physical addresses.
1188 	 * If the page bits in p are 0, we filled the last segment
1189 	 * exactly (now how did that happen?); if not, it is
1190 	 * the last page filled in the last segment.
1191 	 *
1192 	 * All the other MMU entries are free.
1193 	 *
1194 	 * THIS ASSUMES SEGMENT i IS MAPPED BY MMU ENTRY i DURING THE
1195 	 * BOOT PROCESS
1196 	 */
1197 	z = ((((u_int)p + NBPSG - 1) & ~SGOFSET) - KERNBASE) >> SGSHIFT;
1198 	lastpage = VA_VPG(p);
1199 	if (lastpage == 0)
1200 		lastpage = NPTESG;
1201 	p = (caddr_t)KERNBASE;		/* first va */
1202 	vs = VA_VSEG(KERNBASE);		/* first virtual segment */
1203 	rom_setmap = promvec->pv_setctxt;
1204 	for (i = 0;;) {
1205 		/*
1206 		 * Distribute each kernel segment into all contexts.
1207 		 * This is done through the monitor ROM, rather than
1208 		 * directly here: if we do a setcontext we will fault,
1209 		 * as we are not (yet) mapped in any other context.
1210 		 */
1211 		for (j = 1; j < nctx; j++)
1212 			rom_setmap(j, p, i);
1213 
1214 		/* set up the mmu entry */
1215 		me->me_pmeg = i;
1216 		insque(me, me_locked.mh_prev);
1217 		/* me->me_pmforw = NULL; */
1218 		me->me_pmback = kernel_pmap->pm_mmuback;
1219 		*kernel_pmap->pm_mmuback = me;
1220 		kernel_pmap->pm_mmuback = &me->me_pmforw;
1221 		me->me_pmap = kernel_pmap;
1222 		me->me_vseg = vs;
1223 		kernel_pmap->pm_segmap[vs] = i;
1224 		n = ++i < z ? NPTESG : lastpage;
1225 		kernel_pmap->pm_npte[vs] = n;
1226 		me++;
1227 		vs++;
1228 		if (i < z) {
1229 			p += NBPSG;
1230 			continue;
1231 		}
1232 		/*
1233 		 * Unmap the pages, if any, that are not part of
1234 		 * the final segment.
1235 		 */
1236 		for (p += n * NBPG; j < NPTESG; j++, p += NBPG)
1237 			setpte(p, 0);
1238 		break;
1239 	}
1240 	for (; i < nmmu; i++, me++) {
1241 		me->me_pmeg = i;
1242 		me->me_next = me_freelist;
1243 		/* me->me_pmap = NULL; */
1244 		me_freelist = me;
1245 	}
1246 
1247 	/*
1248 	 * write protect & encache kernel text;
1249 	 * set red zone at kernel base; enable cache on message buffer.
1250 	 */
1251 	{
1252 		extern char etext[], trapbase[];
1253 #ifdef KGDB
1254 		register int mask = ~PG_NC;	/* XXX chgkprot is busted */
1255 #else
1256 		register int mask = ~(PG_W | PG_NC);
1257 #endif
1258 		for (p = trapbase; p < etext; p += NBPG)
1259 			setpte(p, getpte(p) & mask);
1260 		p = (caddr_t)KERNBASE;
1261 		setpte(p, 0);
1262 		p += NBPG;
1263 		setpte(p, getpte(p) & ~PG_NC);
1264 	}
1265 
1266 	/*
1267 	 * Grab physical memory list (for /dev/mem).
1268 	 */
1269 	npmemarr = makememarr(pmemarr, MA_SIZE, MEMARR_TOTALPHYS);
1270 }
1271 
1272 /*
1273  * Bootstrap memory allocator. This function allows for early dynamic
1274  * memory allocation until the virtual memory system has been bootstrapped.
1275  * After that point, either kmem_alloc or malloc should be used. This
1276  * function works by stealing pages from the (to be) managed page pool,
1277  * stealing virtual address space, then mapping the pages and zeroing them.
1278  *
1279  * It should be used from pmap_bootstrap till vm_page_startup, afterwards
1280  * it cannot be used, and will generate a panic if tried. Note that this
1281  * memory will never be freed, and in essence it is wired down.
1282  */
1283 void *
1284 pmap_bootstrap_alloc(size)
1285 	int size;
1286 {
1287 	register void *mem;
1288 	extern int vm_page_startup_initialized;
1289 
1290 	if (vm_page_startup_initialized)
1291 		panic("pmap_bootstrap_alloc: called after startup initialized");
1292 	size = round_page(size);
1293 	mem = (void *)virtual_avail;
1294 	virtual_avail = pmap_map(virtual_avail, avail_start,
1295 	    avail_start + size, VM_PROT_READ|VM_PROT_WRITE);
1296 	avail_start += size;
1297 	bzero((void *)mem, size);
1298 	return (mem);
1299 }
1300 
1301 /*
1302  * Initialize the pmap module.
1303  */
1304 void
1305 pmap_init(phys_start, phys_end)
1306 	register vm_offset_t phys_start, phys_end;
1307 {
1308 	register vm_size_t s;
1309 
1310 	if (PAGE_SIZE != NBPG)
1311 		panic("pmap_init: CLSIZE!=1");
1312 	/*
1313 	 * Allocate and clear memory for the pv_table.
1314 	 */
1315 	s = sizeof(struct pvlist) * atop(phys_end - phys_start);
1316 	s = round_page(s);
1317 	pv_table = (struct pvlist *)kmem_alloc(kernel_map, s);
1318 	bzero((caddr_t)pv_table, s);
1319 	vm_first_phys = phys_start;
1320 	vm_num_phys = phys_end - phys_start;
1321 }
1322 
1323 /*
1324  * Map physical addresses into kernel VM.
1325  */
1326 vm_offset_t
1327 pmap_map(va, pa, endpa, prot)
1328 	register vm_offset_t va, pa, endpa;
1329 	register int prot;
1330 {
1331 	register int pgsize = PAGE_SIZE;
1332 
1333 	while (pa < endpa) {
1334 		pmap_enter(kernel_pmap, va, pa, prot, 1);
1335 		va += pgsize;
1336 		pa += pgsize;
1337 	}
1338 	return (va);
1339 }
1340 
1341 /*
1342  * Create and return a physical map.
1343  *
1344  * If size is nonzero, the map is useless. (ick)
1345  */
1346 struct pmap *
1347 pmap_create(size)
1348 	vm_size_t size;
1349 {
1350 	register struct pmap *pm;
1351 
1352 	if (size)
1353 		return (NULL);
1354 	pm = (struct pmap *)malloc(sizeof *pm, M_VMPMAP, M_WAITOK);
1355 #ifdef DEBUG
1356 	if (pmapdebug & PDB_CREATE)
1357 		printf("pmap_create: created %x\n", pm);
1358 #endif
1359 	bzero((caddr_t)pm, sizeof *pm);
1360 	pmap_pinit(pm);
1361 	return (pm);
1362 }
1363 
1364 /*
1365  * Initialize a preallocated and zeroed pmap structure,
1366  * such as one in a vmspace structure.
1367  */
1368 void
1369 pmap_pinit(pm)
1370 	register struct pmap *pm;
1371 {
1372 	register int i;
1373 
1374 #ifdef DEBUG
1375 	if (pmapdebug & PDB_CREATE)
1376 		printf("pmap_pinit(%x)\n", pm);
1377 #endif
1378 	/* pm->pm_ctx = NULL; */
1379 	simple_lock_init(&pm->pm_lock);
1380 	pm->pm_refcount = 1;
1381 	/* pm->pm_mmuforw = NULL; */
1382 	pm->pm_mmuback = &pm->pm_mmuforw;
1383 	pm->pm_segmap = pm->pm_rsegmap;
1384 	pm->pm_pte = pm->pm_rpte;
1385 	pm->pm_npte = pm->pm_rnpte;
1386 	for (i = NUSEG; --i >= 0;)
1387 		pm->pm_rsegmap[i] = seginval;
1388 	/* bzero((caddr_t)pm->pm_rpte, sizeof pm->pm_rpte); */
1389 	/* bzero((caddr_t)pm->pm_rnpte, sizeof pm->pm_rnpte); */
1390 }
1391 
1392 /*
1393  * Retire the given pmap from service.
1394  * Should only be called if the map contains no valid mappings.
1395  */
1396 void
1397 pmap_destroy(pm)
1398 	register struct pmap *pm;
1399 {
1400 	int count;
1401 
1402 	if (pm == NULL)
1403 		return;
1404 #ifdef DEBUG
1405 	if (pmapdebug & PDB_DESTROY)
1406 		printf("pmap_destroy(%x)\n", pm);
1407 #endif
1408 	simple_lock(&pm->pm_lock);
1409 	count = --pm->pm_refcount;
1410 	simple_unlock(&pm->pm_lock);
1411 	if (count == 0) {
1412 		pmap_release(pm);
1413 		free((caddr_t)pm, M_VMPMAP);
1414 	}
1415 }
1416 
1417 /*
1418  * Release any resources held by the given physical map.
1419  * Called when a pmap initialized by pmap_pinit is being released.
1420  */
1421 void
1422 pmap_release(pm)
1423 	register struct pmap *pm;
1424 {
1425 	register union ctxinfo *c;
1426 	register int s = splpmap();	/* paranoia */
1427 
1428 #ifdef DEBUG
1429 	if (pmapdebug & PDB_DESTROY)
1430 		printf("pmap_release(%x)\n", pm);
1431 #endif
1432 	if (pm->pm_mmuforw)
1433 		panic("pmap_release mmuforw");
1434 	if ((c = pm->pm_ctx) != NULL) {
1435 		if (pm->pm_ctxnum == 0)
1436 			panic("pmap_release: releasing kernel");
1437 		ctx_free(pm);
1438 	}
1439 	splx(s);
1440 }
1441 
1442 /*
1443  * Add a reference to the given pmap.
1444  */
1445 void
1446 pmap_reference(pm)
1447 	struct pmap *pm;
1448 {
1449 
1450 	if (pm != NULL) {
1451 		simple_lock(&pm->pm_lock);
1452 		pm->pm_refcount++;
1453 		simple_unlock(&pm->pm_lock);
1454 	}
1455 }
1456 
1457 static int pmap_rmk(struct pmap *, vm_offset_t, vm_offset_t, int, int, int);
1458 static int pmap_rmu(struct pmap *, vm_offset_t, vm_offset_t, int, int, int);
1459 
1460 /*
1461  * Remove the given range of mapping entries.
1462  * The starting and ending addresses are already rounded to pages.
1463  * Sheer lunacy: pmap_remove is often asked to remove nonexistent
1464  * mappings.
1465  */
1466 void
1467 pmap_remove(pm, va, endva)
1468 	register struct pmap *pm;
1469 	register vm_offset_t va, endva;
1470 {
1471 	register vm_offset_t nva;
1472 	register int vseg, nleft, s, ctx;
1473 	register int (*rm)(struct pmap *, vm_offset_t, vm_offset_t,
1474 			    int, int, int);
1475 
1476 	if (pm == NULL)
1477 		return;
1478 #ifdef DEBUG
1479 	if (pmapdebug & PDB_REMOVE)
1480 		printf("pmap_remove(%x, %x, %x)\n", pm, va, endva);
1481 #endif
1482 
1483 	if (pm == kernel_pmap) {
1484 		/*
1485 		 * Removing from kernel address space.
1486 		 */
1487 		rm = pmap_rmk;
1488 	} else {
1489 		/*
1490 		 * Removing from user address space.
1491 		 */
1492 		write_user_windows();
1493 		rm = pmap_rmu;
1494 	}
1495 
1496 	ctx = getcontext();
1497 	s = splpmap();		/* XXX conservative */
1498 	simple_lock(&pm->pm_lock);
1499 	for (; va < endva; va = nva) {
1500 		/* do one virtual segment at a time */
1501 		vseg = VA_VSEG(va);
1502 		nva = VSTOVA(vseg + 1);
1503 		if (nva == 0 || nva > endva)
1504 			nva = endva;
1505 		if ((nleft = pm->pm_npte[vseg]) != 0)
1506 			pm->pm_npte[vseg] = (*rm)(pm, va, nva,
1507 			    vseg, nleft, pm->pm_segmap[vseg]);
1508 	}
1509 	simple_unlock(&pm->pm_lock);
1510 	splx(s);
1511 	setcontext(ctx);
1512 }
1513 
1514 /*
1515  * Remove a range contained within a single segment.
1516  * These are egregiously complicated routines.
1517  */
1518 
1519 /* remove from kernel, return new nleft */
1520 static int
1521 pmap_rmk(pm, va, endva, vseg, nleft, pmeg)
1522 	register struct pmap *pm;
1523 	register vm_offset_t va, endva;
1524 	register int vseg, nleft, pmeg;
1525 {
1526 	register int i, tpte;
1527 	register struct pvlist *pv;
1528 
1529 #ifdef DEBUG
1530 	if (pmeg == seginval)
1531 		panic("pmap_rmk: kernel seg not loaded");
1532 	if (pm->pm_ctx == NULL)
1533 		panic("pmap_rmk: kernel lost context");
1534 #endif
1535 
1536 	/* flush cache */
1537 	/* XXX better to flush per page? (takes more code) */
1538 	setcontext(0);
1539 #ifdef notdef
1540 	if (vactype != VAC_NONE)
1541 #endif
1542 		cache_flush_segment(vseg);
1543 	while (va < endva) {
1544 		tpte = getpte(va);
1545 		if ((tpte & PG_V) == 0) {
1546 			va += PAGE_SIZE;
1547 			continue;
1548 		}
1549 		pv = NULL;
1550 		if ((tpte & PG_TYPE) == PG_OBMEM) {
1551 			i = ptoa(HWTOSW(tpte & PG_PFNUM));
1552 			if (managed(i)) {
1553 				pv = pvhead(i);
1554 				pv->pv_flags |= MR(tpte);
1555 				pv_unlink(pv, pm, va);
1556 			}
1557 		}
1558 		nleft--;
1559 		setpte(va, 0);
1560 		va += NBPG;
1561 	}
1562 
1563 	/*
1564 	 * If the segment is all gone, remove it from everyone and
1565 	 * free the MMU entry.
1566 	 */
1567 	if (nleft == 0) {
1568 		va = VSTOVA(vseg);		/* retract */
1569 		setsegmap(va, seginval);
1570 		for (i = ncontext; --i > 0;) {
1571 			setcontext(i);
1572 			setsegmap(va, seginval);
1573 		}
1574 		me_free(pm, pmeg);
1575 	}
1576 	return (nleft);
1577 }
1578 
1579 /* remove from user */
1580 static int
1581 pmap_rmu(pm, va, endva, vseg, nleft, pmeg)
1582 	register struct pmap *pm;
1583 	register vm_offset_t va, endva;
1584 	register int vseg, nleft, pmeg;
1585 {
1586 	register int *pte0, i, pteva, tpte;
1587 	register struct pvlist *pv;
1588 
1589 	pte0 = pm->pm_pte[vseg];
1590 	if (pmeg == seginval) {
1591 		register int *pte = pte0 + VA_VPG(va);
1592 
1593 		/*
1594 		 * PTEs are not in MMU.  Just invalidate software copies.
1595 		 */
1596 		for (; va < endva; pte++, va += PAGE_SIZE) {
1597 			tpte = *pte;
1598 			if ((tpte & PG_V) == 0) {
1599 				/* nothing to remove (braindead VM layer) */
1600 				continue;
1601 			}
1602 			if ((tpte & PG_TYPE) == PG_OBMEM) {
1603 				i = ptoa(HWTOSW(tpte & PG_PFNUM));
1604 				if (managed(i))
1605 					pv_unlink(pvhead(i), pm, va);
1606 			}
1607 			nleft--;
1608 			*pte = 0;
1609 		}
1610 		if (nleft == 0) {
1611 			free((caddr_t)pte0, M_VMPMAP);
1612 			pm->pm_pte[vseg] = NULL;
1613 		}
1614 		return (nleft);
1615 	}
1616 
1617 	/*
1618 	 * PTEs are in MMU.  Invalidate in hardware, update ref &
1619 	 * mod bits, and flush cache if required.
1620 	 */
1621 	if (pm->pm_ctx) {
1622 		/* process has a context, must flush cache */
1623 		/* XXX better to flush per page? (takes more code) */
1624 		setcontext(pm->pm_ctxnum);
1625 #ifdef notdef
1626 		if (vactype != VAC_NONE)
1627 #endif
1628 			cache_flush_segment(vseg);
1629 		pteva = va;
1630 	} else {
1631 		/* no context, use context 0; cache flush unnecessary */
1632 		setcontext(0);
1633 		/* XXX use per-cpu pteva? */
1634 		setsegmap(0, pmeg);
1635 		pteva = VA_VPG(va) * NBPG;
1636 	}
1637 	for (; va < endva; pteva += PAGE_SIZE, va += PAGE_SIZE) {
1638 		tpte = getpte(pteva);
1639 		if ((tpte & PG_V) == 0)
1640 			continue;
1641 		pv = NULL;
1642 		if ((tpte & PG_TYPE) == PG_OBMEM) {
1643 			i = ptoa(HWTOSW(tpte & PG_PFNUM));
1644 			if (managed(i)) {
1645 				pv = pvhead(i);
1646 				pv->pv_flags |= MR(tpte);
1647 				pv_unlink(pv, pm, va);
1648 			}
1649 		}
1650 		nleft--;
1651 		setpte(pteva, 0);
1652 	}
1653 
1654 	/*
1655 	 * If the segment is all gone, and the context is loaded, give
1656 	 * the segment back.
1657 	 */
1658 	if (nleft == 0 && pm->pm_ctx != NULL) {
1659 		va = VSTOVA(vseg);		/* retract */
1660 		setsegmap(va, seginval);
1661 		free((caddr_t)pte0, M_VMPMAP);
1662 		pm->pm_pte[vseg] = NULL;
1663 		me_free(pm, pmeg);
1664 	}
1665 	return (nleft);
1666 }
1667 
1668 /*
1669  * Lower (make more strict) the protection on the specified
1670  * physical page.
1671  *
1672  * There are only two cases: either the protection is going to 0
1673  * (in which case we do the dirty work here), or it is going from
1674  * to read-only (in which case pv_changepte does the trick).
1675  */
1676 void
1677 pmap_page_protect(pa, prot)
1678 	vm_offset_t pa;
1679 	vm_prot_t prot;
1680 {
1681 	register struct pvlist *pv, *pv0, *npv;
1682 	register struct pmap *pm;
1683 	register int *pte;
1684 	register int va, vseg, pteva, tpte;
1685 	register int flags, nleft, i, pmeg, s, ctx, doflush;
1686 
1687 #ifdef DEBUG
1688 	if ((pmapdebug & PDB_CHANGEPROT) ||
1689 	    (pmapdebug & PDB_REMOVE && prot == VM_PROT_NONE))
1690 		printf("pmap_page_protect(%x, %x)\n", pa, prot);
1691 #endif
1692 	/*
1693 	 * Skip unmanaged pages, or operations that do not take
1694 	 * away write permission.
1695 	 */
1696 	if (!managed(pa) || prot & VM_PROT_WRITE)
1697 		return;
1698 	write_user_windows();	/* paranoia */
1699 	if (prot & VM_PROT_READ) {
1700 		pv_changepte(pvhead(pa), 0, PG_W);
1701 		return;
1702 	}
1703 
1704 	/*
1705 	 * Remove all access to all people talking to this page.
1706 	 * Walk down PV list, removing all mappings.
1707 	 * The logic is much like that for pmap_remove,
1708 	 * but we know we are removing exactly one page.
1709 	 */
1710 	pv = pvhead(pa);
1711 	s = splpmap();
1712 	if ((pm = pv->pv_pmap) == NULL) {
1713 		splx(s);
1714 		return;
1715 	}
1716 	ctx = getcontext();
1717 	pv0 = pv;
1718 	flags = pv->pv_flags & ~PV_NC;
1719 	for (;; pm = pv->pv_pmap) {
1720 		va = pv->pv_va;
1721 		vseg = VA_VSEG(va);
1722 		if ((nleft = pm->pm_npte[vseg]) == 0)
1723 			panic("pmap_remove_all: empty vseg");
1724 		nleft--;
1725 		pm->pm_npte[vseg] = nleft;
1726 		pmeg = pm->pm_segmap[vseg];
1727 		pte = pm->pm_pte[vseg];
1728 		if (pmeg == seginval) {
1729 			if (nleft) {
1730 				pte += VA_VPG(va);
1731 				*pte = 0;
1732 			} else {
1733 				free((caddr_t)pte, M_VMPMAP);
1734 				pm->pm_pte[vseg] = NULL;
1735 			}
1736 			goto nextpv;
1737 		}
1738 		if (pm->pm_ctx) {
1739 			setcontext(pm->pm_ctxnum);
1740 			pteva = va;
1741 #ifdef notdef
1742 			doflush = vactype != VAC_NONE;
1743 #else
1744 			doflush = 1;
1745 #endif
1746 		} else {
1747 			setcontext(0);
1748 			/* XXX use per-cpu pteva? */
1749 			setsegmap(0, pmeg);
1750 			pteva = VA_VPG(va) * NBPG;
1751 			doflush = 0;
1752 		}
1753 		if (nleft) {
1754 			if (doflush)
1755 				cache_flush_page(va);
1756 			tpte = getpte(pteva);
1757 			if ((tpte & PG_V) == 0)
1758 				panic("pmap_page_protect !PG_V 1");
1759 			flags |= MR(tpte);
1760 			setpte(pteva, 0);
1761 		} else {
1762 			if (doflush)
1763 				cache_flush_page(va);
1764 			tpte = getpte(pteva);
1765 			if ((tpte & PG_V) == 0)
1766 				panic("pmap_page_protect !PG_V 2");
1767 			flags |= MR(tpte);
1768 			if (pm->pm_ctx) {
1769 				setsegmap(va, seginval);
1770 				if (pm == kernel_pmap) {
1771 					for (i = ncontext; --i > 0;) {
1772 						setcontext(i);
1773 						setsegmap(va, seginval);
1774 					}
1775 					goto skipptefree;
1776 				}
1777 			}
1778 			free((caddr_t)pte, M_VMPMAP);
1779 			pm->pm_pte[vseg] = NULL;
1780 		skipptefree:
1781 			me_free(pm, pmeg);
1782 		}
1783 	nextpv:
1784 		npv = pv->pv_next;
1785 		if (pv != pv0)
1786 			free((caddr_t)pv, M_VMPVENT);
1787 		if ((pv = npv) == NULL)
1788 			break;
1789 	}
1790 	pv0->pv_pmap = NULL;
1791 	pv0->pv_flags = flags;
1792 	setcontext(ctx);
1793 	splx(s);
1794 }
1795 
1796 /*
1797  * Lower (make more strict) the protection on the specified
1798  * range of this pmap.
1799  *
1800  * There are only two cases: either the protection is going to 0
1801  * (in which case we call pmap_remove to do the dirty work), or
1802  * it is going from read/write to read-only.  The latter is
1803  * fairly easy.
1804  */
1805 void
1806 pmap_protect(pm, sva, eva, prot)
1807 	register struct pmap *pm;
1808 	vm_offset_t sva, eva;
1809 	vm_prot_t prot;
1810 {
1811 	register int va, nva, vseg, pteva, pmeg;
1812 	register int s, ctx;
1813 
1814 	if (pm == NULL || prot & VM_PROT_WRITE)
1815 		return;
1816 	if ((prot & VM_PROT_READ) == 0) {
1817 		pmap_remove(pm, sva, eva);
1818 		return;
1819 	}
1820 
1821 	write_user_windows();
1822 	ctx = getcontext();
1823 	s = splpmap();
1824 	simple_lock(&pm->pm_lock);
1825 
1826 	for (va = sva; va < eva;) {
1827 		vseg = VA_VSEG(va);
1828 		nva = VSTOVA(vseg + 1);
1829 if (nva == 0) panic("pmap_protect: last segment");	/* cannot happen */
1830 		if (nva > eva)
1831 			nva = eva;
1832 		if (pm->pm_npte[vseg] == 0) {
1833 			va = nva;
1834 			continue;
1835 		}
1836 		pmeg = pm->pm_segmap[vseg];
1837 		if (pmeg == seginval) {
1838 			register int *pte = &pm->pm_pte[vseg][VA_VPG(va)];
1839 
1840 			/* not in MMU; just clear PG_W from core copies */
1841 			for (; va < nva; va += NBPG)
1842 				*pte++ &= ~PG_W;
1843 		} else {
1844 			/* in MMU: take away write bits from MMU PTEs */
1845 			if (
1846 #ifdef notdef
1847 			    vactype != VAC_NONE &&
1848 #endif
1849 			    pm->pm_ctx) {
1850 				register int tpte;
1851 
1852 				/*
1853 				 * Flush cache so that any existing cache
1854 				 * tags are updated.  This is really only
1855 				 * needed for PTEs that lose PG_W.
1856 				 */
1857 				setcontext(pm->pm_ctxnum);
1858 				for (; va < nva; va += NBPG) {
1859 					tpte = getpte(va);
1860 					pmap_stats.ps_npg_prot_all++;
1861 					if (tpte & PG_W) {
1862 						pmap_stats.ps_npg_prot_actual++;
1863 						cache_flush_page(va);
1864 						setpte(va, tpte & ~PG_W);
1865 					}
1866 				}
1867 			} else {
1868 				register int pteva;
1869 
1870 				/*
1871 				 * No context, hence not cached;
1872 				 * just update PTEs.
1873 				 */
1874 				setcontext(0);
1875 				/* XXX use per-cpu pteva? */
1876 				setsegmap(0, pmeg);
1877 				pteva = VA_VPG(va) * NBPG;
1878 				for (; va < nva; pteva += NBPG, va += NBPG)
1879 					setpte(pteva, getpte(pteva) & ~PG_W);
1880 			}
1881 		}
1882 	}
1883 	simple_unlock(&pm->pm_lock);
1884 	splx(s);
1885 }
1886 
1887 /*
1888  * Change the protection and/or wired status of the given (MI) virtual page.
1889  * XXX: should have separate function (or flag) telling whether only wiring
1890  * is changing.
1891  */
1892 void
1893 pmap_changeprot(pm, va, prot, wired)
1894 	register struct pmap *pm;
1895 	register vm_offset_t va;
1896 	vm_prot_t prot;
1897 	int wired;
1898 {
1899 	register int vseg, tpte, newprot, pmeg, ctx, i, s;
1900 
1901 #ifdef DEBUG
1902 	if (pmapdebug & PDB_CHANGEPROT)
1903 		printf("pmap_changeprot(%x, %x, %x, %x)\n",
1904 		    pm, va, prot, wired);
1905 #endif
1906 
1907 	write_user_windows();	/* paranoia */
1908 
1909 	if (pm == kernel_pmap)
1910 		newprot = prot & VM_PROT_WRITE ? PG_S|PG_W : PG_S;
1911 	else
1912 		newprot = prot & VM_PROT_WRITE ? PG_W : 0;
1913 	vseg = VA_VSEG(va);
1914 	s = splpmap();		/* conservative */
1915 	pmap_stats.ps_changeprots++;
1916 
1917 	/* update PTEs in software or hardware */
1918 	if ((pmeg = pm->pm_segmap[vseg]) == seginval) {
1919 		register int *pte = &pm->pm_pte[vseg][VA_VPG(va)];
1920 
1921 		/* update in software */
1922 		if ((*pte & PG_PROT) == newprot)
1923 			goto useless;
1924 		*pte = (*pte & ~PG_PROT) | newprot;
1925 	} else {
1926 		/* update in hardware */
1927 		ctx = getcontext();
1928 		if (pm->pm_ctx) {
1929 			/* use current context; flush writeback cache */
1930 			setcontext(pm->pm_ctxnum);
1931 			tpte = getpte(va);
1932 			if ((tpte & PG_PROT) == newprot)
1933 				goto useless;
1934 			if (vactype == VAC_WRITEBACK &&
1935 			    (newprot & PG_W) == 0 &&
1936 			    (tpte & (PG_W | PG_NC)) == PG_W)
1937 				cache_flush_page((int)va);
1938 		} else {
1939 			setcontext(0);
1940 			/* XXX use per-cpu va? */
1941 			setsegmap(0, pmeg);
1942 			va = VA_VPG(va);
1943 			tpte = getpte(va);
1944 			if ((tpte & PG_PROT) == newprot)
1945 				goto useless;
1946 		}
1947 		tpte = (tpte & ~PG_PROT) | newprot;
1948 		setpte(va, tpte);
1949 		setcontext(ctx);
1950 	}
1951 	splx(s);
1952 	return;
1953 
1954 useless:
1955 	/* only wiring changed, and we ignore wiring */
1956 	pmap_stats.ps_useless_changeprots++;
1957 	splx(s);
1958 }
1959 
1960 /*
1961  * Insert (MI) physical page pa at virtual address va in the given pmap.
1962  * NB: the pa parameter includes type bits PMAP_OBIO, PMAP_NC as necessary.
1963  *
1964  * If pa is not in the `managed' range it will not be `bank mapped'.
1965  * This works during bootstrap only because the first 4MB happens to
1966  * map one-to-one.
1967  *
1968  * There may already be something else there, or we might just be
1969  * changing protections and/or wiring on an existing mapping.
1970  *	XXX	should have different entry points for changing!
1971  */
1972 void
1973 pmap_enter(pm, va, pa, prot, wired)
1974 	register struct pmap *pm;
1975 	vm_offset_t va, pa;
1976 	vm_prot_t prot;
1977 	int wired;
1978 {
1979 	register struct pvlist *pv;
1980 	register int pteproto, ctx;
1981 
1982 	if (pm == NULL)
1983 		return;
1984 #ifdef DEBUG
1985 	if (pmapdebug & PDB_ENTER)
1986 		printf("pmap_enter(%x, %x, %x, %x, %x)\n",
1987 		    pm, va, pa, prot, wired);
1988 #endif
1989 
1990 	pteproto = PG_V | ((pa & PMAP_TNC) << PG_TNC_SHIFT);
1991 	pa &= ~PMAP_TNC;
1992 	/*
1993 	 * Set up prototype for new PTE.  Cannot set PG_NC from PV_NC yet
1994 	 * since the pvlist no-cache bit might change as a result of the
1995 	 * new mapping.
1996 	 */
1997 	if (managed(pa)) {
1998 		pteproto |= SWTOHW(atop(pa));
1999 		pv = pvhead(pa);
2000 	} else {
2001 		pteproto |= atop(pa) & PG_PFNUM;
2002 		pv = NULL;
2003 	}
2004 	if (prot & VM_PROT_WRITE)
2005 		pteproto |= PG_W;
2006 
2007 	ctx = getcontext();
2008 	if (pm == kernel_pmap)
2009 		pmap_enk(pm, va, prot, wired, pv, pteproto | PG_S);
2010 	else
2011 		pmap_enu(pm, va, prot, wired, pv, pteproto);
2012 	setcontext(ctx);
2013 }
2014 
2015 /* enter new (or change existing) kernel mapping */
2016 pmap_enk(pm, va, prot, wired, pv, pteproto)
2017 	register struct pmap *pm;
2018 	vm_offset_t va;
2019 	vm_prot_t prot;
2020 	int wired;
2021 	register struct pvlist *pv;
2022 	register int pteproto;
2023 {
2024 	register int vseg, tpte, pmeg, i, s;
2025 
2026 	vseg = VA_VSEG(va);
2027 	s = splpmap();		/* XXX way too conservative */
2028 	if (pm->pm_segmap[vseg] != seginval &&
2029 	    (tpte = getpte(va)) & PG_V) {
2030 		register int addr = tpte & PG_PFNUM;
2031 
2032 		/* old mapping exists */
2033 		if (addr == (pteproto & PG_PFNUM)) {
2034 			/* just changing protection and/or wiring */
2035 			splx(s);
2036 			pmap_changeprot(pm, va, prot, wired);
2037 			return;
2038 		}
2039 
2040 /*printf("pmap_enk: changing existing va=>pa entry\n");*/
2041 		/*
2042 		 * Switcheroo: changing pa for this va.
2043 		 * If old pa was managed, remove from pvlist.
2044 		 * If old page was cached, flush cache.
2045 		 */
2046 		addr = ptoa(HWTOSW(addr));
2047 		if (managed(addr))
2048 			pv_unlink(pvhead(addr), pm, va);
2049 		if (
2050 #ifdef notdef
2051 		    vactype != VAC_NONE &&
2052 #endif
2053 		    (tpte & PG_NC) == 0) {
2054 			setcontext(0);	/* ??? */
2055 			cache_flush_page((int)va);
2056 		}
2057 	} else {
2058 		/* adding new entry */
2059 		pm->pm_npte[vseg]++;
2060 	}
2061 
2062 	/*
2063 	 * If the new mapping is for a managed PA, enter into pvlist.
2064 	 * Note that the mapping for a malloc page will always be
2065 	 * unique (hence will never cause a second call to malloc).
2066 	 */
2067 	if (pv != NULL)
2068 		pteproto |= pv_link(pv, pm, va);
2069 
2070 	pmeg = pm->pm_segmap[vseg];
2071 	if (pmeg == seginval) {
2072 		register int tva;
2073 
2074 		/*
2075 		 * Allocate an MMU entry now (on locked list),
2076 		 * and map it into every context.  Set all its
2077 		 * PTEs invalid (we will then overwrite one, but
2078 		 * this is more efficient than looping twice).
2079 		 */
2080 #ifdef DEBUG
2081 		if (pm->pm_ctx == NULL || pm->pm_ctxnum != 0)
2082 			panic("pmap_enk: kern seg but no kern ctx");
2083 #endif
2084 		pmeg = me_alloc(&me_locked, pm, vseg)->me_pmeg;
2085 		pm->pm_segmap[vseg] = pmeg;
2086 		i = ncontext - 1;
2087 		do {
2088 			setcontext(i);
2089 			setsegmap(va, pmeg);
2090 		} while (--i >= 0);
2091 
2092 		/* set all PTEs to invalid, then overwrite one PTE below */
2093 		tva = VA_ROUNDDOWNTOSEG(va);
2094 		i = NPTESG;
2095 		do {
2096 			setpte(tva, 0);
2097 			tva += NBPG;
2098 		} while (--i > 0);
2099 	}
2100 
2101 	/* ptes kept in hardware only */
2102 	setpte(va, pteproto);
2103 	splx(s);
2104 }
2105 
2106 /* enter new (or change existing) user mapping */
2107 pmap_enu(pm, va, prot, wired, pv, pteproto)
2108 	register struct pmap *pm;
2109 	vm_offset_t va;
2110 	vm_prot_t prot;
2111 	int wired;
2112 	register struct pvlist *pv;
2113 	register int pteproto;
2114 {
2115 	register int vseg, *pte, tpte, pmeg, i, s, doflush;
2116 
2117 	write_user_windows();		/* XXX conservative */
2118 	vseg = VA_VSEG(va);
2119 	s = splpmap();			/* XXX conservative */
2120 
2121 	/*
2122 	 * If there is no space in which the PTEs can be written
2123 	 * while they are not in the hardware, this must be a new
2124 	 * virtual segment.  Get PTE space and count the segment.
2125 	 *
2126 	 * TO SPEED UP CTX ALLOC, PUT SEGMENT BOUNDS STUFF HERE
2127 	 * AND IN pmap_rmu()
2128 	 */
2129 retry:
2130 	pte = pm->pm_pte[vseg];
2131 	if (pte == NULL) {
2132 		/* definitely a new mapping */
2133 		register int size = NPTESG * sizeof *pte;
2134 
2135 		pte = (int *)malloc((u_long)size, M_VMPMAP, M_WAITOK);
2136 		if (pm->pm_pte[vseg] != NULL) {
2137 printf("pmap_enter: pte filled during sleep\n");	/* can this happen? */
2138 			free((caddr_t)pte, M_VMPMAP);
2139 			goto retry;
2140 		}
2141 #ifdef DEBUG
2142 		if (pm->pm_segmap[vseg] != seginval)
2143 			panic("pmap_enter: new ptes, but not seginval");
2144 #endif
2145 		bzero((caddr_t)pte, size);
2146 		pm->pm_pte[vseg] = pte;
2147 		pm->pm_npte[vseg] = 1;
2148 	} else {
2149 		/* might be a change: fetch old pte */
2150 		doflush = 0;
2151 		if ((pmeg = pm->pm_segmap[vseg]) == seginval)
2152 			tpte = pte[VA_VPG(va)];	/* software pte */
2153 		else {
2154 			if (pm->pm_ctx) {	/* hardware pte */
2155 				setcontext(pm->pm_ctxnum);
2156 				tpte = getpte(va);
2157 				doflush = 1;
2158 			} else {
2159 				setcontext(0);
2160 				/* XXX use per-cpu pteva? */
2161 				setsegmap(0, pmeg);
2162 				tpte = getpte(VA_VPG(va) * NBPG);
2163 			}
2164 		}
2165 		if (tpte & PG_V) {
2166 			register int addr = tpte & PG_PFNUM;
2167 
2168 			/* old mapping exists */
2169 			if (addr == (pteproto & PG_PFNUM)) {
2170 				/* just changing prot and/or wiring */
2171 				splx(s);
2172 				/* caller should call this directly: */
2173 				pmap_changeprot(pm, va, prot, wired);
2174 				return;
2175 			}
2176 			/*
2177 			 * Switcheroo: changing pa for this va.
2178 			 * If old pa was managed, remove from pvlist.
2179 			 * If old page was cached, flush cache.
2180 			 */
2181 /*printf("%s[%d]: pmap_enu: changing existing va(%x)=>pa entry\n",
2182 curproc->p_comm, curproc->p_pid, va);*/
2183 			addr = ptoa(HWTOSW(addr));
2184 			if (managed(addr))
2185 				pv_unlink(pvhead(addr), pm, va);
2186 			if (
2187 #ifdef notdef
2188 			    vactype != VAC_NONE &&
2189 #endif
2190 			    doflush && (tpte & PG_NC) == 0)
2191 				cache_flush_page((int)va);
2192 		} else {
2193 			/* adding new entry */
2194 			pm->pm_npte[vseg]++;
2195 		}
2196 	}
2197 
2198 	if (pv != NULL)
2199 		pteproto |= pv_link(pv, pm, va);
2200 
2201 	/*
2202 	 * Update hardware or software PTEs (whichever are active).
2203 	 */
2204 	if ((pmeg = pm->pm_segmap[vseg]) != seginval) {
2205 		/* ptes are in hardare */
2206 		if (pm->pm_ctx)
2207 			setcontext(pm->pm_ctxnum);
2208 		else {
2209 			setcontext(0);
2210 			/* XXX use per-cpu pteva? */
2211 			setsegmap(0, pmeg);
2212 			va = VA_VPG(va) * NBPG;
2213 		}
2214 		setpte(va, pteproto);
2215 	}
2216 	/* update software copy */
2217 	pte += VA_VPG(va);
2218 	*pte = pteproto;
2219 
2220 	splx(s);
2221 }
2222 
2223 /*
2224  * Change the wiring attribute for a map/virtual-address pair.
2225  */
2226 /* ARGSUSED */
2227 void
2228 pmap_change_wiring(pm, va, wired)
2229 	struct pmap *pm;
2230 	vm_offset_t va;
2231 	int wired;
2232 {
2233 
2234 	pmap_stats.ps_useless_changewire++;
2235 }
2236 
2237 /*
2238  * Extract the physical page address associated
2239  * with the given map/virtual_address pair.
2240  * GRR, the vm code knows; we should not have to do this!
2241  */
2242 vm_offset_t
2243 pmap_extract(pm, va)
2244 	register struct pmap *pm;
2245 	vm_offset_t va;
2246 {
2247 	register int tpte;
2248 	register int vseg;
2249 
2250 	if (pm == NULL) {
2251 		printf("pmap_extract: null pmap\n");
2252 		return (0);
2253 	}
2254 	vseg = VA_VSEG(va);
2255 	if (pm->pm_segmap[vseg] != seginval) {
2256 		register int ctx = getcontext();
2257 
2258 		if (pm->pm_ctx) {
2259 			setcontext(pm->pm_ctxnum);
2260 			tpte = getpte(va);
2261 		} else {
2262 			setcontext(0);
2263 			tpte = getpte(VA_VPG(va) * NBPG);
2264 		}
2265 		setcontext(ctx);
2266 	} else {
2267 		register int *pte = pm->pm_pte[vseg];
2268 
2269 		if (pte == NULL) {
2270 			printf("pmap_extract: invalid vseg\n");
2271 			return (0);
2272 		}
2273 		tpte = pte[VA_VPG(va)];
2274 	}
2275 	if ((tpte & PG_V) == 0) {
2276 		printf("pmap_extract: invalid pte\n");
2277 		return (0);
2278 	}
2279 	tpte &= PG_PFNUM;
2280 	tpte = HWTOSW(tpte);
2281 	return ((tpte << PGSHIFT) | (va & PGOFSET));
2282 }
2283 
2284 /*
2285  * Copy the range specified by src_addr/len
2286  * from the source map to the range dst_addr/len
2287  * in the destination map.
2288  *
2289  * This routine is only advisory and need not do anything.
2290  */
2291 /* ARGSUSED */
2292 void
2293 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2294 	struct pmap *dst_pmap, *src_pmap;
2295 	vm_offset_t dst_addr;
2296 	vm_size_t len;
2297 	vm_offset_t src_addr;
2298 {
2299 }
2300 
2301 /*
2302  * Require that all active physical maps contain no
2303  * incorrect entries NOW.  [This update includes
2304  * forcing updates of any address map caching.]
2305  */
2306 void
2307 pmap_update()
2308 {
2309 }
2310 
2311 /*
2312  * Garbage collects the physical map system for
2313  * pages which are no longer used.
2314  * Success need not be guaranteed -- that is, there
2315  * may well be pages which are not referenced, but
2316  * others may be collected.
2317  * Called by the pageout daemon when pages are scarce.
2318  */
2319 /* ARGSUSED */
2320 void
2321 pmap_collect(pm)
2322 	struct pmap *pm;
2323 {
2324 }
2325 
2326 /*
2327  * Clear the modify bit for the given physical page.
2328  */
2329 void
2330 pmap_clear_modify(pa)
2331 	register vm_offset_t pa;
2332 {
2333 	register struct pvlist *pv;
2334 
2335 	if (managed(pa)) {
2336 		pv = pvhead(pa);
2337 		(void) pv_syncflags(pv);
2338 		pv->pv_flags &= ~PV_MOD;
2339 	}
2340 }
2341 
2342 /*
2343  * Tell whether the given physical page has been modified.
2344  */
2345 int
2346 pmap_is_modified(pa)
2347 	register vm_offset_t pa;
2348 {
2349 	register struct pvlist *pv;
2350 
2351 	if (managed(pa)) {
2352 		pv = pvhead(pa);
2353 		if (pv->pv_flags & PV_MOD || pv_syncflags(pv) & PV_MOD)
2354 			return (1);
2355 	}
2356 	return (0);
2357 }
2358 
2359 /*
2360  * Clear the reference bit for the given physical page.
2361  */
2362 void
2363 pmap_clear_reference(pa)
2364 	vm_offset_t pa;
2365 {
2366 	register struct pvlist *pv;
2367 
2368 	if (managed(pa)) {
2369 		pv = pvhead(pa);
2370 		(void) pv_syncflags(pv);
2371 		pv->pv_flags &= ~PV_REF;
2372 	}
2373 }
2374 
2375 /*
2376  * Tell whether the given physical page has been referenced.
2377  */
2378 int
2379 pmap_is_referenced(pa)
2380 	vm_offset_t pa;
2381 {
2382 	register struct pvlist *pv;
2383 
2384 	if (managed(pa)) {
2385 		pv = pvhead(pa);
2386 		if (pv->pv_flags & PV_REF || pv_syncflags(pv) & PV_REF)
2387 			return (1);
2388 	}
2389 	return (0);
2390 }
2391 
2392 /*
2393  * Make the specified pages (by pmap, offset) pageable (or not) as requested.
2394  *
2395  * A page which is not pageable may not take a fault; therefore, its page
2396  * table entry must remain valid for the duration (or at least, the trap
2397  * handler must not call vm_fault).
2398  *
2399  * This routine is merely advisory; pmap_enter will specify that these pages
2400  * are to be wired down (or not) as appropriate.
2401  */
2402 /* ARGSUSED */
2403 void
2404 pmap_pageable(pm, start, end, pageable)
2405 	struct pmap *pm;
2406 	vm_offset_t start, end;
2407 	int pageable;
2408 {
2409 }
2410 
2411 /*
2412  * Fill the given MI physical page with zero bytes.
2413  *
2414  * We avoid stomping on the cache.
2415  * XXX	might be faster to use destination's context and allow cache to fill?
2416  */
2417 void
2418 pmap_zero_page(pa)
2419 	register vm_offset_t pa;
2420 {
2421 	register caddr_t va;
2422 	register int pte;
2423 
2424 	if (managed(pa)) {
2425 		/*
2426 		 * The following might not be necessary since the page
2427 		 * is being cleared because it is about to be allocated,
2428 		 * i.e., is in use by no one.
2429 		 */
2430 #if 1
2431 #ifdef notdef
2432 		if (vactype != VAC_NONE)
2433 #endif
2434 			pv_flushcache(pvhead(pa));
2435 #endif
2436 		pte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(pa));
2437 	} else
2438 		pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM);
2439 
2440 	va = vpage[0];
2441 	setpte(va, pte);
2442 	qzero(va, NBPG);
2443 	setpte(va, 0);
2444 }
2445 
2446 /*
2447  * Copy the given MI physical source page to its destination.
2448  *
2449  * We avoid stomping on the cache as above (with same `XXX' note).
2450  * We must first flush any write-back cache for the source page.
2451  * We go ahead and stomp on the kernel's virtual cache for the
2452  * source page, since the cache can read memory MUCH faster than
2453  * the processor.
2454  */
2455 void
2456 pmap_copy_page(src, dst)
2457 	vm_offset_t src, dst;
2458 {
2459 	register caddr_t sva, dva;
2460 	register int spte, dpte;
2461 
2462 	if (managed(src)) {
2463 		if (vactype == VAC_WRITEBACK)
2464 			pv_flushcache(pvhead(src));
2465 		spte = PG_V | PG_S | SWTOHW(atop(src));
2466 	} else
2467 		spte = PG_V | PG_S | (atop(src) & PG_PFNUM);
2468 
2469 	if (managed(dst)) {
2470 		/* similar `might not be necessary' comment applies */
2471 #if 1
2472 #ifdef notdef
2473 		if (vactype != VAC_NONE)
2474 #endif
2475 			pv_flushcache(pvhead(dst));
2476 #endif
2477 		dpte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(dst));
2478 	} else
2479 		dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM);
2480 
2481 	sva = vpage[0];
2482 	dva = vpage[1];
2483 	setpte(sva, spte);
2484 	setpte(dva, dpte);
2485 	qcopy(sva, dva, NBPG);	/* loads cache, so we must ... */
2486 	cache_flush_page((int)sva);
2487 	setpte(sva, 0);
2488 	setpte(dva, 0);
2489 }
2490 
2491 /*
2492  * Turn a cdevsw d_mmap value into a byte address for pmap_enter.
2493  * XXX	this should almost certainly be done differently, and
2494  *	elsewhere, or even not at all
2495  */
2496 vm_offset_t
2497 pmap_phys_address(x)
2498 	int x;
2499 {
2500 
2501 	return (x);
2502 }
2503 
2504 /*
2505  * Turn off cache for a given (va, number of pages).
2506  *
2507  * We just assert PG_NC for each PTE; the addresses must reside
2508  * in locked kernel space.  A cache flush is also done.
2509  */
2510 kvm_uncache(va, npages)
2511 	register caddr_t va;
2512 	register int npages;
2513 {
2514 	register int pte;
2515 
2516 	for (; --npages >= 0; va += NBPG) {
2517 		pte = getpte(va);
2518 		if ((pte & PG_V) == 0)
2519 			panic("kvm_uncache !pg_v");
2520 		pte |= PG_NC;
2521 		setpte(va, pte);
2522 		cache_flush_page((int)va);
2523 	}
2524 }
2525 
2526 /*
2527  * For /dev/mem.
2528  */
2529 int
2530 pmap_enter_hw(pm, va, pa, prot, wired)
2531 	register struct pmap *pm;
2532 	vm_offset_t va, pa;
2533 	vm_prot_t prot;
2534 	int wired;
2535 {
2536 	register struct memarr *ma;
2537 	register int n;
2538 	register u_int t;
2539 
2540 	if (pa >= MAXMEM)				/* ??? */
2541 		return (EFAULT);
2542 	for (ma = pmemarr, n = npmemarr; --n >= 0; ma++) {
2543 		t = (u_int)pa - ma->addr;
2544 		if (t < ma->len)
2545 			goto ok;
2546 	}
2547 	return (EFAULT);
2548 ok:
2549 	pa = (HWTOSW(atop(pa)) << PGSHIFT) | (pa & PGOFSET);
2550 	if (pa >= vm_first_phys + vm_num_phys)		/* ??? */
2551 		return (EFAULT);
2552 
2553 	pmap_enter(pm, va, pa, prot, wired);
2554 	return (0);
2555 }
2556