xref: /original-bsd/sys/sparc/sparc/pmap.c (revision f737e041)
1 /*
2  * Copyright (c) 1992, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * This software was developed by the Computer Systems Engineering group
6  * at Lawrence Berkeley Laboratory under DARPA contract BG 91-66 and
7  * contributed to Berkeley.
8  *
9  * All advertising materials mentioning features or use of this software
10  * must display the following acknowledgement:
11  *	This product includes software developed by the University of
12  *	California, Lawrence Berkeley Laboratory.
13  *
14  * %sccs.include.redist.c%
15  *
16  *	@(#)pmap.c	8.4 (Berkeley) 02/05/94
17  *
18  * from: $Header: pmap.c,v 1.43 93/10/31 05:34:56 torek Exp $
19  */
20 
21 /*
22  * SPARC physical map management code.
23  * Does not function on multiprocessors (yet).
24  */
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/device.h>
29 #include <sys/proc.h>
30 #include <sys/malloc.h>
31 
32 #include <vm/vm.h>
33 #include <vm/vm_kern.h>
34 #include <vm/vm_prot.h>
35 #include <vm/vm_page.h>
36 
37 #include <machine/autoconf.h>
38 #include <machine/bsd_openprom.h>
39 #include <machine/cpu.h>
40 #include <machine/ctlreg.h>
41 
42 #include <sparc/sparc/asm.h>
43 #include <sparc/sparc/cache.h>
44 #include <sparc/sparc/vaddrs.h>
45 
46 #ifdef DEBUG
47 #define PTE_BITS "\20\40V\37W\36S\35NC\33IO\32U\31M"
48 #endif
49 
50 extern struct promvec *promvec;
51 
52 /*
53  * The SPARCstation offers us the following challenges:
54  *
55  *   1. A virtual address cache.  This is, strictly speaking, not
56  *	part of the architecture, but the code below assumes one.
57  *	This is a write-through cache on the 4c and a write-back cache
58  *	on others.
59  *
60  *   2. An MMU that acts like a cache.  There is not enough space
61  *	in the MMU to map everything all the time.  Instead, we need
62  *	to load MMU with the `working set' of translations for each
63  *	process.
64  *
65  *   3.	Segmented virtual and physical spaces.  The upper 12 bits of
66  *	a virtual address (the virtual segment) index a segment table,
67  *	giving a physical segment.  The physical segment selects a
68  *	`Page Map Entry Group' (PMEG) and the virtual page number---the
69  *	next 5 or 6 bits of the virtual address---select the particular
70  *	`Page Map Entry' for the page.  We call the latter a PTE and
71  *	call each Page Map Entry Group a pmeg (for want of a better name).
72  *
73  *	Since there are no valid bits in the segment table, the only way
74  *	to have an invalid segment is to make one full pmeg of invalid PTEs.
75  *	We use the last one (since the ROM does as well).
76  *
77  *   4. Discontiguous physical pages.  The Mach VM expects physical pages
78  *	to be in one sequential lump.
79  *
80  *   5. The MMU is always on: it is not possible to disable it.  This is
81  *	mainly a startup hassle.
82  */
83 
84 struct pmap_stats {
85 	int	ps_unlink_pvfirst;	/* # of pv_unlinks on head */
86 	int	ps_unlink_pvsearch;	/* # of pv_unlink searches */
87 	int	ps_changeprots;		/* # of calls to changeprot */
88 	int	ps_useless_changeprots;	/* # of changeprots for wiring */
89 	int	ps_enter_firstpv;	/* pv heads entered */
90 	int	ps_enter_secondpv;	/* pv nonheads entered */
91 	int	ps_useless_changewire;	/* useless wiring changes */
92 	int	ps_npg_prot_all;	/* # of active pages protected */
93 	int	ps_npg_prot_actual;	/* # pages actually affected */
94 } pmap_stats;
95 
96 #ifdef DEBUG
97 #define	PDB_CREATE	0x0001
98 #define	PDB_DESTROY	0x0002
99 #define	PDB_REMOVE	0x0004
100 #define	PDB_CHANGEPROT	0x0008
101 #define	PDB_ENTER	0x0010
102 
103 #define	PDB_MMU_ALLOC	0x0100
104 #define	PDB_MMU_STEAL	0x0200
105 #define	PDB_CTX_ALLOC	0x0400
106 #define	PDB_CTX_STEAL	0x0800
107 int	pmapdebug = 0x0;
108 #endif
109 
110 #define	splpmap() splimp()
111 
112 /*
113  * First and last managed physical addresses.
114  */
115 #if 0
116 vm_offset_t	vm_first_phys, vm_last_phys;
117 #define	managed(pa)	((pa) >= vm_first_phys && (pa) < vm_last_phys)
118 #else
119 vm_offset_t	vm_first_phys, vm_num_phys;
120 #define	managed(pa)	((unsigned)((pa) - vm_first_phys) < vm_num_phys)
121 #endif
122 
123 /*
124  * For each managed physical page, there is a list of all currently
125  * valid virtual mappings of that page.  Since there is usually one
126  * (or zero) mapping per page, the table begins with an initial entry,
127  * rather than a pointer; this head entry is empty iff its pv_pmap
128  * field is NULL.
129  *
130  * Note that these are per machine independent page (so there may be
131  * only one for every two hardware pages, e.g.).  Since the virtual
132  * address is aligned on a page boundary, the low order bits are free
133  * for storing flags.  Only the head of each list has flags.
134  *
135  * THIS SHOULD BE PART OF THE CORE MAP
136  */
137 struct pvlist {
138 	struct	pvlist *pv_next;	/* next pvlist, if any */
139 	struct	pmap *pv_pmap;		/* pmap of this va */
140 	int	pv_va;			/* virtual address */
141 	int	pv_flags;		/* flags (below) */
142 };
143 
144 /*
145  * Flags in pv_flags.  Note that PV_MOD must be 1 and PV_REF must be 2
146  * since they must line up with the bits in the hardware PTEs (see pte.h).
147  */
148 #define PV_MOD	1		/* page modified */
149 #define PV_REF	2		/* page referenced */
150 #define PV_NC	4		/* page cannot be cached */
151 /*efine	PV_ALLF	7		** all of the above */
152 
153 struct pvlist *pv_table;	/* array of entries, one per physical page */
154 
155 #define pvhead(pa)	(&pv_table[atop((pa) - vm_first_phys)])
156 
157 /*
158  * Each virtual segment within each pmap is either valid or invalid.
159  * It is valid if pm_npte[VA_VSEG(va)] is not 0.  This does not mean
160  * it is in the MMU, however; that is true iff pm_segmap[VA_VSEG(va)]
161  * does not point to the invalid PMEG.
162  *
163  * If a virtual segment is valid and loaded, the correct PTEs appear
164  * in the MMU only.  If it is valid and unloaded, the correct PTEs appear
165  * in the pm_pte[VA_VSEG(va)] only.  However, some effort is made to keep
166  * the software copies consistent enough with the MMU so that libkvm can
167  * do user address translations.  In particular, pv_changepte() and
168  * pmap_enu() maintain consistency, while less critical changes are
169  * not maintained.  pm_pte[VA_VSEG(va)] always points to space for those
170  * PTEs, unless this is the kernel pmap, in which case pm_pte[x] is not
171  * used (sigh).
172  *
173  * Each PMEG in the MMU is either free or contains PTEs corresponding to
174  * some pmap and virtual segment.  If it contains some PTEs, it also contains
175  * reference and modify bits that belong in the pv_table.  If we need
176  * to steal a PMEG from some process (if we need one and none are free)
177  * we must copy the ref and mod bits, and update pm_segmap in the other
178  * pmap to show that its virtual segment is no longer in the MMU.
179  *
180  * There are 128 PMEGs in a small Sun-4, of which only a few dozen are
181  * tied down permanently, leaving `about' 100 to be spread among
182  * running processes.  These are managed as an LRU cache.  Before
183  * calling the VM paging code for a user page fault, the fault handler
184  * calls mmu_load(pmap, va) to try to get a set of PTEs put into the
185  * MMU.  mmu_load will check the validity of the segment and tell whether
186  * it did something.
187  *
188  * Since I hate the name PMEG I call this data structure an `mmu entry'.
189  * Each mmuentry is on exactly one of three `usage' lists: free, LRU,
190  * or locked.  The LRU list is for user processes; the locked list is
191  * for kernel entries; both are doubly linked queues headed by `mmuhd's.
192  * The free list is a simple list, headed by a free list pointer.
193  */
194 struct mmuhd {
195 	struct	mmuentry *mh_next;
196 	struct	mmuentry *mh_prev;
197 };
198 struct mmuentry {
199 	struct	mmuentry *me_next;	/* queue (MUST BE FIRST) or next free */
200 	struct	mmuentry *me_prev;	/* queue (MUST BE FIRST) */
201 	struct	pmap *me_pmap;		/* pmap, if in use */
202 	struct	mmuentry *me_pmforw;	/* pmap pmeg chain */
203 	struct	mmuentry **me_pmback;	/* pmap pmeg chain */
204 	u_short	me_vseg;		/* virtual segment number in pmap */
205 	pmeg_t	me_pmeg;		/* hardware PMEG number */
206 };
207 struct mmuentry *mmuentry;	/* allocated in pmap_bootstrap */
208 
209 struct mmuentry *me_freelist;	/* free list (not a queue) */
210 struct mmuhd me_lru = {		/* LRU (user) entries */
211 	(struct mmuentry *)&me_lru, (struct mmuentry *)&me_lru
212 };
213 struct mmuhd me_locked = {	/* locked (kernel) entries */
214 	(struct mmuentry *)&me_locked, (struct mmuentry *)&me_locked
215 };
216 
217 int	seginval;		/* the invalid segment number */
218 
219 /*
220  * A context is simply a small number that dictates which set of 4096
221  * segment map entries the MMU uses.  The Sun 4c has eight such sets.
222  * These are alloted in an `almost MRU' fashion.
223  *
224  * Each context is either free or attached to a pmap.
225  *
226  * Since the virtual address cache is tagged by context, when we steal
227  * a context we have to flush (that part of) the cache.
228  */
229 union ctxinfo {
230 	union	ctxinfo *c_nextfree;	/* free list (if free) */
231 	struct	pmap *c_pmap;		/* pmap (if busy) */
232 };
233 union ctxinfo *ctxinfo;		/* allocated at in pmap_bootstrap */
234 int	ncontext;
235 
236 union	ctxinfo *ctx_freelist;	/* context free list */
237 int	ctx_kick;		/* allocation rover when none free */
238 int	ctx_kickdir;		/* ctx_kick roves both directions */
239 
240 /* XXX need per-cpu vpage[]s (and vmempage, unless we lock in /dev/mem) */
241 caddr_t	vpage[2];		/* two reserved MD virtual pages */
242 caddr_t	vmempage;		/* one reserved MI vpage for /dev/mem */
243 caddr_t vdumppages;		/* 32KB worth of reserved dump pages */
244 
245 struct kpmap kernel_pmap_store;	/* the kernel's pmap */
246 
247 /*
248  * We need to know real physical memory ranges (for /dev/mem).
249  */
250 #define	MA_SIZE	32		/* size of memory descriptor arrays */
251 struct	memarr pmemarr[MA_SIZE];/* physical memory regions */
252 int	npmemarr;		/* number of entries in pmemarr */
253 
254 /*
255  * The following four global variables are set in pmap_bootstrap
256  * for the vm code to find.  This is Wrong.
257  */
258 vm_offset_t	avail_start;	/* first free physical page number */
259 vm_offset_t	avail_end;	/* last free physical page number */
260 vm_offset_t	virtual_avail;	/* first free virtual page number */
261 vm_offset_t	virtual_end;	/* last free virtual page number */
262 
263 /*
264  * pseudo-functions for mnemonic value
265 #ifdef notyet
266  * NB: setsegmap should be stba for 4c, but stha works and makes the
267  * code right for the Sun-4 as well.
268 #endif
269  */
270 #define	getcontext()		lduba(AC_CONTEXT, ASI_CONTROL)
271 #define	setcontext(c)		stba(AC_CONTEXT, ASI_CONTROL, c)
272 #ifdef notyet
273 #define	getsegmap(va)		lduha(va, ASI_SEGMAP)
274 #define	setsegmap(va, pmeg)	stha(va, ASI_SEGMAP, pmeg)
275 #else
276 #define	getsegmap(va)		lduba(va, ASI_SEGMAP)
277 #define	setsegmap(va, pmeg)	stba(va, ASI_SEGMAP, pmeg)
278 #endif
279 
280 #define	getpte(va)		lda(va, ASI_PTE)
281 #define	setpte(va, pte)		sta(va, ASI_PTE, pte)
282 
283 /*----------------------------------------------------------------*/
284 
285 #ifdef	sun4c
286 /*
287  * Translations from dense (contiguous) pseudo physical addresses
288  * (fed to the VM code, to keep it happy) to sparse (real, hardware)
289  * physical addresses.  We call the former `software' page frame
290  * numbers and the latter `hardware' page frame numbers.  The
291  * translation is done on a `per bank' basis.
292  *
293  * The HWTOSW and SWTOHW macros handle the actual translation.
294  * They are defined as no-ops on Sun-4s.
295  *
296  * SHOULD DO atop AND ptoa DIRECTLY IN THESE MACROS SINCE ALL CALLERS
297  * ALWAYS NEED THAT ANYWAY ... CAN JUST PRECOOK THE TABLES	(TODO)
298  *
299  * Since we cannot use the memory allocated to the ROM monitor, and
300  * this happens to be just under 64K, I have chosen a bank size of
301  * 64K.  This is necessary since all banks must be completely full.
302  * I have also chosen a physical memory limit of 128 MB.  The 4c is
303  * architecturally limited to 256 MB, but 128 MB is more than will
304  * fit on present hardware.
305  *
306  * XXX	FIX THIS: just make all of each bank available and then
307  *	take out the pages reserved to the monitor!!
308  */
309 #define MAXMEM 	(128 * 1024 * 1024)	/* no more than 128 MB phys mem */
310 #define NPGBANK	16			/* 2^4 pages per bank (64K / bank) */
311 #define	BSHIFT	4			/* log2(NPGBANK) */
312 #define BOFFSET	(NPGBANK - 1)
313 #define BTSIZE 	(MAXMEM / NBPG / NPGBANK)
314 
315 int	pmap_dtos[BTSIZE];		/* dense to sparse */
316 int	pmap_stod[BTSIZE];		/* sparse to dense */
317 
318 #define	HWTOSW(pg) (pmap_stod[(pg) >> BSHIFT] | ((pg) & BOFFSET))
319 #define	SWTOHW(pg) (pmap_dtos[(pg) >> BSHIFT] | ((pg) & BOFFSET))
320 
321 /*
322  * Sort a memory array by address.
323  */
324 static void
325 sortm(mp, n)
326 	register struct memarr *mp;
327 	register int n;
328 {
329 	register struct memarr *mpj;
330 	register int i, j;
331 	register u_int addr, len;
332 
333 	/* Insertion sort.  This is O(n^2), but so what? */
334 	for (i = 1; i < n; i++) {
335 		/* save i'th entry */
336 		addr = mp[i].addr;
337 		len = mp[i].len;
338 		/* find j such that i'th entry goes before j'th */
339 		for (j = 0, mpj = mp; j < i; j++, mpj++)
340 			if (addr < mpj->addr)
341 				break;
342 		/* slide up any additional entries */
343 		ovbcopy(mpj, mpj + 1, (i - j) * sizeof(*mp));
344 		mpj->addr = addr;
345 		mpj->len = len;
346 	}
347 }
348 
349 #ifdef DEBUG
350 struct	memarr pmap_ama[MA_SIZE];
351 int	pmap_nama;
352 #define ama pmap_ama
353 #endif
354 
355 /*
356  * init_translations sets up pmap_dtos[] and pmap_stod[], and
357  * returns the number of usable physical pages.
358  */
359 int
360 init_translations()
361 {
362 	register struct memarr *mp;
363 	register int n, nmem;
364 	register u_int vbank = 0, pbank, v, a;
365 	register u_int pages = 0, lost = 0;
366 #ifndef DEBUG
367 	struct memarr ama[MA_SIZE];	/* available memory array */
368 #endif
369 
370 	nmem = makememarr(ama, MA_SIZE, MEMARR_AVAILPHYS);
371 
372 	/*
373 	 * Open Boot supposedly guarantees at least 3 MB free mem at 0;
374 	 * this is where the kernel has been loaded (we certainly hope the
375 	 * kernel is <= 3 MB).  We need the memory array to be sorted, and
376 	 * to start at 0, so that `software page 0' and `hardware page 0'
377 	 * are the same (otherwise the VM reserves the wrong pages for the
378 	 * kernel).
379 	 */
380 	sortm(ama, nmem);
381 	if (ama[0].addr != 0) {
382 		/* cannot panic here; there's no real kernel yet. */
383 		printf("init_translations: no kernel memory?!\n");
384 		callrom();
385 	}
386 #ifdef DEBUG
387 	pmap_nama = nmem;
388 #endif
389 	for (mp = ama; --nmem >= 0; mp++) {
390 		a = mp->addr >> PGSHIFT;
391 		v = mp->len >> PGSHIFT;
392 		if ((n = a & BOFFSET) != 0) {
393 			/* round up to next bank */
394 			n = NPGBANK - n;
395 			if (v < n) {	/* not a whole bank: skip it */
396 				lost += v;
397 				continue;
398 			}
399 			lost += n;	/* lose n pages from front */
400 			a += n;
401 			v -= n;
402 		}
403 		n = v >> BSHIFT;	/* calculate number of banks */
404 		pbank = a >> BSHIFT;	/* and the bank itself */
405 		if (pbank + n >= BTSIZE)
406 			n = BTSIZE - pbank;
407 		pages += n;		/* off by a factor of 2^BSHIFT */
408 		lost += v - (n << BSHIFT);
409 		while (--n >= 0) {
410 			pmap_dtos[vbank] = pbank << BSHIFT;
411 			pmap_stod[pbank] = vbank << BSHIFT;
412 			pbank++;
413 			vbank++;
414 		}
415 	}
416 	/* adjust page count */
417 	pages <<= BSHIFT;
418 #ifdef DEBUG
419 	printf("note: lost %d pages in translation\n", lost);
420 #endif
421 	return (pages);
422 }
423 
424 #else /* sun4c */
425 
426 /*
427  * Pages are physically contiguous, and hardware PFN == software PFN.
428  *
429  * XXX assumes PAGE_SIZE == NBPG (???)
430  */
431 #define	HWTOSW(pg)	(pg)
432 #define	SWTOHW(pg)	(pg)
433 
434 #endif /* sun4c */
435 
436 /* update pv_flags given a valid pte */
437 #define	MR(pte) (((pte) >> PG_M_SHIFT) & (PV_MOD | PV_REF))
438 
439 /*----------------------------------------------------------------*/
440 
441 /*
442  * Agree with the monitor ROM as to how many MMU entries are
443  * to be reserved, and map all of its segments into all contexts.
444  *
445  * Unfortunately, while the Version 0 PROM had a nice linked list of
446  * taken virtual memory, the Version 2 PROM provides instead a convoluted
447  * description of *free* virtual memory.  Rather than invert this, we
448  * resort to two magic constants from the PROM vector description file.
449  */
450 int
451 mmu_reservemon(nmmu)
452 	register int nmmu;
453 {
454 	register u_int va, eva;
455 	register int mmuseg, i;
456 
457 	va = OPENPROM_STARTVADDR;
458 	eva = OPENPROM_ENDVADDR;
459 	while (va < eva) {
460 		mmuseg = getsegmap(va);
461 		if (mmuseg < nmmu)
462 			nmmu = mmuseg;
463 		for (i = ncontext; --i > 0;)
464 			(*promvec->pv_setctxt)(i, (caddr_t)va, mmuseg);
465 		if (mmuseg == seginval) {
466 			va += NBPSG;
467 			continue;
468 		}
469 		/* PROM maps its memory user-accessible: fix it. */
470 		for (i = NPTESG; --i >= 0; va += NBPG)
471 			setpte(va, getpte(va) | PG_S);
472 	}
473 	return (nmmu);
474 }
475 
476 /*
477  * TODO: agree with the ROM on physical pages by taking them away
478  * from the page list, rather than having a dinky BTSIZE above.
479  */
480 
481 /*----------------------------------------------------------------*/
482 
483 /*
484  * MMU management.
485  */
486 
487 /*
488  * Change contexts.  We need the old context number as well as the new
489  * one.  If the context is changing, we must write all user windows
490  * first, lest an interrupt cause them to be written to the (other)
491  * user whose context we set here.
492  */
493 #define	CHANGE_CONTEXTS(old, new) \
494 	if ((old) != (new)) { \
495 		write_user_windows(); \
496 		setcontext(new); \
497 	}
498 
499 /*
500  * Allocate an MMU entry (i.e., a PMEG).
501  * If necessary, steal one from someone else.
502  * Put it on the tail of the given queue
503  * (which is either the LRU list or the locked list).
504  * The locked list is not actually ordered, but this is easiest.
505  * Also put it on the given (new) pmap's chain,
506  * enter its pmeg number into that pmap's segmap,
507  * and store the pmeg's new virtual segment number (me->me_vseg).
508  *
509  * This routine is large and complicated, but it must be fast
510  * since it implements the dynamic allocation of MMU entries.
511  */
512 struct mmuentry *
513 me_alloc(mh, newpm, newvseg)
514 	register struct mmuhd *mh;
515 	register struct pmap *newpm;
516 	register int newvseg;
517 {
518 	register struct mmuentry *me;
519 	register struct pmap *pm;
520 	register int i, va, pa, *pte, tpte;
521 	int ctx;
522 
523 	/* try free list first */
524 	if ((me = me_freelist) != NULL) {
525 		me_freelist = me->me_next;
526 #ifdef DEBUG
527 		if (me->me_pmap != NULL)
528 			panic("me_alloc: freelist entry has pmap");
529 		if (pmapdebug & PDB_MMU_ALLOC)
530 			printf("me_alloc: got pmeg %x\n", me->me_pmeg);
531 #endif
532 		insque(me, mh->mh_prev);	/* onto end of queue */
533 
534 		/* onto on pmap chain; pmap is already locked, if needed */
535 		me->me_pmforw = NULL;
536 		me->me_pmback = newpm->pm_mmuback;
537 		*newpm->pm_mmuback = me;
538 		newpm->pm_mmuback = &me->me_pmforw;
539 
540 		/* into pmap segment table, with backpointers */
541 		newpm->pm_segmap[newvseg] = me->me_pmeg;
542 		me->me_pmap = newpm;
543 		me->me_vseg = newvseg;
544 
545 		return (me);
546 	}
547 
548 	/* no luck, take head of LRU list */
549 	if ((me = me_lru.mh_next) == (struct mmuentry *)&me_lru)
550 		panic("me_alloc: all pmegs gone");
551 	pm = me->me_pmap;
552 #ifdef DEBUG
553 	if (pm == NULL)
554 		panic("me_alloc: LRU entry has no pmap");
555 	if (pm == kernel_pmap)
556 		panic("me_alloc: stealing from kernel");
557 	pte = pm->pm_pte[me->me_vseg];
558 	if (pte == NULL)
559 		panic("me_alloc: LRU entry's pmap has no ptes");
560 	if (pmapdebug & (PDB_MMU_ALLOC | PDB_MMU_STEAL))
561 		printf("me_alloc: stealing pmeg %x from pmap %x\n",
562 		    me->me_pmeg, pm);
563 #endif
564 	/*
565 	 * Remove from LRU list, and insert at end of new list
566 	 * (probably the LRU list again, but so what?).
567 	 */
568 	remque(me);
569 	insque(me, mh->mh_prev);
570 
571 	/*
572 	 * The PMEG must be mapped into some context so that we can
573 	 * read its PTEs.  Use its current context if it has one;
574 	 * if not, and since context 0 is reserved for the kernel,
575 	 * the simplest method is to switch to 0 and map the PMEG
576 	 * to virtual address 0---which, being a user space address,
577 	 * is by definition not in use.
578 	 *
579 	 * XXX for ncpus>1 must use per-cpu VA?
580 	 * XXX do not have to flush cache immediately
581 	 */
582 	ctx = getcontext();
583 	if (pm->pm_ctx) {
584 		CHANGE_CONTEXTS(ctx, pm->pm_ctxnum);
585 #ifdef notdef
586 		if (vactype != VAC_NONE)
587 #endif
588 			cache_flush_segment(me->me_vseg);
589 		va = VSTOVA(me->me_vseg);
590 	} else {
591 		CHANGE_CONTEXTS(ctx, 0);
592 		setsegmap(0, me->me_pmeg);
593 		/*
594 		 * No cache flush needed: it happened earlier when
595 		 * the old context was taken.
596 		 */
597 		va = 0;
598 	}
599 
600 	/*
601 	 * Record reference and modify bits for each page,
602 	 * and copy PTEs into kernel memory so that they can
603 	 * be reloaded later.
604 	 */
605 	i = NPTESG;
606 	do {
607 		tpte = getpte(va);
608 		if (tpte & PG_V) {
609 			pa = ptoa(HWTOSW(tpte & PG_PFNUM));
610 			if (managed(pa))
611 				pvhead(pa)->pv_flags |= MR(tpte);
612 		}
613 		*pte++ = tpte & ~(PG_U|PG_M);
614 		va += NBPG;
615 	} while (--i > 0);
616 
617 	/* update segment tables */
618 	simple_lock(&pm->pm_lock); /* what if other cpu takes mmuentry ?? */
619 	if (pm->pm_ctx)
620 		setsegmap(VSTOVA(me->me_vseg), seginval);
621 	pm->pm_segmap[me->me_vseg] = seginval;
622 
623 	/* off old pmap chain */
624 	if ((*me->me_pmback = me->me_pmforw) != NULL) {
625 		me->me_pmforw->me_pmback = me->me_pmback;
626 		me->me_pmforw = NULL;
627 	} else
628 		pm->pm_mmuback = me->me_pmback;
629 	simple_unlock(&pm->pm_lock);
630 	setcontext(ctx);	/* done with old context */
631 
632 	/* onto new pmap chain; new pmap is already locked, if needed */
633 	/* me->me_pmforw = NULL; */	/* done earlier */
634 	me->me_pmback = newpm->pm_mmuback;
635 	*newpm->pm_mmuback = me;
636 	newpm->pm_mmuback = &me->me_pmforw;
637 
638 	/* into new segment table, with backpointers */
639 	newpm->pm_segmap[newvseg] = me->me_pmeg;
640 	me->me_pmap = newpm;
641 	me->me_vseg = newvseg;
642 
643 	return (me);
644 }
645 
646 /*
647  * Free an MMU entry.
648  *
649  * Assumes the corresponding pmap is already locked.
650  * Does NOT flush cache, but does record ref and mod bits.
651  * The rest of each PTE is discarded.
652  * CALLER MUST SET CONTEXT to pm->pm_ctxnum (if pmap has
653  * a context) or to 0 (if not).  Caller must also update
654  * pm->pm_segmap and (possibly) the hardware.
655  */
656 void
657 me_free(pm, pmeg)
658 	register struct pmap *pm;
659 	register u_int pmeg;
660 {
661 	register struct mmuentry *me = &mmuentry[pmeg];
662 	register int i, va, pa, tpte;
663 
664 #ifdef DEBUG
665 	if (pmapdebug & PDB_MMU_ALLOC)
666 		printf("me_free: freeing pmeg %x from pmap %x\n",
667 		    me->me_pmeg, pm);
668 	if (me->me_pmeg != pmeg)
669 		panic("me_free: wrong mmuentry");
670 	if (pm != me->me_pmap)
671 		panic("me_free: pm != me_pmap");
672 #endif
673 
674 	/* just like me_alloc, but no cache flush, and context already set */
675 	if (pm->pm_ctx)
676 		va = VSTOVA(me->me_vseg);
677 	else {
678 		setsegmap(0, me->me_pmeg);
679 		va = 0;
680 	}
681 	i = NPTESG;
682 	do {
683 		tpte = getpte(va);
684 		if (tpte & PG_V) {
685 			pa = ptoa(HWTOSW(tpte & PG_PFNUM));
686 			if (managed(pa))
687 				pvhead(pa)->pv_flags |= MR(tpte);
688 		}
689 		va += NBPG;
690 	} while (--i > 0);
691 
692 	/* take mmu entry off pmap chain */
693 	*me->me_pmback = me->me_pmforw;
694 	if ((*me->me_pmback = me->me_pmforw) != NULL)
695 		me->me_pmforw->me_pmback = me->me_pmback;
696 	else
697 		pm->pm_mmuback = me->me_pmback;
698 	/* ... and remove from segment map */
699 	pm->pm_segmap[me->me_vseg] = seginval;
700 
701 	/* off LRU or lock chain */
702 	remque(me);
703 
704 	/* no associated pmap; on free list */
705 	me->me_pmap = NULL;
706 	me->me_next = me_freelist;
707 	me_freelist = me;
708 }
709 
710 /*
711  * `Page in' (load or inspect) an MMU entry; called on page faults.
712  * Returns 1 if we reloaded the segment, -1 if the segment was
713  * already loaded and the page was marked valid (in which case the
714  * fault must be a bus error or something), or 0 (segment loaded but
715  * PTE not valid, or segment not loaded at all).
716  */
717 int
718 mmu_pagein(pm, va, bits)
719 	register struct pmap *pm;
720 	register int va, bits;
721 {
722 	register int *pte;
723 	register struct mmuentry *me;
724 	register int vseg = VA_VSEG(va), pmeg, i, s;
725 
726 	/* return 0 if we have no PTEs to load */
727 	if ((pte = pm->pm_pte[vseg]) == NULL)
728 		return (0);
729 	/* return -1 if the fault is `hard', 0 if not */
730 	if (pm->pm_segmap[vseg] != seginval)
731 		return (bits && (getpte(va) & bits) == bits ? -1 : 0);
732 
733 	/* reload segment: write PTEs into a new LRU entry */
734 	va = VA_ROUNDDOWNTOSEG(va);
735 	s = splpmap();		/* paranoid */
736 	pmeg = me_alloc(&me_lru, pm, vseg)->me_pmeg;
737 	setsegmap(va, pmeg);
738 	i = NPTESG;
739 	do {
740 		setpte(va, *pte++);
741 		va += NBPG;
742 	} while (--i > 0);
743 	splx(s);
744 	return (1);
745 }
746 
747 /*
748  * Allocate a context.  If necessary, steal one from someone else.
749  * Changes hardware context number and loads segment map.
750  *
751  * This routine is only ever called from locore.s just after it has
752  * saved away the previous process, so there are no active user windows.
753  */
754 void
755 ctx_alloc(pm)
756 	register struct pmap *pm;
757 {
758 	register union ctxinfo *c;
759 	register int cnum, i, va;
760 	register pmeg_t *segp;
761 
762 #ifdef DEBUG
763 	if (pm->pm_ctx)
764 		panic("ctx_alloc pm_ctx");
765 	if (pmapdebug & PDB_CTX_ALLOC)
766 		printf("ctx_alloc(%x)\n", pm);
767 #endif
768 	if ((c = ctx_freelist) != NULL) {
769 		ctx_freelist = c->c_nextfree;
770 		cnum = c - ctxinfo;
771 		setcontext(cnum);
772 	} else {
773 		if ((ctx_kick += ctx_kickdir) >= ncontext) {
774 			ctx_kick = ncontext - 1;
775 			ctx_kickdir = -1;
776 		} else if (ctx_kick < 1) {
777 			ctx_kick = 1;
778 			ctx_kickdir = 1;
779 		}
780 		c = &ctxinfo[cnum = ctx_kick];
781 #ifdef DEBUG
782 		if (c->c_pmap == NULL)
783 			panic("ctx_alloc cu_pmap");
784 		if (pmapdebug & (PDB_CTX_ALLOC | PDB_CTX_STEAL))
785 			printf("ctx_alloc: steal context %x from %x\n",
786 			    cnum, c->c_pmap);
787 #endif
788 		c->c_pmap->pm_ctx = NULL;
789 		setcontext(cnum);
790 #ifdef notdef
791 		if (vactype != VAC_NONE)
792 #endif
793 			cache_flush_context();
794 	}
795 	c->c_pmap = pm;
796 	pm->pm_ctx = c;
797 	pm->pm_ctxnum = cnum;
798 
799 	/*
800 	 * XXX	loop below makes 3584 iterations ... could reduce
801 	 *	by remembering valid ranges per context: two ranges
802 	 *	should suffice (for text/data/bss and for stack).
803 	 */
804 	segp = pm->pm_rsegmap;
805 	for (va = 0, i = NUSEG; --i >= 0; va += NBPSG)
806 		setsegmap(va, *segp++);
807 }
808 
809 /*
810  * Give away a context.  Flushes cache and sets current context to 0.
811  */
812 void
813 ctx_free(pm)
814 	struct pmap *pm;
815 {
816 	register union ctxinfo *c;
817 	register int newc, oldc;
818 
819 	if ((c = pm->pm_ctx) == NULL)
820 		panic("ctx_free");
821 	pm->pm_ctx = NULL;
822 	oldc = getcontext();
823 	if (vactype != VAC_NONE) {
824 		newc = pm->pm_ctxnum;
825 		CHANGE_CONTEXTS(oldc, newc);
826 		cache_flush_context();
827 		setcontext(0);
828 	} else {
829 		CHANGE_CONTEXTS(oldc, 0);
830 	}
831 	c->c_nextfree = ctx_freelist;
832 	ctx_freelist = c;
833 }
834 
835 
836 /*----------------------------------------------------------------*/
837 
838 /*
839  * pvlist functions.
840  */
841 
842 /*
843  * Walk the given pv list, and for each PTE, set or clear some bits
844  * (e.g., PG_W or PG_NC).
845  *
846  * As a special case, this never clears PG_W on `pager' pages.
847  * These, being kernel addresses, are always in hardware and have
848  * a context.
849  *
850  * This routine flushes the cache for any page whose PTE changes,
851  * as long as the process has a context; this is overly conservative.
852  * It also copies ref and mod bits to the pvlist, on the theory that
853  * this might save work later.  (XXX should test this theory)
854  */
855 void
856 pv_changepte(pv0, bis, bic)
857 	register struct pvlist *pv0;
858 	register int bis, bic;
859 {
860 	register int *pte;
861 	register struct pvlist *pv;
862 	register struct pmap *pm;
863 	register int va, vseg, pmeg, i, flags;
864 	int ctx, s;
865 
866 	write_user_windows();		/* paranoid? */
867 
868 	s = splpmap();			/* paranoid? */
869 	if (pv0->pv_pmap == NULL) {
870 		splx(s);
871 		return;
872 	}
873 	ctx = getcontext();
874 	flags = pv0->pv_flags;
875 	for (pv = pv0; pv != NULL; pv = pv->pv_next) {
876 		pm = pv->pv_pmap;
877 if(pm==NULL)panic("pv_changepte 1");
878 		va = pv->pv_va;
879 		vseg = VA_VSEG(va);
880 		pte = pm->pm_pte[vseg];
881 		if ((pmeg = pm->pm_segmap[vseg]) != seginval) {
882 			register int tpte;
883 
884 			/* in hardware: fix hardware copy */
885 			if (pm->pm_ctx) {
886 				extern vm_offset_t pager_sva, pager_eva;
887 
888 				/*
889 				 * Bizarreness:  we never clear PG_W on
890 				 * pager pages, nor PG_NC on DVMA pages.
891 				 */
892 				if (bic == PG_W &&
893 				    va >= pager_sva && va < pager_eva)
894 					continue;
895 				if (bic == PG_NC &&
896 				    va >= DVMA_BASE && va < DVMA_END)
897 					continue;
898 				setcontext(pm->pm_ctxnum);
899 				/* XXX should flush only when necessary */
900 #ifdef notdef
901 				if (vactype != VAC_NONE)
902 #endif
903 					cache_flush_page(va);
904 			} else {
905 				/* XXX per-cpu va? */
906 				setcontext(0);
907 				setsegmap(0, pmeg);
908 				va = VA_VPG(va) * NBPG;
909 			}
910 			tpte = getpte(va);
911 			if (tpte & PG_V)
912 				flags |= (tpte >> PG_M_SHIFT) &
913 				    (PV_MOD|PV_REF);
914 			tpte = (tpte | bis) & ~bic;
915 			setpte(va, tpte);
916 			if (pte != NULL)	/* update software copy */
917 				pte[VA_VPG(va)] = tpte;
918 		} else {
919 			/* not in hardware: just fix software copy */
920 			if (pte == NULL)
921 				panic("pv_changepte 2");
922 			pte += VA_VPG(va);
923 			*pte = (*pte | bis) & ~bic;
924 		}
925 	}
926 	pv0->pv_flags = flags;
927 	setcontext(ctx);
928 	splx(s);
929 }
930 
931 /*
932  * Sync ref and mod bits in pvlist (turns off same in hardware PTEs).
933  * Returns the new flags.
934  *
935  * This is just like pv_changepte, but we never add or remove bits,
936  * hence never need to adjust software copies.
937  */
938 int
939 pv_syncflags(pv0)
940 	register struct pvlist *pv0;
941 {
942 	register struct pvlist *pv;
943 	register struct pmap *pm;
944 	register int tpte, va, vseg, pmeg, i, flags;
945 	int ctx, s;
946 
947 	write_user_windows();		/* paranoid? */
948 
949 	s = splpmap();			/* paranoid? */
950 	if (pv0->pv_pmap == NULL) {	/* paranoid */
951 		splx(s);
952 		return (0);
953 	}
954 	ctx = getcontext();
955 	flags = pv0->pv_flags;
956 	for (pv = pv0; pv != NULL; pv = pv->pv_next) {
957 		pm = pv->pv_pmap;
958 		va = pv->pv_va;
959 		vseg = VA_VSEG(va);
960 		if ((pmeg = pm->pm_segmap[vseg]) == seginval)
961 			continue;
962 		if (pm->pm_ctx) {
963 			setcontext(pm->pm_ctxnum);
964 			/* XXX should flush only when necessary */
965 #ifdef notdef
966 			if (vactype != VAC_NONE)
967 #endif
968 				cache_flush_page(va);
969 		} else {
970 			/* XXX per-cpu va? */
971 			setcontext(0);
972 			setsegmap(0, pmeg);
973 			va = VA_VPG(va) * NBPG;
974 		}
975 		tpte = getpte(va);
976 		if (tpte & (PG_M|PG_U) && tpte & PG_V) {
977 			flags |= (tpte >> PG_M_SHIFT) &
978 			    (PV_MOD|PV_REF);
979 			tpte &= ~(PG_M|PG_U);
980 			setpte(va, tpte);
981 		}
982 	}
983 	pv0->pv_flags = flags;
984 	setcontext(ctx);
985 	splx(s);
986 	return (flags);
987 }
988 
989 /*
990  * pv_unlink is a helper function for pmap_remove.
991  * It takes a pointer to the pv_table head for some physical address
992  * and removes the appropriate (pmap, va) entry.
993  *
994  * Once the entry is removed, if the pv_table head has the cache
995  * inhibit bit set, see if we can turn that off; if so, walk the
996  * pvlist and turn off PG_NC in each PTE.  (The pvlist is by
997  * definition nonempty, since it must have at least two elements
998  * in it to have PV_NC set, and we only remove one here.)
999  */
1000 static void
1001 pv_unlink(pv, pm, va)
1002 	register struct pvlist *pv;
1003 	register struct pmap *pm;
1004 	register vm_offset_t va;
1005 {
1006 	register struct pvlist *npv;
1007 
1008 	/*
1009 	 * First entry is special (sigh).
1010 	 */
1011 	npv = pv->pv_next;
1012 	if (pv->pv_pmap == pm && pv->pv_va == va) {
1013 		pmap_stats.ps_unlink_pvfirst++;
1014 		if (npv != NULL) {
1015 			pv->pv_next = npv->pv_next;
1016 			pv->pv_pmap = npv->pv_pmap;
1017 			pv->pv_va = npv->pv_va;
1018 			free((caddr_t)npv, M_VMPVENT);
1019 		} else
1020 			pv->pv_pmap = NULL;
1021 	} else {
1022 		register struct pvlist *prev;
1023 
1024 		for (prev = pv;; prev = npv, npv = npv->pv_next) {
1025 			pmap_stats.ps_unlink_pvsearch++;
1026 			if (npv == NULL)
1027 				panic("pv_unlink");
1028 			if (npv->pv_pmap == pm && npv->pv_va == va)
1029 				break;
1030 		}
1031 		prev->pv_next = npv->pv_next;
1032 		free((caddr_t)npv, M_VMPVENT);
1033 	}
1034 	if (pv->pv_flags & PV_NC) {
1035 		/*
1036 		 * Not cached: check to see if we can fix that now.
1037 		 */
1038 		va = pv->pv_va;
1039 		for (npv = pv->pv_next; npv != NULL; npv = npv->pv_next)
1040 			if (BADALIAS(va, npv->pv_va))
1041 				return;
1042 		pv->pv_flags &= ~PV_NC;
1043 		pv_changepte(pv, 0, PG_NC);
1044 	}
1045 }
1046 
1047 /*
1048  * pv_link is the inverse of pv_unlink, and is used in pmap_enter.
1049  * It returns PG_NC if the (new) pvlist says that the address cannot
1050  * be cached.
1051  */
1052 static int
1053 pv_link(pv, pm, va)
1054 	register struct pvlist *pv;
1055 	register struct pmap *pm;
1056 	register vm_offset_t va;
1057 {
1058 	register struct pvlist *npv;
1059 	register int ret;
1060 
1061 	if (pv->pv_pmap == NULL) {
1062 		/* no pvlist entries yet */
1063 		pmap_stats.ps_enter_firstpv++;
1064 		pv->pv_next = NULL;
1065 		pv->pv_pmap = pm;
1066 		pv->pv_va = va;
1067 		return (0);
1068 	}
1069 	/*
1070 	 * Before entering the new mapping, see if
1071 	 * it will cause old mappings to become aliased
1072 	 * and thus need to be `discached'.
1073 	 */
1074 	ret = 0;
1075 	pmap_stats.ps_enter_secondpv++;
1076 	if (pv->pv_flags & PV_NC) {
1077 		/* already uncached, just stay that way */
1078 		ret = PG_NC;
1079 	} else {
1080 		/* MAY NEED TO DISCACHE ANYWAY IF va IS IN DVMA SPACE? */
1081 		for (npv = pv; npv != NULL; npv = npv->pv_next) {
1082 			if (BADALIAS(va, npv->pv_va)) {
1083 				pv->pv_flags |= PV_NC;
1084 				pv_changepte(pv, ret = PG_NC, 0);
1085 				break;
1086 			}
1087 		}
1088 	}
1089 	npv = (struct pvlist *)malloc(sizeof *npv, M_VMPVENT, M_WAITOK);
1090 	npv->pv_next = pv->pv_next;
1091 	npv->pv_pmap = pm;
1092 	npv->pv_va = va;
1093 	pv->pv_next = npv;
1094 	return (ret);
1095 }
1096 
1097 /*
1098  * Walk the given list and flush the cache for each (MI) page that is
1099  * potentially in the cache.
1100  */
1101 pv_flushcache(pv)
1102 	register struct pvlist *pv;
1103 {
1104 	register struct pmap *pm;
1105 	register int i, s, ctx;
1106 
1107 	write_user_windows();	/* paranoia? */
1108 
1109 	s = splpmap();		/* XXX extreme paranoia */
1110 	if ((pm = pv->pv_pmap) != NULL) {
1111 		ctx = getcontext();
1112 		for (;;) {
1113 			if (pm->pm_ctx) {
1114 				setcontext(pm->pm_ctxnum);
1115 				cache_flush_page(pv->pv_va);
1116 			}
1117 			pv = pv->pv_next;
1118 			if (pv == NULL)
1119 				break;
1120 			pm = pv->pv_pmap;
1121 		}
1122 		setcontext(ctx);
1123 	}
1124 	splx(s);
1125 }
1126 
1127 /*----------------------------------------------------------------*/
1128 
1129 /*
1130  * At last, pmap code.
1131  */
1132 
1133 /*
1134  * Bootstrap the system enough to run with VM enabled.
1135  *
1136  * nmmu is the number of mmu entries (``PMEGs'');
1137  * nctx is the number of contexts.
1138  */
1139 void
1140 pmap_bootstrap(nmmu, nctx)
1141 	int nmmu, nctx;
1142 {
1143 	register union ctxinfo *ci;
1144 	register struct mmuentry *me;
1145 	register int i, j, n, z, vs;
1146 	register caddr_t p;
1147 	register void (*rom_setmap)(int ctx, caddr_t va, int pmeg);
1148 	int lastpage;
1149 	extern char end[];
1150 	extern caddr_t reserve_dumppages(caddr_t);
1151 
1152 	ncontext = nctx;
1153 
1154 	/*
1155 	 * Last segment is the `invalid' one (one PMEG of pte's with !pg_v).
1156 	 * It will never be used for anything else.
1157 	 */
1158 	seginval = --nmmu;
1159 
1160 	/*
1161 	 * Preserve the monitor ROM's reserved VM region, so that
1162 	 * we can use L1-A or the monitor's debugger.  As a side
1163 	 * effect we map the ROM's reserved VM into all contexts
1164 	 * (otherwise L1-A crashes the machine!).
1165 	 */
1166 	nmmu = mmu_reservemon(nmmu);
1167 
1168 	/*
1169 	 * Allocate and clear mmu entry and context structures.
1170 	 */
1171 	p = end;
1172 	mmuentry = me = (struct mmuentry *)p;
1173 	p += nmmu * sizeof *me;
1174 	ctxinfo = ci = (union ctxinfo *)p;
1175 	p += nctx * sizeof *ci;
1176 	bzero(end, p - end);
1177 
1178 	/*
1179 	 * Set up the `constants' for the call to vm_init()
1180 	 * in main().  All pages beginning at p (rounded up to
1181 	 * the next whole page) and continuing through the number
1182 	 * of available pages are free, but they start at a higher
1183 	 * virtual address.  This gives us two mappable MD pages
1184 	 * for pmap_zero_page and pmap_copy_page, and one MI page
1185 	 * for /dev/mem, all with no associated physical memory.
1186 	 */
1187 	p = (caddr_t)(((u_int)p + NBPG - 1) & ~PGOFSET);
1188 	avail_start = (int)p - KERNBASE;
1189 	avail_end = init_translations() << PGSHIFT;
1190 	i = (int)p;
1191 	vpage[0] = p, p += NBPG;
1192 	vpage[1] = p, p += NBPG;
1193 	vmempage = p, p += NBPG;
1194 	p = reserve_dumppages(p);
1195 	virtual_avail = (vm_offset_t)p;
1196 	virtual_end = VM_MAX_KERNEL_ADDRESS;
1197 
1198 	p = (caddr_t)i;			/* retract to first free phys */
1199 
1200 	/*
1201 	 * Intialize the kernel pmap.
1202 	 */
1203 	{
1204 		register struct kpmap *k = &kernel_pmap_store;
1205 
1206 /*		kernel_pmap = (struct pmap *)k; */
1207 		k->pm_ctx = ctxinfo;
1208 		/* k->pm_ctxnum = 0; */
1209 		simple_lock_init(&k->pm_lock);
1210 		k->pm_refcount = 1;
1211 		/* k->pm_mmuforw = 0; */
1212 		k->pm_mmuback = &k->pm_mmuforw;
1213 		k->pm_segmap = &k->pm_rsegmap[-NUSEG];
1214 		k->pm_pte = &k->pm_rpte[-NUSEG];
1215 		k->pm_npte = &k->pm_rnpte[-NUSEG];
1216 		for (i = NKSEG; --i >= 0;)
1217 			k->pm_rsegmap[i] = seginval;
1218 	}
1219 
1220 	/*
1221 	 * All contexts are free except the kernel's.
1222 	 *
1223 	 * XXX sun4c could use context 0 for users?
1224 	 */
1225 	ci->c_pmap = kernel_pmap;
1226 	ctx_freelist = ci + 1;
1227 	for (i = 1; i < ncontext; i++) {
1228 		ci++;
1229 		ci->c_nextfree = ci + 1;
1230 	}
1231 	ci->c_nextfree = NULL;
1232 	ctx_kick = 0;
1233 	ctx_kickdir = -1;
1234 
1235 	/* me_freelist = NULL; */	/* already NULL */
1236 
1237 	/*
1238 	 * Init mmu entries that map the kernel physical addresses.
1239 	 * If the page bits in p are 0, we filled the last segment
1240 	 * exactly (now how did that happen?); if not, it is
1241 	 * the last page filled in the last segment.
1242 	 *
1243 	 * All the other MMU entries are free.
1244 	 *
1245 	 * THIS ASSUMES SEGMENT i IS MAPPED BY MMU ENTRY i DURING THE
1246 	 * BOOT PROCESS
1247 	 */
1248 	z = ((((u_int)p + NBPSG - 1) & ~SGOFSET) - KERNBASE) >> SGSHIFT;
1249 	lastpage = VA_VPG(p);
1250 	if (lastpage == 0)
1251 		lastpage = NPTESG;
1252 	p = (caddr_t)KERNBASE;		/* first va */
1253 	vs = VA_VSEG(KERNBASE);		/* first virtual segment */
1254 	rom_setmap = promvec->pv_setctxt;
1255 	for (i = 0;;) {
1256 		/*
1257 		 * Distribute each kernel segment into all contexts.
1258 		 * This is done through the monitor ROM, rather than
1259 		 * directly here: if we do a setcontext we will fault,
1260 		 * as we are not (yet) mapped in any other context.
1261 		 */
1262 		for (j = 1; j < nctx; j++)
1263 			rom_setmap(j, p, i);
1264 
1265 		/* set up the mmu entry */
1266 		me->me_pmeg = i;
1267 		insque(me, me_locked.mh_prev);
1268 		/* me->me_pmforw = NULL; */
1269 		me->me_pmback = kernel_pmap->pm_mmuback;
1270 		*kernel_pmap->pm_mmuback = me;
1271 		kernel_pmap->pm_mmuback = &me->me_pmforw;
1272 		me->me_pmap = kernel_pmap;
1273 		me->me_vseg = vs;
1274 		kernel_pmap->pm_segmap[vs] = i;
1275 		n = ++i < z ? NPTESG : lastpage;
1276 		kernel_pmap->pm_npte[vs] = n;
1277 		me++;
1278 		vs++;
1279 		if (i < z) {
1280 			p += NBPSG;
1281 			continue;
1282 		}
1283 		/*
1284 		 * Unmap the pages, if any, that are not part of
1285 		 * the final segment.
1286 		 */
1287 		for (p += n * NBPG; j < NPTESG; j++, p += NBPG)
1288 			setpte(p, 0);
1289 		break;
1290 	}
1291 	for (; i < nmmu; i++, me++) {
1292 		me->me_pmeg = i;
1293 		me->me_next = me_freelist;
1294 		/* me->me_pmap = NULL; */
1295 		me_freelist = me;
1296 	}
1297 
1298 	/*
1299 	 * write protect & encache kernel text;
1300 	 * set red zone at kernel base; enable cache on message buffer.
1301 	 */
1302 	{
1303 		extern char etext[], trapbase[];
1304 #ifdef KGDB
1305 		register int mask = ~PG_NC;	/* XXX chgkprot is busted */
1306 #else
1307 		register int mask = ~(PG_W | PG_NC);
1308 #endif
1309 		for (p = trapbase; p < etext; p += NBPG)
1310 			setpte(p, getpte(p) & mask);
1311 		p = (caddr_t)KERNBASE;
1312 		setpte(p, 0);
1313 		p += NBPG;
1314 		setpte(p, getpte(p) & ~PG_NC);
1315 	}
1316 
1317 	/*
1318 	 * Grab physical memory list (for /dev/mem).
1319 	 */
1320 	npmemarr = makememarr(pmemarr, MA_SIZE, MEMARR_TOTALPHYS);
1321 }
1322 
1323 /*
1324  * Bootstrap memory allocator. This function allows for early dynamic
1325  * memory allocation until the virtual memory system has been bootstrapped.
1326  * After that point, either kmem_alloc or malloc should be used. This
1327  * function works by stealing pages from the (to be) managed page pool,
1328  * stealing virtual address space, then mapping the pages and zeroing them.
1329  *
1330  * It should be used from pmap_bootstrap till vm_page_startup, afterwards
1331  * it cannot be used, and will generate a panic if tried. Note that this
1332  * memory will never be freed, and in essence it is wired down.
1333  */
1334 void *
1335 pmap_bootstrap_alloc(size)
1336 	int size;
1337 {
1338 	register void *mem;
1339 	extern int vm_page_startup_initialized;
1340 
1341 	if (vm_page_startup_initialized)
1342 		panic("pmap_bootstrap_alloc: called after startup initialized");
1343 	size = round_page(size);
1344 	mem = (void *)virtual_avail;
1345 	virtual_avail = pmap_map(virtual_avail, avail_start,
1346 	    avail_start + size, VM_PROT_READ|VM_PROT_WRITE);
1347 	avail_start += size;
1348 	bzero((void *)mem, size);
1349 	return (mem);
1350 }
1351 
1352 /*
1353  * Initialize the pmap module.
1354  */
1355 void
1356 pmap_init(phys_start, phys_end)
1357 	register vm_offset_t phys_start, phys_end;
1358 {
1359 	register vm_size_t s;
1360 
1361 	if (PAGE_SIZE != NBPG)
1362 		panic("pmap_init: CLSIZE!=1");
1363 	/*
1364 	 * Allocate and clear memory for the pv_table.
1365 	 */
1366 	s = sizeof(struct pvlist) * atop(phys_end - phys_start);
1367 	s = round_page(s);
1368 	pv_table = (struct pvlist *)kmem_alloc(kernel_map, s);
1369 	bzero((caddr_t)pv_table, s);
1370 	vm_first_phys = phys_start;
1371 	vm_num_phys = phys_end - phys_start;
1372 }
1373 
1374 /*
1375  * Map physical addresses into kernel VM.
1376  */
1377 vm_offset_t
1378 pmap_map(va, pa, endpa, prot)
1379 	register vm_offset_t va, pa, endpa;
1380 	register int prot;
1381 {
1382 	register int pgsize = PAGE_SIZE;
1383 
1384 	while (pa < endpa) {
1385 		pmap_enter(kernel_pmap, va, pa, prot, 1);
1386 		va += pgsize;
1387 		pa += pgsize;
1388 	}
1389 	return (va);
1390 }
1391 
1392 /*
1393  * Create and return a physical map.
1394  *
1395  * If size is nonzero, the map is useless. (ick)
1396  */
1397 struct pmap *
1398 pmap_create(size)
1399 	vm_size_t size;
1400 {
1401 	register struct pmap *pm;
1402 
1403 	if (size)
1404 		return (NULL);
1405 	pm = (struct pmap *)malloc(sizeof *pm, M_VMPMAP, M_WAITOK);
1406 #ifdef DEBUG
1407 	if (pmapdebug & PDB_CREATE)
1408 		printf("pmap_create: created %x\n", pm);
1409 #endif
1410 	bzero((caddr_t)pm, sizeof *pm);
1411 	pmap_pinit(pm);
1412 	return (pm);
1413 }
1414 
1415 /*
1416  * Initialize a preallocated and zeroed pmap structure,
1417  * such as one in a vmspace structure.
1418  */
1419 void
1420 pmap_pinit(pm)
1421 	register struct pmap *pm;
1422 {
1423 	register int i;
1424 
1425 #ifdef DEBUG
1426 	if (pmapdebug & PDB_CREATE)
1427 		printf("pmap_pinit(%x)\n", pm);
1428 #endif
1429 	/* pm->pm_ctx = NULL; */
1430 	simple_lock_init(&pm->pm_lock);
1431 	pm->pm_refcount = 1;
1432 	/* pm->pm_mmuforw = NULL; */
1433 	pm->pm_mmuback = &pm->pm_mmuforw;
1434 	pm->pm_segmap = pm->pm_rsegmap;
1435 	pm->pm_pte = pm->pm_rpte;
1436 	pm->pm_npte = pm->pm_rnpte;
1437 	for (i = NUSEG; --i >= 0;)
1438 		pm->pm_rsegmap[i] = seginval;
1439 	/* bzero((caddr_t)pm->pm_rpte, sizeof pm->pm_rpte); */
1440 	/* bzero((caddr_t)pm->pm_rnpte, sizeof pm->pm_rnpte); */
1441 }
1442 
1443 /*
1444  * Retire the given pmap from service.
1445  * Should only be called if the map contains no valid mappings.
1446  */
1447 void
1448 pmap_destroy(pm)
1449 	register struct pmap *pm;
1450 {
1451 	int count;
1452 
1453 	if (pm == NULL)
1454 		return;
1455 #ifdef DEBUG
1456 	if (pmapdebug & PDB_DESTROY)
1457 		printf("pmap_destroy(%x)\n", pm);
1458 #endif
1459 	simple_lock(&pm->pm_lock);
1460 	count = --pm->pm_refcount;
1461 	simple_unlock(&pm->pm_lock);
1462 	if (count == 0) {
1463 		pmap_release(pm);
1464 		free((caddr_t)pm, M_VMPMAP);
1465 	}
1466 }
1467 
1468 /*
1469  * Release any resources held by the given physical map.
1470  * Called when a pmap initialized by pmap_pinit is being released.
1471  */
1472 void
1473 pmap_release(pm)
1474 	register struct pmap *pm;
1475 {
1476 	register union ctxinfo *c;
1477 	register int s = splpmap();	/* paranoia */
1478 
1479 #ifdef DEBUG
1480 	if (pmapdebug & PDB_DESTROY)
1481 		printf("pmap_release(%x)\n", pm);
1482 #endif
1483 	if (pm->pm_mmuforw)
1484 		panic("pmap_release mmuforw");
1485 	if ((c = pm->pm_ctx) != NULL) {
1486 		if (pm->pm_ctxnum == 0)
1487 			panic("pmap_release: releasing kernel");
1488 		ctx_free(pm);
1489 	}
1490 	splx(s);
1491 }
1492 
1493 /*
1494  * Add a reference to the given pmap.
1495  */
1496 void
1497 pmap_reference(pm)
1498 	struct pmap *pm;
1499 {
1500 
1501 	if (pm != NULL) {
1502 		simple_lock(&pm->pm_lock);
1503 		pm->pm_refcount++;
1504 		simple_unlock(&pm->pm_lock);
1505 	}
1506 }
1507 
1508 static int pmap_rmk(struct pmap *, vm_offset_t, vm_offset_t, int, int, int);
1509 static int pmap_rmu(struct pmap *, vm_offset_t, vm_offset_t, int, int, int);
1510 
1511 /*
1512  * Remove the given range of mapping entries.
1513  * The starting and ending addresses are already rounded to pages.
1514  * Sheer lunacy: pmap_remove is often asked to remove nonexistent
1515  * mappings.
1516  */
1517 void
1518 pmap_remove(pm, va, endva)
1519 	register struct pmap *pm;
1520 	register vm_offset_t va, endva;
1521 {
1522 	register vm_offset_t nva;
1523 	register int vseg, nleft, s, ctx;
1524 	register int (*rm)(struct pmap *, vm_offset_t, vm_offset_t,
1525 			    int, int, int);
1526 
1527 	if (pm == NULL)
1528 		return;
1529 #ifdef DEBUG
1530 	if (pmapdebug & PDB_REMOVE)
1531 		printf("pmap_remove(%x, %x, %x)\n", pm, va, endva);
1532 #endif
1533 
1534 	if (pm == kernel_pmap) {
1535 		/*
1536 		 * Removing from kernel address space.
1537 		 */
1538 		rm = pmap_rmk;
1539 	} else {
1540 		/*
1541 		 * Removing from user address space.
1542 		 */
1543 		write_user_windows();
1544 		rm = pmap_rmu;
1545 	}
1546 
1547 	ctx = getcontext();
1548 	s = splpmap();		/* XXX conservative */
1549 	simple_lock(&pm->pm_lock);
1550 	for (; va < endva; va = nva) {
1551 		/* do one virtual segment at a time */
1552 		vseg = VA_VSEG(va);
1553 		nva = VSTOVA(vseg + 1);
1554 		if (nva == 0 || nva > endva)
1555 			nva = endva;
1556 		if ((nleft = pm->pm_npte[vseg]) != 0)
1557 			pm->pm_npte[vseg] = (*rm)(pm, va, nva,
1558 			    vseg, nleft, pm->pm_segmap[vseg]);
1559 	}
1560 	simple_unlock(&pm->pm_lock);
1561 	splx(s);
1562 	setcontext(ctx);
1563 }
1564 
1565 #define perftest
1566 #ifdef perftest
1567 /* counters, one per possible length */
1568 int	rmk_vlen[NPTESG+1];	/* virtual length per rmk() call */
1569 int	rmk_npg[NPTESG+1];	/* n valid pages per rmk() call */
1570 int	rmk_vlendiff;		/* # times npg != vlen */
1571 #endif
1572 
1573 /*
1574  * The following magic number was chosen because:
1575  *	1. It is the same amount of work to cache_flush_page 4 pages
1576  *	   as to cache_flush_segment 1 segment (so at 4 the cost of
1577  *	   flush is the same).
1578  *	2. Flushing extra pages is bad (causes cache not to work).
1579  *	3. The current code, which malloc()s 5 pages for each process
1580  *	   for a user vmspace/pmap, almost never touches all 5 of those
1581  *	   pages.
1582  */
1583 #define	PMAP_RMK_MAGIC	5	/* if > magic, use cache_flush_segment */
1584 
1585 /*
1586  * Remove a range contained within a single segment.
1587  * These are egregiously complicated routines.
1588  */
1589 
1590 /* remove from kernel, return new nleft */
1591 static int
1592 pmap_rmk(pm, va, endva, vseg, nleft, pmeg)
1593 	register struct pmap *pm;
1594 	register vm_offset_t va, endva;
1595 	register int vseg, nleft, pmeg;
1596 {
1597 	register int i, tpte, perpage, npg;
1598 	register struct pvlist *pv;
1599 #ifdef perftest
1600 	register int nvalid;
1601 #endif
1602 
1603 #ifdef DEBUG
1604 	if (pmeg == seginval)
1605 		panic("pmap_rmk: not loaded");
1606 	if (pm->pm_ctx == NULL)
1607 		panic("pmap_rmk: lost context");
1608 #endif
1609 
1610 	setcontext(0);
1611 	/* decide how to flush cache */
1612 	npg = (endva - va) >> PGSHIFT;
1613 	if (npg > PMAP_RMK_MAGIC) {
1614 		/* flush the whole segment */
1615 		perpage = 0;
1616 #ifdef notdef
1617 		if (vactype != VAC_NONE)
1618 #endif
1619 			cache_flush_segment(vseg);
1620 	} else {
1621 		/* flush each page individually; some never need flushing */
1622 		perpage = 1;
1623 	}
1624 #ifdef perftest
1625 	nvalid = 0;
1626 #endif
1627 	while (va < endva) {
1628 		tpte = getpte(va);
1629 		if ((tpte & PG_V) == 0) {
1630 			va += PAGE_SIZE;
1631 			continue;
1632 		}
1633 		pv = NULL;
1634 		/* if cacheable, flush page as needed */
1635 		if ((tpte & PG_NC) == 0) {
1636 #ifdef perftest
1637 			nvalid++;
1638 #endif
1639 			if (perpage)
1640 				cache_flush_page(va);
1641 		}
1642 		if ((tpte & PG_TYPE) == PG_OBMEM) {
1643 			i = ptoa(HWTOSW(tpte & PG_PFNUM));
1644 			if (managed(i)) {
1645 				pv = pvhead(i);
1646 				pv->pv_flags |= MR(tpte);
1647 				pv_unlink(pv, pm, va);
1648 			}
1649 		}
1650 		nleft--;
1651 		setpte(va, 0);
1652 		va += NBPG;
1653 	}
1654 #ifdef perftest
1655 	rmk_vlen[npg]++;
1656 	rmk_npg[nvalid]++;
1657 	if (npg != nvalid)
1658 		rmk_vlendiff++;
1659 #endif
1660 
1661 	/*
1662 	 * If the segment is all gone, remove it from everyone and
1663 	 * free the MMU entry.
1664 	 */
1665 	if (nleft == 0) {
1666 		va = VSTOVA(vseg);		/* retract */
1667 		setsegmap(va, seginval);
1668 		for (i = ncontext; --i > 0;) {
1669 			setcontext(i);
1670 			setsegmap(va, seginval);
1671 		}
1672 		me_free(pm, pmeg);
1673 	}
1674 	return (nleft);
1675 }
1676 
1677 #ifdef perftest
1678 /* as before but for pmap_rmu */
1679 int	rmu_vlen[NPTESG+1];	/* virtual length per rmu() call */
1680 int	rmu_npg[NPTESG+1];	/* n valid pages per rmu() call */
1681 int	rmu_vlendiff;		/* # times npg != vlen */
1682 int	rmu_noflush;		/* # times rmu does not need to flush at all */
1683 #endif
1684 
1685 /*
1686  * Just like pmap_rmk_magic, but we have a different threshold.
1687  * Note that this may well deserve further tuning work.
1688  */
1689 #define	PMAP_RMU_MAGIC	4	/* if > magic, use cache_flush_segment */
1690 
1691 /* remove from user */
1692 static int
1693 pmap_rmu(pm, va, endva, vseg, nleft, pmeg)
1694 	register struct pmap *pm;
1695 	register vm_offset_t va, endva;
1696 	register int vseg, nleft, pmeg;
1697 {
1698 	register int *pte0, i, pteva, tpte, perpage, npg;
1699 	register struct pvlist *pv;
1700 #ifdef perftest
1701 	register int doflush, nvalid;
1702 #endif
1703 
1704 	pte0 = pm->pm_pte[vseg];
1705 	if (pmeg == seginval) {
1706 		register int *pte = pte0 + VA_VPG(va);
1707 
1708 		/*
1709 		 * PTEs are not in MMU.  Just invalidate software copies.
1710 		 */
1711 		for (; va < endva; pte++, va += PAGE_SIZE) {
1712 			tpte = *pte;
1713 			if ((tpte & PG_V) == 0) {
1714 				/* nothing to remove (braindead VM layer) */
1715 				continue;
1716 			}
1717 			if ((tpte & PG_TYPE) == PG_OBMEM) {
1718 				i = ptoa(HWTOSW(tpte & PG_PFNUM));
1719 				if (managed(i))
1720 					pv_unlink(pvhead(i), pm, va);
1721 			}
1722 			nleft--;
1723 			*pte = 0;
1724 		}
1725 		if (nleft == 0) {
1726 			free((caddr_t)pte0, M_VMPMAP);
1727 			pm->pm_pte[vseg] = NULL;
1728 		}
1729 		return (nleft);
1730 	}
1731 
1732 	/*
1733 	 * PTEs are in MMU.  Invalidate in hardware, update ref &
1734 	 * mod bits, and flush cache if required.
1735 	 */
1736 	if (pm->pm_ctx) {
1737 		/* process has a context, must flush cache */
1738 		npg = (endva - va) >> PGSHIFT;
1739 #ifdef perftest
1740 		doflush = 1;
1741 		nvalid = 0;
1742 #endif
1743 		setcontext(pm->pm_ctxnum);
1744 		if (npg > PMAP_RMU_MAGIC) {
1745 			perpage = 0; /* flush the whole segment */
1746 #ifdef notdef
1747 			if (vactype != VAC_NONE)
1748 #endif
1749 				cache_flush_segment(vseg);
1750 		} else
1751 			perpage = 1;
1752 		pteva = va;
1753 	} else {
1754 		/* no context, use context 0; cache flush unnecessary */
1755 		setcontext(0);
1756 		/* XXX use per-cpu pteva? */
1757 		setsegmap(0, pmeg);
1758 		pteva = VA_VPG(va) * NBPG;
1759 		perpage = 0;
1760 #ifdef perftest
1761 		npg = 0;
1762 		doflush = 0;
1763 		nvalid = 0;
1764 		rmu_noflush++;
1765 #endif
1766 	}
1767 	for (; va < endva; pteva += PAGE_SIZE, va += PAGE_SIZE) {
1768 		tpte = getpte(pteva);
1769 		if ((tpte & PG_V) == 0)
1770 			continue;
1771 		pv = NULL;
1772 		/* if cacheable, flush page as needed */
1773 		if (doflush && (tpte & PG_NC) == 0) {
1774 #ifdef perftest
1775 			nvalid++;
1776 #endif
1777 			if (perpage)
1778 				cache_flush_page(va);
1779 		}
1780 		if ((tpte & PG_TYPE) == PG_OBMEM) {
1781 			i = ptoa(HWTOSW(tpte & PG_PFNUM));
1782 			if (managed(i)) {
1783 				pv = pvhead(i);
1784 				pv->pv_flags |= MR(tpte);
1785 				pv_unlink(pv, pm, va);
1786 			}
1787 		}
1788 		nleft--;
1789 		setpte(pteva, 0);
1790 	}
1791 #ifdef perftest
1792 	if (doflush) {
1793 		rmu_vlen[npg]++;
1794 		rmu_npg[nvalid]++;
1795 		if (npg != nvalid)
1796 			rmu_vlendiff++;
1797 	}
1798 #endif
1799 
1800 	/*
1801 	 * If the segment is all gone, and the context is loaded, give
1802 	 * the segment back.
1803 	 */
1804 	if (nleft == 0 && pm->pm_ctx != NULL) {
1805 		va = VSTOVA(vseg);		/* retract */
1806 		setsegmap(va, seginval);
1807 		free((caddr_t)pte0, M_VMPMAP);
1808 		pm->pm_pte[vseg] = NULL;
1809 		me_free(pm, pmeg);
1810 	}
1811 	return (nleft);
1812 }
1813 
1814 /*
1815  * Lower (make more strict) the protection on the specified
1816  * physical page.
1817  *
1818  * There are only two cases: either the protection is going to 0
1819  * (in which case we do the dirty work here), or it is going from
1820  * to read-only (in which case pv_changepte does the trick).
1821  */
1822 void
1823 pmap_page_protect(pa, prot)
1824 	vm_offset_t pa;
1825 	vm_prot_t prot;
1826 {
1827 	register struct pvlist *pv, *pv0, *npv;
1828 	register struct pmap *pm;
1829 	register int *pte;
1830 	register int va, vseg, pteva, tpte;
1831 	register int flags, nleft, i, pmeg, s, ctx, doflush;
1832 
1833 #ifdef DEBUG
1834 	if ((pmapdebug & PDB_CHANGEPROT) ||
1835 	    (pmapdebug & PDB_REMOVE && prot == VM_PROT_NONE))
1836 		printf("pmap_page_protect(%x, %x)\n", pa, prot);
1837 #endif
1838 	/*
1839 	 * Skip unmanaged pages, or operations that do not take
1840 	 * away write permission.
1841 	 */
1842 	if (!managed(pa) || prot & VM_PROT_WRITE)
1843 		return;
1844 	write_user_windows();	/* paranoia */
1845 	if (prot & VM_PROT_READ) {
1846 		pv_changepte(pvhead(pa), 0, PG_W);
1847 		return;
1848 	}
1849 
1850 	/*
1851 	 * Remove all access to all people talking to this page.
1852 	 * Walk down PV list, removing all mappings.
1853 	 * The logic is much like that for pmap_remove,
1854 	 * but we know we are removing exactly one page.
1855 	 */
1856 	pv = pvhead(pa);
1857 	s = splpmap();
1858 	if ((pm = pv->pv_pmap) == NULL) {
1859 		splx(s);
1860 		return;
1861 	}
1862 	ctx = getcontext();
1863 	pv0 = pv;
1864 	flags = pv->pv_flags & ~PV_NC;
1865 	for (;; pm = pv->pv_pmap) {
1866 		va = pv->pv_va;
1867 		vseg = VA_VSEG(va);
1868 		if ((nleft = pm->pm_npte[vseg]) == 0)
1869 			panic("pmap_remove_all: empty vseg");
1870 		nleft--;
1871 		pm->pm_npte[vseg] = nleft;
1872 		pmeg = pm->pm_segmap[vseg];
1873 		pte = pm->pm_pte[vseg];
1874 		if (pmeg == seginval) {
1875 			if (nleft) {
1876 				pte += VA_VPG(va);
1877 				*pte = 0;
1878 			} else {
1879 				free((caddr_t)pte, M_VMPMAP);
1880 				pm->pm_pte[vseg] = NULL;
1881 			}
1882 			goto nextpv;
1883 		}
1884 		if (pm->pm_ctx) {
1885 			setcontext(pm->pm_ctxnum);
1886 			pteva = va;
1887 #ifdef notdef
1888 			doflush = vactype != VAC_NONE;
1889 #else
1890 			doflush = 1;
1891 #endif
1892 		} else {
1893 			setcontext(0);
1894 			/* XXX use per-cpu pteva? */
1895 			setsegmap(0, pmeg);
1896 			pteva = VA_VPG(va) * NBPG;
1897 			doflush = 0;
1898 		}
1899 		if (nleft) {
1900 			if (doflush)
1901 				cache_flush_page(va);
1902 			tpte = getpte(pteva);
1903 			if ((tpte & PG_V) == 0)
1904 				panic("pmap_page_protect !PG_V 1");
1905 			flags |= MR(tpte);
1906 			setpte(pteva, 0);
1907 		} else {
1908 			if (doflush)
1909 				cache_flush_page(va);
1910 			tpte = getpte(pteva);
1911 			if ((tpte & PG_V) == 0)
1912 				panic("pmap_page_protect !PG_V 2");
1913 			flags |= MR(tpte);
1914 			if (pm->pm_ctx) {
1915 				setsegmap(va, seginval);
1916 				if (pm == kernel_pmap) {
1917 					for (i = ncontext; --i > 0;) {
1918 						setcontext(i);
1919 						setsegmap(va, seginval);
1920 					}
1921 					goto skipptefree;
1922 				}
1923 			}
1924 			free((caddr_t)pte, M_VMPMAP);
1925 			pm->pm_pte[vseg] = NULL;
1926 		skipptefree:
1927 			me_free(pm, pmeg);
1928 		}
1929 	nextpv:
1930 		npv = pv->pv_next;
1931 		if (pv != pv0)
1932 			free((caddr_t)pv, M_VMPVENT);
1933 		if ((pv = npv) == NULL)
1934 			break;
1935 	}
1936 	pv0->pv_pmap = NULL;
1937 	pv0->pv_flags = flags;
1938 	setcontext(ctx);
1939 	splx(s);
1940 }
1941 
1942 /*
1943  * Lower (make more strict) the protection on the specified
1944  * range of this pmap.
1945  *
1946  * There are only two cases: either the protection is going to 0
1947  * (in which case we call pmap_remove to do the dirty work), or
1948  * it is going from read/write to read-only.  The latter is
1949  * fairly easy.
1950  */
1951 void
1952 pmap_protect(pm, sva, eva, prot)
1953 	register struct pmap *pm;
1954 	vm_offset_t sva, eva;
1955 	vm_prot_t prot;
1956 {
1957 	register int va, nva, vseg, pteva, pmeg;
1958 	register int s, ctx;
1959 
1960 	if (pm == NULL || prot & VM_PROT_WRITE)
1961 		return;
1962 	if ((prot & VM_PROT_READ) == 0) {
1963 		pmap_remove(pm, sva, eva);
1964 		return;
1965 	}
1966 
1967 	write_user_windows();
1968 	ctx = getcontext();
1969 	s = splpmap();
1970 	simple_lock(&pm->pm_lock);
1971 
1972 	for (va = sva; va < eva;) {
1973 		vseg = VA_VSEG(va);
1974 		nva = VSTOVA(vseg + 1);
1975 if (nva == 0) panic("pmap_protect: last segment");	/* cannot happen */
1976 		if (nva > eva)
1977 			nva = eva;
1978 		if (pm->pm_npte[vseg] == 0) {
1979 			va = nva;
1980 			continue;
1981 		}
1982 		pmeg = pm->pm_segmap[vseg];
1983 		if (pmeg == seginval) {
1984 			register int *pte = &pm->pm_pte[vseg][VA_VPG(va)];
1985 
1986 			/* not in MMU; just clear PG_W from core copies */
1987 			for (; va < nva; va += NBPG)
1988 				*pte++ &= ~PG_W;
1989 		} else {
1990 			/* in MMU: take away write bits from MMU PTEs */
1991 			if (
1992 #ifdef notdef
1993 			    vactype != VAC_NONE &&
1994 #endif
1995 			    pm->pm_ctx) {
1996 				register int tpte;
1997 
1998 				/*
1999 				 * Flush cache so that any existing cache
2000 				 * tags are updated.  This is really only
2001 				 * needed for PTEs that lose PG_W.
2002 				 */
2003 				setcontext(pm->pm_ctxnum);
2004 				for (; va < nva; va += NBPG) {
2005 					tpte = getpte(va);
2006 					pmap_stats.ps_npg_prot_all++;
2007 					if (tpte & PG_W) {
2008 						pmap_stats.ps_npg_prot_actual++;
2009 						cache_flush_page(va);
2010 						setpte(va, tpte & ~PG_W);
2011 					}
2012 				}
2013 			} else {
2014 				register int pteva;
2015 
2016 				/*
2017 				 * No context, hence not cached;
2018 				 * just update PTEs.
2019 				 */
2020 				setcontext(0);
2021 				/* XXX use per-cpu pteva? */
2022 				setsegmap(0, pmeg);
2023 				pteva = VA_VPG(va) * NBPG;
2024 				for (; va < nva; pteva += NBPG, va += NBPG)
2025 					setpte(pteva, getpte(pteva) & ~PG_W);
2026 			}
2027 		}
2028 	}
2029 	simple_unlock(&pm->pm_lock);
2030 	splx(s);
2031 }
2032 
2033 /*
2034  * Change the protection and/or wired status of the given (MI) virtual page.
2035  * XXX: should have separate function (or flag) telling whether only wiring
2036  * is changing.
2037  */
2038 void
2039 pmap_changeprot(pm, va, prot, wired)
2040 	register struct pmap *pm;
2041 	register vm_offset_t va;
2042 	vm_prot_t prot;
2043 	int wired;
2044 {
2045 	register int vseg, tpte, newprot, pmeg, ctx, i, s;
2046 
2047 #ifdef DEBUG
2048 	if (pmapdebug & PDB_CHANGEPROT)
2049 		printf("pmap_changeprot(%x, %x, %x, %x)\n",
2050 		    pm, va, prot, wired);
2051 #endif
2052 
2053 	write_user_windows();	/* paranoia */
2054 
2055 	if (pm == kernel_pmap)
2056 		newprot = prot & VM_PROT_WRITE ? PG_S|PG_W : PG_S;
2057 	else
2058 		newprot = prot & VM_PROT_WRITE ? PG_W : 0;
2059 	vseg = VA_VSEG(va);
2060 	s = splpmap();		/* conservative */
2061 	pmap_stats.ps_changeprots++;
2062 
2063 	/* update PTEs in software or hardware */
2064 	if ((pmeg = pm->pm_segmap[vseg]) == seginval) {
2065 		register int *pte = &pm->pm_pte[vseg][VA_VPG(va)];
2066 
2067 		/* update in software */
2068 		if ((*pte & PG_PROT) == newprot)
2069 			goto useless;
2070 		*pte = (*pte & ~PG_PROT) | newprot;
2071 	} else {
2072 		/* update in hardware */
2073 		ctx = getcontext();
2074 		if (pm->pm_ctx) {
2075 			/* use current context; flush writeback cache */
2076 			setcontext(pm->pm_ctxnum);
2077 			tpte = getpte(va);
2078 			if ((tpte & PG_PROT) == newprot)
2079 				goto useless;
2080 			if (vactype == VAC_WRITEBACK &&
2081 			    (newprot & PG_W) == 0 &&
2082 			    (tpte & (PG_W | PG_NC)) == PG_W)
2083 				cache_flush_page((int)va);
2084 		} else {
2085 			setcontext(0);
2086 			/* XXX use per-cpu va? */
2087 			setsegmap(0, pmeg);
2088 			va = VA_VPG(va) * NBPG;
2089 			tpte = getpte(va);
2090 			if ((tpte & PG_PROT) == newprot)
2091 				goto useless;
2092 		}
2093 		tpte = (tpte & ~PG_PROT) | newprot;
2094 		setpte(va, tpte);
2095 		setcontext(ctx);
2096 	}
2097 	splx(s);
2098 	return;
2099 
2100 useless:
2101 	/* only wiring changed, and we ignore wiring */
2102 	pmap_stats.ps_useless_changeprots++;
2103 	splx(s);
2104 }
2105 
2106 /*
2107  * Insert (MI) physical page pa at virtual address va in the given pmap.
2108  * NB: the pa parameter includes type bits PMAP_OBIO, PMAP_NC as necessary.
2109  *
2110  * If pa is not in the `managed' range it will not be `bank mapped'.
2111  * This works during bootstrap only because the first 4MB happens to
2112  * map one-to-one.
2113  *
2114  * There may already be something else there, or we might just be
2115  * changing protections and/or wiring on an existing mapping.
2116  *	XXX	should have different entry points for changing!
2117  */
2118 void
2119 pmap_enter(pm, va, pa, prot, wired)
2120 	register struct pmap *pm;
2121 	vm_offset_t va, pa;
2122 	vm_prot_t prot;
2123 	int wired;
2124 {
2125 	register struct pvlist *pv;
2126 	register int pteproto, ctx;
2127 
2128 	if (pm == NULL)
2129 		return;
2130 #ifdef DEBUG
2131 	if (pmapdebug & PDB_ENTER)
2132 		printf("pmap_enter(%x, %x, %x, %x, %x)\n",
2133 		    pm, va, pa, prot, wired);
2134 #endif
2135 
2136 	pteproto = PG_V | ((pa & PMAP_TNC) << PG_TNC_SHIFT);
2137 	pa &= ~PMAP_TNC;
2138 	/*
2139 	 * Set up prototype for new PTE.  Cannot set PG_NC from PV_NC yet
2140 	 * since the pvlist no-cache bit might change as a result of the
2141 	 * new mapping.
2142 	 */
2143 	if (managed(pa)) {
2144 		pteproto |= SWTOHW(atop(pa));
2145 		pv = pvhead(pa);
2146 	} else {
2147 		pteproto |= atop(pa) & PG_PFNUM;
2148 		pv = NULL;
2149 	}
2150 	if (prot & VM_PROT_WRITE)
2151 		pteproto |= PG_W;
2152 
2153 	ctx = getcontext();
2154 	if (pm == kernel_pmap)
2155 		pmap_enk(pm, va, prot, wired, pv, pteproto | PG_S);
2156 	else
2157 		pmap_enu(pm, va, prot, wired, pv, pteproto);
2158 	setcontext(ctx);
2159 }
2160 
2161 /* enter new (or change existing) kernel mapping */
2162 pmap_enk(pm, va, prot, wired, pv, pteproto)
2163 	register struct pmap *pm;
2164 	vm_offset_t va;
2165 	vm_prot_t prot;
2166 	int wired;
2167 	register struct pvlist *pv;
2168 	register int pteproto;
2169 {
2170 	register int vseg, tpte, pmeg, i, s;
2171 
2172 	vseg = VA_VSEG(va);
2173 	s = splpmap();		/* XXX way too conservative */
2174 	if (pm->pm_segmap[vseg] != seginval &&
2175 	    (tpte = getpte(va)) & PG_V) {
2176 		register int addr = tpte & PG_PFNUM;
2177 
2178 		/* old mapping exists */
2179 		if (addr == (pteproto & PG_PFNUM)) {
2180 			/* just changing protection and/or wiring */
2181 			splx(s);
2182 			pmap_changeprot(pm, va, prot, wired);
2183 			return;
2184 		}
2185 
2186 /*printf("pmap_enk: changing existing va=>pa entry\n");*/
2187 		/*
2188 		 * Switcheroo: changing pa for this va.
2189 		 * If old pa was managed, remove from pvlist.
2190 		 * If old page was cached, flush cache.
2191 		 */
2192 		addr = ptoa(HWTOSW(addr));
2193 		if (managed(addr))
2194 			pv_unlink(pvhead(addr), pm, va);
2195 		if (
2196 #ifdef notdef
2197 		    vactype != VAC_NONE &&
2198 #endif
2199 		    (tpte & PG_NC) == 0) {
2200 			setcontext(0);	/* ??? */
2201 			cache_flush_page((int)va);
2202 		}
2203 	} else {
2204 		/* adding new entry */
2205 		pm->pm_npte[vseg]++;
2206 	}
2207 
2208 	/*
2209 	 * If the new mapping is for a managed PA, enter into pvlist.
2210 	 * Note that the mapping for a malloc page will always be
2211 	 * unique (hence will never cause a second call to malloc).
2212 	 */
2213 	if (pv != NULL)
2214 		pteproto |= pv_link(pv, pm, va);
2215 
2216 	pmeg = pm->pm_segmap[vseg];
2217 	if (pmeg == seginval) {
2218 		register int tva;
2219 
2220 		/*
2221 		 * Allocate an MMU entry now (on locked list),
2222 		 * and map it into every context.  Set all its
2223 		 * PTEs invalid (we will then overwrite one, but
2224 		 * this is more efficient than looping twice).
2225 		 */
2226 #ifdef DEBUG
2227 		if (pm->pm_ctx == NULL || pm->pm_ctxnum != 0)
2228 			panic("pmap_enk: kern seg but no kern ctx");
2229 #endif
2230 		pmeg = me_alloc(&me_locked, pm, vseg)->me_pmeg;
2231 		pm->pm_segmap[vseg] = pmeg;
2232 		i = ncontext - 1;
2233 		do {
2234 			setcontext(i);
2235 			setsegmap(va, pmeg);
2236 		} while (--i >= 0);
2237 
2238 		/* set all PTEs to invalid, then overwrite one PTE below */
2239 		tva = VA_ROUNDDOWNTOSEG(va);
2240 		i = NPTESG;
2241 		do {
2242 			setpte(tva, 0);
2243 			tva += NBPG;
2244 		} while (--i > 0);
2245 	}
2246 
2247 	/* ptes kept in hardware only */
2248 	setpte(va, pteproto);
2249 	splx(s);
2250 }
2251 
2252 /* enter new (or change existing) user mapping */
2253 pmap_enu(pm, va, prot, wired, pv, pteproto)
2254 	register struct pmap *pm;
2255 	vm_offset_t va;
2256 	vm_prot_t prot;
2257 	int wired;
2258 	register struct pvlist *pv;
2259 	register int pteproto;
2260 {
2261 	register int vseg, *pte, tpte, pmeg, i, s, doflush;
2262 
2263 	write_user_windows();		/* XXX conservative */
2264 	vseg = VA_VSEG(va);
2265 	s = splpmap();			/* XXX conservative */
2266 
2267 	/*
2268 	 * If there is no space in which the PTEs can be written
2269 	 * while they are not in the hardware, this must be a new
2270 	 * virtual segment.  Get PTE space and count the segment.
2271 	 *
2272 	 * TO SPEED UP CTX ALLOC, PUT SEGMENT BOUNDS STUFF HERE
2273 	 * AND IN pmap_rmu()
2274 	 */
2275 retry:
2276 	pte = pm->pm_pte[vseg];
2277 	if (pte == NULL) {
2278 		/* definitely a new mapping */
2279 		register int size = NPTESG * sizeof *pte;
2280 
2281 		pte = (int *)malloc((u_long)size, M_VMPMAP, M_WAITOK);
2282 		if (pm->pm_pte[vseg] != NULL) {
2283 printf("pmap_enter: pte filled during sleep\n");	/* can this happen? */
2284 			free((caddr_t)pte, M_VMPMAP);
2285 			goto retry;
2286 		}
2287 #ifdef DEBUG
2288 		if (pm->pm_segmap[vseg] != seginval)
2289 			panic("pmap_enter: new ptes, but not seginval");
2290 #endif
2291 		bzero((caddr_t)pte, size);
2292 		pm->pm_pte[vseg] = pte;
2293 		pm->pm_npte[vseg] = 1;
2294 	} else {
2295 		/* might be a change: fetch old pte */
2296 		doflush = 0;
2297 		if ((pmeg = pm->pm_segmap[vseg]) == seginval)
2298 			tpte = pte[VA_VPG(va)];	/* software pte */
2299 		else {
2300 			if (pm->pm_ctx) {	/* hardware pte */
2301 				setcontext(pm->pm_ctxnum);
2302 				tpte = getpte(va);
2303 				doflush = 1;
2304 			} else {
2305 				setcontext(0);
2306 				/* XXX use per-cpu pteva? */
2307 				setsegmap(0, pmeg);
2308 				tpte = getpte(VA_VPG(va) * NBPG);
2309 			}
2310 		}
2311 		if (tpte & PG_V) {
2312 			register int addr = tpte & PG_PFNUM;
2313 
2314 			/* old mapping exists */
2315 			if (addr == (pteproto & PG_PFNUM)) {
2316 				/* just changing prot and/or wiring */
2317 				splx(s);
2318 				/* caller should call this directly: */
2319 				pmap_changeprot(pm, va, prot, wired);
2320 				return;
2321 			}
2322 			/*
2323 			 * Switcheroo: changing pa for this va.
2324 			 * If old pa was managed, remove from pvlist.
2325 			 * If old page was cached, flush cache.
2326 			 */
2327 /*printf("%s[%d]: pmap_enu: changing existing va(%x)=>pa entry\n",
2328 curproc->p_comm, curproc->p_pid, va);*/
2329 			addr = ptoa(HWTOSW(addr));
2330 			if (managed(addr))
2331 				pv_unlink(pvhead(addr), pm, va);
2332 			if (
2333 #ifdef notdef
2334 			    vactype != VAC_NONE &&
2335 #endif
2336 			    doflush && (tpte & PG_NC) == 0)
2337 				cache_flush_page((int)va);
2338 		} else {
2339 			/* adding new entry */
2340 			pm->pm_npte[vseg]++;
2341 		}
2342 	}
2343 
2344 	if (pv != NULL)
2345 		pteproto |= pv_link(pv, pm, va);
2346 
2347 	/*
2348 	 * Update hardware or software PTEs (whichever are active).
2349 	 */
2350 	if ((pmeg = pm->pm_segmap[vseg]) != seginval) {
2351 		/* ptes are in hardare */
2352 		if (pm->pm_ctx)
2353 			setcontext(pm->pm_ctxnum);
2354 		else {
2355 			setcontext(0);
2356 			/* XXX use per-cpu pteva? */
2357 			setsegmap(0, pmeg);
2358 			va = VA_VPG(va) * NBPG;
2359 		}
2360 		setpte(va, pteproto);
2361 	}
2362 	/* update software copy */
2363 	pte += VA_VPG(va);
2364 	*pte = pteproto;
2365 
2366 	splx(s);
2367 }
2368 
2369 /*
2370  * Change the wiring attribute for a map/virtual-address pair.
2371  */
2372 /* ARGSUSED */
2373 void
2374 pmap_change_wiring(pm, va, wired)
2375 	struct pmap *pm;
2376 	vm_offset_t va;
2377 	int wired;
2378 {
2379 
2380 	pmap_stats.ps_useless_changewire++;
2381 }
2382 
2383 /*
2384  * Extract the physical page address associated
2385  * with the given map/virtual_address pair.
2386  * GRR, the vm code knows; we should not have to do this!
2387  */
2388 vm_offset_t
2389 pmap_extract(pm, va)
2390 	register struct pmap *pm;
2391 	vm_offset_t va;
2392 {
2393 	register int tpte;
2394 	register int vseg;
2395 
2396 	if (pm == NULL) {
2397 		printf("pmap_extract: null pmap\n");
2398 		return (0);
2399 	}
2400 	vseg = VA_VSEG(va);
2401 	if (pm->pm_segmap[vseg] != seginval) {
2402 		register int ctx = getcontext();
2403 
2404 		if (pm->pm_ctx) {
2405 			setcontext(pm->pm_ctxnum);
2406 			tpte = getpte(va);
2407 		} else {
2408 			setcontext(0);
2409 			tpte = getpte(VA_VPG(va) * NBPG);
2410 		}
2411 		setcontext(ctx);
2412 	} else {
2413 		register int *pte = pm->pm_pte[vseg];
2414 
2415 		if (pte == NULL) {
2416 			printf("pmap_extract: invalid vseg\n");
2417 			return (0);
2418 		}
2419 		tpte = pte[VA_VPG(va)];
2420 	}
2421 	if ((tpte & PG_V) == 0) {
2422 		printf("pmap_extract: invalid pte\n");
2423 		return (0);
2424 	}
2425 	tpte &= PG_PFNUM;
2426 	tpte = HWTOSW(tpte);
2427 	return ((tpte << PGSHIFT) | (va & PGOFSET));
2428 }
2429 
2430 /*
2431  * Copy the range specified by src_addr/len
2432  * from the source map to the range dst_addr/len
2433  * in the destination map.
2434  *
2435  * This routine is only advisory and need not do anything.
2436  */
2437 /* ARGSUSED */
2438 void
2439 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
2440 	struct pmap *dst_pmap, *src_pmap;
2441 	vm_offset_t dst_addr;
2442 	vm_size_t len;
2443 	vm_offset_t src_addr;
2444 {
2445 }
2446 
2447 /*
2448  * Require that all active physical maps contain no
2449  * incorrect entries NOW.  [This update includes
2450  * forcing updates of any address map caching.]
2451  */
2452 void
2453 pmap_update()
2454 {
2455 }
2456 
2457 /*
2458  * Garbage collects the physical map system for
2459  * pages which are no longer used.
2460  * Success need not be guaranteed -- that is, there
2461  * may well be pages which are not referenced, but
2462  * others may be collected.
2463  * Called by the pageout daemon when pages are scarce.
2464  */
2465 /* ARGSUSED */
2466 void
2467 pmap_collect(pm)
2468 	struct pmap *pm;
2469 {
2470 }
2471 
2472 /*
2473  * Clear the modify bit for the given physical page.
2474  */
2475 void
2476 pmap_clear_modify(pa)
2477 	register vm_offset_t pa;
2478 {
2479 	register struct pvlist *pv;
2480 
2481 	if (managed(pa)) {
2482 		pv = pvhead(pa);
2483 		(void) pv_syncflags(pv);
2484 		pv->pv_flags &= ~PV_MOD;
2485 	}
2486 }
2487 
2488 /*
2489  * Tell whether the given physical page has been modified.
2490  */
2491 int
2492 pmap_is_modified(pa)
2493 	register vm_offset_t pa;
2494 {
2495 	register struct pvlist *pv;
2496 
2497 	if (managed(pa)) {
2498 		pv = pvhead(pa);
2499 		if (pv->pv_flags & PV_MOD || pv_syncflags(pv) & PV_MOD)
2500 			return (1);
2501 	}
2502 	return (0);
2503 }
2504 
2505 /*
2506  * Clear the reference bit for the given physical page.
2507  */
2508 void
2509 pmap_clear_reference(pa)
2510 	vm_offset_t pa;
2511 {
2512 	register struct pvlist *pv;
2513 
2514 	if (managed(pa)) {
2515 		pv = pvhead(pa);
2516 		(void) pv_syncflags(pv);
2517 		pv->pv_flags &= ~PV_REF;
2518 	}
2519 }
2520 
2521 /*
2522  * Tell whether the given physical page has been referenced.
2523  */
2524 int
2525 pmap_is_referenced(pa)
2526 	vm_offset_t pa;
2527 {
2528 	register struct pvlist *pv;
2529 
2530 	if (managed(pa)) {
2531 		pv = pvhead(pa);
2532 		if (pv->pv_flags & PV_REF || pv_syncflags(pv) & PV_REF)
2533 			return (1);
2534 	}
2535 	return (0);
2536 }
2537 
2538 /*
2539  * Make the specified pages (by pmap, offset) pageable (or not) as requested.
2540  *
2541  * A page which is not pageable may not take a fault; therefore, its page
2542  * table entry must remain valid for the duration (or at least, the trap
2543  * handler must not call vm_fault).
2544  *
2545  * This routine is merely advisory; pmap_enter will specify that these pages
2546  * are to be wired down (or not) as appropriate.
2547  */
2548 /* ARGSUSED */
2549 void
2550 pmap_pageable(pm, start, end, pageable)
2551 	struct pmap *pm;
2552 	vm_offset_t start, end;
2553 	int pageable;
2554 {
2555 }
2556 
2557 /*
2558  * Fill the given MI physical page with zero bytes.
2559  *
2560  * We avoid stomping on the cache.
2561  * XXX	might be faster to use destination's context and allow cache to fill?
2562  */
2563 void
2564 pmap_zero_page(pa)
2565 	register vm_offset_t pa;
2566 {
2567 	register caddr_t va;
2568 	register int pte;
2569 
2570 	if (managed(pa)) {
2571 		/*
2572 		 * The following might not be necessary since the page
2573 		 * is being cleared because it is about to be allocated,
2574 		 * i.e., is in use by no one.
2575 		 */
2576 #if 1
2577 #ifdef notdef
2578 		if (vactype != VAC_NONE)
2579 #endif
2580 			pv_flushcache(pvhead(pa));
2581 #endif
2582 		pte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(pa));
2583 	} else
2584 		pte = PG_V | PG_S | PG_W | PG_NC | (atop(pa) & PG_PFNUM);
2585 
2586 	va = vpage[0];
2587 	setpte(va, pte);
2588 	qzero(va, NBPG);
2589 	setpte(va, 0);
2590 }
2591 
2592 /*
2593  * Copy the given MI physical source page to its destination.
2594  *
2595  * We avoid stomping on the cache as above (with same `XXX' note).
2596  * We must first flush any write-back cache for the source page.
2597  * We go ahead and stomp on the kernel's virtual cache for the
2598  * source page, since the cache can read memory MUCH faster than
2599  * the processor.
2600  */
2601 void
2602 pmap_copy_page(src, dst)
2603 	vm_offset_t src, dst;
2604 {
2605 	register caddr_t sva, dva;
2606 	register int spte, dpte;
2607 
2608 	if (managed(src)) {
2609 		if (vactype == VAC_WRITEBACK)
2610 			pv_flushcache(pvhead(src));
2611 		spte = PG_V | PG_S | SWTOHW(atop(src));
2612 	} else
2613 		spte = PG_V | PG_S | (atop(src) & PG_PFNUM);
2614 
2615 	if (managed(dst)) {
2616 		/* similar `might not be necessary' comment applies */
2617 #if 1
2618 #ifdef notdef
2619 		if (vactype != VAC_NONE)
2620 #endif
2621 			pv_flushcache(pvhead(dst));
2622 #endif
2623 		dpte = PG_V | PG_S | PG_W | PG_NC | SWTOHW(atop(dst));
2624 	} else
2625 		dpte = PG_V | PG_S | PG_W | PG_NC | (atop(dst) & PG_PFNUM);
2626 
2627 	sva = vpage[0];
2628 	dva = vpage[1];
2629 	setpte(sva, spte);
2630 	setpte(dva, dpte);
2631 	qcopy(sva, dva, NBPG);	/* loads cache, so we must ... */
2632 	cache_flush_page((int)sva);
2633 	setpte(sva, 0);
2634 	setpte(dva, 0);
2635 }
2636 
2637 /*
2638  * Turn a cdevsw d_mmap value into a byte address for pmap_enter.
2639  * XXX	this should almost certainly be done differently, and
2640  *	elsewhere, or even not at all
2641  */
2642 vm_offset_t
2643 pmap_phys_address(x)
2644 	int x;
2645 {
2646 
2647 	return (x);
2648 }
2649 
2650 /*
2651  * Turn off cache for a given (va, number of pages).
2652  *
2653  * We just assert PG_NC for each PTE; the addresses must reside
2654  * in locked kernel space.  A cache flush is also done.
2655  */
2656 kvm_uncache(va, npages)
2657 	register caddr_t va;
2658 	register int npages;
2659 {
2660 	register int pte;
2661 
2662 	for (; --npages >= 0; va += NBPG) {
2663 		pte = getpte(va);
2664 		if ((pte & PG_V) == 0)
2665 			panic("kvm_uncache !pg_v");
2666 		pte |= PG_NC;
2667 		setpte(va, pte);
2668 		cache_flush_page((int)va);
2669 	}
2670 }
2671 
2672 /*
2673  * For /dev/mem.
2674  */
2675 int
2676 pmap_enter_hw(pm, va, pa, prot, wired)
2677 	register struct pmap *pm;
2678 	vm_offset_t va, pa;
2679 	vm_prot_t prot;
2680 	int wired;
2681 {
2682 	register struct memarr *ma;
2683 	register int n;
2684 	register u_int t;
2685 
2686 	if (pa >= MAXMEM)				/* ??? */
2687 		return (EFAULT);
2688 	for (ma = pmemarr, n = npmemarr; --n >= 0; ma++) {
2689 		t = (u_int)pa - ma->addr;
2690 		if (t < ma->len)
2691 			goto ok;
2692 	}
2693 	return (EFAULT);
2694 ok:
2695 	pa = (HWTOSW(atop(pa)) << PGSHIFT) | (pa & PGOFSET);
2696 	if (pa >= vm_first_phys + vm_num_phys)		/* ??? */
2697 		return (EFAULT);
2698 
2699 	pmap_enter(pm, va, pa, prot, wired);
2700 	return (0);
2701 }
2702