xref: /openbsd/sys/arch/sparc64/sparc64/pmap.c (revision 610f49f8)
1 /*	$OpenBSD: pmap.c,v 1.10 2002/01/25 15:43:59 art Exp $	*/
2 /*	$NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $	*/
3 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
4 #define	HWREF
5 /*
6  *
7  * Copyright (C) 1996-1999 Eduardo Horvath.
8  * All rights reserved.
9  *
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  *
29  */
30 
31 #include <sys/param.h>
32 #include <sys/malloc.h>
33 #include <sys/queue.h>
34 #include <sys/systm.h>
35 #include <sys/msgbuf.h>
36 #include <sys/lock.h>
37 #include <sys/pool.h>
38 #include <sys/exec.h>
39 #include <sys/core.h>
40 #include <sys/kcore.h>
41 
42 #include <uvm/uvm.h>
43 
44 #include <machine/pcb.h>
45 #include <machine/sparc64.h>
46 #include <machine/ctlreg.h>
47 #include <machine/openfirm.h>
48 #include <machine/kcore.h>
49 
50 #include "cache.h"
51 
52 #ifdef DDB
53 #include <machine/db_machdep.h>
54 #include <ddb/db_command.h>
55 #include <ddb/db_sym.h>
56 #include <ddb/db_variables.h>
57 #include <ddb/db_extern.h>
58 #include <ddb/db_access.h>
59 #include <ddb/db_output.h>
60 #else
61 #define Debugger()
62 #define db_printf	printf
63 #endif
64 
65 #define	MEG		(1<<20) /* 1MB */
66 #define	KB		(1<<10)	/* 1KB */
67 
68 paddr_t cpu0paddr;/* XXXXXXXXXXXXXXXX */
69 
70 extern int64_t asmptechk __P((int64_t *pseg[], int addr)); /* DEBUG XXXXX */
71 
72 #define IS_VM_PHYSADDR(PA) (vm_physseg_find(atop(PA), NULL) != -1)
73 
74 #if 0
75 static int pseg_check __P((struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare));
76 static int
77 pseg_check(struct pmap *pm, vaddr_t addr, int64_t tte, paddr_t spare)
78 {
79 	int i, k, s;
80 	paddr_t *pdir, *ptbl;
81 	extern int pseg_set __P((struct pmap*, vaddr_t addr, int64_t tte,
82 		paddr_t spare));
83 
84 	if (!spare) return pseg_set(pm, addr, tte, spare);
85 
86 	s = splvm();
87 	if ((paddr_t)pm->pm_segs == spare) panic("pseg_check: pm_segs == %llx\n", spare);
88 	for (i=0; i<STSZ; i++) {
89 		if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
90 			if ((paddr_t)pdir == spare)
91 				panic("pseg_check: pdir %d == %llx\n", i,
92 					spare);
93 			for (k=0; k<PDSZ; k++) {
94 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
95 					if ((paddr_t)ptbl == spare)
96 				panic("pseg_check: ptbl %d:%d == %llx\n", i, k,
97 					spare);
98 				}
99 			}
100 		}
101 	}
102 	splx(s);
103 	if (addr == -1) return 0;
104 	return pseg_set(pm, addr, tte, spare);
105 }
106 #define pseg_check(a, b, c, d)
107 #define cache_flush_phys(a, b, c)
108 /* #define pseg_set(a, b, c, d)	pseg_check(a, b, c, d) */
109 #endif
110 
111 /* These routines are in assembly to allow access thru physical mappings */
112 #if 1
113 extern int64_t pseg_get __P((struct pmap*, vaddr_t addr));
114 extern int pseg_set __P((struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare));
115 extern paddr_t pseg_find __P((struct pmap*, vaddr_t addr, paddr_t spare));
116 #else
117 static int64_t pseg_get __P((struct pmap*, vaddr_t addr));
118 static int pseg_set __P((struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare));
119 static paddr_t pseg_find __P((struct pmap*, vaddr_t addr, paddr_t spare));
120 
121 static int64_t pseg_get(struct pmap* pm, vaddr_t addr) {
122 	paddr_t *pdir, *ptbl;
123 
124 	if ((pdir = (paddr_t *)ldda(&pm->pm_segs[va_to_seg(addr)],
125 				    ASI_PHYS_CACHED)) &&
126 	    (ptbl = (paddr_t *)ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED)))
127 		return  (ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED));
128 	return (0);
129 }
130 
131 static int pseg_set(struct pmap* pm, vaddr_t addr, int64_t tte, paddr_t spare) {
132 	int i, j, k, s;
133 	paddr_t *pdir, *ptbl;
134 
135 	if (!(pdir = (paddr_t *)ldda(&pm->pm_segs[va_to_seg(addr)],
136 	    ASI_PHYS_CACHED))) {
137 		if (!spare) return (1);
138 		stda(&pm->pm_segs[va_to_seg(addr)], ASI_PHYS_CACHED, spare);
139 		pdir = spare;
140 		spare = NULL;
141 	}
142 	if (!(ptbl = (paddr_t *)ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED))) {
143 		if (!spare) return (1);
144 		stda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED, spare);
145 		ptbl = spare;
146 		spare = NULL;
147 	}
148 	stda(&ptbl[va_to_pte(addr)], ASI_PHYS_CACHED, tte);
149 	return (0);
150 }
151 
152 static paddr_t pseg_find(struct pmap* pm, vaddr_t addr, paddr_t spare) {
153 	int i, j, k, s;
154 	paddr_t *pdir, *ptbl;
155 
156 	if (!(pdir = (paddr_t *)ldda(&pm->pm_segs[va_to_seg(addr)],
157 	    ASI_PHYS_CACHED))) {
158 		if (!spare) return (1);
159 		stda(&pm->pm_segs[va_to_seg(addr)], ASI_PHYS_CACHED, spare);
160 		pdir = spare;
161 		spare = NULL;
162 	}
163 	if (!(ptbl = (paddr_t *)ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED))) {
164 		if (!spare) return (1);
165 		stda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED, spare);
166 		ptbl = spare;
167 		spare = NULL;
168 	}
169 	return (paddr_t)(&ptbl[va_to_pte(addr)]);
170 }
171 
172 
173 #endif
174 
175 extern struct vm_page *vm_page_alloc1 __P((void));
176 extern void vm_page_free1 __P((struct vm_page *));
177 
178 
179 #ifdef DEBUG
180 #ifdef __STDC__
181 #define	ASSERT(x)	\
182 	if (!(x)) panic("%s at line %d: assertion failed\n", #x, __LINE__);
183 #else
184 #define	ASSERT(x)	\
185 	if (!(x)) panic("%s at line %d: assertion failed\n", "x", __LINE__);
186 #endif
187 #else
188 #define ASSERT(x)
189 #endif
190 
191 /*
192  * For each struct vm_page, there is a list of all currently valid virtual
193  * mappings of that page.  An entry is a pv_entry_t, the list is pv_table.
194  * XXX really should do this as a part of the higher level code.
195  */
196 typedef struct pv_entry {
197 	struct pv_entry	*pv_next;	/* next pv_entry */
198 	struct pmap	*pv_pmap;	/* pmap where mapping lies */
199 	vaddr_t	pv_va;		/* virtual address for mapping */
200 } *pv_entry_t;
201 /* PV flags encoded in the low bits of the VA of the first pv_entry */
202 
203 /*
204  * Diatribe on ref/mod counting:
205  *
206  * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
207  * in the pv_entry structure for each page.  (We could bypass this for the
208  * vm_page, but that's a long story....)
209  *
210  * This architecture has nice, fast traps with lots of space for software bits
211  * in the TTE.  To accelerate ref/mod counts we make use of these features.
212  *
213  * When we map a page initially, we place a TTE in the page table.  It's
214  * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
215  * writeable we set the TLB_REAL_W bit for the trap handler.
216  *
217  * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
218  * bit in the approprate TTE in the page table.  Whenever we take a protection
219  * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
220  * bits to enable writing and mark the page as modified.
221  *
222  * This means that we may have ref/mod information all over the place.  The
223  * pmap routines must traverse the page tables of all pmaps with a given page
224  * and collect/clear all the ref/mod information and copy it into the pv_entry.
225  */
226 
227 #ifdef	NO_VCACHE
228 #define	FORCE_ALIAS	1
229 #else
230 #define FORCE_ALIAS	0
231 #endif
232 
233 #define	PV_ALIAS	0x1LL
234 #define PV_REF		0x2LL
235 #define PV_MOD		0x4LL
236 #define PV_NVC		0x8LL
237 #define PV_NC		0x10LL
238 #define PV_WE		0x20LL		/* Debug -- track if this page was ever writable */
239 #define PV_MASK		(0x03fLL)
240 #define PV_VAMASK	(~(NBPG-1))
241 #define PV_MATCH(pv,va)	(!((((pv)->pv_va)^(va))&PV_VAMASK))
242 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va)&PV_VAMASK)|(((pv)->pv_va)&PV_MASK)))
243 
244 pv_entry_t	pv_table;	/* array of entries, one per page */
245 static struct pool pv_pool;
246 static struct pool pmap_pool;
247 extern void	pmap_remove_pv __P((struct pmap *pm, vaddr_t va, paddr_t pa));
248 extern void	pmap_enter_pv __P((struct pmap *pm, vaddr_t va, paddr_t pa));
249 extern void	pmap_page_cache __P((struct pmap *pm, paddr_t pa, int mode));
250 
251 void	pmap_pinit __P((struct pmap *));
252 void	pmap_release __P((struct pmap *));
253 
254 /*
255  * First and last managed physical addresses.  XXX only used for dumping the system.
256  */
257 paddr_t	vm_first_phys, vm_num_phys;
258 
259 u_int64_t first_phys_addr;
260 #define pa_index(pa)		atop((pa) - first_phys_addr)
261 #define	pa_to_pvh(pa)							\
262 ({									\
263 	int bank_, pg_;							\
264 									\
265 	bank_ = vm_physseg_find(atop((pa)), &pg_);			\
266 	(pv_entry_t)&vm_physmem[bank_].pmseg.pvent[pg_];		\
267 })
268 
269 
270 
271 /*
272  * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
273  */
274 pte_t *tsb;
275 int tsbsize;		/* tsbents = 512 * 2^^tsbsize */
276 #define TSBENTS (512<<tsbsize)
277 #define	TSBSIZE	(TSBENTS * 16)
278 
279 struct pmap kernel_pmap_;
280 
281 int physmem;
282 /*
283  * Virtual and physical addresses of the start and end of kernel text
284  * and data segments.
285  */
286 vaddr_t ktext;
287 paddr_t ktextp;
288 vaddr_t ektext;
289 paddr_t ektextp;
290 vaddr_t kdata;
291 paddr_t kdatap;
292 vaddr_t ekdata;
293 paddr_t ekdatap;
294 
295 static int npgs;
296 static u_int nextavail;
297 static struct mem_region memlist[8]; /* Pick a random size here */
298 
299 vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
300 
301 struct mem_region *mem, *avail, *orig;
302 int memsize;
303 
304 static int memh = 0, vmemh = 0;	/* Handles to OBP devices */
305 
306 int avail_start, avail_end;	/* These are used by ps & family */
307 
308 static int ptelookup_va __P((vaddr_t va)); /* sun4u */
309 #if notyet
310 static void tsb_enter __P((int ctx, int64_t va, int64_t data));
311 #endif
312 
313 struct pmap_stats {
314 	int	ps_unlink_pvfirst;	/* # of pv_unlinks on head */
315 	int	ps_unlink_pvsearch;	/* # of pv_unlink searches */
316 	int	ps_changeprots;		/* # of calls to changeprot */
317 	int	ps_useless_changeprots;	/* # of changeprots for wiring */
318 	int	ps_enter_firstpv;	/* pv heads entered */
319 	int	ps_enter_secondpv;	/* pv nonheads entered */
320 	int	ps_useless_changewire;	/* useless wiring changes */
321 	int	ps_npg_prot_all;	/* # of active pages protected */
322 	int	ps_npg_prot_actual;	/* # pages actually affected */
323 } pmap_stats;
324 
325 struct prom_map *prom_map;
326 int prom_map_size;
327 
328 #ifdef DEBUG
329 struct {
330 	int kernel;	/* entering kernel mapping */
331 	int user;	/* entering user mapping */
332 	int ptpneeded;	/* needed to allocate a PT page */
333 	int pwchange;	/* no mapping change, just wiring or protection */
334 	int wchange;	/* no mapping change, just wiring */
335 	int mchange;	/* was mapped but mapping to different page */
336 	int managed;	/* a managed page */
337 	int firstpv;	/* first mapping for this PA */
338 	int secondpv;	/* second mapping for this PA */
339 	int ci;		/* cache inhibited */
340 	int unmanaged;	/* not a managed page */
341 	int flushes;	/* cache flushes */
342 	int cachehit;	/* new entry forced valid entry out */
343 } enter_stats;
344 struct {
345 	int calls;
346 	int removes;
347 	int flushes;
348 	int tflushes;	/* TLB flushes */
349 	int pidflushes;	/* HW pid stolen */
350 	int pvfirst;
351 	int pvsearch;
352 } remove_stats;
353 #define	PDB_CREATE	0x0001
354 #define	PDB_DESTROY	0x0002
355 #define	PDB_REMOVE	0x0004
356 #define	PDB_CHANGEPROT	0x0008
357 #define	PDB_ENTER	0x0010
358 #define PDB_DEMAP	0x0020
359 #define	PDB_REF		0x0040
360 #define PDB_COPY	0x0080
361 
362 #define	PDB_MMU_ALLOC	0x0100
363 #define	PDB_MMU_STEAL	0x0200
364 #define	PDB_CTX_ALLOC	0x0400
365 #define	PDB_CTX_STEAL	0x0800
366 #define	PDB_MMUREG_ALLOC	0x1000
367 #define	PDB_MMUREG_STEAL	0x2000
368 #define	PDB_CACHESTUFF	0x4000
369 #define	PDB_ALIAS	0x8000
370 #define PDB_EXTRACT	0x10000
371 #define	PDB_BOOT	0x20000
372 #define	PDB_BOOT1	0x40000
373 #define	PDB_GROW	0x80000
374 int	pmapdebug = 0;
375 /* Number of H/W pages stolen for page tables */
376 int	pmap_pages_stolen = 0;
377 
378 #define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
379 #define	DPRINTF(n, f)	if (pmapdebug & (n)) printf f
380 #else
381 #define	BDPRINTF(n, f)
382 #define	DPRINTF(n, f)
383 #endif
384 
385 #ifdef NOTDEF_DEBUG
386 void pv_check __P((void));
387 void
388 pv_check()
389 {
390 	int i, j, s;
391 
392 	s = splhigh();
393 	for (i = 0; i < physmem; i++) {
394 		struct pv_entry *pv;
395 		for (pv = &pv_table[i]; pv; pv = pv->pv_next) {
396 			if (pv->pv_pmap &&
397 			    !(pseg_get(pv->pv_pmap, pv->pv_va)&TLB_V)) {
398 		printf("pv_check(): unreferenced pv=%p pa=%p va=%p pm=%p\n",
399 		       i, ptoa(first_phys_addr+i), pv->pv_va, pv->pv_pmap);
400 				Debugger();
401 			}
402 		}
403 	}
404 	splx(s);
405 }
406 #else
407 #define pv_check()
408 #endif
409 
410 /*
411  *
412  * A context is simply a small number that differentiates multiple mappings
413  * of the same address.  Contexts on the spitfire are 13 bits, but could
414  * be as large as 17 bits.
415  *
416  * Each context is either free or attached to a pmap.
417  *
418  * The context table is an array of pointers to psegs.  Just dereference
419  * the right pointer and you get to the pmap segment tables.  These are
420  * physical addresses, of course.
421  *
422  */
423 paddr_t *ctxbusy;
424 int numctx;
425 #define CTXENTRY	(sizeof(paddr_t))
426 #define CTXSIZE		(numctx*CTXENTRY)
427 
428 #define	pmap_get_page(p)	uvm_page_physget((p));
429 
430 
431 /*
432  * Support for big page sizes.  This maps the page size to the
433  * page bits.  That is: these are the bits between 8K pages and
434  * larger page sizes that cause aliasing.
435  */
436 struct page_size_map page_size_map[] = {
437 #ifdef DEBUG
438 	{ 0, PGSZ_8K&0  },	/* Disable large pages */
439 #endif
440 	{ (4*1024*1024-1) & ~(8*1024-1), PGSZ_4M },
441 	{ (512*1024-1) & ~(8*1024-1), PGSZ_512K  },
442 	{ (64*1024-1) & ~(8*1024-1), PGSZ_64K  },
443 	{ (8*1024-1) & ~(8*1024-1), PGSZ_8K  },
444 	{ 0, PGSZ_8K&0  }
445 };
446 
447 /*
448  * Calculate the largest page size that will map this.
449  *
450  * You really need to do this both on VA and PA.
451  */
452 #define	PMAP_PAGE_SIZE(va, pa, len, pgsz, pglen)			\
453 do {									\
454 	for ((pgsz) = PGSZ_4M; (pgsz); (pgsz)--) {			\
455 		(pglen) = PG_SZ(pgsz);					\
456 									\
457 		if (((len) >= (pgsz)) &&				\
458 			((pa) & ((pglen)-1) & ~PG_SZ(PGSZ_8K)) == 0 &&	\
459 			((va) & ((pglen)-1) & ~PG_SZ(PGSZ_8K)) == 0)	\
460 			break;						\
461 	}								\
462 	(pgsz) = 0;							\
463 	(pglen) = PG_SZ(pgsz);						\
464 } while (0)
465 
466 
467 /*
468  * Enter a TTE into the kernel pmap only.  Don't do anything else.
469  *
470  * Use only during bootstrapping since it does no locking and
471  * can lose ref/mod info!!!!
472  *
473  */
474 static void pmap_enter_kpage __P((vaddr_t, int64_t));
475 static void
476 pmap_enter_kpage(va, data)
477 	vaddr_t va;
478 	int64_t data;
479 {
480 	paddr_t newp;
481 
482 	newp = NULL;
483 	while (pseg_set(pmap_kernel(), va, data, newp) == 1) {
484 		newp = NULL;
485 		pmap_get_page(&newp);
486 		if (!newp) {
487 			prom_printf("pmap_enter_kpage: out of pages\n");
488 			panic("pmap_enter_kpage");
489 		}
490 		pmap_zero_page(newp);
491 #ifdef DEBUG
492 		enter_stats.ptpneeded ++;
493 #endif
494 		BDPRINTF(PDB_BOOT1,
495 			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\r\n",
496 			  pmap_kernel(), va, (long)data, (long)newp));
497 #ifdef DEBUG
498 		if (pmapdebug & PDB_BOOT1)
499 		{int i; for (i=0; i<140000000; i++) ;}
500 #endif
501 	}
502 }
503 
504 /*
505  * See checp bootargs to see if we need to enable bootdebug.
506  */
507 #ifdef DEBUG
508 void pmap_bootdebug __P((void));
509 void
510 pmap_bootdebug()
511 {
512 	int chosen;
513 	char *cp;
514 	char buf[128];
515 
516 	/*
517 	 * Grab boot args from PROM
518 	 */
519 	chosen = OF_finddevice("/chosen");
520 	/* Setup pointer to boot flags */
521 	OF_getprop(chosen, "bootargs", buf, sizeof(buf));
522 	cp = buf;
523 	while (*cp != '-')
524 		if (*cp++ == '\0')
525 			return;
526 	for (;;)
527 		switch (*++cp) {
528 		case '\0':
529 			return;
530 		case 'V':
531 			pmapdebug |= PDB_BOOT|PDB_BOOT1;
532 			break;
533 		case 'D':
534 			pmapdebug |= PDB_BOOT1;
535 			break;
536 		}
537 }
538 #endif
539 
540 #ifdef notyet
541 /*
542  * Calculate the correct number of page colors to use.  This should be the
543  * size of the E$/NBPG.  However, different CPUs can have different sized
544  * E$, so we need to take the GCM of the E$ size.
545  */
546 static int pmap_calculate_colors __P((void));
547 static int
548 pmap_calculate_colors() {
549 	int node = 0;
550 	int size, assoc, color, maxcolor = 1;
551 	char buf[80];
552 
553 	while ((node = OF_peer(node))) {
554 		if ((OF_getprop(node, "device_type", buf, sizeof(buf)) > 0) &&
555 			strcmp("cpu", buf) == 0) {
556 			/* Found a CPU, get the E$ info. */
557 			if (OF_getprop(node,"ecache-size", &size,
558 				sizeof(size)) != sizeof(size)) {
559 				printf("pmap_calculate_colors: node %x has "
560 					"no ecache-size\n", node);
561 				/* If we can't get the E$ size, skip the node */
562 				continue;
563 			}
564 			if (OF_getprop(node, "ecache-associativity", &assoc,
565 				sizeof(assoc)) != sizeof(assoc))
566 				/* Fake asociativity of 1 */
567 				assoc = 1;
568 			color = size/assoc/NBPG;
569 			if (color > maxcolor)
570 				maxcolor = color;
571 		}
572 	}
573 	return (maxcolor);
574 }
575 #endif
576 
577 /*
578  * This is called during bootstrap, before the system is really initialized.
579  *
580  * It's called with the start and end virtual addresses of the kernel.  We
581  * bootstrap the pmap allocator now.  We will allocate the basic structures we
582  * need to bootstrap the VM system here: the page frame tables, the TSB, and
583  * the free memory lists.
584  *
585  * Now all this is becoming a bit obsolete.  maxctx is still important, but by
586  * separating the kernel text and data segments we really would need to
587  * provide the start and end of each segment.  But we can't.  The rodata
588  * segment is attached to the end of the kernel segment and has nothing to
589  * delimit its end.  We could still pass in the beginning of the kernel and
590  * the beginning and end of the data segment but we could also just as easily
591  * calculate that all in here.
592  *
593  * To handle the kernel text, we need to do a reverse mapping of the start of
594  * the kernel, then traverse the free memory lists to find out how big it is.
595  */
596 
597 void
598 pmap_bootstrap(kernelstart, kernelend, maxctx)
599 	u_long kernelstart, kernelend;
600 	u_int maxctx;
601 {
602 	extern int data_start[], end[];	/* start of data segment */
603 	extern int msgbufmapped;
604 	struct mem_region *mp, *mp1;
605 	int msgbufsiz;
606 	int pcnt;
607 	size_t s, sz;
608 	int i, j;
609 	int64_t data;
610 	vaddr_t va;
611 	u_int64_t phys_msgbuf;
612 	paddr_t newkp;
613 	vaddr_t newkv, firstaddr, intstk;
614 	vsize_t kdsize, ktsize;
615 
616 #ifdef DEBUG
617 	pmap_bootdebug();
618 #endif
619 
620 	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\r\n"));
621 	/*
622 	 * set machine page size
623 	 */
624 	uvmexp.pagesize = NBPG;
625 #ifdef notyet
626 	uvmexp.ncolors = pmap_calculate_colors();
627 #endif
628 	uvm_setpagesize();
629 
630 	/*
631 	 * Find out how big the kernel's virtual address
632 	 * space is.  The *$#@$ prom loses this info
633 	 */
634 	if ((vmemh = OF_finddevice("/virtual-memory")) == -1) {
635 		prom_printf("no virtual-memory?");
636 		OF_exit();
637 	}
638 	bzero((caddr_t)memlist, sizeof(memlist));
639 	if (OF_getprop(vmemh, "available", memlist, sizeof(memlist)) <= 0) {
640 		prom_printf("no vmemory avail?");
641 		OF_exit();
642 	}
643 
644 #ifdef DEBUG
645 	if (pmapdebug & PDB_BOOT) {
646 		/* print out mem list */
647 		prom_printf("Available virtual memory:\r\n");
648 		for (mp = memlist; mp->size; mp++) {
649 			prom_printf("memlist start %p size %lx\r\n",
650 				    (void *)(u_long)mp->start,
651 				    (u_long)mp->size);
652 		}
653 		prom_printf("End of available virtual memory\r\n");
654 	}
655 #endif
656 	/*
657 	 * Get hold or the message buffer.
658 	 */
659 	msgbufp = (struct msgbuf *)(vaddr_t)MSGBUF_VA;
660 /* XXXXX -- increase msgbufsiz for uvmhist printing */
661 	msgbufsiz = 4*NBPG /* round_page(sizeof(struct msgbuf)) */;
662 	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\r\n",
663 			    (long)msgbufp, (long)msgbufsiz));
664 	if ((long)msgbufp !=
665 	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
666 		prom_printf(
667 		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\r\n",
668 		    (void *)msgbufp, (long)phys_msgbuf);
669 	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
670 	BDPRINTF(PDB_BOOT,
671 		("We should have the memory at %lx, let's map it in\r\n",
672 			phys_msgbuf));
673 	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
674 			  -1/* sunos does this */) == -1)
675 		prom_printf("Failed to map msgbuf\r\n");
676 	else
677 		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\r\n",
678 			(void *)msgbufp));
679 	msgbufmapped = 1;	/* enable message buffer */
680 	initmsgbuf((caddr_t)msgbufp, msgbufsiz);
681 
682 	/*
683 	 * Record kernel mapping -- we will map these with a permanent 4MB
684 	 * TLB entry when we initialize the CPU later.
685 	 */
686 	BDPRINTF(PDB_BOOT, ("translating kernelstart %p\r\n",
687 		(void *)kernelstart));
688 	ktext = kernelstart;
689 	ktextp = prom_vtop(kernelstart);
690 
691 	kdata = (vaddr_t)data_start;
692 	kdatap = prom_vtop(kdata);
693 	ekdata = (vaddr_t)end;
694 
695 	/*
696 	 * Find the real size of the kernel.  Locate the smallest starting
697 	 * address > kernelstart.
698 	 */
699 	for (mp1 = mp = memlist; mp->size; mp++) {
700 		/*
701 		 * Check whether this region is at the end of the kernel.
702 		 */
703 		if (mp->start >= ekdata && (mp1->start < ekdata ||
704 						mp1->start > mp->start))
705 			mp1 = mp;
706 	}
707 	if (mp1->start < kdata)
708 		prom_printf("Kernel at end of vmem???\r\n");
709 
710 	BDPRINTF(PDB_BOOT1,
711 		("Kernel data is mapped at %lx, next free seg: %lx, %lx\r\n",
712 			(long)kdata, (u_long)mp1->start, (u_long)mp1->size));
713 
714 	/*
715 	 * We save where we can start allocating memory.
716 	 */
717 	firstaddr = (ekdata + 07) & ~ 07;	/* Longword align */
718 
719 	/*
720 	 * We reserve 100K to grow.
721 	 */
722 	ekdata += 100*KB;
723 
724 	/*
725 	 * And set the end of the data segment to the end of what our
726 	 * bootloader allocated for us, if we still fit in there.
727 	 */
728 	if (ekdata < mp1->start)
729 		ekdata = mp1->start;
730 
731 #if 1
732 #define	valloc(name, type, num) (name) = (type *)firstaddr; firstaddr += (num)
733 #else
734 #define	valloc(name, type, num) (name) = (type *)firstaddr; firstaddr = \
735 	(vaddr_t)((name)+(num))
736 #endif
737 
738 	/*
739 	 * Since we can't always give the loader the hint to align us on a 4MB
740 	 * boundary, we will need to do the alignment ourselves.  First
741 	 * allocate a new 4MB aligned segment for the kernel, then map it
742 	 * in, copy the kernel over, swap mappings, then finally, free the
743 	 * old kernel.  Then we can continue with this.
744 	 *
745 	 * We'll do the data segment up here since we know how big it is.
746 	 * We'll do the text segment after we've read in the PROM translations
747 	 * so we can figure out its size.
748 	 *
749 	 * The ctxbusy table takes about 64KB, the TSB up to 32KB, and the
750 	 * rest should be less than 1K, so 100KB extra should be plenty.
751 	 */
752 	kdsize = round_page(ekdata - kdata);
753 	BDPRINTF(PDB_BOOT1, ("Kernel data size is %lx\r\n", (long)kdsize));
754 
755 	if ((kdatap & (4*MEG-1)) == 0) {
756 		/* We were at a 4MB boundary -- claim the rest */
757 		psize_t szdiff = (4*MEG - kdsize) & (4*MEG - 1);
758 
759 		BDPRINTF(PDB_BOOT1, ("Need to extend dseg by %lx\r\n",
760 			(long)szdiff));
761 		if (szdiff) {
762 			/* Claim the rest of the physical page. */
763 			newkp = kdatap + kdsize;
764 			newkv = kdata + kdsize;
765 			if (newkp != prom_claim_phys(newkp, szdiff)) {
766 				prom_printf("pmap_bootstrap: could not claim "
767 					"physical dseg extension "
768 					"at %lx size %lx\r\n",
769 					newkp, szdiff);
770 				goto remap_data;
771 			}
772 
773 			/* And the rest of the virtual page. */
774 			if (prom_claim_virt(newkv, szdiff) != newkv)
775 			prom_printf("pmap_bootstrap: could not claim "
776 				"virtual dseg extension "
777 				"at size %lx\r\n", newkv, szdiff);
778 
779 			/* Make sure all 4MB are mapped */
780 			prom_map_phys(newkp, szdiff, newkv, -1);
781 		}
782 	} else {
783 		psize_t sz;
784 remap_data:
785 		/*
786 		 * Either we're not at a 4MB boundary or we can't get the rest
787 		 * of the 4MB extension.  We need to move the data segment.
788 		 * Leave 1MB of extra fiddle space in the calculations.
789 		 */
790 
791 		sz = (kdsize + 4*MEG - 1) & ~(4*MEG-1);
792 		BDPRINTF(PDB_BOOT1,
793 			 ("Allocating new %lx kernel data at 4MB boundary\r\n",
794 			  (u_long)sz));
795 		if ((newkp = prom_alloc_phys(sz, 4*MEG)) == (paddr_t)-1 ) {
796 			prom_printf("Cannot allocate new kernel\r\n");
797 			OF_exit();
798 		}
799 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
800 				     (u_int64_t)newkp));
801 		if ((newkv = (vaddr_t)prom_alloc_virt(sz, 8)) ==
802 		    (vaddr_t)-1) {
803 			prom_printf("Cannot allocate new kernel va\r\n");
804 			OF_exit();
805 		}
806 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %llx at %llx\r\n",
807 		    (u_int64_t)newkp, (u_int64_t)newkv));
808 		prom_map_phys(newkp, sz, (vaddr_t)newkv, -1);
809 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel data...",
810 			kdsize));
811 		bzero((void *)newkv, sz);
812 		bcopy((void *)kdata, (void *)newkv, kdsize);
813 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
814 		prom_unmap_virt((vaddr_t)newkv, sz);
815 		BDPRINTF(PDB_BOOT, ("remap old "));
816 #if 0
817 		/*
818 		 * calling the prom will probably require reading part of the
819 		 * data segment so we can't do this.  */
820 		prom_unmap_virt((vaddr_t)kdatap, kdsize);
821 #endif
822 		prom_map_phys(newkp, sz, kdata, -1);
823 		/*
824 		 * we will map in 4MB, more than we allocated, to allow
825 		 * further allocation
826 		 */
827 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
828 		prom_free_phys(kdatap, kdsize);
829 		kdatap = newkp;
830 		BDPRINTF(PDB_BOOT1,
831 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
832 			  "avail for kernel\r\n", (u_long)firstaddr,
833 			  (u_long)prom_vtop(firstaddr)));
834 	}
835 
836 	/*
837 	 * Find out how much RAM we have installed.
838 	 */
839 	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\r\n"));
840 	if ((memh = OF_finddevice("/memory")) == -1) {
841 		prom_printf("no memory?");
842 		OF_exit();
843 	}
844 	memsize = OF_getproplen(memh, "reg") + 2 * sizeof(struct mem_region);
845 	valloc(mem, struct mem_region, memsize);
846 	bzero((caddr_t)mem, memsize);
847 	if (OF_getprop(memh, "reg", mem, memsize) <= 0) {
848 		prom_printf("no memory installed?");
849 		OF_exit();
850 	}
851 
852 #ifdef DEBUG
853 	if (pmapdebug & PDB_BOOT1) {
854 		/* print out mem list */
855 		prom_printf("Installed physical memory:\r\n");
856 		for (mp = mem; mp->size; mp++) {
857 			prom_printf("memlist start %lx size %lx\r\n",
858 				    (u_long)mp->start, (u_long)mp->size);
859 		}
860 	}
861 #endif
862 	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
863 
864 	for (mp = mem; mp->size; mp++)
865 		physmem += btoc(mp->size);
866 	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\r\n",
867 			     (int)physmem, (int)physmem));
868 	/*
869 	 * Calculate approx TSB size.  This probably needs tweaking.
870 	 */
871 	if (physmem < 64 * 1024 * 1024)
872 		tsbsize = 0;
873 	else if (physmem < 512 * 1024 * 1024)
874 		tsbsize = 1;
875 	else
876 		tsbsize = 2;
877 
878 	/*
879 	 * Save the prom translations
880 	 */
881 	sz = OF_getproplen(vmemh, "translations");
882 	valloc(prom_map, struct prom_map, sz);
883 	if (OF_getprop(vmemh, "translations", (void*)prom_map, sz) <= 0) {
884 		prom_printf("no translations installed?");
885 		OF_exit();
886 	}
887 	prom_map_size = sz / sizeof(struct prom_map);
888 #ifdef DEBUG
889 	if (pmapdebug & PDB_BOOT) {
890 		/* print out mem list */
891 		prom_printf("Prom xlations:\r\n");
892 		for (i = 0; i < prom_map_size; i++) {
893 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
894 				    (u_long)prom_map[i].vstart,
895 				    (u_long)prom_map[i].vsize,
896 				    (u_long)prom_map[i].tte);
897 		}
898 		prom_printf("End of prom xlations\r\n");
899 	}
900 #endif
901 	/*
902 	 * Hunt for the kernel text segment and figure out it size and
903 	 * alignment.
904 	 */
905 	for (i = 0; i < prom_map_size; i++)
906 		if (prom_map[i].vstart == ktext)
907 			break;
908 	if (i == prom_map_size)
909 		panic("No kernel text segment!\r\n");
910 	ktsize = prom_map[i].vsize;
911 	ektext = ktext + ktsize;
912 
913 	if (ktextp & (4*MEG-1)) {
914 		/* Kernel text is not 4MB aligned -- need to fix that */
915 		BDPRINTF(PDB_BOOT1,
916 			 ("Allocating new %lx kernel text at 4MB boundary\r\n",
917 			  (u_long)ktsize));
918 		if ((newkp = prom_alloc_phys(ktsize, 4*MEG)) == 0 ) {
919 			prom_printf("Cannot allocate new kernel text\r\n");
920 			OF_exit();
921 		}
922 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
923 				     (u_int64_t)newkp));
924 		if ((newkv = (vaddr_t)prom_alloc_virt(ktsize, 8)) ==
925 		    (vaddr_t)-1) {
926 			prom_printf("Cannot allocate new kernel text va\r\n");
927 			OF_exit();
928 		}
929 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %lx at %lx\r\n",
930 				     (u_long)newkp, (u_long)newkv));
931 		prom_map_phys(newkp, ktsize, (vaddr_t)newkv, -1);
932 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel text...",
933 			ktsize));
934 		bcopy((void *)ktext, (void *)newkv,
935 		    ktsize);
936 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
937 		prom_unmap_virt((vaddr_t)newkv, 4*MEG);
938 		BDPRINTF(PDB_BOOT, ("remap old "));
939 #if 0
940 		/*
941 		 * calling the prom will probably require reading part of the
942 		 * text segment so we can't do this.
943 		 */
944 		prom_unmap_virt((vaddr_t)ktextp, ktsize);
945 #endif
946 		prom_map_phys(newkp, ktsize, ktext, -1);
947 		/*
948 		 * we will map in 4MB, more than we allocated, to allow
949 		 * further allocation
950 		 */
951 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
952 		prom_free_phys(ktextp, ktsize);
953 		ktextp = newkp;
954 
955 		BDPRINTF(PDB_BOOT1,
956 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
957 			  "avail for kernel\r\n", (u_long)firstaddr,
958 			  (u_long)prom_vtop(firstaddr)));
959 
960 		/*
961 		 * Re-fetch translations -- they've certainly changed.
962 		 */
963 		if (OF_getprop(vmemh, "translations", (void*)prom_map, sz) <=
964 			0) {
965 			prom_printf("no translations installed?");
966 			OF_exit();
967 		}
968 #ifdef DEBUG
969 		if (pmapdebug & PDB_BOOT) {
970 			/* print out mem list */
971 			prom_printf("New prom xlations:\r\n");
972 			for (i = 0; i < prom_map_size; i++) {
973 				prom_printf("start %016lx size %016lx tte %016lx\r\n",
974 					    (u_long)prom_map[i].vstart,
975 					    (u_long)prom_map[i].vsize,
976 					    (u_long)prom_map[i].tte);
977 			}
978 			prom_printf("End of prom xlations\r\n");
979 		}
980 #endif
981 	}
982 	ektextp = ktextp + ktsize;
983 
984 	/*
985 	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
986 	 * any translations inside the kernel data VA range.
987 	 */
988 	for(i = 0; i < prom_map_size; i++) {
989 		if (prom_map[i].vstart >= kdata &&
990 		    prom_map[i].vstart <= firstaddr) {
991 			prom_map[i].vstart = 0;
992 			prom_map[i].vsize = 0;
993 		}
994 		if (prom_map[i].vstart >= ktext &&
995 		    prom_map[i].vstart <= ektext) {
996 			prom_map[i].vstart = 0;
997 			prom_map[i].vsize = 0;
998 		}
999 		for(j = i; j < prom_map_size; j++) {
1000 			if (prom_map[j].vstart >= kdata &&
1001 			    prom_map[j].vstart <= firstaddr)
1002 				continue;	/* this is inside the kernel */
1003 			if (prom_map[j].vstart >= ktext &&
1004 			    prom_map[j].vstart <= ektext)
1005 				continue;	/* this is inside the kernel */
1006 			if (prom_map[j].vstart > prom_map[i].vstart) {
1007 				struct prom_map tmp;
1008 				tmp = prom_map[i];
1009 				prom_map[i] = prom_map[j];
1010 				prom_map[j] = tmp;
1011 			}
1012 		}
1013 	}
1014 #ifdef DEBUG
1015 	if (pmapdebug & PDB_BOOT) {
1016 		/* print out mem list */
1017 		prom_printf("Prom xlations:\r\n");
1018 		for (i = 0; i < prom_map_size; i++) {
1019 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
1020 				    (u_long)prom_map[i].vstart,
1021 				    (u_long)prom_map[i].vsize,
1022 				    (u_long)prom_map[i].tte);
1023 		}
1024 		prom_printf("End of prom xlations\r\n");
1025 	}
1026 #endif
1027 
1028 	/*
1029 	 * Allocate a 64MB page for the cpu_info structure now.
1030 	 */
1031 	if ((cpu0paddr = prom_alloc_phys(8*NBPG, 8*NBPG)) == 0 ) {
1032 		prom_printf("Cannot allocate new cpu_info\r\n");
1033 		OF_exit();
1034 	}
1035 
1036 
1037 	/*
1038 	 * Now the kernel text segment is in its final location we can try to
1039 	 * find out how much memory really is free.
1040 	 */
1041 	sz = OF_getproplen(memh, "available") + sizeof(struct mem_region);
1042 	valloc(orig, struct mem_region, sz);
1043 	bzero((caddr_t)orig, sz);
1044 	if (OF_getprop(memh, "available", orig, sz) <= 0) {
1045 		prom_printf("no available RAM?");
1046 		OF_exit();
1047 	}
1048 #ifdef DEBUG
1049 	if (pmapdebug & PDB_BOOT1) {
1050 		/* print out mem list */
1051 		prom_printf("Available physical memory:\r\n");
1052 		for (mp = orig; mp->size; mp++) {
1053 			prom_printf("memlist start %lx size %lx\r\n",
1054 				    (u_long)mp->start, (u_long)mp->size);
1055 		}
1056 		prom_printf("End of available physical memory\r\n");
1057 	}
1058 #endif
1059 	valloc(avail, struct mem_region, sz);
1060 	bzero((caddr_t)avail, sz);
1061 	for (pcnt = 0, mp = orig, mp1 = avail; (mp1->size = mp->size);
1062 	    mp++, mp1++) {
1063 		mp1->start = mp->start;
1064 		pcnt++;
1065 	}
1066 
1067 	/*
1068 	 * Allocate and initialize a context table
1069 	 */
1070 	numctx = maxctx;
1071 	valloc(ctxbusy, paddr_t, CTXSIZE);
1072 	bzero((caddr_t)ctxbusy, CTXSIZE);
1073 
1074 	/*
1075 	 * Allocate our TSB.
1076 	 *
1077 	 * We will use the left over space to flesh out the kernel pmap.
1078 	 */
1079 	BDPRINTF(PDB_BOOT1, ("firstaddr before TSB=%lx\r\n",
1080 		(u_long)firstaddr));
1081 	firstaddr = ((firstaddr + TSBSIZE - 1) & ~(TSBSIZE-1));
1082 #ifdef DEBUG
1083 	i = (firstaddr + (NBPG-1)) & ~(NBPG-1);	/* First, page align */
1084 	if ((int)firstaddr < i) {
1085 		prom_printf("TSB alloc fixup failed\r\n");
1086 		prom_printf("frobbed i, firstaddr before TSB=%x, %lx\r\n",
1087 		    (int)i, (u_long)firstaddr);
1088 		panic("TSB alloc\n");
1089 		OF_exit();
1090 	}
1091 #endif
1092 	BDPRINTF(PDB_BOOT, ("frobbed i, firstaddr before TSB=%x, %lx\r\n",
1093 			    (int)i, (u_long)firstaddr));
1094 	valloc(tsb, pte_t, TSBSIZE);
1095 	bzero(tsb, TSBSIZE);
1096 
1097 	BDPRINTF(PDB_BOOT1, ("firstaddr after TSB=%lx\r\n", (u_long)firstaddr));
1098 	BDPRINTF(PDB_BOOT1, ("TSB allocated at %p size %08x\r\n", (void*)tsb,
1099 	    (int)TSBSIZE));
1100 
1101 	first_phys_addr = mem->start;
1102 	BDPRINTF(PDB_BOOT1, ("firstaddr after pmap=%08lx\r\n",
1103 		(u_long)firstaddr));
1104 
1105 	/*
1106 	 * Page align all regions.
1107 	 * Non-page memory isn't very interesting to us.
1108 	 * Also, sort the entries for ascending addresses.
1109 	 *
1110 	 * And convert from virtual to physical addresses.
1111 	 */
1112 
1113 	BDPRINTF(PDB_BOOT, ("kernel virtual size %08lx - %08lx\r\n",
1114 			    (u_long)kernelstart, (u_long)firstaddr));
1115 	kdata = kdata & ~PGOFSET;
1116 	ekdata = firstaddr;
1117 	ekdata = (ekdata + PGOFSET) & ~PGOFSET;
1118 	BDPRINTF(PDB_BOOT1, ("kernel virtual size %08lx - %08lx\r\n",
1119 			     (u_long)kernelstart, (u_long)kernelend));
1120 	ekdatap = ekdata - kdata + kdatap;
1121 	/* Switch from vaddrs to paddrs */
1122 	if(ekdatap > (kdatap + 4*MEG)) {
1123 		prom_printf("Kernel size exceeds 4MB\r\n");
1124 	}
1125 
1126 #ifdef DEBUG
1127 	if (pmapdebug & PDB_BOOT1) {
1128 		/* print out mem list */
1129 		prom_printf("Available %lx physical memory before cleanup:\r\n",
1130 			    (u_long)avail);
1131 		for (mp = avail; mp->size; mp++) {
1132 			prom_printf("memlist start %lx size %lx\r\n",
1133 				    (u_long)mp->start,
1134 				    (u_long)mp->size);
1135 		}
1136 		prom_printf("End of available physical memory before cleanup\r\n");
1137 		prom_printf("kernel physical text size %08lx - %08lx\r\n",
1138 			    (u_long)ktextp, (u_long)ektextp);
1139 		prom_printf("kernel physical data size %08lx - %08lx\r\n",
1140 			    (u_long)kdatap, (u_long)ekdatap);
1141 	}
1142 #endif
1143 	/*
1144 	 * Here's a another quick in-lined bubble sort.
1145 	 */
1146 	for (i = 0; i < pcnt; i++) {
1147 		for (j = i; j < pcnt; j++) {
1148 			if (avail[j].start < avail[i].start) {
1149 				struct mem_region tmp;
1150 				tmp = avail[i];
1151 				avail[i] = avail[j];
1152 				avail[j] = tmp;
1153 			}
1154 		}
1155 	}
1156 
1157 	/* Throw away page zero if we have it. */
1158 	if (avail->start == 0) {
1159 		avail->start += NBPG;
1160 		avail->size -= NBPG;
1161 	}
1162 	/*
1163 	 * Now we need to remove the area we valloc'ed from the available
1164 	 * memory lists.  (NB: we may have already alloc'ed the entire space).
1165 	 */
1166 	npgs = 0;
1167 	for (mp = avail; mp->size; mp++) {
1168 		/*
1169 		 * Check whether this region holds all of the kernel.
1170 		 */
1171 		s = mp->start + mp->size;
1172 		if (mp->start < kdatap && s > roundup(ekdatap, 4*MEG)) {
1173 			avail[pcnt].start = roundup(ekdatap, 4*MEG);
1174 			avail[pcnt++].size = s - kdatap;
1175 			mp->size = kdatap - mp->start;
1176 		}
1177 		/*
1178 		 * Look whether this regions starts within the kernel.
1179 		 */
1180 		if (mp->start >= kdatap &&
1181 			mp->start < roundup(ekdatap, 4*MEG)) {
1182 			s = ekdatap - mp->start;
1183 			if (mp->size > s)
1184 				mp->size -= s;
1185 			else
1186 				mp->size = 0;
1187 			mp->start = roundup(ekdatap, 4*MEG);
1188 		}
1189 		/*
1190 		 * Now look whether this region ends within the kernel.
1191 		 */
1192 		s = mp->start + mp->size;
1193 		if (s > kdatap && s < roundup(ekdatap, 4*MEG))
1194 			mp->size -= s - kdatap;
1195 		/*
1196 		 * Now page align the start of the region.
1197 		 */
1198 		s = mp->start % NBPG;
1199 		if (mp->size >= s) {
1200 			mp->size -= s;
1201 			mp->start += s;
1202 		}
1203 		/*
1204 		 * And now align the size of the region.
1205 		 */
1206 		mp->size -= mp->size % NBPG;
1207 		/*
1208 		 * Check whether some memory is left here.
1209 		 */
1210 		if (mp->size == 0) {
1211 			bcopy(mp + 1, mp,
1212 			      (pcnt - (mp - avail)) * sizeof *mp);
1213 			pcnt--;
1214 			mp--;
1215 			continue;
1216 		}
1217 		s = mp->start;
1218 		sz = mp->size;
1219 		npgs += btoc(sz);
1220 		for (mp1 = avail; mp1 < mp; mp1++)
1221 			if (s < mp1->start)
1222 				break;
1223 		if (mp1 < mp) {
1224 			bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1);
1225 			mp1->start = s;
1226 			mp1->size = sz;
1227 		}
1228 #ifdef DEBUG
1229 /* Clear all memory we give to the VM system.  I want to make sure
1230  * the PROM isn't using it for something, so this should break the PROM.
1231  */
1232 		{
1233 			paddr_t p;
1234 			for (p = mp->start; p < mp->start+mp->size; p += NBPG)
1235 				pmap_zero_page(p);
1236 		}
1237 #endif
1238 		/*
1239 		 * In future we should be able to specify both allocated
1240 		 * and free.
1241 		 */
1242 		uvm_page_physload(
1243 			atop(mp->start),
1244 			atop(mp->start+mp->size),
1245 			atop(mp->start),
1246 			atop(mp->start+mp->size),
1247 			VM_FREELIST_DEFAULT);
1248 	}
1249 
1250 #if 0
1251 	/* finally, free up any space that valloc did not use */
1252 	prom_unmap_virt((vaddr_t)ekdata, roundup(ekdata, 4*MEG) - ekdata);
1253 	if (ekdatap < roundup(kdatap, 4*MEG))) {
1254 		uvm_page_physload(atop(ekdatap),
1255 			atop(roundup(ekdatap, (4*MEG))),
1256 			atop(ekdatap),
1257 			atop(roundup(ekdatap, (4*MEG))),
1258 			VM_FREELIST_DEFAULT);
1259 	}
1260 #endif
1261 
1262 #ifdef DEBUG
1263 	if (pmapdebug & PDB_BOOT) {
1264 		/* print out mem list */
1265 		prom_printf("Available physical memory after cleanup:\r\n");
1266 		for (mp = avail; mp->size; mp++) {
1267 			prom_printf("avail start %lx size %lx\r\n",
1268 				    (long)mp->start, (long)mp->size);
1269 		}
1270 		prom_printf("End of available physical memory after cleanup\r\n");
1271 	}
1272 #endif
1273 	/*
1274 	 * Allocate and clear out pmap_kernel()->pm_segs[]
1275 	 */
1276 	pmap_pinit(pmap_kernel());
1277 	{
1278 		paddr_t newp;
1279 
1280 		do {
1281 			pmap_get_page(&newp);
1282 			pmap_zero_page(newp);
1283 		} while (!newp); /* Throw away page zero */
1284 		pmap_kernel()->pm_segs=(int64_t *)(u_long)newp;
1285 		pmap_kernel()->pm_physaddr = newp;
1286 		/* mark kernel context as busy */
1287 		((paddr_t*)ctxbusy)[0] = (int)pmap_kernel()->pm_physaddr;
1288 	}
1289 	/*
1290 	 * finish filling out kernel pmap.
1291 	 */
1292 
1293 	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\r\n",
1294 	    (long)pmap_kernel()->pm_physaddr));
1295 	/*
1296 	 * Tell pmap about our mesgbuf -- Hope this works already
1297 	 */
1298 #ifdef DEBUG
1299 	BDPRINTF(PDB_BOOT1, ("Calling consinit()\r\n"));
1300 	if (pmapdebug & PDB_BOOT1) consinit();
1301 	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\r\n"));
1302 #endif
1303 	/* it's not safe to call pmap_enter so we need to do this ourselves */
1304 	va = (vaddr_t)msgbufp;
1305 	prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1);
1306 	while (msgbufsiz) {
1307 		int pgsz;
1308 		psize_t psize;
1309 
1310 		PMAP_PAGE_SIZE(va, phys_msgbuf, msgbufsiz, pgsz, psize);
1311 		data = TSB_DATA(0 /* global */,
1312 			pgsz,
1313 			phys_msgbuf,
1314 			1 /* priv */,
1315 			1 /* Write */,
1316 			1 /* Cacheable */,
1317 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1318 			1 /* valid */,
1319 			0 /* IE */);
1320 		do {
1321 			pmap_enter_kpage(va, data);
1322 			va += NBPG;
1323 			msgbufsiz -= NBPG;
1324 			phys_msgbuf += NBPG;
1325 		} while (psize-=NBPG);
1326 	}
1327 	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\r\n"));
1328 
1329 	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\r\n"));
1330 	for (i = 0; i < prom_map_size; i++)
1331 		if (prom_map[i].vstart && ((prom_map[i].vstart>>32) == 0))
1332 			for (j = 0; j < prom_map[i].vsize; j += NBPG) {
1333 				int k;
1334 
1335 				for (k = 0; page_size_map[k].mask; k++) {
1336 					if (((prom_map[i].vstart |
1337 					      prom_map[i].tte) &
1338 					      page_size_map[k].mask) == 0 &&
1339 					      page_size_map[k].mask <
1340 					      prom_map[i].vsize)
1341 						break;
1342 				}
1343 #ifdef DEBUG
1344 				page_size_map[k].use++;
1345 #endif
1346 				/* Enter PROM map into pmap_kernel() */
1347 				pmap_enter_kpage(prom_map[i].vstart + j,
1348 					(prom_map[i].tte + j)|
1349 					page_size_map[k].code);
1350 			}
1351 	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\r\n"));
1352 
1353 	/*
1354 	 * Fix up start of kernel heap.
1355 	 */
1356 	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1357 	/* Let's keep 1 page of redzone after the kernel */
1358 	vmmap += NBPG;
1359 	{
1360 		extern vaddr_t u0[2];
1361 		extern struct pcb* proc0paddr;
1362 		extern void main __P((void));
1363 		paddr_t pa;
1364 
1365 		/* Initialize all the pointers to u0 */
1366 		cpcb = (struct pcb *)vmmap;
1367 		proc0paddr = cpcb;
1368 		u0[0] = vmmap;
1369 		/* Allocate some VAs for u0 */
1370 		u0[1] = vmmap + 2*USPACE;
1371 
1372 		BDPRINTF(PDB_BOOT1,
1373 			("Inserting stack 0 into pmap_kernel() at %p\r\n",
1374 				vmmap));
1375 
1376 		while (vmmap < u0[1]) {
1377 			int64_t data;
1378 
1379 			pmap_get_page(&pa);
1380 			pmap_zero_page(pa);
1381 			prom_map_phys(pa, NBPG, vmmap, -1);
1382 			data = TSB_DATA(0 /* global */,
1383 				PGSZ_8K,
1384 				pa,
1385 				1 /* priv */,
1386 				1 /* Write */,
1387 				1 /* Cacheable */,
1388 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1389 				1 /* valid */,
1390 				0 /* IE */);
1391 			pmap_enter_kpage(vmmap, data);
1392 			vmmap += NBPG;
1393 		}
1394 		BDPRINTF(PDB_BOOT1,
1395 			 ("Done inserting stack 0 into pmap_kernel()\r\n"));
1396 
1397 		/* Now map in and initialize our cpu_info structure */
1398 #ifdef DIAGNOSTIC
1399 		vmmap += NBPG; /* redzone -- XXXX do we need one? */
1400 #endif
1401 		if ((vmmap ^ INTSTACK) & VA_ALIAS_MASK)
1402 			vmmap += NBPG; /* Matchup virtual color for D$ */
1403 		intstk = vmmap;
1404 		cpus = (struct cpu_info *)(intstk+CPUINFO_VA-INTSTACK);
1405 
1406 		BDPRINTF(PDB_BOOT1,
1407 			("Inserting cpu_info into pmap_kernel() at %p\r\n",
1408 				 cpus));
1409 		/* Now map in all 8 pages of cpu_info */
1410 		pa = cpu0paddr;
1411 		prom_map_phys(pa, 64*KB, vmmap, -1);
1412 		/*
1413 		 * Also map it in as the interrupt stack.
1414 		 * This lets the PROM see this if needed.
1415 		 *
1416 		 * XXXX locore.s does not flush these mappings
1417 		 * before installing the locked TTE.
1418 		 */
1419 		prom_map_phys(pa, 64*KB, CPUINFO_VA, -1);
1420 		for (i=0; i<8; i++) {
1421 			int64_t data;
1422 
1423 			data = TSB_DATA(0 /* global */,
1424 				PGSZ_8K,
1425 				pa,
1426 				1 /* priv */,
1427 				1 /* Write */,
1428 				1 /* Cacheable */,
1429 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1430 				1 /* valid */,
1431 				0 /* IE */);
1432 			pmap_enter_kpage(vmmap, data);
1433 			vmmap += NBPG;
1434 			pa += NBPG;
1435 		}
1436 		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\r\n"));
1437 
1438 		/* Initialize our cpu_info structure */
1439 		bzero((void *)intstk, 8*NBPG);
1440 		cpus->ci_next = NULL; /* Redundant, I know. */
1441 		cpus->ci_curproc = &proc0;
1442 		cpus->ci_cpcb = (struct pcb *)u0[0]; /* Need better source */
1443 		cpus->ci_upaid = CPU_UPAID;
1444 		cpus->ci_number = cpus->ci_upaid; /* How do we figure this out? */
1445 		cpus->ci_fpproc = NULL;
1446 		cpus->ci_spinup = main; /* Call main when we're running. */
1447 		cpus->ci_initstack = (void *)u0[1];
1448 		cpus->ci_paddr = cpu0paddr;
1449 		/* The rest will be done at CPU attach time. */
1450 		BDPRINTF(PDB_BOOT1,
1451 			 ("Done inserting cpu_info into pmap_kernel()\r\n"));
1452 	}
1453 
1454 	vmmap = (vaddr_t)reserve_dumppages((caddr_t)(u_long)vmmap);
1455 	/*
1456 	 * Set up bounds of allocatable memory for vmstat et al.
1457 	 */
1458 	nextavail = avail->start;
1459 	avail_start = nextavail;
1460 	for (mp = avail; mp->size; mp++)
1461 		avail_end = mp->start+mp->size;
1462 	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\r\n"));
1463 
1464 }
1465 
1466 /*
1467  * Initialize anything else for pmap handling.
1468  * Called during vm_init().
1469  */
1470 void
1471 pmap_init()
1472 {
1473 	struct vm_page *m;
1474 	paddr_t pa;
1475 	psize_t size;
1476 	vaddr_t va;
1477 	struct pglist mlist;
1478 	vsize_t		s;
1479 	int		bank;
1480 	struct pv_entry	*pvh;
1481 
1482 	BDPRINTF(PDB_BOOT1, ("pmap_init()\r\n"));
1483 	if (PAGE_SIZE != NBPG)
1484 		panic("pmap_init: CLSIZE!=1");
1485 
1486 	size = sizeof(struct pv_entry) * physmem;
1487 	TAILQ_INIT(&mlist);
1488 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
1489 		(paddr_t)NBPG, (paddr_t)0, &mlist, 1, 0) != 0)
1490 		panic("cpu_start: no memory");
1491 
1492 	va = uvm_km_valloc(kernel_map, size);
1493 	if (va == 0)
1494 		panic("cpu_start: no memory");
1495 
1496 	pv_table = (struct pv_entry *)va;
1497 	m = TAILQ_FIRST(&mlist);
1498 
1499 	/* Map the pages */
1500 	for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
1501 		u_int64_t data;
1502 
1503 		pa = VM_PAGE_TO_PHYS(m);
1504 		pmap_zero_page(pa);
1505 		data = TSB_DATA(0 /* global */,
1506 			PGSZ_8K,
1507 			pa,
1508 			1 /* priv */,
1509 			1 /* Write */,
1510 			1 /* Cacheable */,
1511 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1512 			1 /* valid */,
1513 			0 /* IE */);
1514 		pmap_enter_kpage(va, data);
1515 		va += NBPG;
1516 	}
1517 
1518 	/*
1519 	 * Memory for the pv heads has already been allocated.
1520 	 * Initialize the physical memory segments.
1521 	 */
1522 	pvh = pv_table;
1523 	for (bank = 0; bank < vm_nphysseg; bank++) {
1524 		s = vm_physmem[bank].end - vm_physmem[bank].start;
1525 		vm_physmem[bank].pmseg.pvent = pvh;
1526 		pvh += s;
1527 	}
1528 
1529 	/* Setup a pool for additional pvlist structures */
1530 	pool_init(&pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pv_entry", NULL);
1531 	pool_init(&pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
1532 	    &pool_allocator_nointr);
1533 
1534 	vm_first_phys = avail_start;
1535 	vm_num_phys = avail_end - avail_start;
1536 }
1537 
1538 /*
1539  * How much virtual space is available to the kernel?
1540  */
1541 static vaddr_t kbreak; /* End of kernel VA */
1542 void
1543 pmap_virtual_space(start, end)
1544 	vaddr_t *start, *end;
1545 {
1546 	/*
1547 	 * Reserve one segment for kernel virtual memory
1548 	 */
1549 	/* Reserve two pages for pmap_copy_page && /dev/mem */
1550 	*start = kbreak = (vaddr_t)(vmmap + 2*NBPG);
1551 	*end = VM_MAX_KERNEL_ADDRESS;
1552 	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\r\n", *start, *end));
1553 }
1554 
1555 #ifdef PMAP_GROWKERNEL
1556 /*
1557  * Preallocate kernel page tables to a specified VA.
1558  * This simply loops through the first TTE for each
1559  * page table from the beginning of the kernel pmap,
1560  * reads the entry, and if the result is
1561  * zero (either invalid entry or no page table) it stores
1562  * a zero there, populating page tables in the process.
1563  * This is not the most efficient technique but i don't
1564  * expect it to be called that often.
1565  */
1566 vaddr_t
1567 pmap_growkernel(maxkvaddr)
1568         vaddr_t maxkvaddr;
1569 {
1570 	int s;
1571 	paddr_t pg;
1572 	struct pmap *pm = pmap_kernel();
1573 
1574 	if (maxkvaddr >= KERNEND) {
1575 		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1576 		       (void *)KERNEND, (void *)maxkvaddr);
1577 		return (kbreak);
1578 	}
1579 	s = splvm();
1580 	simple_lock(&pm->pm_lock);
1581 	DPRINTF(PDB_GROW,
1582 		("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr));
1583 	/* Align with the start of a page table */
1584 	for (kbreak &= (-1<<PDSHIFT); kbreak < maxkvaddr;
1585 	     kbreak += (1<<PDSHIFT)) {
1586 		if (pseg_get(pm, kbreak)) continue;
1587 
1588 		pg = 0;
1589 		while (pseg_set(pm, kbreak, 0, pg) == 1) {
1590 			DPRINTF(PDB_GROW,
1591 				("pmap_growkernel: extending %lx\n", kbreak));
1592 			pg = 0;
1593 			if (uvm.page_init_done || !uvm_page_physget(&pg)) {
1594 				struct vm_page *page;
1595 				DPRINTF(PDB_GROW,
1596 ("pmap_growkernel: need to alloc page\n"));
1597 				while ((page =
1598 					vm_page_alloc1()) == NULL) {
1599 					DPRINTF(PDB_GROW,
1600 ("pmap_growkernel: calling uvm_wait()\n"));
1601 					uvm_wait("pmap_growkernel");
1602 				}
1603 				pg = (paddr_t)VM_PAGE_TO_PHYS(page);
1604 			}
1605 			pmap_zero_page((paddr_t)pg);
1606 #ifdef DEBUG
1607 			enter_stats.ptpneeded ++;
1608 #endif
1609 		}
1610 
1611 	}
1612 	simple_unlock(&pm->pm_lock);
1613 	splx(s);
1614 	return (kbreak);
1615 }
1616 #endif
1617 
1618 /*
1619  * Create and return a physical map.
1620  */
1621 struct pmap *
1622 pmap_create()
1623 {
1624 	struct pmap *pm;
1625 
1626 	DPRINTF(PDB_CREATE, ("pmap_create()\n"));
1627 
1628 	pm = pool_get(&pmap_pool, PR_WAITOK);
1629 	bzero((caddr_t)pm, sizeof *pm);
1630 #ifdef DEBUG
1631 	if (pmapdebug & PDB_CREATE)
1632 		printf("pmap_create(): created %p\n", pm);
1633 #endif
1634 	pmap_pinit(pm);
1635 	return pm;
1636 }
1637 
1638 /*
1639  * Initialize a preallocated and zeroed pmap structure.
1640  */
1641 void
1642 pmap_pinit(pm)
1643 	struct pmap *pm;
1644 {
1645 
1646 	/*
1647 	 * Allocate some segment registers for this pmap.
1648 	 */
1649 	simple_lock_init(&pm->pm_lock);
1650 	simple_lock(&pm->pm_lock);
1651 	pm->pm_refs = 1;
1652 	if(pm != pmap_kernel()) {
1653 		struct vm_page *page;
1654 #ifdef NOTDEF_DEBUG
1655 		printf("pmap_pinit: need to alloc page\n");
1656 #endif
1657 		while ((page = vm_page_alloc1()) == NULL) {
1658 			/*
1659 			 * Let the pager run a bit--however this may deadlock
1660 			 */
1661 #ifdef NOTDEF_DEBUG
1662 			printf("pmap_pinit: calling uvm_wait()\n");
1663 #endif
1664 			uvm_wait("pmap_pinit");
1665 		}
1666 		pm->pm_physaddr = (paddr_t)VM_PAGE_TO_PHYS(page);
1667 		pmap_zero_page(pm->pm_physaddr);
1668 		pm->pm_segs = (int64_t *)(u_long)pm->pm_physaddr;
1669 		if (!pm->pm_physaddr) panic("pmap_pinit");
1670 #ifdef NOTDEF_DEBUG
1671 		printf("pmap_pinit: segs %p == %p\n", pm->pm_segs, (void*)page->phys_addr);
1672 #endif
1673 		ctx_alloc(pm);
1674 	}
1675 #ifdef DEBUG
1676 	if (pmapdebug & PDB_CREATE)
1677 		printf("pmap_pinit(%p): ctx %d\n", pm, pm->pm_ctx);
1678 #endif
1679 	simple_unlock(&pm->pm_lock);
1680 }
1681 
1682 /*
1683  * Add a reference to the given pmap.
1684  */
1685 void
1686 pmap_reference(pm)
1687 	struct pmap *pm;
1688 {
1689 	int s;
1690 
1691 	s = splvm();
1692 	simple_lock(&pm->pm_lock);
1693 	pm->pm_refs++;
1694 	simple_unlock(&pm->pm_lock);
1695 	splx(s);
1696 }
1697 
1698 /*
1699  * Retire the given pmap from service.
1700  * Should only be called if the map contains no valid mappings.
1701  */
1702 void
1703 pmap_destroy(pm)
1704 	struct pmap *pm;
1705 {
1706 	if (--pm->pm_refs == 0) {
1707 #ifdef DEBUG
1708 		if (pmapdebug & PDB_DESTROY)
1709 			printf("pmap_destroy: freeing pmap %p\n", pm);
1710 #endif
1711 		pmap_release(pm);
1712 		pool_put(&pmap_pool, pm);
1713 	}
1714 }
1715 
1716 /*
1717  * Release any resources held by the given physical map.
1718  * Called when a pmap initialized by pmap_pinit is being released.
1719  */
1720 void
1721 pmap_release(pm)
1722 	struct pmap *pm;
1723 {
1724 	int i, j, k, s;
1725 	paddr_t *pdir, *ptbl, tmp;
1726 
1727 #ifdef DIAGNOSTIC
1728 	if(pm == pmap_kernel())
1729 		panic("pmap_release: releasing pmap_kernel()");
1730 #endif
1731 
1732 	s=splvm();
1733 	simple_lock(&pm->pm_lock);
1734 	for(i=0; i<STSZ; i++) {
1735 		paddr_t psegentp = (paddr_t)(u_long)&pm->pm_segs[i];
1736 		if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)psegentp,
1737 			ASI_PHYS_CACHED))) {
1738 			for (k=0; k<PDSZ; k++) {
1739 				paddr_t pdirentp = (paddr_t)(u_long)&pdir[k];
1740 				if ((ptbl = (paddr_t *)(u_long)ldxa(
1741 					(vaddr_t)pdirentp, ASI_PHYS_CACHED))) {
1742 					for (j=0; j<PTSZ; j++) {
1743 						int64_t data;
1744 						data  = ldxa((vaddr_t)&ptbl[j],
1745 							ASI_PHYS_CACHED);
1746 						if (data&TLB_V &&
1747 						    IS_VM_PHYSADDR(data&TLB_PA_MASK)) {
1748 							paddr_t pa;
1749 							pv_entry_t pv;
1750 
1751 #ifdef DEBUG
1752 							printf("pmap_release: pm=%p page %llx still in use\n", pm,
1753 							       (unsigned long long)(((u_int64_t)i<<STSHIFT)|((u_int64_t)k<<PDSHIFT)|((u_int64_t)j<<PTSHIFT)));
1754 							Debugger();
1755 #endif
1756 							/* Save REF/MOD info */
1757 							pa = data&TLB_PA_MASK;
1758 							pv = pa_to_pvh(pa);
1759 							if (data & TLB_ACCESS)
1760 								pv->pv_va |=
1761 									PV_REF;
1762 							if (data & (TLB_MODIFY))
1763 								pv->pv_va |=
1764 									PV_MOD;
1765 
1766 							pmap_remove_pv(pm,
1767 								       (long)((u_int64_t)i<<STSHIFT)|((long)k<<PDSHIFT)|((long)j<<PTSHIFT),
1768 								       pa);
1769 						}
1770 					}
1771 					stxa(pdirentp, ASI_PHYS_CACHED, NULL);
1772 					vm_page_free1(PHYS_TO_VM_PAGE((paddr_t)(u_long)ptbl));
1773 				}
1774 			}
1775 			stxa(psegentp, ASI_PHYS_CACHED, NULL);
1776 			vm_page_free1(PHYS_TO_VM_PAGE((paddr_t)(u_long)pdir));
1777 		}
1778 	}
1779 	tmp = (paddr_t)(u_long)pm->pm_segs;
1780 	pm->pm_segs = NULL;
1781 	vm_page_free1(PHYS_TO_VM_PAGE(tmp));
1782 #ifdef NOTDEF_DEBUG
1783 	for (i=0; i<physmem; i++) {
1784 		struct pv_entry *pv;
1785 		for (pv = &pv_table[i]; pv; pv=pv->pv_next) {
1786 			if (pv->pv_pmap == pm) {
1787 				printf("pmap_release(): unreferenced pv=%p pa=%p va=%p pm=%p\n",
1788 				       i, ptoa(first_phys_addr+i), pv->pv_va, pm);
1789 				Debugger();
1790 				pmap_remove_pv(pm, pv->pv_va, i);
1791 				break;
1792 			}
1793 		}
1794 	}
1795 #endif
1796 	splx(s);
1797 	simple_unlock(&pm->pm_lock);
1798 	ctx_free(pm);
1799 }
1800 
1801 /*
1802  * Copy the range specified by src_addr/len
1803  * from the source map to the range dst_addr/len
1804  * in the destination map.
1805  *
1806  * This routine is only advisory and need not do anything.
1807  */
1808 void
1809 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
1810 	struct pmap *dst_pmap, *src_pmap;
1811 	vaddr_t dst_addr, src_addr;
1812 	vsize_t len;
1813 {
1814 #ifdef DEBUG
1815 	if (pmapdebug&PDB_CREATE)
1816 		printf("pmap_copy(%p, %p, %p, %lx, %p)\n",
1817 		       dst_pmap, src_pmap, (void *)(u_long)dst_addr,
1818 		       (u_long)len, (void *)(u_long)src_addr);
1819 #endif
1820 }
1821 
1822 /*
1823  * Garbage collects the physical map system for
1824  * pages which are no longer used.
1825  * Success need not be guaranteed -- that is, there
1826  * may well be pages which are not referenced, but
1827  * others may be collected.
1828  * Called by the pageout daemon when pages are scarce.
1829  */
1830 void
1831 pmap_collect(pm)
1832 	struct pmap *pm;
1833 {
1834 #if 1
1835 	int i, j, k, n, m, s;
1836 	paddr_t *pdir, *ptbl;
1837 	/* This is a good place to scan the pmaps for page tables with
1838 	 * no valid mappings in them and free them. */
1839 
1840 	/* NEVER GARBAGE COLLECT THE KERNEL PMAP */
1841 	if (pm == pmap_kernel()) return;
1842 
1843 	s = splvm();
1844 	simple_lock(&pm->pm_lock);
1845 	for (i=0; i<STSZ; i++) {
1846 		if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
1847 			m = 0;
1848 			for (k=0; k<PDSZ; k++) {
1849 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
1850 					m++;
1851 					n = 0;
1852 					for (j=0; j<PTSZ; j++) {
1853 						int64_t data = ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
1854 						if (data&TLB_V)
1855 							n++;
1856 					}
1857 					if (!n) {
1858 						/* Free the damn thing */
1859 						stxa((paddr_t)(u_long)&pdir[k], ASI_PHYS_CACHED, NULL);
1860 						vm_page_free1(PHYS_TO_VM_PAGE((paddr_t)(u_long)ptbl));
1861 					}
1862 				}
1863 			}
1864 			if (!m) {
1865 				/* Free the damn thing */
1866 				stxa((paddr_t)(u_long)&pm->pm_segs[i], ASI_PHYS_CACHED, NULL);
1867 				vm_page_free1(PHYS_TO_VM_PAGE((paddr_t)(u_long)pdir));
1868 			}
1869 		}
1870 	}
1871 	simple_unlock(&pm->pm_lock);
1872 	splx(s);
1873 #endif
1874 }
1875 
1876 #if 0
1877 /*
1878  * The two following routines are now in locore.s so I can code them in assembly
1879  * They can bypass the MMU or use VIS bcopy extensions for speed.
1880  */
1881 /*
1882  * Fill the given physical page with zeroes.
1883  */
1884 void
1885 pmap_zero_page(pa)
1886 	paddr_t pa;
1887 {
1888 	/*
1889 	 * We don't need to worry about flushing caches
1890 	 * since all our virtual caches are write-through.
1891 	 * All we need to do is map the page in somewhere, bzero it,
1892 	 * and unmap it.  However, we need to be sure we don't
1893 	 * map it in anywhere near the kernel or we may lose, badly.
1894 	 */
1895 	bzero((caddr_t)pa, NBPG);
1896 }
1897 
1898 /*
1899  * Copy the given physical source page to its destination.
1900  *
1901  * I will code this in assembly RSN.
1902  */
1903 void
1904 pmap_copy_page(src, dst)
1905 	paddr_t src, dst;
1906 {
1907 	bcopy((caddr_t)src, (caddr_t)dst, NBPG);
1908 }
1909 #endif
1910 
1911 /*
1912  * Activate the address space for the specified process.  If the
1913  * process is the current process, load the new MMU context.
1914  */
1915 void
1916 pmap_activate(p)
1917 	struct proc *p;
1918 {
1919 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
1920 	int s;
1921 
1922 	/*
1923 	 * This is essentially the same thing that happens in cpu_switch()
1924 	 * when the newly selected process is about to run, except that we
1925 	 * have to make sure to clean the register windows before we set
1926 	 * the new context.
1927 	 */
1928 
1929 	s = splvm();
1930 	if (p == curproc) {
1931 		write_user_windows();
1932 		if (pmap->pm_ctx == NULL)
1933 			ctx_alloc(pmap);
1934 		stxa(CTX_SECONDARY, ASI_DMMU, pmap->pm_ctx);
1935 	}
1936 	splx(s);
1937 }
1938 
1939 /*
1940  * Deactivate the address space of the specified process.
1941  */
1942 void
1943 pmap_deactivate(p)
1944 	struct proc *p;
1945 {
1946 }
1947 
1948 /*
1949  * pmap_kenter_pa:		[ INTERFACE ]
1950  *
1951  *	Enter a va -> pa mapping into the kernel pmap without any
1952  *	physical->virtual tracking.
1953  *
1954  *	Note: no locking is necessary in this function.
1955  */
1956 void
1957 pmap_kenter_pa(va, pa, prot)
1958 	vaddr_t va;
1959 	paddr_t pa;
1960 	vm_prot_t prot;
1961 {
1962 	pte_t tte;
1963 	paddr_t pg;
1964 	struct pmap *pm = pmap_kernel();
1965 	int i, s;
1966 
1967 	ASSERT(va < INTSTACK || va > EINTSTACK);
1968 	ASSERT(va < kdata || va > ekdata);
1969 
1970 	/*
1971 	 * Construct the TTE.
1972 	 */
1973 	s = splvm();
1974 #if 0
1975 	/* Not needed -- all operations are atomic. */
1976 	simple_lock(&pm->pm_lock);
1977 #endif
1978 #ifdef DEBUG
1979 	enter_stats.unmanaged ++;
1980 #endif
1981 #ifdef DEBUG
1982 	if (pa & (PMAP_NVC|PMAP_NC))
1983 		enter_stats.ci ++;
1984 #endif
1985 	tte.tag = TSB_TAG(0,pm->pm_ctx,va);
1986 	tte.data = TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1987 				 (VM_PROT_WRITE & prot),
1988 				 (!(pa & PMAP_NC)), pa & (PMAP_NVC), 1, 0);
1989 	/* We don't track modification here. */
1990 	if (VM_PROT_WRITE & prot) tte.data |= TLB_REAL_W|TLB_W; /* HWREF -- XXXX */
1991 	tte.data |= TLB_TSB_LOCK;	/* wired */
1992 	ASSERT((tte.data & TLB_NFO) == 0);
1993 	pg = NULL;
1994 	while ((i = pseg_set(pm, va, tte.data, pg)) == 1) {
1995 		pg = NULL;
1996 		if (uvm.page_init_done || !uvm_page_physget(&pg)) {
1997 			struct vm_page *page;
1998 #ifdef NOTDEF_DEBUG
1999 			printf("pmap_kenter_pa: need to alloc page\n");
2000 #endif
2001 			while ((page = vm_page_alloc1()) == NULL) {
2002 				/*
2003 				 * Let the pager run a bit--however this may deadlock
2004 				 */
2005 				panic("pmap_kenter_pa: no free pages");
2006 #ifdef NOTDEF_DEBUG
2007 				printf("pmap_kenter_pa: calling uvm_wait()\n");
2008 #endif
2009 				uvm_wait("pmap_kenter_pa");
2010 			}
2011 			pg = (paddr_t)VM_PAGE_TO_PHYS(page);
2012 		}
2013 		pmap_zero_page((paddr_t)pg);
2014 #ifdef DEBUG
2015 		enter_stats.ptpneeded ++;
2016 #endif
2017 	}
2018 	if (i == 2) {
2019 		/* We allocated a spare page but didn't use it.  Free it. */
2020 		printf("pmap_kenter_pa: freeing unused page %llx\n",
2021 		       (long long)pg);
2022 		vm_page_free1(PHYS_TO_VM_PAGE(pg));
2023 	}
2024 #ifdef DEBUG
2025 	i = ptelookup_va(va);
2026 	if( pmapdebug & PDB_ENTER )
2027 		prom_printf("pmap_kenter_pa: va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
2028 			    (int)(tte.tag>>32), (int)tte.tag,
2029 			    (int)(tte.data>>32), (int)tte.data,
2030 			    i, &tsb[i]);
2031 	if( pmapdebug & PDB_MMU_STEAL && tsb[i].data ) {
2032 		prom_printf("pmap_kenter_pa: evicting entry tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n",
2033 			    (int)(tsb[i].tag>>32), (int)tsb[i].tag,
2034 			    (int)(tsb[i].data>>32), (int)tsb[i].data,
2035 			    i, &tsb[i]);
2036 		prom_printf("with va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
2037 			    (int)(tte.tag>>32), (int)tte.tag,
2038 			    (int)(tte.data>>32), (int)tte.data,
2039 			    i, &tsb[i]);
2040 	}
2041 #endif
2042 #if 0
2043 /* Not needed -- all operations are atomic. */
2044 	simple_unlock(&pm->pm_lock);
2045 #endif
2046 	splx(s);
2047 	ASSERT((tsb[i].data & TLB_NFO) == 0);
2048 	/* this is correct */
2049 	dcache_flush_page(pa);
2050 }
2051 
2052 /*
2053  * pmap_kremove:		[ INTERFACE ]
2054  *
2055  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
2056  *	for size bytes (assumed to be page rounded).
2057  */
2058 #if 0
2059 void
2060 pmap_kremove(va, size)
2061 	vaddr_t va;
2062 	vsize_t size;
2063 {
2064 	return pmap_remove(pmap_kernel(), va, va+size);
2065 }
2066 #else
2067 void
2068 pmap_kremove(va, size)
2069 	vaddr_t va;
2070 	vsize_t size;
2071 {
2072 	struct pmap *pm = pmap_kernel();
2073 	int64_t data;
2074 	int i, s, flush = 0;
2075 
2076 	ASSERT(va < INTSTACK || va > EINTSTACK);
2077 	ASSERT(va < kdata || va > ekdata);
2078 
2079 	s = splvm();
2080 	simple_lock(&pm->pm_lock);
2081 #ifdef DEBUG
2082 	if (pmapdebug & PDB_DEMAP) {
2083 		printf("pmap_kremove: start %p size %lx\n",
2084 		    (void *)(u_long)va, size);
2085 	}
2086 #endif
2087 	while (size >= NBPG) {
2088 		/*
2089 		 * Is this part of the permanent 4MB mapping?
2090 		 */
2091 #ifdef DIAGNOSTIC
2092 		if (pm == pmap_kernel() &&
2093 			(va >= ktext && va < roundup(ekdata, 4*MEG)))
2094 			panic("pmap_kremove: va=%08x in locked TLB\r\n",
2095 				(u_int)va);
2096 #endif
2097 		/* Shouldn't need to do this if the entry's not valid. */
2098 		if ((data = pseg_get(pm, va))) {
2099 			paddr_t entry;
2100 
2101 			flush |= 1;
2102 			entry = (data&TLB_PA_MASK);
2103 			/* We need to flip the valid bit and clear the access statistics. */
2104 			if (pseg_set(pm, va, 0, 0)) {
2105 				printf("pmap_kremove: gotten pseg empty!\n");
2106 				Debugger();
2107 				/* panic? */
2108 			}
2109 #ifdef DEBUG
2110 			if (pmapdebug & PDB_DEMAP)
2111 				printf("pmap_kremove: clearing seg %x pdir %x pte %x\n",
2112 				       (int)va_to_seg(va), (int)va_to_dir(va),
2113 				       (int)va_to_pte(va));
2114 			remove_stats.removes ++;
2115 #endif
2116 
2117 			i = ptelookup_va(va);
2118 			if (tsb[i].tag > 0
2119 			    && tsb[i].tag == TSB_TAG(0,pm->pm_ctx,va))
2120 			{
2121 				/*
2122 				 * Invalidate the TSB
2123 				 *
2124 				 * While we can invalidate it by clearing the
2125 				 * valid bit:
2126 				 *
2127 				 * ptp->data_v = 0;
2128 				 *
2129 				 * it's faster to do store 1 doubleword.
2130 				 */
2131 #ifdef DEBUG
2132 				if (pmapdebug & PDB_DEMAP)
2133 					printf(" clearing TSB [%d]\n", i);
2134 #endif
2135 				tsb[i].data = 0LL;
2136 				ASSERT((tsb[i].data & TLB_NFO) == 0);
2137 				/* Flush the TLB */
2138 			}
2139 #ifdef DEBUG
2140 			remove_stats.tflushes ++;
2141 #endif
2142 			/* Here we assume nothing can get into the TLB unless it has a PTE */
2143 			tlb_flush_pte(va, pm->pm_ctx);
2144 		}
2145 		va += NBPG;
2146 		size -= NBPG;
2147 	}
2148 	if (flush) {
2149 #ifdef DEBUG
2150 		remove_stats.flushes ++;
2151 #endif
2152 	}
2153 	simple_unlock(&pm->pm_lock);
2154 	splx(s);
2155 }
2156 #endif
2157 
2158 /*
2159  * Insert physical page at pa into the given pmap at virtual address va.
2160  * Supports 64-bit pa so we can map I/O space.
2161  */
2162 int
2163 pmap_enter(pm, va, pa, prot, flags)
2164 	struct pmap *pm;
2165 	vaddr_t va;
2166 	paddr_t pa;
2167 	vm_prot_t prot;
2168 	int flags;
2169 {
2170 	pte_t tte;
2171 	paddr_t pg;
2172 	int i, s, aliased = 0;
2173 	pv_entry_t pv = NULL;
2174 	int size = 0; /* PMAP_SZ_TO_TTE(pa); */
2175 	boolean_t wired = (flags & PMAP_WIRED) != 0;
2176 
2177 	/*
2178 	 * Is this part of the permanent mappings?
2179 	 */
2180 	ASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
2181 	ASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
2182 
2183 #ifdef DEBUG
2184 	/* Trap mapping of page zero */
2185 	if (va == NULL) {
2186 		prom_printf("pmap_enter: NULL va=%08x pa=%x:%08x\r\n",
2187 			    va, (int)(pa>>32), (int)pa);
2188 		OF_enter();
2189 	}
2190 #endif
2191 	/*
2192 	 * XXXX If a mapping at this address already exists, remove it.
2193 	 */
2194 	s = splvm();
2195 	simple_lock(&pm->pm_lock);
2196 	if ((tte.data = pseg_get(pm, va))<0) {
2197 		simple_unlock(&pm->pm_lock);
2198 		pmap_remove(pm, va, va+NBPG-1);
2199 		simple_lock(&pm->pm_lock);
2200 		tte.data = pseg_get(pm, va);
2201 	}
2202 
2203 	/*
2204 	 * Construct the TTE.
2205 	 */
2206 	if (IS_VM_PHYSADDR(pa)) {
2207 		pv = pa_to_pvh(pa);
2208 		aliased = (pv->pv_va&(PV_ALIAS|PV_NVC));
2209 #ifdef DIAGNOSTIC
2210 		if ((flags & VM_PROT_ALL) & ~prot)
2211 			panic("pmap_enter: access_type exceeds prot");
2212 #endif
2213 		/* If we don't have the traphandler do it, set the ref/mod bits now */
2214 		if ((flags & VM_PROT_ALL) || (tte.data & TLB_ACCESS))
2215 			pv->pv_va |= PV_REF;
2216 		if (flags & VM_PROT_WRITE || (tte.data & (TLB_MODIFY)))
2217 			pv->pv_va |= PV_MOD;
2218 #ifdef DEBUG
2219 		enter_stats.managed ++;
2220 #endif
2221 	} else {
2222 #ifdef DEBUG
2223 		enter_stats.unmanaged ++;
2224 #endif
2225 		aliased = 0;
2226 	}
2227 	if (pa & PMAP_NVC) aliased = 1;
2228 #ifdef NO_VCACHE
2229 	aliased = 1; /* Disable D$ */
2230 #endif
2231 #ifdef DEBUG
2232 	enter_stats.ci ++;
2233 #endif
2234 	tte.data = TSB_DATA(0, size, pa, pm == pmap_kernel(),
2235 		(flags & VM_PROT_WRITE), (!(pa & PMAP_NC)),
2236 		aliased, 1, (pa & PMAP_LITTLE));
2237 #ifdef HWREF
2238 	if (prot & VM_PROT_WRITE) tte.data |= TLB_REAL_W;
2239 #else
2240 	/* If it needs ref accounting do nothing. */
2241 	if (!(flags&VM_PROT_READ)) {
2242 		simple_unlock(&pm->pm_lock);
2243 		splx(s);
2244 		if (wired) {
2245 			printf("pmap_enter: wired but not readable\n");
2246 			Debugger();
2247 		}
2248 		return 0;
2249 	}
2250 #endif
2251 	if (wired) tte.data |= TLB_TSB_LOCK;
2252 	ASSERT((tte.data & TLB_NFO) == 0);
2253 	pg = NULL;
2254 #ifdef NOTDEF_DEBUG
2255 	printf("pmap_enter: inserting %x:%x at %x\n",
2256 	       (int)(tte.data>>32), (int)tte.data, (int)va);
2257 #endif
2258 	while (pseg_set(pm, va, tte.data, pg) == 1) {
2259 		pg = NULL;
2260 		if (uvm.page_init_done || !uvm_page_physget(&pg)) {
2261 			struct vm_page *page;
2262 #ifdef NOTDEF_DEBUG
2263 			printf("pmap_enter: need to alloc page\n");
2264 #endif
2265 			while ((page = vm_page_alloc1()) == NULL) {
2266 				/*
2267 				 * Let the pager run a bit--however this may deadlock
2268 				 */
2269 				if (pm == pmap_kernel())
2270 					panic("pmap_enter: no free pages");
2271 #ifdef NOTDEF_DEBUG
2272 				printf("pmap_enter: calling uvm_wait()\n");
2273 #endif
2274 				uvm_wait("pmap_enter");
2275 			}
2276 			pg = (paddr_t)VM_PAGE_TO_PHYS(page);
2277 		}
2278 		pmap_zero_page((paddr_t)pg);
2279 #ifdef DEBUG
2280 		enter_stats.ptpneeded ++;
2281 #endif
2282 #ifdef NOTDEF_DEBUG
2283 	printf("pmap_enter: inserting %x:%x at %x with %x\n",
2284 	       (int)(tte.data>>32), (int)tte.data, (int)va, (int)pg);
2285 #endif
2286 	}
2287 
2288 	if (pv)
2289 		pmap_enter_pv(pm, va, pa);
2290 	simple_unlock(&pm->pm_lock);
2291 	splx(s);
2292 	i = ptelookup_va(va);
2293 #ifdef DEBUG
2294 	if( pmapdebug & PDB_ENTER )
2295 		prom_printf("pmap_enter: va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
2296 			    (int)(tte.tag>>32), (int)tte.tag,
2297 			    (int)(tte.data>>32), (int)tte.data,
2298 			    i, &tsb[i]);
2299 	if( pmapdebug & PDB_MMU_STEAL && tsb[i].data ) {
2300 		prom_printf("pmap_enter: evicting entry tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n",
2301 			    (int)(tsb[i].tag>>32), (int)tsb[i].tag,
2302 			    (int)(tsb[i].data>>32), (int)tsb[i].data,
2303 			    i, &tsb[i]);
2304 		prom_printf("with va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
2305 			    (int)(tte.tag>>32), (int)tte.tag,
2306 			    (int)(tte.data>>32), (int)tte.data,
2307 			    i, &tsb[i]);
2308 	}
2309 #endif
2310 	if (pm->pm_ctx || pm == pmap_kernel()) {
2311 		if (tsb[i].tag > 0 &&
2312 		    tsb[i].tag == TSB_TAG(0,pm->pm_ctx,va)) {
2313 			/*
2314 			 * Invalidate the TSB
2315 			 *
2316 			 * While we can invalidate it by clearing the
2317 			 * valid bit:
2318 			 *
2319 			 * ptp->data_v = 0;
2320 			 *
2321 			 * it's faster to do store 1 doubleword.
2322 			 */
2323 			tsb[i].data = 0LL;
2324 			ASSERT((tsb[i].data & TLB_NFO) == 0);
2325 		}
2326 		/* Force reload -- protections may be changed */
2327 		tlb_flush_pte(va, pm->pm_ctx);
2328 		ASSERT((tsb[i].data & TLB_NFO) == 0);
2329 	}
2330 	/* this is correct */
2331 	dcache_flush_page(pa);
2332 
2333 	/* We will let the fast mmu miss interrupt load the new translation */
2334 	pv_check();
2335 	return 0;
2336 }
2337 
2338 /*
2339  * Remove the given range of mapping entries.
2340  */
2341 void
2342 pmap_remove(pm, va, endva)
2343 	struct pmap *pm;
2344 	vaddr_t va, endva;
2345 {
2346 	int i, s, flush=0;
2347 	int64_t data;
2348 	vaddr_t flushva = va;
2349 
2350 	/*
2351 	 * In here we should check each pseg and if there are no more entries,
2352 	 * free it.  It's just that linear scans of 8K pages gets expensive.
2353 	 */
2354 
2355 	ASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
2356 	ASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
2357 
2358 	s = splvm();
2359 	simple_lock(&pm->pm_lock);
2360 #ifdef DEBUG
2361 	if (pmapdebug & PDB_REMOVE)
2362 		printf("pmap_remove(pm=%p, va=%p, endva=%p):", pm,
2363 		    (void *)(u_long)va, (void *)(u_long)endva);
2364 	remove_stats.calls ++;
2365 #endif
2366 
2367 	/* Now do the real work */
2368 	while (va < endva) {
2369 		/*
2370 		 * Is this part of the permanent 4MB mapping?
2371 		 */
2372 #ifdef DIAGNOSTIC
2373 		if (pm == pmap_kernel() && va >= ktext &&
2374 			va < roundup(ekdata, 4*MEG))
2375 			panic("pmap_remove: va=%08x in locked TLB\r\n", (u_int)va);
2376 #endif
2377 		/* We don't really need to do this if the valid bit is not set... */
2378 		if ((data = pseg_get(pm, va))) {
2379 			paddr_t entry;
2380 
2381 			flush |= 1;
2382 			/* First remove it from the pv_table */
2383 			entry = (data&TLB_PA_MASK);
2384 			if (IS_VM_PHYSADDR(entry)) {
2385 				pv_entry_t pv;
2386 
2387 				/* Save REF/MOD info */
2388 				pv = pa_to_pvh(entry);
2389 				if (data & TLB_ACCESS) pv->pv_va |= PV_REF;
2390 				if (data & (TLB_MODIFY))  pv->pv_va |= PV_MOD;
2391 
2392 				pmap_remove_pv(pm, va, entry);
2393 			}
2394 			/* We need to flip the valid bit and clear the access statistics. */
2395 			if (pseg_set(pm, va, 0, 0)) {
2396 				printf("pmap_remove: gotten pseg empty!\n");
2397 				Debugger();
2398 				/* panic? */
2399 			}
2400 #ifdef DEBUG
2401 			if (pmapdebug & PDB_REMOVE)
2402 				printf(" clearing seg %x pte %x\n", (int)va_to_seg(va), (int)va_to_pte(va));
2403 			remove_stats.removes ++;
2404 #endif
2405 			if (!pm->pm_ctx && pm != pmap_kernel()) continue;
2406 			i = ptelookup_va(va);
2407 			if (tsb[i].tag > 0
2408 			    && tsb[i].tag == TSB_TAG(0,pm->pm_ctx,va))
2409 			{
2410 				/*
2411 				 * Invalidate the TSB
2412 				 *
2413 				 * While we can invalidate it by clearing the
2414 				 * valid bit:
2415 				 *
2416 				 * ptp->data_v = 0;
2417 				 *
2418 				 * it's faster to do store 1 doubleword.
2419 				 */
2420 #ifdef DEBUG
2421 				if (pmapdebug & PDB_REMOVE)
2422 					printf(" clearing TSB [%d]\n", i);
2423 #endif
2424 				tsb[i].data = 0LL;
2425 				ASSERT((tsb[i].data & TLB_NFO) == 0);
2426 				/* Flush the TLB */
2427 			}
2428 #ifdef NOTDEF_DEBUG
2429 			else if (pmapdebug & PDB_REMOVE) {
2430 				printf("TSB[%d] has ctx %d va %x: ",
2431 				       i,
2432 				       TSB_TAG_CTX(tsb[i].tag),
2433 				       (int)(TSB_TAG_VA(tsb[i].tag)|(i<<13)));
2434 				printf("%08x:%08x %08x:%08x\n",
2435 				       (int)(tsb[i].tag>>32), (int)tsb[i].tag,
2436 				       (int)(tsb[i].data>>32), (int)tsb[i].data);
2437 			}
2438 #endif
2439 #ifdef DEBUG
2440 			remove_stats.tflushes ++;
2441 #endif
2442 			/* Here we assume nothing can get into the TLB unless it has a PTE */
2443 			tlb_flush_pte(va, pm->pm_ctx);
2444 		}
2445 		va += NBPG;
2446 	}
2447 	simple_unlock(&pm->pm_lock);
2448 	splx(s);
2449 	if (flush) {
2450 #ifdef DEBUG
2451 		remove_stats.flushes ++;
2452 #endif
2453 		cache_flush_virt(flushva, endva - flushva);
2454 	}
2455 #ifdef DEBUG
2456 	if (pmapdebug & PDB_REMOVE)
2457 		printf("\n");
2458 #endif
2459 	pv_check();
2460 }
2461 
2462 /*
2463  * Change the protection on the specified range of this pmap.
2464  */
2465 void
2466 pmap_protect(pm, sva, eva, prot)
2467 	struct pmap *pm;
2468 	vaddr_t sva, eva;
2469 	vm_prot_t prot;
2470 {
2471 	int i, s;
2472 	paddr_t pa;
2473 	int64_t data;
2474 
2475 	ASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
2476 	ASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
2477 
2478 	if (prot & VM_PROT_WRITE)
2479 		return;
2480 
2481 	if (prot == VM_PROT_NONE) {
2482 		pmap_remove(pm, sva, eva);
2483 		return;
2484 	}
2485 
2486 	s = splvm();
2487 	simple_lock(&pm->pm_lock);
2488 	sva = sva & ~PGOFSET;
2489 	while (sva < eva) {
2490 		/*
2491 		 * Is this part of the permanent 4MB mapping?
2492 		 */
2493 		if (pm == pmap_kernel() && sva >= ktext &&
2494 			sva < roundup(ekdata, 4*MEG)) {
2495 			prom_printf("pmap_protect: va=%08x in locked TLB\r\n", sva);
2496 			OF_enter();
2497 			return;
2498 		}
2499 
2500 #ifdef DEBUG
2501 		if (pmapdebug & PDB_CHANGEPROT)
2502 			printf("pmap_protect: va %p\n", (void *)(u_long)sva);
2503 #endif
2504 		if (((data = pseg_get(pm, sva))&TLB_V) /*&& ((data&TLB_TSB_LOCK) == 0)*/) {
2505 			pa = data&TLB_PA_MASK;
2506 #ifdef DEBUG
2507 			if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2508 				printf("pmap_protect: va=%08x data=%x:%08x seg=%08x pte=%08x\r\n",
2509 					    (u_int)sva, (int)(pa>>32), (int)pa, (int)va_to_seg(sva), (int)va_to_pte(sva));
2510 /* Catch this before the assertion */
2511 			if (data & TLB_NFO) {
2512 				printf("pmap_protect: pm=%p  NFO mapping va=%x data=%x:%x\n",
2513 				       pm, (u_int)sva, (int)(data>>32), (int)data);
2514 				Debugger();
2515 			}
2516 #endif
2517 			if (IS_VM_PHYSADDR(pa)) {
2518 				pv_entry_t pv;
2519 
2520 				/* Save REF/MOD info */
2521 				pv = pa_to_pvh(pa);
2522 				if (data & TLB_ACCESS) pv->pv_va |= PV_REF;
2523 				if (data & (TLB_MODIFY))
2524 					pv->pv_va |= PV_MOD;
2525 			}
2526 			/* Just do the pmap and TSB, not the pv_list */
2527 			data &= ~(TLB_W|TLB_REAL_W);
2528 			ASSERT((data & TLB_NFO) == 0);
2529 			if (pseg_set(pm, sva, data, 0)) {
2530 				printf("pmap_protect: gotten pseg empty!\n");
2531 				Debugger();
2532 				/* panic? */
2533 			}
2534 
2535 			if (!pm->pm_ctx && pm != pmap_kernel()) continue;
2536 			i = ptelookup_va(sva);
2537 			if (tsb[i].tag > 0
2538 			    && tsb[i].tag == TSB_TAG(0,pm->pm_ctx,sva)) {
2539 				tsb[i].data = data;
2540 				ASSERT((tsb[i].data & TLB_NFO) == 0);
2541 
2542 			}
2543 			tlb_flush_pte(sva, pm->pm_ctx);
2544 		}
2545 		sva += NBPG;
2546 	}
2547 	simple_unlock(&pm->pm_lock);
2548 	splx(s);
2549 	pv_check();
2550 }
2551 
2552 /*
2553  * Extract the physical page address associated
2554  * with the given map/virtual_address pair.
2555  */
2556 boolean_t
2557 pmap_extract(pm, va, pap)
2558 	register struct pmap *pm;
2559 	vaddr_t va;
2560 	paddr_t *pap;
2561 {
2562 	paddr_t pa;
2563 
2564 	if (pm == pmap_kernel() && va >= kdata &&
2565 		va < roundup(ekdata, 4*MEG)) {
2566 		/* Need to deal w/locked TLB entry specially. */
2567 		pa = (paddr_t) (kdatap - kdata + va);
2568 #ifdef DEBUG
2569 		if (pmapdebug & PDB_EXTRACT) {
2570 			printf("pmap_extract: va=%lx pa=%llx\n", (u_long)va, (unsigned long long)pa);
2571 		}
2572 #endif
2573 	} else if( pm == pmap_kernel() && va >= ktext && va < ektext ) {
2574 		/* Need to deal w/locked TLB entry specially. */
2575 		pa = (paddr_t) (ktextp - ktext + va);
2576 #ifdef DEBUG
2577 		if (pmapdebug & PDB_EXTRACT) {
2578 			printf("pmap_extract: va=%lx pa=%llx\n",
2579 			    (u_long)va, (unsigned long long)pa);
2580 		}
2581 #endif
2582 	} else {
2583 		int s;
2584 
2585 		s = splvm();
2586 		simple_lock(&pm->pm_lock);
2587 		pa = (pseg_get(pm, va)&TLB_PA_MASK)+(va&PGOFSET);
2588 #ifdef DEBUG
2589 		if (pmapdebug & PDB_EXTRACT) {
2590 			pa = ldxa((vaddr_t)&pm->pm_segs[va_to_seg(va)], ASI_PHYS_CACHED);
2591 			printf("pmap_extract: va=%p segs[%ld]=%llx", (void *)(u_long)va, (long)va_to_seg(va), (unsigned long long)pa);
2592 			if (pa) {
2593 				pa = (paddr_t)ldxa((vaddr_t)&((paddr_t*)(u_long)pa)[va_to_dir(va)], ASI_PHYS_CACHED);
2594 				printf(" segs[%ld][%ld]=%lx", (long)va_to_seg(va), (long)va_to_dir(va), (long)pa);
2595 			}
2596 			if (pa)	{
2597 				pa = (paddr_t)ldxa((vaddr_t)&((paddr_t*)(u_long)pa)[va_to_pte(va)], ASI_PHYS_CACHED);
2598 				printf(" segs[%ld][%ld][%ld]=%lx", (long)va_to_seg(va),
2599 				       (long)va_to_dir(va), (long)va_to_pte(va), (long)pa);
2600 			}
2601 			printf(" pseg_get: %lx\n", (long)pa);
2602 		}
2603 #endif
2604 		simple_unlock(&pm->pm_lock);
2605 		splx(s);
2606 	}
2607 	if (pa == 0)
2608 		return (FALSE);
2609 	if (pap != NULL)
2610 		*pap = pa;
2611 	return (TRUE);
2612 }
2613 
2614 /*
2615  * Return the number bytes that pmap_dumpmmu() will dump.
2616  */
2617 int
2618 pmap_dumpsize()
2619 {
2620 	int	sz;
2621 
2622 	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2623 	sz += memsize * sizeof(phys_ram_seg_t);
2624 
2625 	return btodb(sz + DEV_BSIZE - 1);
2626 }
2627 
2628 /*
2629  * Write the mmu contents to the dump device.
2630  * This gets appended to the end of a crash dump since
2631  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2632  *
2633  * Write the core dump headers and MD data to the dump device.
2634  * We dump the following items:
2635  *
2636  *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
2637  *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
2638  *	phys_ram_seg_t[memsize]  physical memory segments
2639  */
2640 int
2641 pmap_dumpmmu(dump, blkno)
2642 	register daddr_t blkno;
2643 	register int (*dump)	__P((dev_t, daddr_t, caddr_t, size_t));
2644 {
2645 	kcore_seg_t	*kseg;
2646 	cpu_kcore_hdr_t	*kcpu;
2647 	phys_ram_seg_t	memseg;
2648 	register int	error = 0;
2649 	register int	i, memsegoffset;
2650 	int		buffer[dbtob(1) / sizeof(int)];
2651 	int		*bp, *ep;
2652 
2653 #define EXPEDITE(p,n) do {						\
2654 	int *sp = (int *)(p);						\
2655 	int sz = (n);							\
2656 	while (sz > 0) {						\
2657 		*bp++ = *sp++;						\
2658 		if (bp >= ep) {						\
2659 			error = (*dump)(dumpdev, blkno,			\
2660 					(caddr_t)buffer, dbtob(1));	\
2661 			if (error != 0)					\
2662 				return (error);				\
2663 			++blkno;					\
2664 			bp = buffer;					\
2665 		}							\
2666 		sz -= 4;						\
2667 	}								\
2668 } while (0)
2669 
2670 	/* Setup bookkeeping pointers */
2671 	bp = buffer;
2672 	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2673 
2674 	/* Fill in MI segment header */
2675 	kseg = (kcore_seg_t *)bp;
2676 	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2677 	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2678 
2679 	/* Fill in MD segment header (interpreted by MD part of libkvm) */
2680 	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2681 	kcpu->cputype = CPU_SUN4U;
2682 	kcpu->kernbase = (u_int64_t)KERNBASE;
2683 	kcpu->cpubase = (u_int64_t)CPUINFO_VA;
2684 
2685 	/* Describe the locked text segment */
2686 	kcpu->ktextbase = (u_int64_t)ktext;
2687 	kcpu->ktextp = (u_int64_t)ktextp;
2688 	kcpu->ktextsz = (u_int64_t)ektextp - ktextp;
2689 
2690 	/* Describe locked data segment */
2691 	kcpu->kdatabase = (u_int64_t)kdata;
2692 	kcpu->kdatap = (u_int64_t)kdatap;
2693 	kcpu->kdatasz = (u_int64_t)ekdatap - kdatap;
2694 
2695 	/* Now the memsegs */
2696 	kcpu->nmemseg = memsize;
2697 	kcpu->memsegoffset = memsegoffset = ALIGN(sizeof(cpu_kcore_hdr_t));
2698 
2699 	/* Now we need to point this at our kernel pmap. */
2700 	kcpu->nsegmap = STSZ;
2701 	kcpu->segmapoffset = (u_int64_t)pmap_kernel()->pm_physaddr;
2702 
2703 	/* Note: we have assumed everything fits in buffer[] so far... */
2704 	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2705 
2706 	for (i = 0; i < memsize; i++) {
2707 		memseg.start = mem[i].start;
2708 		memseg.size = mem[i].size;
2709 		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2710 	}
2711 
2712 	if (bp != buffer)
2713 		error = (*dump)(dumpdev, blkno++, (caddr_t)buffer, dbtob(1));
2714 
2715 	return (error);
2716 }
2717 
2718 /*
2719  * Determine (non)existance of physical page
2720  */
2721 int pmap_pa_exists(pa)
2722 paddr_t pa;
2723 {
2724 	register struct mem_region *mp;
2725 
2726 	/* Just go through physical memory list & see if we're there */
2727 	for (mp = mem; mp->size && mp->start <= pa; mp++)
2728 		if( mp->start <= pa && mp->start + mp->size >= pa )
2729 			return 1;
2730 	return 0;
2731 }
2732 
2733 /*
2734  * Lookup the appropriate TSB entry.
2735  *
2736  * Here is the full official pseudo code:
2737  *
2738  */
2739 
2740 #ifdef NOTYET
2741 int64 GenerateTSBPointer(
2742  	int64 va,		/* Missing VA			*/
2743  	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
2744  	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
2745  	Boolean split,		/* TSB Register[12]		*/
2746  	int TSBSize)		/* TSB Register[2:0]		*/
2747 {
2748  	int64 vaPortion;
2749  	int64 TSBBaseMask;
2750  	int64 splitMask;
2751 
2752 	/* TSBBaseMask marks the bits from TSB Base Reg		*/
2753 	TSBBaseMask = 0xffffffffffffe000 <<
2754 		(split? (TSBsize + 1) : TSBsize);
2755 
2756 	/* Shift va towards lsb appropriately and		*/
2757 	/* zero out the original va page offset			*/
2758 	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2759 		0xfffffffffffffff0;
2760 
2761 	if (split) {
2762 		/* There's only one bit in question for split	*/
2763 		splitMask = 1 << (13 + TSBsize);
2764 		if (type == 8K_POINTER)
2765 			/* Make sure we're in the lower half	*/
2766 			vaPortion &= ~splitMask;
2767 		else
2768 			/* Make sure we're in the upper half	*/
2769 			vaPortion |= splitMask;
2770 	}
2771 	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2772 }
2773 #endif
2774 /*
2775  * Of course, since we are not using a split TSB or variable page sizes,
2776  * we can optimize this a bit.
2777  *
2778  * The following only works for a unified 8K TSB.  It will find the slot
2779  * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
2780  */
2781 int
2782 ptelookup_va(va)
2783 	vaddr_t va;
2784 {
2785 	long tsbptr;
2786 #define TSBBASEMASK	(0xffffffffffffe000LL<<tsbsize)
2787 
2788 	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK );
2789 	return (tsbptr/sizeof(pte_t));
2790 }
2791 
2792 #if notyet
2793 void
2794 tsb_enter(ctx, va, data)
2795 	int ctx;
2796 	int64_t va;
2797 	int64_t data;
2798 {
2799 	int i, s;
2800 	int64_t pa;
2801 
2802 	i = ptelookup_va(va);
2803 	s = splvm();
2804 	pa = tsb[i].data&TLB_PA_MASK;
2805 	/*
2806 	 * If we use fast DMMU access fault handlers to track
2807 	 * referenced and modified bits, we should save the
2808 	 * TSB entry's state here.  Since we don't, we don't.
2809 	 */
2810 	/* Do not use global entries */
2811 	tsb[i].tag = TSB_TAG(0,ctx,va);
2812 	tsb[i].data = data;
2813 	tlb_flush_pte(va, ctx);	/* Force reload -- protections may be changed */
2814 	splx(s);
2815 }
2816 #endif
2817 
2818 /*
2819  * Do whatever is needed to sync the MOD/REF flags
2820  */
2821 
2822 boolean_t
2823 pmap_clear_modify(pg)
2824 	struct vm_page *pg;
2825 {
2826 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2827 	int changed = 0;
2828 #ifdef DEBUG
2829 	int modified = 0;
2830 #endif
2831 	int i, s;
2832 	register pv_entry_t pv;
2833 
2834 #ifdef DEBUG
2835 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2836 		printf("pmap_clear_modify(%llx)\n", (unsigned long long)pa);
2837 #endif
2838 
2839 #if defined(DEBUG)
2840 	modified = pmap_is_modified(pg);
2841 #endif
2842 	/* Clear all mappings */
2843 	s = splvm();
2844 	pv = pa_to_pvh(pa);
2845 #ifdef DEBUG
2846 	if (pv->pv_va & PV_MOD)
2847 		pv->pv_va |= PV_WE;	/* Remember this was modified */
2848 #endif
2849 	if (pv->pv_va & PV_MOD)
2850 		changed |= 1;
2851 	pv->pv_va &= ~(PV_MOD);
2852 #ifdef DEBUG
2853 	if (pv->pv_next && !pv->pv_pmap) {
2854 		printf("pmap_clear_modify: npv but no pmap for pv %p\n", pv);
2855 		Debugger();
2856 	}
2857 #endif
2858 	if (pv->pv_pmap != NULL)
2859 		for (; pv; pv = pv->pv_next) {
2860 			int64_t data;
2861 
2862 
2863 			simple_lock(&pv->pv_pmap->pm_lock);
2864 			/* First clear the mod bit in the PTE and make it R/O */
2865 			data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
2866 			/* Need to both clear the modify and write bits */
2867 			if (data & (TLB_MODIFY))
2868 				changed |= 1;
2869 #ifdef HWREF
2870 			data &= ~(TLB_MODIFY|TLB_W);
2871 #else
2872 			data &= ~(TLB_MODIFY|TLB_W|TLB_REAL_W);
2873 #endif
2874 			ASSERT((data & TLB_NFO) == 0);
2875 			if (pseg_set(pv->pv_pmap, pv->pv_va&PV_VAMASK, data, 0)) {
2876 				printf("pmap_clear_modify: gotten pseg empty!\n");
2877 				Debugger();
2878 				/* panic? */
2879 			}
2880 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2881 				i = ptelookup_va(pv->pv_va&PV_VAMASK);
2882 				if (tsb[i].tag == TSB_TAG(0, pv->pv_pmap->pm_ctx, pv->pv_va&PV_VAMASK))
2883 					tsb[i].data = /* data */ 0;
2884 				tlb_flush_pte(pv->pv_va&PV_VAMASK,
2885 					pv->pv_pmap->pm_ctx);
2886 			}
2887 			/* Then clear the mod bit in the pv */
2888 			if (pv->pv_va & PV_MOD)
2889 				changed |= 1;
2890 			pv->pv_va &= ~(PV_MOD);
2891 			simple_unlock(&pv->pv_pmap->pm_lock);
2892 			dcache_flush_page(pa);
2893 		}
2894 	splx(s);
2895 	pv_check();
2896 #ifdef DEBUG
2897 	if (pmap_is_modified(pg)) {
2898 		printf("pmap_clear_modify(): %p still modified!\n", pg);
2899 		Debugger();
2900 	}
2901 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2902 		printf("pmap_clear_modify: page %lx %s\n", (long)pa,
2903 		       (changed?"was modified":"was not modified"));
2904 	if (modified != changed) {
2905 		printf("pmap_clear_modify: modified %d changed %d\n", modified, changed);
2906 		Debugger();
2907 	} else return (modified);
2908 #endif
2909 	return (changed);
2910 }
2911 
2912 boolean_t
2913 pmap_clear_reference(pg)
2914 	struct vm_page* pg;
2915 {
2916 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2917 	int changed = 0;
2918 #ifdef DEBUG
2919 	int referenced = 0;
2920 #endif
2921 	int i, s;
2922 	register pv_entry_t pv;
2923 
2924 #ifdef DEBUG
2925 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2926 		printf("pmap_clear_reference(%llx)\n", (unsigned long long)pa);
2927 	referenced = pmap_is_referenced(pg);
2928 #endif
2929 	/* Clear all references */
2930 	s = splvm();
2931 	pv = pa_to_pvh(pa);
2932 #ifdef NOT_DEBUG
2933 	if (pv->pv_va & PV_MOD)
2934 		printf("pmap_clear_reference(): pv %p still modified\n", (long)pa);
2935 #endif
2936 	if (pv->pv_va & PV_REF)
2937 		changed |= 1;
2938 	pv->pv_va &= ~(PV_REF);
2939 #ifdef DEBUG
2940 	if (pv->pv_next && !pv->pv_pmap) {
2941 		printf("pmap_clear_reference: npv but no pmap for pv %p\n", pv);
2942 		Debugger();
2943 	}
2944 #endif
2945 	if (pv->pv_pmap != NULL) {
2946 		for (; pv; pv = pv->pv_next) {
2947 			int64_t data;
2948 
2949 			simple_lock(&pv->pv_pmap->pm_lock);
2950 			data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
2951 #ifdef DEBUG
2952 			if (pmapdebug & PDB_CHANGEPROT)
2953 				printf("clearing ref pm:%p va:%p ctx:%lx data:%x:%x\n", pv->pv_pmap,
2954 				       (void *)(u_long)pv->pv_va, (u_long)pv->pv_pmap->pm_ctx, (int)(data>>32), (int)data);
2955 #endif
2956 #ifdef HWREF
2957 			if (data & TLB_ACCESS)
2958 				changed |= 1;
2959 			data &= ~TLB_ACCESS;
2960 #else
2961 			if (data < 0)
2962 				changed |= 1;
2963 			data = 0;
2964 #endif
2965 			ASSERT((data & TLB_NFO) == 0);
2966 			if (pseg_set(pv->pv_pmap, pv->pv_va, data, 0)) {
2967 				printf("pmap_clear_reference: gotten pseg empty!\n");
2968 				Debugger();
2969 				/* panic? */
2970 			}
2971 			if (pv->pv_pmap->pm_ctx ||
2972 				pv->pv_pmap == pmap_kernel()) {
2973 				i = ptelookup_va(pv->pv_va&PV_VAMASK);
2974 				/* Invalidate our TSB entry since ref info is in the PTE */
2975 				if (tsb[i].tag ==
2976 					TSB_TAG(0,pv->pv_pmap->pm_ctx,pv->pv_va&
2977 						PV_VAMASK))
2978 					tsb[i].data = 0;
2979 /*
2980 				tlb_flush_pte(pv->pv_va&PV_VAMASK,
2981 					pv->pv_pmap->pm_ctx);
2982 */
2983 			}
2984 			if (pv->pv_va & PV_REF)
2985 				changed |= 1;
2986 			pv->pv_va &= ~(PV_REF);
2987 			simple_unlock(&pv->pv_pmap->pm_lock);
2988 		}
2989 	}
2990 	/* Stupid here will take a cache hit even on unmapped pages 8^( */
2991 	dcache_flush_page(pa);
2992 	splx(s);
2993 	pv_check();
2994 #ifdef DEBUG
2995 	if (pmap_is_referenced(pg)) {
2996 		printf("pmap_clear_reference(): %p still referenced!\n", pg);
2997 		Debugger();
2998 	}
2999 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
3000 		printf("pmap_clear_reference: page %lx %s\n", (long)pa,
3001 		       (changed?"was referenced":"was not referenced"));
3002 	if (referenced != changed) {
3003 		printf("pmap_clear_reference: referenced %d changed %d\n", referenced, changed);
3004 		Debugger();
3005 	} else return (referenced);
3006 #endif
3007 	return (changed);
3008 }
3009 
3010 boolean_t
3011 pmap_is_modified(pg)
3012 	struct vm_page* pg;
3013 {
3014 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
3015 	int i=0, s;
3016 	register pv_entry_t pv, npv;
3017 
3018 	/* Check if any mapping has been modified */
3019 	s = splvm();
3020 	pv = pa_to_pvh(pa);
3021 	if (pv->pv_va&PV_MOD) i = 1;
3022 #ifdef HWREF
3023 #ifdef DEBUG
3024 	if (pv->pv_next && !pv->pv_pmap) {
3025 		printf("pmap_is_modified: npv but no pmap for pv %p\n", pv);
3026 		Debugger();
3027 	}
3028 #endif
3029 	if (!i && (pv->pv_pmap != NULL))
3030 		for (npv = pv; i == 0 && npv && npv->pv_pmap; npv = npv->pv_next) {
3031 			int64_t data;
3032 
3033 			data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
3034 			if (data & (TLB_MODIFY)) i = 1;
3035 			/* Migrate modify info to head pv */
3036 			if (npv->pv_va & PV_MOD) i = 1;
3037 			npv->pv_va &= ~PV_MOD;
3038 		}
3039 	/* Save modify info */
3040 	if (i) pv->pv_va |= PV_MOD;
3041 #ifdef DEBUG
3042 	if (i) pv->pv_va |= PV_WE;
3043 #endif
3044 #endif
3045 	splx(s);
3046 
3047 #ifdef DEBUG
3048 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF)) {
3049 		printf("pmap_is_modified(%llx) = %d\n", (unsigned long long)pa, i);
3050 		/* if (i) Debugger(); */
3051 	}
3052 #endif
3053 	pv_check();
3054 	return (i);
3055 }
3056 
3057 boolean_t
3058 pmap_is_referenced(pg)
3059 	struct vm_page* pg;
3060 {
3061 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
3062 	int i=0, s;
3063 	register pv_entry_t pv, npv;
3064 
3065 	/* Check if any mapping has been referenced */
3066 	s = splvm();
3067 	pv = pa_to_pvh(pa);
3068 	if (pv->pv_va&PV_REF) i = 1;
3069 #ifdef HWREF
3070 #ifdef DEBUG
3071 	if (pv->pv_next && !pv->pv_pmap) {
3072 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
3073 		Debugger();
3074 	}
3075 #endif
3076 	if (!i && (pv->pv_pmap != NULL))
3077 		for (npv = pv; npv; npv = npv->pv_next) {
3078 			int64_t data;
3079 
3080 			data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
3081 			if (data & TLB_ACCESS) i = 1;
3082 			/* Migrate modify info to head pv */
3083 			if (npv->pv_va & PV_REF) i = 1;
3084 			npv->pv_va &= ~PV_REF;
3085 		}
3086 	/* Save ref info */
3087 	if (i) pv->pv_va |= PV_REF;
3088 #endif
3089 	splx(s);
3090 
3091 #ifdef DEBUG
3092 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF)) {
3093 		printf("pmap_is_referenced(%llx) = %d\n", (unsigned long long)pa, i);
3094 		/* if (i) Debugger(); */
3095 	}
3096 #endif
3097 	pv_check();
3098 	return i;
3099 }
3100 
3101 
3102 
3103 /*
3104  *	Routine:	pmap_unwire
3105  *	Function:	Clear the wired attribute for a map/virtual-address
3106  *			pair.
3107  *	In/out conditions:
3108  *			The mapping must already exist in the pmap.
3109  */
3110 void
3111 pmap_unwire(pmap, va)
3112 	register pmap_t	pmap;
3113 	vaddr_t va;
3114 {
3115 	int64_t data;
3116 	int s;
3117 
3118 #ifdef DEBUG
3119 	if (pmapdebug & (PDB_MMU_STEAL)) /* XXXX Need another flag for this */
3120 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
3121 #endif
3122 	if (pmap == NULL) {
3123 		pv_check();
3124 		return;
3125 	}
3126 
3127 	/*
3128 	 * Is this part of the permanent 4MB mapping?
3129 	 */
3130 	if (pmap == pmap_kernel() && va >= ktext &&
3131 		va < roundup(ekdata, 4*MEG)) {
3132 		prom_printf("pmap_unwire: va=%08x in locked TLB\r\n", va);
3133 		OF_enter();
3134 		return;
3135 	}
3136 	s = splvm();
3137 	simple_lock(&pmap->pm_lock);
3138 	data = pseg_get(pmap, va&PV_VAMASK);
3139 
3140 	data &= ~TLB_TSB_LOCK;
3141 
3142 	if (pseg_set(pmap, va&PV_VAMASK, data, 0)) {
3143 		printf("pmap_unwire: gotten pseg empty!\n");
3144 		Debugger();
3145 		/* panic? */
3146 	}
3147 	simple_unlock(&pmap->pm_lock);
3148 	splx(s);
3149 	pv_check();
3150 }
3151 
3152 /*
3153  * Lower the protection on the specified physical page.
3154  *
3155  * Never enable writing as it will break COW
3156  */
3157 
3158 void
3159 pmap_page_protect(pg, prot)
3160 	struct vm_page* pg;
3161 	vm_prot_t prot;
3162 {
3163 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
3164 	register pv_entry_t pv;
3165 	register int i, s;
3166 	long long clear, set;
3167 	int64_t data = 0LL;
3168 
3169 #ifdef DEBUG
3170 	if (pmapdebug & PDB_CHANGEPROT)
3171 		printf("pmap_page_protect: pa %llx prot %x\n",
3172 			(unsigned long long)pa, prot);
3173 #endif
3174 
3175 	if (prot & VM_PROT_WRITE) {
3176 		pv_check();
3177 		return;
3178 	}
3179 
3180 	if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
3181 		/* copy_on_write */
3182 
3183 		set = TLB_V;
3184 		clear = TLB_REAL_W|TLB_W;
3185 		if (VM_PROT_EXECUTE & prot)
3186 			set |= TLB_EXEC;
3187 		else
3188 			clear |= TLB_EXEC;
3189 		if (VM_PROT_EXECUTE == prot)
3190 			set |= TLB_EXEC_ONLY;
3191 
3192 		pv = pa_to_pvh(pa);
3193 		s = splvm();
3194 #ifdef DEBUG
3195 		if (pv->pv_next && !pv->pv_pmap) {
3196 			printf("pmap_page_protect: npv but no pmap for pv %p\n", pv);
3197 			Debugger();
3198 		}
3199 #endif
3200 		if (pv->pv_pmap != NULL) {
3201 			for (; pv; pv = pv->pv_next) {
3202 				simple_lock(&pv->pv_pmap->pm_lock);
3203 #ifdef DEBUG
3204 				if (pmapdebug & (PDB_CHANGEPROT|PDB_REF)) {
3205 					printf("pmap_page_protect: RO va %p of pa %p...\n",
3206 					    (void *)(u_long)pv->pv_va, (void *)(u_long)pa);
3207 				}
3208 #if 0
3209 				if (!pv->pv_pmap->pm_segs[va_to_seg(pv->pv_va&PV_VAMASK)]) {
3210 					printf("pmap_page_protect(%x:%x,%x): pv %x va %x not in pmap %x\n",
3211 					       (int)(pa>>32), (int)pa, prot, pv, pv->pv_va, pv->pv_pmap);
3212 					Debugger();
3213 					continue;
3214 				}
3215 #endif
3216 #endif
3217 				data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
3218 
3219 				/* Save REF/MOD info */
3220 				if (data & TLB_ACCESS) pv->pv_va |= PV_REF;
3221 				if (data & (TLB_MODIFY))
3222 					pv->pv_va |= PV_MOD;
3223 
3224 				data &= ~(clear);
3225 				data |= (set);
3226 				ASSERT((data & TLB_NFO) == 0);
3227 				if (pseg_set(pv->pv_pmap, pv->pv_va&PV_VAMASK, data, 0)) {
3228 					printf("pmap_page_protect: gotten pseg empty!\n");
3229 					Debugger();
3230 					/* panic? */
3231 				}
3232 				if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
3233 					i = ptelookup_va(pv->pv_va&PV_VAMASK);
3234 					/* since we already know the va for each mapping we don't need to scan the entire TSB */
3235 					if (tsb[i].tag == TSB_TAG(0, pv->pv_pmap->pm_ctx, pv->pv_va&PV_VAMASK))
3236 						tsb[i].data = /* data */ 0;
3237 					tlb_flush_pte(pv->pv_va&PV_VAMASK, pv->pv_pmap->pm_ctx);
3238 				}
3239 				simple_unlock(&pv->pv_pmap->pm_lock);
3240 			}
3241 		}
3242 		splx(s);
3243 	} else {
3244 		pv_entry_t npv, firstpv;
3245 		/* remove mappings */
3246 
3247 #ifdef DEBUG
3248 		if (pmapdebug & PDB_REMOVE)
3249 			printf("pmap_page_protect: demapping pa %lx\n", (long)pa);
3250 #endif
3251 
3252 		firstpv = pv = pa_to_pvh(pa);
3253 		s = splvm();
3254 
3255 		/* First remove the entire list of continuation pv's*/
3256 		for (npv = pv->pv_next; npv; npv = pv->pv_next) {
3257 			/* We're removing npv from pv->pv_next */
3258 			simple_lock(&npv->pv_pmap->pm_lock);
3259 #ifdef DEBUG
3260 			if (pmapdebug & (PDB_CHANGEPROT|PDB_REF|PDB_REMOVE)) {
3261 				printf("pmap_page_protect: demap va %p of pa %p in pmap %p...\n",
3262 				       (void *)(u_long)npv->pv_va, (void *)(u_long)pa, npv->pv_pmap);
3263 			}
3264 #if 0
3265 			if (!npv->pv_pmap->pm_segs[va_to_seg(npv->pv_va&PV_VAMASK)]) {
3266 				printf("pmap_page_protect(%x:%x,%x): pv %x va %x not in pmap %x\n",
3267 				       (int)(pa>>32), (int)pa, prot, npv, npv->pv_va, npv->pv_pmap);
3268 				Debugger();
3269 				continue;
3270 			}
3271 #endif
3272 #endif
3273 			/* clear the entry in the page table */
3274 			data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
3275 
3276 			/* Save ref/mod info */
3277 			if (data & TLB_ACCESS)
3278 				firstpv->pv_va |= PV_REF;
3279 			if (data & (TLB_MODIFY))
3280 				firstpv->pv_va |= PV_MOD;
3281 			if (data & TLB_TSB_LOCK) {
3282 #ifdef DIAGNOSTIC
3283 				printf("pmap_page_protect: wired page pm %p va %p not removed\n",
3284 				       npv->pv_pmap, (void *)(u_long)npv->pv_va);
3285 				printf("vm wire count %d\n",
3286 					PHYS_TO_VM_PAGE(pa)->wire_count);
3287 				pv = npv;
3288 				continue;
3289 #endif
3290 			}
3291 			/* Clear mapping */
3292 			if (pseg_set(npv->pv_pmap, npv->pv_va&PV_VAMASK, 0, 0)) {
3293 				printf("pmap_page_protect: gotten pseg empty!\n");
3294 				Debugger();
3295 				/* panic? */
3296 			}
3297 			if (npv->pv_pmap->pm_ctx || npv->pv_pmap == pmap_kernel()) {
3298 				/* clear the entry in the TSB */
3299 				i = ptelookup_va(npv->pv_va&PV_VAMASK);
3300 				/* since we already know the va for each mapping we don't need to scan the entire TSB */
3301 				if (tsb[i].tag == TSB_TAG(0, npv->pv_pmap->pm_ctx, npv->pv_va&PV_VAMASK))
3302 					tsb[i].data = 0LL;
3303 				tlb_flush_pte(npv->pv_va&PV_VAMASK, npv->pv_pmap->pm_ctx);
3304 			}
3305 			simple_unlock(&npv->pv_pmap->pm_lock);
3306 
3307 			/* free the pv */
3308 			pv->pv_next = npv->pv_next;
3309 			pool_put(&pv_pool, npv);
3310 		}
3311 
3312 		pv = firstpv;
3313 
3314 		/* Then remove the primary pv */
3315 #ifdef DEBUG
3316 		if (pv->pv_next && !pv->pv_pmap) {
3317 			printf("pmap_page_protect: npv but no pmap for pv %p\n", pv);
3318 			Debugger();
3319 		}
3320 #endif
3321 		if (pv->pv_pmap != NULL) {
3322 			simple_lock(&pv->pv_pmap->pm_lock);
3323 #ifdef DEBUG
3324 			if (pmapdebug & (PDB_CHANGEPROT|PDB_REF|PDB_REMOVE)) {
3325 				printf("pmap_page_protect: demap va %p of pa %lx from pm %p...\n",
3326 				       (void *)(u_long)pv->pv_va, (long)pa, pv->pv_pmap);
3327 			}
3328 #endif
3329 			data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
3330 			/* Save ref/mod info */
3331 			if (data & TLB_ACCESS)
3332 				pv->pv_va |= PV_REF;
3333 			if (data & (TLB_MODIFY))
3334 				pv->pv_va |= PV_MOD;
3335 			if (data & TLB_TSB_LOCK) {
3336 #ifdef DIAGNOSTIC
3337 				printf("pmap_page_protect: Removing wired page pm %p va %p\n",
3338 				       (void *)(u_long)pv->pv_pmap, (void *)(u_long)pv->pv_va);
3339 #endif
3340 			}
3341 			if (pseg_set(pv->pv_pmap, pv->pv_va&PV_VAMASK, 0, 0)) {
3342 				printf("pmap_page_protect: gotten pseg empty!\n");
3343 				Debugger();
3344 				/* panic? */
3345 			}
3346 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
3347 				i = ptelookup_va(pv->pv_va&PV_VAMASK);
3348 				/* since we already know the va for each mapping we don't need to scan the entire TSB */
3349 				if (tsb[i].tag == TSB_TAG(0, pv->pv_pmap->pm_ctx, pv->pv_va&PV_VAMASK))
3350 					tsb[i].data = 0LL;
3351 				tlb_flush_pte(pv->pv_va&PV_VAMASK, pv->pv_pmap->pm_ctx);
3352 			}
3353 			simple_unlock(&pv->pv_pmap->pm_lock);
3354 			npv = pv->pv_next;
3355 			/* dump the first pv */
3356 			if (npv) {
3357 				/* First save mod/ref bits */
3358 				pv->pv_va |= (npv->pv_va&PV_MASK);
3359 				pv->pv_next = npv->pv_next;
3360 				pv->pv_pmap = npv->pv_pmap;
3361 				pool_put(&pv_pool, npv);
3362 			} else {
3363 				pv->pv_pmap = NULL;
3364 				pv->pv_next = NULL;
3365 			}
3366 		}
3367 		dcache_flush_page(pa);
3368 		splx(s);
3369 	}
3370 	/* We should really only flush the pages we demapped. */
3371 	pv_check();
3372 }
3373 
3374 /*
3375  * count pages in pmap -- this can be slow.
3376  */
3377 int
3378 pmap_count_res(pm)
3379 	pmap_t pm;
3380 {
3381 	int i, j, k, n, s;
3382 	paddr_t *pdir, *ptbl;
3383 	/* Almost the same as pmap_collect() */
3384 
3385 	/* Don't want one of these pages reused while we're reading it. */
3386 	s = splvm();
3387 	simple_lock(&pm->pm_lock);
3388 	n = 0;
3389 	for (i=0; i<STSZ; i++) {
3390 		if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
3391 			for (k=0; k<PDSZ; k++) {
3392 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
3393 					for (j=0; j<PTSZ; j++) {
3394 						int64_t data = (int64_t)ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
3395 						if (data&TLB_V)
3396 							n++;
3397 					}
3398 				}
3399 			}
3400 		}
3401 	}
3402 	simple_unlock(&pm->pm_lock);
3403 	splx(s);
3404 	return n;
3405 }
3406 
3407 /*
3408  * Allocate a context.  If necessary, steal one from someone else.
3409  * Changes hardware context number and loads segment map.
3410  *
3411  * This routine is only ever called from locore.s just after it has
3412  * saved away the previous process, so there are no active user windows.
3413  *
3414  * The new context is flushed from the TLB before returning.
3415  */
3416 int
3417 ctx_alloc(pm)
3418 	struct pmap* pm;
3419 {
3420 	register int s, cnum;
3421 	static int next = 0;
3422 
3423 	if (pm == pmap_kernel()) {
3424 #ifdef DIAGNOSTIC
3425 		printf("ctx_alloc: kernel pmap!\n");
3426 #endif
3427 		return (0);
3428 	}
3429 	s = splvm();
3430 	cnum = next;
3431 	do {
3432 		if (cnum >= numctx-1)
3433 			cnum = 0;
3434 	} while (ctxbusy[++cnum] != NULL && cnum != next);
3435 	if (cnum==0) cnum++; /* Never steal ctx 0 */
3436 	if (ctxbusy[cnum]) {
3437 		int i;
3438 #ifdef DEBUG
3439 		/* We should identify this pmap and clear it */
3440 		printf("Warning: stealing context %d\n", cnum);
3441 		remove_stats.pidflushes ++;
3442 #endif
3443 		/* We gotta steal this context */
3444 		for (i = 0; i < TSBENTS; i++) {
3445 			if (TSB_TAG_CTX(tsb[i].tag) == cnum)
3446 				tsb[i].data = 0LL;
3447 		}
3448 		tlb_flush_ctx(cnum);
3449 	}
3450 	ctxbusy[cnum] = pm->pm_physaddr;
3451 	next = cnum;
3452 	splx(s);
3453 	pm->pm_ctx = cnum;
3454 #ifdef DEBUG
3455 	if (pmapdebug & PDB_CTX_ALLOC)
3456 		printf("ctx_alloc: allocated ctx %d\n", cnum);
3457 #endif
3458 	return cnum;
3459 }
3460 
3461 /*
3462  * Give away a context.
3463  */
3464 void
3465 ctx_free(pm)
3466 	struct pmap* pm;
3467 {
3468 	int oldctx;
3469 
3470 	oldctx = pm->pm_ctx;
3471 
3472 	if (oldctx == 0)
3473 		panic("ctx_free: freeing kernel context");
3474 #ifdef DIAGNOSTIC
3475 	if (ctxbusy[oldctx] == 0)
3476 		printf("ctx_free: freeing free context %d\n", oldctx);
3477 	if (ctxbusy[oldctx] != pm->pm_physaddr) {
3478 		printf("ctx_free: freeing someone esle's context\n "
3479 		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
3480 		       oldctx, (void *)(u_long)ctxbusy[oldctx], pm,
3481 		       (void *)(u_long)pm->pm_physaddr);
3482 		Debugger();
3483 	}
3484 #endif
3485 	/* We should verify it has not been stolen and reallocated... */
3486 #ifdef DEBUG
3487 	if (pmapdebug & PDB_CTX_ALLOC) {
3488 		printf("ctx_free: freeing ctx %d\n", oldctx);
3489 		Debugger();
3490 	}
3491 #endif
3492 	ctxbusy[oldctx] = NULL;
3493 }
3494 
3495 /*
3496  * Enter the pmap and virtual address into the
3497  * physical to virtual map table.
3498  *
3499  * We enter here with the pmap locked.
3500  */
3501 void
3502 pmap_enter_pv(pmap, va, pa)
3503 	pmap_t pmap;
3504 	vaddr_t va;
3505 	paddr_t pa;
3506 {
3507 	pv_entry_t pv, npv;
3508 	int s;
3509 
3510 	pv = pa_to_pvh(pa);
3511 	s = splvm();
3512 #ifdef DEBUG
3513 	if (pmapdebug & PDB_ENTER)
3514 		printf("pmap_enter: pv %p: was %lx/%p/%p\n",
3515 		       pv, pv->pv_va, pv->pv_pmap, pv->pv_next);
3516 #endif
3517 	if (pv->pv_pmap == NULL) {
3518 		/*
3519 		 * No entries yet, use header as the first entry
3520 		 */
3521 #ifdef DEBUG
3522 		if (pmapdebug & PDB_ENTER)
3523 			printf("pmap_enter: first pv: pmap %p va %lx\n",
3524 				pmap, va);
3525 		enter_stats.firstpv++;
3526 #endif
3527 		PV_SETVA(pv, va);
3528 		pv->pv_pmap = pmap;
3529 		pv->pv_next = NULL;
3530 	} else {
3531 		if (!(pv->pv_va & PV_ALIAS)) {
3532 			/*
3533 			 * There is at least one other VA mapping this page.
3534 			 * Check if they are cache index compatible. If not
3535 			 * remove all mappings, flush the cache and set page
3536 			 * to be mapped uncached. Caching will be restored
3537 			 * when pages are mapped compatible again.
3538 			 * XXX - caching is not currently being restored, but
3539 			 * XXX - I haven't seen the pages uncached since
3540 			 * XXX - using pmap_prefer().	mhitch
3541 			 */
3542 			if ((pv->pv_va^va)&VA_ALIAS_MASK) {
3543 				pv->pv_va |= PV_ALIAS;
3544 				pmap_page_cache(pmap, pa, 0);
3545 #ifdef DEBUG
3546 				enter_stats.ci++;
3547 #endif
3548 			}
3549 		}
3550 		/*
3551 		 * There is at least one other VA mapping this page.
3552 		 * Place this entry after the header.
3553 		 *
3554 		 * Note: the entry may already be in the table if
3555 		 * we are only changing the protection bits.
3556 		 */
3557 		for (npv = pv; npv; npv = npv->pv_next) {
3558 			if (pmap == npv->pv_pmap && PV_MATCH(npv, va)) {
3559 #ifdef PARANOIADIAG
3560 				int64_t data;
3561 
3562 				data = pseg_get(pm, va);
3563 				if (data >= 0 ||
3564 				    data&TLB_PA_MASK != pa)
3565 					printf(
3566 		"pmap_enter: found va %lx pa %lx in pv_table but != %lx\n",
3567 						va, pa, (long)data);
3568 #endif
3569 				goto fnd;
3570 			}
3571 		}
3572 #ifdef DEBUG
3573 		if (pmapdebug & PDB_ENTER)
3574 			printf("pmap_enter: new pv: pmap %p va %lx\n",
3575 				pmap, va);
3576 #endif
3577 		/*
3578 		 * XXXXX can this cause us to recurse forever?
3579 		 *
3580 		 * We need to drop the lock on the kernel_pmap
3581 		 * to do memory allocation.  But that should not
3582 		 * cause any real problems unless someone tries to
3583 		 * touch the particular mapping we're adding.
3584 		 */
3585 		npv = pool_get(&pv_pool, PR_NOWAIT);
3586 		if (npv == NULL)
3587 			panic("pmap_enter: new pv malloc() failed");
3588 		npv->pv_va = va&PV_VAMASK;
3589 		npv->pv_pmap = pmap;
3590 		npv->pv_next = pv->pv_next;
3591 		pv->pv_next = npv;
3592 #ifdef DEBUG
3593 		if (!npv->pv_next)
3594 			enter_stats.secondpv++;
3595 #endif
3596 	fnd:
3597 		;
3598 	}
3599 	splx(s);
3600 }
3601 
3602 /*
3603  * Remove a physical to virtual address translation.
3604  */
3605 
3606 void
3607 pmap_remove_pv(pmap, va, pa)
3608 	pmap_t pmap;
3609 	vaddr_t va;
3610 	paddr_t pa;
3611 {
3612 	register pv_entry_t pv, npv, opv;
3613 	int64_t data = 0LL;
3614 	int s;
3615 
3616 #ifdef DEBUG
3617 	if (pmapdebug & (PDB_REMOVE))
3618 		printf("pmap_remove_pv(pm=%p, va=%p, pa=%llx)\n", pmap,
3619 		    (void *)(u_long)va, (unsigned long long)pa);
3620 #endif
3621 	/*
3622 	 * Remove page from the PV table (raise IPL since we
3623 	 * may be called at interrupt time).
3624 	 */
3625 	pv_check();
3626 	opv = pv = pa_to_pvh(pa);
3627 	s = splvm();
3628 	/*
3629 	 * If it is the first entry on the list, it is actually
3630 	 * in the header and we must copy the following entry up
3631 	 * to the header.  Otherwise we must search the list for
3632 	 * the entry.  In either case we free the now unused entry.
3633 	 */
3634 	if (pmap == pv->pv_pmap && PV_MATCH(pv,va)) {
3635 		/* Save modified/ref bits */
3636 		data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
3637 		npv = pv->pv_next;
3638 		if (npv) {
3639 			/* First save mod/ref bits */
3640 			pv->pv_va = (pv->pv_va&PV_MASK) | npv->pv_va;
3641 			pv->pv_next = npv->pv_next;
3642 			pv->pv_pmap = npv->pv_pmap;
3643 			pool_put(&pv_pool, npv);
3644 		} else {
3645 			pv->pv_pmap = NULL;
3646 			pv->pv_next = NULL;
3647 			pv->pv_va &= (PV_REF|PV_MOD); /* Only save ref/mod bits */
3648 		}
3649 #ifdef DEBUG
3650 		remove_stats.pvfirst++;
3651 #endif
3652 	} else {
3653 		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
3654 #ifdef DEBUG
3655 			remove_stats.pvsearch++;
3656 #endif
3657 			if (pmap == npv->pv_pmap && PV_MATCH(npv,va))
3658 				goto fnd;
3659 		}
3660 
3661 		/*
3662 		 * Sometimes UVM gets confused and calls pmap_remove() instead
3663 		 * of pmap_kremove()
3664 		 */
3665 		return;
3666 #ifdef DIAGNOSTIC
3667 		printf("pmap_remove_pv(%lx, %x, %x) not found\n", (u_long)pmap, (u_int)va, (u_int)pa);
3668 
3669 		Debugger();
3670 		splx(s);
3671 		return;
3672 #endif
3673 	fnd:
3674 		pv->pv_next = npv->pv_next;
3675 		/*
3676 		 * move any referenced/modified info to the base pv
3677 		 */
3678 		data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
3679 		/*
3680 		 * Here, if this page was aliased, we should try clear out any
3681 		 * alias that may have occurred.  However, that's a complicated
3682 		 * operation involving multiple scans of the pv list.
3683 		 */
3684 		pool_put(&pv_pool, npv);
3685 	}
3686 
3687 	/* Save ref/mod info */
3688 	if (data & TLB_ACCESS)
3689 		opv->pv_va |= PV_REF;
3690 	if (data & (TLB_MODIFY))
3691 		opv->pv_va |= PV_MOD;
3692 
3693 	/* Check to see if the alias went away */
3694 	if (opv->pv_va & PV_ALIAS) {
3695 		opv->pv_va &= ~PV_ALIAS;
3696 		for (npv = opv; npv; npv = npv->pv_next) {
3697 			if ((npv->pv_va^opv->pv_va)&VA_ALIAS_MASK) {
3698 				opv->pv_va |= PV_ALIAS;
3699 			}
3700 		}
3701 		if (!(opv->pv_va & PV_ALIAS))
3702 			pmap_page_cache(pmap, pa, 1);
3703 	}
3704 	splx(s);
3705 	pv_check();
3706 }
3707 
3708 /*
3709  *	pmap_page_cache:
3710  *
3711  *	Change all mappings of a page to cached/uncached.
3712  */
3713 void
3714 pmap_page_cache(pm, pa, mode)
3715 	struct pmap *pm;
3716 	paddr_t pa;
3717 	int mode;
3718 {
3719 	pv_entry_t pv;
3720 	int i, s;
3721 
3722 #ifdef DEBUG
3723 	if (pmapdebug & (PDB_ENTER))
3724 		printf("pmap_page_uncache(%llx)\n", (unsigned long long)pa);
3725 #endif
3726 	if (!IS_VM_PHYSADDR(pa))
3727 		return;
3728 
3729 	pv = pa_to_pvh(pa);
3730 	s = splvm();
3731 
3732 	while (pv) {
3733 		vaddr_t va;
3734 
3735 		va = (pv->pv_va & PV_VAMASK);
3736 		if (pv->pv_pmap != pm)
3737 			simple_lock(&pv->pv_pmap->pm_lock);
3738 		if (pv->pv_va & PV_NC) {
3739 			/* Non-cached -- I/O mapping */
3740 			if (pseg_set(pv->pv_pmap, va,
3741 				     pseg_get(pv->pv_pmap, va) & ~(TLB_CV|TLB_CP),
3742 				     0)) {
3743 				printf("pmap_page_cache: aliased pseg empty!\n");
3744 				Debugger();
3745 				/* panic? */
3746 			}
3747 		} else if (mode && (!(pv->pv_va & PV_NVC))) {
3748 			/* Enable caching */
3749 			if (pseg_set(pv->pv_pmap, va,
3750 				     pseg_get(pv->pv_pmap, va) | TLB_CV, 0)) {
3751 				printf("pmap_page_cache: aliased pseg empty!\n");
3752 				Debugger();
3753 				/* panic? */
3754 			}
3755 		} else {
3756 			/* Disable caching */
3757 			if (pseg_set(pv->pv_pmap, va,
3758 				     pseg_get(pv->pv_pmap, va) & ~TLB_CV, 0)) {
3759 				printf("pmap_page_cache: aliased pseg empty!\n");
3760 				Debugger();
3761 				/* panic? */
3762 			}
3763 		}
3764 		if (pv->pv_pmap != pm)
3765 			simple_unlock(&pv->pv_pmap->pm_lock);
3766 		if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
3767 			i = ptelookup_va(va);
3768 			if (tsb[i].tag > 0 && tsb[i].tag ==
3769 			    TSB_TAG(0, pv->pv_pmap->pm_ctx, va)) {
3770 				/*
3771 				 * Invalidate the TSB
3772 				 *
3773 				 * While we can invalidate it by clearing the
3774 				 * valid bit:
3775 				 *
3776 				 * ptp->data_v = 0;
3777 				 *
3778 				 * it's faster to do store 1 doubleword.
3779 				 */
3780 				tsb[i].data = 0LL;
3781 				ASSERT((tsb[i].data & TLB_NFO) == 0);
3782 			}
3783 			/* Force reload -- protections may be changed */
3784 			tlb_flush_pte(va, pv->pv_pmap->pm_ctx);
3785 		}
3786 
3787 		pv = pv->pv_next;
3788 	}
3789 
3790 	splx(s);
3791 }
3792 
3793 /*
3794  *	vm_page_alloc1:
3795  *
3796  *	Allocate and return a memory cell with no associated object.
3797  */
3798 struct vm_page *
3799 vm_page_alloc1()
3800 {
3801 	struct vm_page *pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_USERESERVE);
3802 	if (pg) {
3803 		pg->wire_count = 1;	/* no mappings yet */
3804 		pg->flags &= ~PG_BUSY;	/* never busy */
3805 	}
3806 	return pg;
3807 }
3808 
3809 /*
3810  *	vm_page_free1:
3811  *
3812  *	Returns the given page to the free list,
3813  *	disassociating it with any VM object.
3814  *
3815  *	Object and page must be locked prior to entry.
3816  */
3817 void
3818 vm_page_free1(mem)
3819 	struct vm_page *mem;
3820 {
3821 	if (mem->flags != (PG_CLEAN|PG_FAKE)) {
3822 		printf("Freeing invalid page %p\n", mem);
3823 		printf("pa = %llx\n", (unsigned long long)VM_PAGE_TO_PHYS(mem));
3824 		Debugger();
3825 		return;
3826 	}
3827 	mem->flags |= PG_BUSY;
3828 	mem->wire_count = 0;
3829 	uvm_pagefree(mem);
3830 }
3831 
3832 #ifdef DDB
3833 
3834 void db_dump_pv __P((db_expr_t, int, db_expr_t, char *));
3835 void
3836 db_dump_pv(addr, have_addr, count, modif)
3837 	db_expr_t addr;
3838 	int have_addr;
3839 	db_expr_t count;
3840 	char *modif;
3841 {
3842 	struct pv_entry *pv;
3843 
3844 	if (!have_addr) {
3845 		db_printf("Need addr for pv\n");
3846 		return;
3847 	}
3848 
3849 	for (pv = pa_to_pvh(addr); pv; pv = pv->pv_next)
3850 		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
3851 			  pv, pv->pv_next, pv->pv_pmap,
3852 			  (unsigned long long)pv->pv_va);
3853 
3854 }
3855 
3856 #endif
3857 
3858 #ifdef DEBUG
3859 /*
3860  * Test ref/modify handling.
3861  */
3862 void pmap_testout __P((void));
3863 void
3864 pmap_testout()
3865 {
3866 	vaddr_t va;
3867 	volatile int *loc;
3868 	int val = 0;
3869 	paddr_t pa;
3870 	struct vm_page *pg;
3871 	int ref, mod;
3872 
3873 	/* Allocate a page */
3874 	va = (vaddr_t)(vmmap - NBPG);
3875 	ASSERT(va != NULL);
3876 	loc = (int*)va;
3877 
3878 	pg = vm_page_alloc1();
3879 	pa = (paddr_t)VM_PAGE_TO_PHYS(pg);
3880 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3881 	pmap_update(pmap_kernel());
3882 
3883 	/* Now clear reference and modify */
3884 	ref = pmap_clear_reference(pg);
3885 	mod = pmap_clear_modify(pg);
3886 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3887 	       (void *)(u_long)va, (long)pa,
3888 	       ref, mod);
3889 
3890 	/* Check it's properly cleared */
3891 	ref = pmap_is_referenced(pg);
3892 	mod = pmap_is_modified(pg);
3893 	printf("Checking cleared page: ref %d, mod %d\n",
3894 	       ref, mod);
3895 
3896 	/* Reference page */
3897 	val = *loc;
3898 
3899 	ref = pmap_is_referenced(pg);
3900 	mod = pmap_is_modified(pg);
3901 	printf("Referenced page: ref %d, mod %d val %x\n",
3902 	       ref, mod, val);
3903 
3904 	/* Now clear reference and modify */
3905 	ref = pmap_clear_reference(pg);
3906 	mod = pmap_clear_modify(pg);
3907 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3908 	       (void *)(u_long)va, (long)pa,
3909 	       ref, mod);
3910 
3911 	/* Modify page */
3912 	*loc = 1;
3913 
3914 	ref = pmap_is_referenced(pg);
3915 	mod = pmap_is_modified(pg);
3916 	printf("Modified page: ref %d, mod %d\n",
3917 	       ref, mod);
3918 
3919 	/* Now clear reference and modify */
3920 	ref = pmap_clear_reference(pg);
3921 	mod = pmap_clear_modify(pg);
3922 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3923 	       (void *)(u_long)va, (long)pa,
3924 	       ref, mod);
3925 
3926 	/* Check it's properly cleared */
3927 	ref = pmap_is_referenced(pg);
3928 	mod = pmap_is_modified(pg);
3929 	printf("Checking cleared page: ref %d, mod %d\n",
3930 	       ref, mod);
3931 
3932 	/* Modify page */
3933 	*loc = 1;
3934 
3935 	ref = pmap_is_referenced(pg);
3936 	mod = pmap_is_modified(pg);
3937 	printf("Modified page: ref %d, mod %d\n",
3938 	       ref, mod);
3939 
3940 	/* Check pmap_protect() */
3941 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_READ);
3942 	pmap_update(pmap_kernel());
3943 	ref = pmap_is_referenced(pg);
3944 	mod = pmap_is_modified(pg);
3945 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3946 	       ref, mod);
3947 
3948 	/* Now clear reference and modify */
3949 	ref = pmap_clear_reference(pg);
3950 	mod = pmap_clear_modify(pg);
3951 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3952 	       (void *)(u_long)va, (long)pa,
3953 	       ref, mod);
3954 
3955 	/* Modify page */
3956 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3957 	pmap_update(pmap_kernel());
3958 	*loc = 1;
3959 
3960 	ref = pmap_is_referenced(pg);
3961 	mod = pmap_is_modified(pg);
3962 	printf("Modified page: ref %d, mod %d\n",
3963 	       ref, mod);
3964 
3965 	/* Check pmap_protect() */
3966 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_NONE);
3967 	pmap_update(pmap_kernel());
3968 	ref = pmap_is_referenced(pg);
3969 	mod = pmap_is_modified(pg);
3970 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3971 	       ref, mod);
3972 
3973 	/* Now clear reference and modify */
3974 	ref = pmap_clear_reference(pg);
3975 	mod = pmap_clear_modify(pg);
3976 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3977 	       (void *)(u_long)va, (long)pa,
3978 	       ref, mod);
3979 
3980 	/* Modify page */
3981 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3982 	pmap_update(pmap_kernel());
3983 	*loc = 1;
3984 
3985 	ref = pmap_is_referenced(pg);
3986 	mod = pmap_is_modified(pg);
3987 	printf("Modified page: ref %d, mod %d\n",
3988 	       ref, mod);
3989 
3990 	/* Check pmap_pag_protect() */
3991 	pmap_page_protect(pg, VM_PROT_READ);
3992 	ref = pmap_is_referenced(pg);
3993 	mod = pmap_is_modified(pg);
3994 	printf("pmap_protect(): ref %d, mod %d\n",
3995 	       ref, mod);
3996 
3997 	/* Now clear reference and modify */
3998 	ref = pmap_clear_reference(pg);
3999 	mod = pmap_clear_modify(pg);
4000 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
4001 	       (void *)(u_long)va, (long)pa,
4002 	       ref, mod);
4003 
4004 
4005 	/* Modify page */
4006 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
4007 	pmap_update(pmap_kernel());
4008 	*loc = 1;
4009 
4010 	ref = pmap_is_referenced(pg);
4011 	mod = pmap_is_modified(pg);
4012 	printf("Modified page: ref %d, mod %d\n",
4013 	       ref, mod);
4014 
4015 	/* Check pmap_pag_protect() */
4016 	pmap_page_protect(pg, VM_PROT_NONE);
4017 	ref = pmap_is_referenced(pg);
4018 	mod = pmap_is_modified(pg);
4019 	printf("pmap_protect(): ref %d, mod %d\n",
4020 	       ref, mod);
4021 
4022 	/* Now clear reference and modify */
4023 	ref = pmap_clear_reference(pg);
4024 	mod = pmap_clear_modify(pg);
4025 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
4026 	       (void *)(u_long)va, (long)pa,
4027 	       ref, mod);
4028 
4029 	/* Unmap page */
4030 	pmap_remove(pmap_kernel(), va, va+1);
4031 	pmap_update(pmap_kernel());
4032 	ref = pmap_is_referenced(pg);
4033 	mod = pmap_is_modified(pg);
4034 	printf("Unmapped page: ref %d, mod %d\n", ref, mod);
4035 
4036 	/* Now clear reference and modify */
4037 	ref = pmap_clear_reference(pg);
4038 	mod = pmap_clear_modify(pg);
4039 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
4040 	       (void *)(u_long)va, (long)pa, ref, mod);
4041 
4042 	/* Check it's properly cleared */
4043 	ref = pmap_is_referenced(pg);
4044 	mod = pmap_is_modified(pg);
4045 	printf("Checking cleared page: ref %d, mod %d\n",
4046 	       ref, mod);
4047 
4048 	pmap_remove(pmap_kernel(), va, va+1);
4049 	pmap_update(pmap_kernel());
4050 	vm_page_free1(pg);
4051 }
4052 #endif
4053