xref: /netbsd/sys/arch/sparc64/sparc64/pmap.c (revision bf9ec67e)
1 /*	$NetBSD: pmap.c,v 1.120 2002/05/18 00:51:15 eeh Exp $	*/
2 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
3 #define	HWREF
4 /*
5  *
6  * Copyright (C) 1996-1999 Eduardo Horvath.
7  * All rights reserved.
8  *
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include "opt_ddb.h"
31 
32 #include <sys/param.h>
33 #include <sys/malloc.h>
34 #include <sys/queue.h>
35 #include <sys/systm.h>
36 #include <sys/msgbuf.h>
37 #include <sys/lock.h>
38 #include <sys/pool.h>
39 #include <sys/exec.h>
40 #include <sys/core.h>
41 #include <sys/kcore.h>
42 #include <sys/proc.h>
43 
44 #include <uvm/uvm.h>
45 
46 #include <machine/pcb.h>
47 #include <machine/sparc64.h>
48 #include <machine/ctlreg.h>
49 #include <machine/openfirm.h>
50 #include <machine/kcore.h>
51 
52 #include "cache.h"
53 
54 #ifdef DDB
55 #include <machine/db_machdep.h>
56 #include <ddb/db_command.h>
57 #include <ddb/db_sym.h>
58 #include <ddb/db_variables.h>
59 #include <ddb/db_extern.h>
60 #include <ddb/db_access.h>
61 #include <ddb/db_output.h>
62 #else
63 #define Debugger()
64 #define db_printf	printf
65 #endif
66 
67 #define	MEG		(1<<20) /* 1MB */
68 #define	KB		(1<<10)	/* 1KB */
69 
70 paddr_t cpu0paddr;/* XXXXXXXXXXXXXXXX */
71 
72 extern int64_t asmptechk __P((int64_t *pseg[], int addr)); /* DEBUG XXXXX */
73 
74 #if 0
75 static int pseg_check __P((struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare));
76 static int
77 pseg_check(struct pmap *pm, vaddr_t addr, int64_t tte, paddr_t spare)
78 {
79 	int i, k, s;
80 	paddr_t *pdir, *ptbl;
81 	extern int pseg_set __P((struct pmap*, vaddr_t addr, int64_t tte,
82 		paddr_t spare));
83 
84 	if (!spare) return pseg_set(pm, addr, tte, spare);
85 
86 	s = splvm();
87 	if ((paddr_t)pm->pm_segs == spare) panic("pseg_check: pm_segs == %llx\n", spare);
88 	for (i=0; i<STSZ; i++) {
89 		if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
90 			if ((paddr_t)pdir == spare)
91 				panic("pseg_check: pdir %d == %llx\n", i,
92 					spare);
93 			for (k=0; k<PDSZ; k++) {
94 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
95 					if ((paddr_t)ptbl == spare)
96 				panic("pseg_check: ptbl %d:%d == %llx\n", i, k,
97 					spare);
98 				}
99 			}
100 		}
101 	}
102 	splx(s);
103 	if (addr == -1) return 0;
104 	return pseg_set(pm, addr, tte, spare);
105 }
106 #define pseg_check(a, b, c, d)
107 #define cache_flush_phys(a, b, c)
108 /* #define pseg_set(a, b, c, d)	pseg_check(a, b, c, d) */
109 #endif
110 
111 /* These routines are in assembly to allow access thru physical mappings */
112 #if 1
113 extern int64_t pseg_get __P((struct pmap*, vaddr_t addr));
114 extern int pseg_set __P((struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare));
115 extern paddr_t pseg_find __P((struct pmap*, vaddr_t addr, paddr_t spare));
116 #else
117 static int64_t pseg_get __P((struct pmap*, vaddr_t addr));
118 static int pseg_set __P((struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare));
119 static paddr_t pseg_find __P((struct pmap*, vaddr_t addr, paddr_t spare));
120 
121 static int64_t pseg_get(struct pmap* pm, vaddr_t addr) {
122 	paddr_t *pdir, *ptbl;
123 
124 	if ((pdir = (paddr_t *)ldda(&pm->pm_segs[va_to_seg(addr)],
125 				    ASI_PHYS_CACHED)) &&
126 	    (ptbl = (paddr_t *)ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED)))
127 		return  (ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED));
128 	return (0);
129 }
130 
131 static int pseg_set(struct pmap* pm, vaddr_t addr, int64_t tte, paddr_t spare) {
132 	int i, j, k, s;
133 	paddr_t *pdir, *ptbl;
134 
135 	if (!(pdir = (paddr_t *)ldda(&pm->pm_segs[va_to_seg(addr)],
136 	    ASI_PHYS_CACHED))) {
137 		if (!spare) return (1);
138 		stda(&pm->pm_segs[va_to_seg(addr)], ASI_PHYS_CACHED, spare);
139 		pdir = spare;
140 		spare = NULL;
141 	}
142 	if (!(ptbl = (paddr_t *)ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED))) {
143 		if (!spare) return (1);
144 		stda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED, spare);
145 		ptbl = spare;
146 		spare = NULL;
147 	}
148 	stda(&ptbl[va_to_pte(addr)], ASI_PHYS_CACHED, tte);
149 	return (0);
150 }
151 
152 static paddr_t pseg_find(struct pmap* pm, vaddr_t addr, paddr_t spare) {
153 	int i, j, k, s;
154 	paddr_t *pdir, *ptbl;
155 
156 	if (!(pdir = (paddr_t *)ldda(&pm->pm_segs[va_to_seg(addr)],
157 	    ASI_PHYS_CACHED))) {
158 		if (!spare) return (1);
159 		stda(&pm->pm_segs[va_to_seg(addr)], ASI_PHYS_CACHED, spare);
160 		pdir = spare;
161 		spare = NULL;
162 	}
163 	if (!(ptbl = (paddr_t *)ldda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED))) {
164 		if (!spare) return (1);
165 		stda(&pdir[va_to_dir(addr)], ASI_PHYS_CACHED, spare);
166 		ptbl = spare;
167 		spare = NULL;
168 	}
169 	return (paddr_t)(&ptbl[va_to_pte(addr)]);
170 }
171 
172 
173 #endif
174 
175 
176 #ifdef DEBUG
177 #ifdef __STDC__
178 #define	ASSERT(x)	\
179 	if (!(x)) panic("%s at line %d: assertion failed\n", #x, __LINE__);
180 #else
181 #define	ASSERT(x)	\
182 	if (!(x)) panic("%s at line %d: assertion failed\n", "x", __LINE__);
183 #endif
184 #else
185 #define ASSERT(x)
186 #endif
187 
188 /*
189  * For each struct vm_page, there is a list of all currently valid virtual
190  * mappings of that page.  An entry is a pv_entry_t, the list is pv_table.
191  * XXX really should do this as a part of the higher level code.
192  */
193 typedef struct pv_entry {
194 	struct pv_entry	*pv_next;	/* next pv_entry */
195 	struct pmap	*pv_pmap;	/* pmap where mapping lies */
196 	vaddr_t	pv_va;		/* virtual address for mapping */
197 } *pv_entry_t;
198 /* PV flags encoded in the low bits of the VA of the first pv_entry */
199 
200 /*
201  * Diatribe on ref/mod counting:
202  *
203  * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
204  * in the pv_entry structure for each page.  (We could bypass this for the
205  * vm_page, but that's a long story....)
206  *
207  * This architecture has nice, fast traps with lots of space for software bits
208  * in the TTE.  To accelerate ref/mod counts we make use of these features.
209  *
210  * When we map a page initially, we place a TTE in the page table.  It's
211  * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
212  * writeable we set the TLB_REAL_W bit for the trap handler.
213  *
214  * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
215  * bit in the approprate TTE in the page table.  Whenever we take a protection
216  * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
217  * bits to enable writing and mark the page as modified.
218  *
219  * This means that we may have ref/mod information all over the place.  The
220  * pmap routines must traverse the page tables of all pmaps with a given page
221  * and collect/clear all the ref/mod information and copy it into the pv_entry.
222  */
223 
224 #ifdef	NO_VCACHE
225 #define	FORCE_ALIAS	1
226 #else
227 #define FORCE_ALIAS	0
228 #endif
229 
230 #define	PV_ALIAS	0x1LL
231 #define PV_REF		0x2LL
232 #define PV_MOD		0x4LL
233 #define PV_NVC		0x8LL
234 #define PV_NC		0x10LL
235 #define PV_WE		0x20LL		/* Debug -- track if this page was ever writable */
236 #define PV_MASK		(0x03fLL)
237 #define PV_VAMASK	(~(NBPG-1))
238 #define PV_MATCH(pv,va)	(!((((pv)->pv_va)^(va))&PV_VAMASK))
239 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va)&PV_VAMASK)|(((pv)->pv_va)&PV_MASK)))
240 
241 pv_entry_t	pv_table;	/* array of entries, one per page */
242 static struct pool pv_pool;
243 extern void	pmap_remove_pv __P((struct pmap *pm, vaddr_t va, paddr_t pa));
244 extern void	pmap_enter_pv __P((struct pmap *pm, vaddr_t va, paddr_t pa));
245 extern void	pmap_page_cache __P((struct pmap *pm, paddr_t pa, int mode));
246 
247 /*
248  * First and last managed physical addresses.  XXX only used for dumping the system.
249  */
250 paddr_t	vm_first_phys, vm_num_phys;
251 
252 u_int64_t first_phys_addr;
253 #define pa_index(pa)		atop((pa) - first_phys_addr)
254 #define	pa_to_pvh(pa)							\
255 ({									\
256 	int bank_, pg_;							\
257 									\
258 	bank_ = vm_physseg_find(atop((pa)), &pg_);			\
259 	(pv_entry_t)&vm_physmem[bank_].pmseg.pvent[pg_];		\
260 })
261 
262 
263 
264 /*
265  * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
266  */
267 pte_t *tsb;
268 int tsbsize;		/* tsbents = 512 * 2^^tsbsize */
269 #define TSBENTS (512<<tsbsize)
270 #define	TSBSIZE	(TSBENTS * 16)
271 
272 struct pmap kernel_pmap_;
273 
274 int physmem;
275 /*
276  * Virtual and physical addresses of the start and end of kernel text
277  * and data segments.
278  */
279 vaddr_t ktext;
280 paddr_t ktextp;
281 vaddr_t ektext;
282 paddr_t ektextp;
283 vaddr_t kdata;
284 paddr_t kdatap;
285 vaddr_t ekdata;
286 paddr_t ekdatap;
287 
288 static int npgs;
289 static u_int nextavail;
290 static struct mem_region memlist[8]; /* Pick a random size here */
291 
292 vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
293 
294 struct mem_region *mem, *avail, *orig;
295 int memsize;
296 
297 static int memh = 0, vmemh = 0;	/* Handles to OBP devices */
298 
299 int avail_start, avail_end;	/* These are used by ps & family */
300 
301 static int ptelookup_va __P((vaddr_t va)); /* sun4u */
302 #if notyet
303 static void tsb_enter __P((int ctx, int64_t va, int64_t data));
304 #endif
305 static void pmap_pinit __P((struct pmap *));
306 static void pmap_release __P((pmap_t));
307 
308 struct pmap_stats {
309 	int	ps_unlink_pvfirst;	/* # of pv_unlinks on head */
310 	int	ps_unlink_pvsearch;	/* # of pv_unlink searches */
311 	int	ps_changeprots;		/* # of calls to changeprot */
312 	int	ps_useless_changeprots;	/* # of changeprots for wiring */
313 	int	ps_enter_firstpv;	/* pv heads entered */
314 	int	ps_enter_secondpv;	/* pv nonheads entered */
315 	int	ps_useless_changewire;	/* useless wiring changes */
316 	int	ps_npg_prot_all;	/* # of active pages protected */
317 	int	ps_npg_prot_actual;	/* # pages actually affected */
318 } pmap_stats;
319 
320 struct prom_map *prom_map;
321 int prom_map_size;
322 
323 #ifdef DEBUG
324 struct {
325 	int kernel;	/* entering kernel mapping */
326 	int user;	/* entering user mapping */
327 	int ptpneeded;	/* needed to allocate a PT page */
328 	int pwchange;	/* no mapping change, just wiring or protection */
329 	int wchange;	/* no mapping change, just wiring */
330 	int mchange;	/* was mapped but mapping to different page */
331 	int managed;	/* a managed page */
332 	int firstpv;	/* first mapping for this PA */
333 	int secondpv;	/* second mapping for this PA */
334 	int ci;		/* cache inhibited */
335 	int unmanaged;	/* not a managed page */
336 	int flushes;	/* cache flushes */
337 	int cachehit;	/* new entry forced valid entry out */
338 } enter_stats;
339 struct {
340 	int calls;
341 	int removes;
342 	int flushes;
343 	int tflushes;	/* TLB flushes */
344 	int pidflushes;	/* HW pid stolen */
345 	int pvfirst;
346 	int pvsearch;
347 } remove_stats;
348 #define	PDB_CREATE	0x0001
349 #define	PDB_DESTROY	0x0002
350 #define	PDB_REMOVE	0x0004
351 #define	PDB_CHANGEPROT	0x0008
352 #define	PDB_ENTER	0x0010
353 #define PDB_DEMAP	0x0020
354 #define	PDB_REF		0x0040
355 #define PDB_COPY	0x0080
356 
357 #define	PDB_MMU_ALLOC	0x0100
358 #define	PDB_MMU_STEAL	0x0200
359 #define	PDB_CTX_ALLOC	0x0400
360 #define	PDB_CTX_STEAL	0x0800
361 #define	PDB_MMUREG_ALLOC	0x1000
362 #define	PDB_MMUREG_STEAL	0x2000
363 #define	PDB_CACHESTUFF	0x4000
364 #define	PDB_ALIAS	0x8000
365 #define PDB_EXTRACT	0x10000
366 #define	PDB_BOOT	0x20000
367 #define	PDB_BOOT1	0x40000
368 #define	PDB_GROW	0x80000
369 int	pmapdebug = 0;
370 /* Number of H/W pages stolen for page tables */
371 int	pmap_pages_stolen = 0;
372 
373 #define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
374 #define	DPRINTF(n, f)	if (pmapdebug & (n)) printf f
375 #else
376 #define	BDPRINTF(n, f)
377 #define	DPRINTF(n, f)
378 #endif
379 
380 #ifdef NOTDEF_DEBUG
381 void pv_check __P((void));
382 void
383 pv_check()
384 {
385 	int i, j, s;
386 
387 	s = splhigh();
388 	for (i = 0; i < physmem; i++) {
389 		struct pv_entry *pv;
390 		for (pv = &pv_table[i]; pv; pv = pv->pv_next) {
391 			if (pv->pv_pmap &&
392 			    !(pseg_get(pv->pv_pmap, pv->pv_va)&TLB_V)) {
393 		printf("pv_check(): unreferenced pv=%p pa=%p va=%p pm=%p\n",
394 		       i, ptoa(first_phys_addr+i), pv->pv_va, pv->pv_pmap);
395 				Debugger();
396 			}
397 		}
398 	}
399 	splx(s);
400 }
401 #else
402 #define pv_check()
403 #endif
404 
405 /*
406  *
407  * A context is simply a small number that differentiates multiple mappings
408  * of the same address.  Contexts on the spitfire are 13 bits, but could
409  * be as large as 17 bits.
410  *
411  * Each context is either free or attached to a pmap.
412  *
413  * The context table is an array of pointers to psegs.  Just dereference
414  * the right pointer and you get to the pmap segment tables.  These are
415  * physical addresses, of course.
416  *
417  */
418 paddr_t *ctxbusy;
419 int numctx;
420 #define CTXENTRY	(sizeof(paddr_t))
421 #define CTXSIZE		(numctx*CTXENTRY)
422 
423 static int pmap_get_page(paddr_t *p, char *wait);
424 static void pmap_free_page(paddr_t pa);
425 
426 
427 /*
428  * Support for big page sizes.  This maps the page size to the
429  * page bits.  That is: these are the bits between 8K pages and
430  * larger page sizes that cause aliasing.
431  */
432 struct page_size_map page_size_map[] = {
433 #ifdef DEBUG
434 	{ 0, PGSZ_8K&0  },	/* Disable large pages */
435 #endif
436 	{ (4*1024*1024-1) & ~(8*1024-1), PGSZ_4M },
437 	{ (512*1024-1) & ~(8*1024-1), PGSZ_512K  },
438 	{ (64*1024-1) & ~(8*1024-1), PGSZ_64K  },
439 	{ (8*1024-1) & ~(8*1024-1), PGSZ_8K  },
440 	{ 0, PGSZ_8K&0  }
441 };
442 
443 /*
444  * Calculate the largest page size that will map this.
445  *
446  * You really need to do this both on VA and PA.
447  */
448 #define	PMAP_PAGE_SIZE(va, pa, len, pgsz, pglen)			\
449 do {									\
450 	for ((pgsz) = PGSZ_4M; (pgsz); (pgsz)--) {			\
451 		(pglen) = PG_SZ(pgsz);					\
452 									\
453 		if (((len) >= (pgsz)) &&				\
454 			((pa) & ((pglen)-1) & ~PG_SZ(PGSZ_8K)) == 0 &&	\
455 			((va) & ((pglen)-1) & ~PG_SZ(PGSZ_8K)) == 0)	\
456 			break;						\
457 	}								\
458 	(pgsz) = 0;							\
459 	(pglen) = PG_SZ(pgsz);						\
460 } while (0)
461 
462 
463 /*
464  * Enter a TTE into the kernel pmap only.  Don't do anything else.
465  *
466  * Use only during bootstrapping since it does no locking and
467  * can lose ref/mod info!!!!
468  *
469  */
470 static void pmap_enter_kpage __P((vaddr_t, int64_t));
471 static void
472 pmap_enter_kpage(va, data)
473 	vaddr_t va;
474 	int64_t data;
475 {
476 	paddr_t newp;
477 
478 	newp = NULL;
479 	while (pseg_set(pmap_kernel(), va, data, newp) == 1) {
480 		newp = NULL;
481 		pmap_get_page(&newp, NULL);
482 		if (!newp) {
483 			prom_printf("pmap_enter_kpage: out of pages\n");
484 			panic("pmap_enter_kpage");
485 		}
486 #ifdef DEBUG
487 		enter_stats.ptpneeded ++;
488 #endif
489 		BDPRINTF(PDB_BOOT1,
490 			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\r\n",
491 			  pmap_kernel(), va, (long)data, (long)newp));
492 #ifdef DEBUG
493 		if (pmapdebug & PDB_BOOT1)
494 		{int i; for (i=0; i<140000000; i++) ;}
495 #endif
496 	}
497 }
498 
499 /*
500  * Check the bootargs to see if we need to enable bootdebug.
501  */
502 #ifdef DEBUG
503 void pmap_bootdebug __P((void));
504 void
505 pmap_bootdebug()
506 {
507 	int chosen;
508 	char *cp;
509 	char buf[128];
510 
511 	/*
512 	 * Grab boot args from PROM
513 	 */
514 	chosen = OF_finddevice("/chosen");
515 	/* Setup pointer to boot flags */
516 	OF_getprop(chosen, "bootargs", buf, sizeof(buf));
517 	cp = buf;
518 	while (*cp != '-')
519 		if (*cp++ == '\0')
520 			return;
521 	for (;;)
522 		switch (*++cp) {
523 		case '\0':
524 			return;
525 		case 'V':
526 			pmapdebug |= PDB_BOOT|PDB_BOOT1;
527 			break;
528 		case 'D':
529 			pmapdebug |= PDB_BOOT1;
530 			break;
531 		}
532 }
533 #endif
534 
535 
536 /*
537  * Calculate the correct number of page colors to use.  This should be the
538  * size of the E$/NBPG.  However, different CPUs can have different sized
539  * E$, so we need to take the GCM of the E$ size.
540  */
541 static int pmap_calculate_colors __P((void));
542 static int
543 pmap_calculate_colors() {
544 	int node = 0;
545 	int size, assoc, color, maxcolor = 1;
546 	char buf[80];
547 
548 	while ((node = OF_peer(node))) {
549 		if ((OF_getprop(node, "device_type", buf, sizeof(buf)) > 0) &&
550 			strcmp("cpu", buf) == 0) {
551 			/* Found a CPU, get the E$ info. */
552 			if (OF_getprop(node,"ecache-size", &size,
553 				sizeof(size)) != sizeof(size)) {
554 				printf("pmap_calculate_colors: node %x has "
555 					"no ecache-size\n", node);
556 				/* If we can't get the E$ size, skip the node */
557 				continue;
558 			}
559 			if (OF_getprop(node, "ecache-associativity", &assoc,
560 				sizeof(assoc)) != sizeof(assoc))
561 				/* Fake asociativity of 1 */
562 				assoc = 1;
563 			color = size/assoc/NBPG;
564 			if (color > maxcolor)
565 				maxcolor = color;
566 		}
567 	}
568 	return (maxcolor);
569 }
570 
571 /*
572  * This is called during bootstrap, before the system is really initialized.
573  *
574  * It's called with the start and end virtual addresses of the kernel.  We
575  * bootstrap the pmap allocator now.  We will allocate the basic structures we
576  * need to bootstrap the VM system here: the page frame tables, the TSB, and
577  * the free memory lists.
578  *
579  * Now all this is becoming a bit obsolete.  maxctx is still important, but by
580  * separating the kernel text and data segments we really would need to
581  * provide the start and end of each segment.  But we can't.  The rodata
582  * segment is attached to the end of the kernel segment and has nothing to
583  * delimit its end.  We could still pass in the beginning of the kernel and
584  * the beginning and end of the data segment but we could also just as easily
585  * calculate that all in here.
586  *
587  * To handle the kernel text, we need to do a reverse mapping of the start of
588  * the kernel, then traverse the free memory lists to find out how big it is.
589  */
590 
591 void
592 pmap_bootstrap(kernelstart, kernelend, maxctx)
593 	u_long kernelstart, kernelend;
594 	u_int maxctx;
595 {
596 	extern int data_start[], end[];	/* start of data segment */
597 	extern int msgbufmapped;
598 	struct mem_region *mp, *mp1;
599 	int msgbufsiz;
600 	int pcnt;
601 	size_t s, sz;
602 	int i, j;
603 	int64_t data;
604 	vaddr_t va;
605 	u_int64_t phys_msgbuf;
606 	paddr_t newkp;
607 	vaddr_t newkv, firstaddr, intstk;
608 	vsize_t kdsize, ktsize;
609 
610 #ifdef DEBUG
611 	pmap_bootdebug();
612 #endif
613 
614 	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\r\n"));
615 	/*
616 	 * set machine page size
617 	 */
618 	uvmexp.pagesize = NBPG;
619 	uvmexp.ncolors = pmap_calculate_colors();
620 	uvm_setpagesize();
621 
622 	/*
623 	 * Find out how big the kernel's virtual address
624 	 * space is.  The *$#@$ prom loses this info
625 	 */
626 	if ((vmemh = OF_finddevice("/virtual-memory")) == -1) {
627 		prom_printf("no virtual-memory?");
628 		OF_exit();
629 	}
630 	bzero((caddr_t)memlist, sizeof(memlist));
631 	if (OF_getprop(vmemh, "available", memlist, sizeof(memlist)) <= 0) {
632 		prom_printf("no vmemory avail?");
633 		OF_exit();
634 	}
635 
636 #ifdef DEBUG
637 	if (pmapdebug & PDB_BOOT) {
638 		/* print out mem list */
639 		prom_printf("Available virtual memory:\r\n");
640 		for (mp = memlist; mp->size; mp++) {
641 			prom_printf("memlist start %p size %lx\r\n",
642 				    (void *)(u_long)mp->start,
643 				    (u_long)mp->size);
644 		}
645 		prom_printf("End of available virtual memory\r\n");
646 	}
647 #endif
648 	/*
649 	 * Get hold or the message buffer.
650 	 */
651 	msgbufp = (struct kern_msgbuf *)(vaddr_t)MSGBUF_VA;
652 /* XXXXX -- increase msgbufsiz for uvmhist printing */
653 	msgbufsiz = 4*NBPG /* round_page(sizeof(struct msgbuf)) */;
654 	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\r\n",
655 			    (long)msgbufp, (long)msgbufsiz));
656 	if ((long)msgbufp !=
657 	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
658 		prom_printf(
659 		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\r\n",
660 		    (void *)msgbufp, (long)phys_msgbuf);
661 	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
662 	BDPRINTF(PDB_BOOT,
663 		("We should have the memory at %lx, let's map it in\r\n",
664 			phys_msgbuf));
665 	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
666 			  -1/* sunos does this */) == -1)
667 		prom_printf("Failed to map msgbuf\r\n");
668 	else
669 		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\r\n",
670 			(void *)msgbufp));
671 	msgbufmapped = 1;	/* enable message buffer */
672 	initmsgbuf((caddr_t)msgbufp, msgbufsiz);
673 
674 	/*
675 	 * Record kernel mapping -- we will map these with a permanent 4MB
676 	 * TLB entry when we initialize the CPU later.
677 	 */
678 	BDPRINTF(PDB_BOOT, ("translating kernelstart %p\r\n",
679 		(void *)kernelstart));
680 	ktext = kernelstart;
681 	ktextp = prom_vtop(kernelstart);
682 
683 	kdata = (vaddr_t)data_start;
684 	kdatap = prom_vtop(kdata);
685 	ekdata = (vaddr_t)end;
686 
687 	/*
688 	 * Find the real size of the kernel.  Locate the smallest starting
689 	 * address > kernelstart.
690 	 */
691 	for (mp1 = mp = memlist; mp->size; mp++) {
692 		/*
693 		 * Check whether this region is at the end of the kernel.
694 		 */
695 		if (mp->start >= ekdata && (mp1->start < ekdata ||
696 						mp1->start > mp->start))
697 			mp1 = mp;
698 	}
699 	if (mp1->start < kdata)
700 		prom_printf("Kernel at end of vmem???\r\n");
701 
702 	BDPRINTF(PDB_BOOT1,
703 		("Kernel data is mapped at %lx, next free seg: %lx, %lx\r\n",
704 			(long)kdata, (u_long)mp1->start, (u_long)mp1->size));
705 
706 	/*
707 	 * We save where we can start allocating memory.
708 	 */
709 	firstaddr = (ekdata + 07) & ~ 07;	/* Longword align */
710 
711 	/*
712 	 * We reserve 100K to grow.
713 	 */
714 	ekdata += 100*KB;
715 
716 	/*
717 	 * And set the end of the data segment to the end of what our
718 	 * bootloader allocated for us, if we still fit in there.
719 	 */
720 	if (ekdata < mp1->start)
721 		ekdata = mp1->start;
722 
723 #if 1
724 #define	valloc(name, type, num) (name) = (type *)firstaddr; firstaddr += (num)
725 #else
726 #define	valloc(name, type, num) (name) = (type *)firstaddr; firstaddr = \
727 	(vaddr_t)((name)+(num))
728 #endif
729 
730 	/*
731 	 * Since we can't always give the loader the hint to align us on a 4MB
732 	 * boundary, we will need to do the alignment ourselves.  First
733 	 * allocate a new 4MB aligned segment for the kernel, then map it
734 	 * in, copy the kernel over, swap mappings, then finally, free the
735 	 * old kernel.  Then we can continue with this.
736 	 *
737 	 * We'll do the data segment up here since we know how big it is.
738 	 * We'll do the text segment after we've read in the PROM translations
739 	 * so we can figure out its size.
740 	 *
741 	 * The ctxbusy table takes about 64KB, the TSB up to 32KB, and the
742 	 * rest should be less than 1K, so 100KB extra should be plenty.
743 	 */
744 	kdsize = round_page(ekdata - kdata);
745 	BDPRINTF(PDB_BOOT1, ("Kernel data size is %lx\r\n", (long)kdsize));
746 
747 	if ((kdatap & (4*MEG-1)) == 0) {
748 		/* We were at a 4MB boundary -- claim the rest */
749 		psize_t szdiff = (4*MEG - kdsize) & (4*MEG - 1);
750 
751 		BDPRINTF(PDB_BOOT1, ("Need to extend dseg by %lx\r\n",
752 			(long)szdiff));
753 		if (szdiff) {
754 			/* Claim the rest of the physical page. */
755 			newkp = kdatap + kdsize;
756 			newkv = kdata + kdsize;
757 			if (newkp != prom_claim_phys(newkp, szdiff)) {
758 				prom_printf("pmap_bootstrap: could not claim "
759 					"physical dseg extension "
760 					"at %lx size %lx\r\n",
761 					newkp, szdiff);
762 				goto remap_data;
763 			}
764 
765 			/* And the rest of the virtual page. */
766 			if (prom_claim_virt(newkv, szdiff) != newkv)
767 			prom_printf("pmap_bootstrap: could not claim "
768 				"virtual dseg extension "
769 				"at size %lx\r\n", newkv, szdiff);
770 
771 			/* Make sure all 4MB are mapped */
772 			prom_map_phys(newkp, szdiff, newkv, -1);
773 		}
774 	} else {
775 		psize_t sz;
776 remap_data:
777 		/*
778 		 * Either we're not at a 4MB boundary or we can't get the rest
779 		 * of the 4MB extension.  We need to move the data segment.
780 		 * Leave 1MB of extra fiddle space in the calculations.
781 		 */
782 
783 		sz = (kdsize + 4*MEG - 1) & ~(4*MEG-1);
784 		BDPRINTF(PDB_BOOT1,
785 			 ("Allocating new %lx kernel data at 4MB boundary\r\n",
786 			  (u_long)sz));
787 		if ((newkp = prom_alloc_phys(sz, 4*MEG)) == (paddr_t)-1 ) {
788 			prom_printf("Cannot allocate new kernel\r\n");
789 			OF_exit();
790 		}
791 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
792 				     (u_int64_t)newkp));
793 		if ((newkv = (vaddr_t)prom_alloc_virt(sz, 8)) ==
794 		    (vaddr_t)-1) {
795 			prom_printf("Cannot allocate new kernel va\r\n");
796 			OF_exit();
797 		}
798 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %llx at %llx\r\n",
799 		    (u_int64_t)newkp, (u_int64_t)newkv));
800 		prom_map_phys(newkp, sz, (vaddr_t)newkv, -1);
801 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel data...",
802 			kdsize));
803 		bzero((void *)newkv, sz);
804 		bcopy((void *)kdata, (void *)newkv, kdsize);
805 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
806 		prom_unmap_virt((vaddr_t)newkv, sz);
807 		BDPRINTF(PDB_BOOT, ("remap old "));
808 #if 0
809 		/*
810 		 * calling the prom will probably require reading part of the
811 		 * data segment so we can't do this.  */
812 		prom_unmap_virt((vaddr_t)kdatap, kdsize);
813 #endif
814 		prom_map_phys(newkp, sz, kdata, -1);
815 		/*
816 		 * we will map in 4MB, more than we allocated, to allow
817 		 * further allocation
818 		 */
819 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
820 		prom_free_phys(kdatap, kdsize);
821 		kdatap = newkp;
822 		BDPRINTF(PDB_BOOT1,
823 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
824 			  "avail for kernel\r\n", (u_long)firstaddr,
825 			  (u_long)prom_vtop(firstaddr)));
826 	}
827 
828 	/*
829 	 * Find out how much RAM we have installed.
830 	 */
831 	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\r\n"));
832 	if ((memh = OF_finddevice("/memory")) == -1) {
833 		prom_printf("no memory?");
834 		OF_exit();
835 	}
836 	memsize = OF_getproplen(memh, "reg") + 2 * sizeof(struct mem_region);
837 	valloc(mem, struct mem_region, memsize);
838 	bzero((caddr_t)mem, memsize);
839 	if (OF_getprop(memh, "reg", mem, memsize) <= 0) {
840 		prom_printf("no memory installed?");
841 		OF_exit();
842 	}
843 
844 #ifdef DEBUG
845 	if (pmapdebug & PDB_BOOT1) {
846 		/* print out mem list */
847 		prom_printf("Installed physical memory:\r\n");
848 		for (mp = mem; mp->size; mp++) {
849 			prom_printf("memlist start %lx size %lx\r\n",
850 				    (u_long)mp->start, (u_long)mp->size);
851 		}
852 	}
853 #endif
854 	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
855 
856 	for (mp = mem; mp->size; mp++)
857 		physmem += btoc(mp->size);
858 	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\r\n",
859 			     (int)physmem, (int)physmem));
860 	/*
861 	 * Calculate approx TSB size.  This probably needs tweaking.
862 	 */
863 	if (physmem < 64 * 1024 * 1024)
864 		tsbsize = 0;
865 	else if (physmem < 512 * 1024 * 1024)
866 		tsbsize = 1;
867 	else
868 		tsbsize = 2;
869 
870 	/*
871 	 * Save the prom translations
872 	 */
873 	sz = OF_getproplen(vmemh, "translations");
874 	valloc(prom_map, struct prom_map, sz);
875 	if (OF_getprop(vmemh, "translations", (void*)prom_map, sz) <= 0) {
876 		prom_printf("no translations installed?");
877 		OF_exit();
878 	}
879 	prom_map_size = sz / sizeof(struct prom_map);
880 #ifdef DEBUG
881 	if (pmapdebug & PDB_BOOT) {
882 		/* print out mem list */
883 		prom_printf("Prom xlations:\r\n");
884 		for (i = 0; i < prom_map_size; i++) {
885 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
886 				    (u_long)prom_map[i].vstart,
887 				    (u_long)prom_map[i].vsize,
888 				    (u_long)prom_map[i].tte);
889 		}
890 		prom_printf("End of prom xlations\r\n");
891 	}
892 #endif
893 	/*
894 	 * Hunt for the kernel text segment and figure out it size and
895 	 * alignment.
896 	 */
897 	for (i = 0; i < prom_map_size; i++)
898 		if (prom_map[i].vstart == ktext)
899 			break;
900 	if (i == prom_map_size)
901 		panic("No kernel text segment!\r\n");
902 	ktsize = prom_map[i].vsize;
903 	ektext = ktext + ktsize;
904 
905 	if (ktextp & (4*MEG-1)) {
906 		/* Kernel text is not 4MB aligned -- need to fix that */
907 		BDPRINTF(PDB_BOOT1,
908 			 ("Allocating new %lx kernel text at 4MB boundary\r\n",
909 			  (u_long)ktsize));
910 		if ((newkp = prom_alloc_phys(ktsize, 4*MEG)) == 0 ) {
911 			prom_printf("Cannot allocate new kernel text\r\n");
912 			OF_exit();
913 		}
914 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
915 				     (u_int64_t)newkp));
916 		if ((newkv = (vaddr_t)prom_alloc_virt(ktsize, 8)) ==
917 		    (vaddr_t)-1) {
918 			prom_printf("Cannot allocate new kernel text va\r\n");
919 			OF_exit();
920 		}
921 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %lx at %lx\r\n",
922 				     (u_long)newkp, (u_long)newkv));
923 		prom_map_phys(newkp, ktsize, (vaddr_t)newkv, -1);
924 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel text...",
925 			ktsize));
926 		bcopy((void *)ktext, (void *)newkv,
927 		    ktsize);
928 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
929 		prom_unmap_virt((vaddr_t)newkv, 4*MEG);
930 		BDPRINTF(PDB_BOOT, ("remap old "));
931 #if 0
932 		/*
933 		 * calling the prom will probably require reading part of the
934 		 * text segment so we can't do this.
935 		 */
936 		prom_unmap_virt((vaddr_t)ktextp, ktsize);
937 #endif
938 		prom_map_phys(newkp, ktsize, ktext, -1);
939 		/*
940 		 * we will map in 4MB, more than we allocated, to allow
941 		 * further allocation
942 		 */
943 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
944 		prom_free_phys(ktextp, ktsize);
945 		ktextp = newkp;
946 
947 		BDPRINTF(PDB_BOOT1,
948 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
949 			  "avail for kernel\r\n", (u_long)firstaddr,
950 			  (u_long)prom_vtop(firstaddr)));
951 
952 		/*
953 		 * Re-fetch translations -- they've certainly changed.
954 		 */
955 		if (OF_getprop(vmemh, "translations", (void*)prom_map, sz) <=
956 			0) {
957 			prom_printf("no translations installed?");
958 			OF_exit();
959 		}
960 #ifdef DEBUG
961 		if (pmapdebug & PDB_BOOT) {
962 			/* print out mem list */
963 			prom_printf("New prom xlations:\r\n");
964 			for (i = 0; i < prom_map_size; i++) {
965 				prom_printf("start %016lx size %016lx tte %016lx\r\n",
966 					    (u_long)prom_map[i].vstart,
967 					    (u_long)prom_map[i].vsize,
968 					    (u_long)prom_map[i].tte);
969 			}
970 			prom_printf("End of prom xlations\r\n");
971 		}
972 #endif
973 	}
974 	ektextp = ktextp + ktsize;
975 
976 	/*
977 	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
978 	 * any translations inside the kernel data VA range.
979 	 */
980 	for(i = 0; i < prom_map_size; i++) {
981 		if (prom_map[i].vstart >= kdata &&
982 		    prom_map[i].vstart <= firstaddr) {
983 			prom_map[i].vstart = 0;
984 			prom_map[i].vsize = 0;
985 		}
986 		if (prom_map[i].vstart >= ktext &&
987 		    prom_map[i].vstart <= ektext) {
988 			prom_map[i].vstart = 0;
989 			prom_map[i].vsize = 0;
990 		}
991 		for(j = i; j < prom_map_size; j++) {
992 			if (prom_map[j].vstart >= kdata &&
993 			    prom_map[j].vstart <= firstaddr)
994 				continue;	/* this is inside the kernel */
995 			if (prom_map[j].vstart >= ktext &&
996 			    prom_map[j].vstart <= ektext)
997 				continue;	/* this is inside the kernel */
998 			if (prom_map[j].vstart > prom_map[i].vstart) {
999 				struct prom_map tmp;
1000 				tmp = prom_map[i];
1001 				prom_map[i] = prom_map[j];
1002 				prom_map[j] = tmp;
1003 			}
1004 		}
1005 	}
1006 #ifdef DEBUG
1007 	if (pmapdebug & PDB_BOOT) {
1008 		/* print out mem list */
1009 		prom_printf("Prom xlations:\r\n");
1010 		for (i = 0; i < prom_map_size; i++) {
1011 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
1012 				    (u_long)prom_map[i].vstart,
1013 				    (u_long)prom_map[i].vsize,
1014 				    (u_long)prom_map[i].tte);
1015 		}
1016 		prom_printf("End of prom xlations\r\n");
1017 	}
1018 #endif
1019 
1020 	/*
1021 	 * Allocate a 64MB page for the cpu_info structure now.
1022 	 */
1023 	if ((cpu0paddr = prom_alloc_phys(8*NBPG, 8*NBPG)) == 0 ) {
1024 		prom_printf("Cannot allocate new cpu_info\r\n");
1025 		OF_exit();
1026 	}
1027 
1028 
1029 	/*
1030 	 * Now the kernel text segment is in its final location we can try to
1031 	 * find out how much memory really is free.
1032 	 */
1033 	sz = OF_getproplen(memh, "available") + sizeof(struct mem_region);
1034 	valloc(orig, struct mem_region, sz);
1035 	bzero((caddr_t)orig, sz);
1036 	if (OF_getprop(memh, "available", orig, sz) <= 0) {
1037 		prom_printf("no available RAM?");
1038 		OF_exit();
1039 	}
1040 #ifdef DEBUG
1041 	if (pmapdebug & PDB_BOOT1) {
1042 		/* print out mem list */
1043 		prom_printf("Available physical memory:\r\n");
1044 		for (mp = orig; mp->size; mp++) {
1045 			prom_printf("memlist start %lx size %lx\r\n",
1046 				    (u_long)mp->start, (u_long)mp->size);
1047 		}
1048 		prom_printf("End of available physical memory\r\n");
1049 	}
1050 #endif
1051 	valloc(avail, struct mem_region, sz);
1052 	bzero((caddr_t)avail, sz);
1053 	for (pcnt = 0, mp = orig, mp1 = avail; (mp1->size = mp->size);
1054 	    mp++, mp1++) {
1055 		mp1->start = mp->start;
1056 		pcnt++;
1057 	}
1058 
1059 	/*
1060 	 * Allocate and initialize a context table
1061 	 */
1062 	numctx = maxctx;
1063 	valloc(ctxbusy, paddr_t, CTXSIZE);
1064 	bzero((caddr_t)ctxbusy, CTXSIZE);
1065 
1066 	/*
1067 	 * Allocate our TSB.
1068 	 *
1069 	 * We will use the left over space to flesh out the kernel pmap.
1070 	 */
1071 	BDPRINTF(PDB_BOOT1, ("firstaddr before TSB=%lx\r\n",
1072 		(u_long)firstaddr));
1073 	firstaddr = ((firstaddr + TSBSIZE - 1) & ~(TSBSIZE-1));
1074 #ifdef DEBUG
1075 	i = (firstaddr + (NBPG-1)) & ~(NBPG-1);	/* First, page align */
1076 	if ((int)firstaddr < i) {
1077 		prom_printf("TSB alloc fixup failed\r\n");
1078 		prom_printf("frobbed i, firstaddr before TSB=%x, %lx\r\n",
1079 		    (int)i, (u_long)firstaddr);
1080 		panic("TSB alloc\n");
1081 		OF_exit();
1082 	}
1083 #endif
1084 	BDPRINTF(PDB_BOOT, ("frobbed i, firstaddr before TSB=%x, %lx\r\n",
1085 			    (int)i, (u_long)firstaddr));
1086 	valloc(tsb, pte_t, TSBSIZE);
1087 	bzero(tsb, TSBSIZE);
1088 
1089 	BDPRINTF(PDB_BOOT1, ("firstaddr after TSB=%lx\r\n", (u_long)firstaddr));
1090 	BDPRINTF(PDB_BOOT1, ("TSB allocated at %p size %08x\r\n", (void*)tsb,
1091 	    (int)TSBSIZE));
1092 
1093 	first_phys_addr = mem->start;
1094 	BDPRINTF(PDB_BOOT1, ("firstaddr after pmap=%08lx\r\n",
1095 		(u_long)firstaddr));
1096 
1097 	/*
1098 	 * Page align all regions.
1099 	 * Non-page memory isn't very interesting to us.
1100 	 * Also, sort the entries for ascending addresses.
1101 	 *
1102 	 * And convert from virtual to physical addresses.
1103 	 */
1104 
1105 	BDPRINTF(PDB_BOOT, ("kernel virtual size %08lx - %08lx\r\n",
1106 			    (u_long)kernelstart, (u_long)firstaddr));
1107 	kdata = kdata & ~PGOFSET;
1108 	ekdata = firstaddr;
1109 	ekdata = (ekdata + PGOFSET) & ~PGOFSET;
1110 	BDPRINTF(PDB_BOOT1, ("kernel virtual size %08lx - %08lx\r\n",
1111 			     (u_long)kernelstart, (u_long)kernelend));
1112 	ekdatap = ekdata - kdata + kdatap;
1113 	/* Switch from vaddrs to paddrs */
1114 	if(ekdatap > (kdatap + 4*MEG)) {
1115 		prom_printf("Kernel size exceeds 4MB\r\n");
1116 	}
1117 
1118 #ifdef DEBUG
1119 	if (pmapdebug & PDB_BOOT1) {
1120 		/* print out mem list */
1121 		prom_printf("Available %lx physical memory before cleanup:\r\n",
1122 			    (u_long)avail);
1123 		for (mp = avail; mp->size; mp++) {
1124 			prom_printf("memlist start %lx size %lx\r\n",
1125 				    (u_long)mp->start,
1126 				    (u_long)mp->size);
1127 		}
1128 		prom_printf("End of available physical memory before cleanup\r\n");
1129 		prom_printf("kernel physical text size %08lx - %08lx\r\n",
1130 			    (u_long)ktextp, (u_long)ektextp);
1131 		prom_printf("kernel physical data size %08lx - %08lx\r\n",
1132 			    (u_long)kdatap, (u_long)ekdatap);
1133 	}
1134 #endif
1135 	/*
1136 	 * Here's a another quick in-lined bubble sort.
1137 	 */
1138 	for (i = 0; i < pcnt; i++) {
1139 		for (j = i; j < pcnt; j++) {
1140 			if (avail[j].start < avail[i].start) {
1141 				struct mem_region tmp;
1142 				tmp = avail[i];
1143 				avail[i] = avail[j];
1144 				avail[j] = tmp;
1145 			}
1146 		}
1147 	}
1148 
1149 	/* Throw away page zero if we have it. */
1150 	if (avail->start == 0) {
1151 		avail->start += NBPG;
1152 		avail->size -= NBPG;
1153 	}
1154 	/*
1155 	 * Now we need to remove the area we valloc'ed from the available
1156 	 * memory lists.  (NB: we may have already alloc'ed the entire space).
1157 	 */
1158 	npgs = 0;
1159 	for (mp = avail; mp->size; mp++) {
1160 		/*
1161 		 * Check whether this region holds all of the kernel.
1162 		 */
1163 		s = mp->start + mp->size;
1164 		if (mp->start < kdatap && s > roundup(ekdatap, 4*MEG)) {
1165 			avail[pcnt].start = roundup(ekdatap, 4*MEG);
1166 			avail[pcnt++].size = s - kdatap;
1167 			mp->size = kdatap - mp->start;
1168 		}
1169 		/*
1170 		 * Look whether this regions starts within the kernel.
1171 		 */
1172 		if (mp->start >= kdatap &&
1173 			mp->start < roundup(ekdatap, 4*MEG)) {
1174 			s = ekdatap - mp->start;
1175 			if (mp->size > s)
1176 				mp->size -= s;
1177 			else
1178 				mp->size = 0;
1179 			mp->start = roundup(ekdatap, 4*MEG);
1180 		}
1181 		/*
1182 		 * Now look whether this region ends within the kernel.
1183 		 */
1184 		s = mp->start + mp->size;
1185 		if (s > kdatap && s < roundup(ekdatap, 4*MEG))
1186 			mp->size -= s - kdatap;
1187 		/*
1188 		 * Now page align the start of the region.
1189 		 */
1190 		s = mp->start % NBPG;
1191 		if (mp->size >= s) {
1192 			mp->size -= s;
1193 			mp->start += s;
1194 		}
1195 		/*
1196 		 * And now align the size of the region.
1197 		 */
1198 		mp->size -= mp->size % NBPG;
1199 		/*
1200 		 * Check whether some memory is left here.
1201 		 */
1202 		if (mp->size == 0) {
1203 			bcopy(mp + 1, mp,
1204 			      (pcnt - (mp - avail)) * sizeof *mp);
1205 			pcnt--;
1206 			mp--;
1207 			continue;
1208 		}
1209 		s = mp->start;
1210 		sz = mp->size;
1211 		npgs += btoc(sz);
1212 		for (mp1 = avail; mp1 < mp; mp1++)
1213 			if (s < mp1->start)
1214 				break;
1215 		if (mp1 < mp) {
1216 			bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1);
1217 			mp1->start = s;
1218 			mp1->size = sz;
1219 		}
1220 #ifdef DEBUG
1221 /* Clear all memory we give to the VM system.  I want to make sure
1222  * the PROM isn't using it for something, so this should break the PROM.
1223  */
1224 
1225 /* Calling pmap_zero_page() at this point also hangs some machines
1226  * so don't do it at all. -- pk 26/02/2002
1227  */
1228 #if 0
1229 		{
1230 			paddr_t p;
1231 			for (p = mp->start; p < mp->start+mp->size; p += NBPG)
1232 				pmap_zero_page(p);
1233 		}
1234 #endif
1235 #endif /* DEBUG */
1236 		/*
1237 		 * In future we should be able to specify both allocated
1238 		 * and free.
1239 		 */
1240 		uvm_page_physload(
1241 			atop(mp->start),
1242 			atop(mp->start+mp->size),
1243 			atop(mp->start),
1244 			atop(mp->start+mp->size),
1245 			VM_FREELIST_DEFAULT);
1246 	}
1247 
1248 #if 0
1249 	/* finally, free up any space that valloc did not use */
1250 	prom_unmap_virt((vaddr_t)ekdata, roundup(ekdata, 4*MEG) - ekdata);
1251 	if (ekdatap < roundup(kdatap, 4*MEG))) {
1252 		uvm_page_physload(atop(ekdatap),
1253 			atop(roundup(ekdatap, (4*MEG))),
1254 			atop(ekdatap),
1255 			atop(roundup(ekdatap, (4*MEG))),
1256 			VM_FREELIST_DEFAULT);
1257 	}
1258 #endif
1259 
1260 #ifdef DEBUG
1261 	if (pmapdebug & PDB_BOOT) {
1262 		/* print out mem list */
1263 		prom_printf("Available physical memory after cleanup:\r\n");
1264 		for (mp = avail; mp->size; mp++) {
1265 			prom_printf("avail start %lx size %lx\r\n",
1266 				    (long)mp->start, (long)mp->size);
1267 		}
1268 		prom_printf("End of available physical memory after cleanup\r\n");
1269 	}
1270 #endif
1271 	/*
1272 	 * Allocate and clear out pmap_kernel()->pm_segs[]
1273 	 */
1274 	pmap_pinit(pmap_kernel());
1275 	{
1276 		paddr_t newp;
1277 
1278 		do {
1279 			pmap_get_page(&newp, NULL);
1280 		} while (!newp); /* Throw away page zero */
1281 		pmap_kernel()->pm_segs=(paddr_t *)(u_long)newp;
1282 		pmap_kernel()->pm_physaddr = newp;
1283 		/* mark kernel context as busy */
1284 		((paddr_t*)ctxbusy)[0] = (int)pmap_kernel()->pm_physaddr;
1285 	}
1286 	/*
1287 	 * finish filling out kernel pmap.
1288 	 */
1289 
1290 	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\r\n",
1291 	    (long)pmap_kernel()->pm_physaddr));
1292 	/*
1293 	 * Tell pmap about our mesgbuf -- Hope this works already
1294 	 */
1295 #ifdef DEBUG
1296 	BDPRINTF(PDB_BOOT1, ("Calling consinit()\r\n"));
1297 	if (pmapdebug & PDB_BOOT1) consinit();
1298 	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\r\n"));
1299 #endif
1300 	/* it's not safe to call pmap_enter so we need to do this ourselves */
1301 	va = (vaddr_t)msgbufp;
1302 	prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1);
1303 	while (msgbufsiz) {
1304 		int pgsz;
1305 		psize_t psize;
1306 
1307 		PMAP_PAGE_SIZE(va, phys_msgbuf, msgbufsiz, pgsz, psize);
1308 		data = TSB_DATA(0 /* global */,
1309 			pgsz,
1310 			phys_msgbuf,
1311 			1 /* priv */,
1312 			1 /* Write */,
1313 			1 /* Cacheable */,
1314 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1315 			1 /* valid */,
1316 			0 /* IE */);
1317 		do {
1318 			pmap_enter_kpage(va, data);
1319 			va += NBPG;
1320 			msgbufsiz -= NBPG;
1321 			phys_msgbuf += NBPG;
1322 		} while (psize-=NBPG);
1323 	}
1324 	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\r\n"));
1325 
1326 	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\r\n"));
1327 	for (i = 0; i < prom_map_size; i++)
1328 		if (prom_map[i].vstart && ((prom_map[i].vstart>>32) == 0))
1329 			for (j = 0; j < prom_map[i].vsize; j += NBPG) {
1330 				int k;
1331 
1332 				for (k = 0; page_size_map[k].mask; k++) {
1333 					if (((prom_map[i].vstart |
1334 					      prom_map[i].tte) &
1335 					      page_size_map[k].mask) == 0 &&
1336 					      page_size_map[k].mask <
1337 					      prom_map[i].vsize)
1338 						break;
1339 				}
1340 #ifdef DEBUG
1341 				page_size_map[k].use++;
1342 #endif
1343 				/* Enter PROM map into pmap_kernel() */
1344 				pmap_enter_kpage(prom_map[i].vstart + j,
1345 					(prom_map[i].tte + j)|
1346 					page_size_map[k].code);
1347 			}
1348 	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\r\n"));
1349 
1350 	/*
1351 	 * Fix up start of kernel heap.
1352 	 */
1353 	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1354 	/* Let's keep 1 page of redzone after the kernel */
1355 	vmmap += NBPG;
1356 	{
1357 		extern vaddr_t u0[2];
1358 		extern struct pcb* proc0paddr;
1359 		extern void main __P((void));
1360 		paddr_t pa;
1361 
1362 		/* Initialize all the pointers to u0 */
1363 		cpcb = (struct pcb *)vmmap;
1364 		proc0paddr = cpcb;
1365 		u0[0] = vmmap;
1366 		/* Allocate some VAs for u0 */
1367 		u0[1] = vmmap + 2*USPACE;
1368 
1369 		BDPRINTF(PDB_BOOT1,
1370 			("Inserting stack 0 into pmap_kernel() at %p\r\n",
1371 				vmmap));
1372 
1373 		while (vmmap < u0[1]) {
1374 			int64_t data;
1375 
1376 			pmap_get_page(&pa, NULL);
1377 			prom_map_phys(pa, NBPG, vmmap, -1);
1378 			data = TSB_DATA(0 /* global */,
1379 				PGSZ_8K,
1380 				pa,
1381 				1 /* priv */,
1382 				1 /* Write */,
1383 				1 /* Cacheable */,
1384 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1385 				1 /* valid */,
1386 				0 /* IE */);
1387 			pmap_enter_kpage(vmmap, data);
1388 			vmmap += NBPG;
1389 		}
1390 		BDPRINTF(PDB_BOOT1,
1391 			 ("Done inserting stack 0 into pmap_kernel()\r\n"));
1392 
1393 		/* Now map in and initialize our cpu_info structure */
1394 #ifdef DIAGNOSTIC
1395 		vmmap += NBPG; /* redzone -- XXXX do we need one? */
1396 #endif
1397 		if ((vmmap ^ INTSTACK) & VA_ALIAS_MASK)
1398 			vmmap += NBPG; /* Matchup virtual color for D$ */
1399 		intstk = vmmap;
1400 		cpus = (struct cpu_info *)(intstk+CPUINFO_VA-INTSTACK);
1401 
1402 		BDPRINTF(PDB_BOOT1,
1403 			("Inserting cpu_info into pmap_kernel() at %p\r\n",
1404 				 cpus));
1405 		/* Now map in all 8 pages of cpu_info */
1406 		pa = cpu0paddr;
1407 		prom_map_phys(pa, 64*KB, vmmap, -1);
1408 		/*
1409 		 * Also map it in as the interrupt stack.
1410 		 * This lets the PROM see this if needed.
1411 		 *
1412 		 * XXXX locore.s does not flush these mappings
1413 		 * before installing the locked TTE.
1414 		 */
1415 		prom_map_phys(pa, 64*KB, CPUINFO_VA, -1);
1416 		for (i=0; i<8; i++) {
1417 			int64_t data;
1418 
1419 			data = TSB_DATA(0 /* global */,
1420 				PGSZ_8K,
1421 				pa,
1422 				1 /* priv */,
1423 				1 /* Write */,
1424 				1 /* Cacheable */,
1425 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1426 				1 /* valid */,
1427 				0 /* IE */);
1428 			pmap_enter_kpage(vmmap, data);
1429 			vmmap += NBPG;
1430 			pa += NBPG;
1431 		}
1432 		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\r\n"));
1433 
1434 		/* Initialize our cpu_info structure */
1435 		bzero((void *)intstk, 8*NBPG);
1436 		cpus->ci_next = NULL; /* Redundant, I know. */
1437 		cpus->ci_curproc = &proc0;
1438 		cpus->ci_cpcb = (struct pcb *)u0[0]; /* Need better source */
1439 		cpus->ci_upaid = CPU_UPAID;
1440 		cpus->ci_number = cpus->ci_upaid; /* How do we figure this out? */
1441 		cpus->ci_fpproc = NULL;
1442 		cpus->ci_spinup = main; /* Call main when we're running. */
1443 		cpus->ci_initstack = (void *)u0[1];
1444 		cpus->ci_paddr = cpu0paddr;
1445 		/* The rest will be done at CPU attach time. */
1446 		BDPRINTF(PDB_BOOT1,
1447 			 ("Done inserting cpu_info into pmap_kernel()\r\n"));
1448 	}
1449 
1450 	vmmap = (vaddr_t)reserve_dumppages((caddr_t)(u_long)vmmap);
1451 	/*
1452 	 * Set up bounds of allocatable memory for vmstat et al.
1453 	 */
1454 	nextavail = avail->start;
1455 	avail_start = nextavail;
1456 	for (mp = avail; mp->size; mp++)
1457 		avail_end = mp->start+mp->size;
1458 	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\r\n"));
1459 
1460 }
1461 
1462 /*
1463  * Initialize anything else for pmap handling.
1464  * Called during vm_init().
1465  */
1466 void
1467 pmap_init()
1468 {
1469 	struct vm_page *m;
1470 	paddr_t pa;
1471 	psize_t size;
1472 	vaddr_t va;
1473 	struct pglist mlist;
1474 	vsize_t		s;
1475 	int		bank;
1476 	struct pv_entry	*pvh;
1477 
1478 	BDPRINTF(PDB_BOOT1, ("pmap_init()\r\n"));
1479 	if (PAGE_SIZE != NBPG)
1480 		panic("pmap_init: PAGE_SIZE!=NBPG");
1481 
1482 	size = sizeof(struct pv_entry) * physmem;
1483 	TAILQ_INIT(&mlist);
1484 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
1485 		(paddr_t)NBPG, (paddr_t)0, &mlist, 1, 0) != 0)
1486 		panic("cpu_start: no memory");
1487 
1488 	va = uvm_km_valloc(kernel_map, size);
1489 	if (va == 0)
1490 		panic("cpu_start: no memory");
1491 
1492 	pv_table = (struct pv_entry *)va;
1493 	m = TAILQ_FIRST(&mlist);
1494 
1495 	/* Map the pages */
1496 	for (; m != NULL; m = TAILQ_NEXT(m,pageq)) {
1497 		u_int64_t data;
1498 
1499 		pa = VM_PAGE_TO_PHYS(m);
1500 		pmap_zero_page(pa);
1501 		data = TSB_DATA(0 /* global */,
1502 			PGSZ_8K,
1503 			pa,
1504 			1 /* priv */,
1505 			1 /* Write */,
1506 			1 /* Cacheable */,
1507 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1508 			1 /* valid */,
1509 			0 /* IE */);
1510 		pmap_enter_kpage(va, data);
1511 		va += NBPG;
1512 	}
1513 
1514 	/*
1515 	 * Memory for the pv heads has already been allocated.
1516 	 * Initialize the physical memory segments.
1517 	 */
1518 	pvh = pv_table;
1519 	for (bank = 0; bank < vm_nphysseg; bank++) {
1520 		s = vm_physmem[bank].end - vm_physmem[bank].start;
1521 		vm_physmem[bank].pmseg.pvent = pvh;
1522 		pvh += s;
1523 	}
1524 
1525 	/* Setup a pool for additional pvlist structures */
1526 	pool_init(&pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pv_entry", NULL);
1527 
1528 	vm_first_phys = avail_start;
1529 	vm_num_phys = avail_end - avail_start;
1530 }
1531 
1532 /*
1533  * How much virtual space is available to the kernel?
1534  */
1535 static vaddr_t kbreak; /* End of kernel VA */
1536 void
1537 pmap_virtual_space(start, end)
1538 	vaddr_t *start, *end;
1539 {
1540 	/*
1541 	 * Reserve one segment for kernel virtual memory
1542 	 */
1543 	/* Reserve two pages for pmap_copy_page && /dev/mem */
1544 	*start = kbreak = (vaddr_t)(vmmap + 2*NBPG);
1545 	*end = VM_MAX_KERNEL_ADDRESS;
1546 	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\r\n", *start, *end));
1547 }
1548 
1549 #ifdef PMAP_GROWKERNEL
1550 /*
1551  * Preallocate kernel page tables to a specified VA.
1552  * This simply loops through the first TTE for each
1553  * page table from the beginning of the kernel pmap,
1554  * reads the entry, and if the result is
1555  * zero (either invalid entry or no page table) it stores
1556  * a zero there, populating page tables in the process.
1557  * This is not the most efficient technique but i don't
1558  * expect it to be called that often.
1559  */
1560 vaddr_t
1561 pmap_growkernel(maxkvaddr)
1562         vaddr_t maxkvaddr;
1563 {
1564 	int s;
1565 	paddr_t pg;
1566 	struct pmap *pm = pmap_kernel();
1567 
1568 	if (maxkvaddr >= KERNEND) {
1569 		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1570 		       (void *)KERNEND, (void *)maxkvaddr);
1571 		return (kbreak);
1572 	}
1573 	s = splvm();
1574 	simple_lock(&pm->pm_lock);
1575 	DPRINTF(PDB_GROW,
1576 		("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr));
1577 	/* Align with the start of a page table */
1578 	for (kbreak &= (-1<<PDSHIFT); kbreak < maxkvaddr;
1579 	     kbreak += (1<<PDSHIFT)) {
1580 		if (pseg_get(pm, kbreak)) continue;
1581 
1582 		pg = 0;
1583 		while (pseg_set(pm, kbreak, 0, pg) == 1) {
1584 			DPRINTF(PDB_GROW,
1585 				("pmap_growkernel: extending %lx\n", kbreak));
1586 			pg = 0;
1587 			if (!pmap_get_page(&pg, NULL))
1588 				panic("pmap_grow_kernel: no pages");
1589 #ifdef DEBUG
1590 			enter_stats.ptpneeded ++;
1591 #endif
1592 		}
1593 
1594 	}
1595 	simple_unlock(&pm->pm_lock);
1596 	splx(s);
1597 	return (kbreak);
1598 }
1599 #endif
1600 
1601 /*
1602  * Create and return a physical map.
1603  */
1604 struct pmap *
1605 pmap_create()
1606 {
1607 	struct pmap *pm;
1608 
1609 	DPRINTF(PDB_CREATE, ("pmap_create()\n"));
1610 
1611 	pm = (struct pmap *)malloc(sizeof *pm, M_VMPMAP, M_WAITOK);
1612 	bzero((caddr_t)pm, sizeof *pm);
1613 #ifdef DEBUG
1614 	if (pmapdebug & PDB_CREATE)
1615 		printf("pmap_create(): created %p\n", pm);
1616 #endif
1617 	pmap_pinit(pm);
1618 	return pm;
1619 }
1620 
1621 /*
1622  * Initialize a preallocated and zeroed pmap structure.
1623  */
1624 void
1625 pmap_pinit(pm)
1626 	struct pmap *pm;
1627 {
1628 
1629 	/*
1630 	 * Allocate some segment registers for this pmap.
1631 	 */
1632 	simple_lock_init(&pm->pm_lock);
1633 	simple_lock(&pm->pm_lock);
1634 	pm->pm_refs = 1;
1635 	if(pm != pmap_kernel()) {
1636 		pmap_get_page(&pm->pm_physaddr , "pmap_pinit");
1637 		pm->pm_segs = (paddr_t *)(u_long)pm->pm_physaddr;
1638 		if (!pm->pm_physaddr) panic("pmap_pinit");
1639 		ctx_alloc(pm);
1640 	}
1641 #ifdef DEBUG
1642 	if (pmapdebug & PDB_CREATE)
1643 		printf("pmap_pinit(%p): ctx %d\n", pm, pm->pm_ctx);
1644 #endif
1645 	simple_unlock(&pm->pm_lock);
1646 }
1647 
1648 /*
1649  * Add a reference to the given pmap.
1650  */
1651 void
1652 pmap_reference(pm)
1653 	struct pmap *pm;
1654 {
1655 	int s;
1656 
1657 	s = splvm();
1658 	simple_lock(&pm->pm_lock);
1659 	pm->pm_refs++;
1660 	simple_unlock(&pm->pm_lock);
1661 	splx(s);
1662 }
1663 
1664 /*
1665  * Retire the given pmap from service.
1666  * Should only be called if the map contains no valid mappings.
1667  */
1668 void
1669 pmap_destroy(pm)
1670 	struct pmap *pm;
1671 {
1672 	if (--pm->pm_refs == 0) {
1673 #ifdef DEBUG
1674 		if (pmapdebug & PDB_DESTROY)
1675 			printf("pmap_destroy: freeing pmap %p\n", pm);
1676 #endif
1677 		pmap_release(pm);
1678 		free((caddr_t)pm, M_VMPMAP);
1679 	}
1680 }
1681 
1682 /*
1683  * Release any resources held by the given physical map.
1684  * Called when a pmap initialized by pmap_pinit is being released.
1685  */
1686 void
1687 pmap_release(pm)
1688 	struct pmap *pm;
1689 {
1690 	int i, j, k, s;
1691 	paddr_t *pdir, *ptbl, tmp;
1692 
1693 #ifdef DIAGNOSTIC
1694 	if(pm == pmap_kernel())
1695 		panic("pmap_release: releasing pmap_kernel()");
1696 #endif
1697 
1698 	s=splvm();
1699 	simple_lock(&pm->pm_lock);
1700 	for(i=0; i<STSZ; i++) {
1701 		paddr_t psegentp = (paddr_t)(u_long)&pm->pm_segs[i];
1702 		if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)psegentp,
1703 			ASI_PHYS_CACHED))) {
1704 			for (k=0; k<PDSZ; k++) {
1705 				paddr_t pdirentp = (paddr_t)(u_long)&pdir[k];
1706 				if ((ptbl = (paddr_t *)(u_long)ldxa(
1707 					(vaddr_t)pdirentp, ASI_PHYS_CACHED))) {
1708 					for (j=0; j<PTSZ; j++) {
1709 						int64_t data;
1710 						data  = ldxa((vaddr_t)&ptbl[j],
1711 							ASI_PHYS_CACHED);
1712 						if (data&TLB_V &&
1713 						    IS_VM_PHYSADDR(data&TLB_PA_MASK)) {
1714 							paddr_t pa;
1715 							pv_entry_t pv;
1716 
1717 #ifdef DEBUG
1718 							printf("pmap_release: pm=%p page %llx still in use\n", pm,
1719 							       (unsigned long long)(((u_int64_t)i<<STSHIFT)|((u_int64_t)k<<PDSHIFT)|((u_int64_t)j<<PTSHIFT)));
1720 							Debugger();
1721 #endif
1722 							/* Save REF/MOD info */
1723 							pa = data&TLB_PA_MASK;
1724 							pv = pa_to_pvh(pa);
1725 							if (data & TLB_ACCESS)
1726 								pv->pv_va |=
1727 									PV_REF;
1728 							if (data & (TLB_MODIFY))
1729 								pv->pv_va |=
1730 									PV_MOD;
1731 
1732 							pmap_remove_pv(pm,
1733 								       (long)((u_int64_t)i<<STSHIFT)|((long)k<<PDSHIFT)|((long)j<<PTSHIFT),
1734 								       pa);
1735 						}
1736 					}
1737 					stxa(pdirentp, ASI_PHYS_CACHED, NULL);
1738 					pmap_free_page((paddr_t)(u_long)ptbl);
1739 				}
1740 			}
1741 			stxa(psegentp, ASI_PHYS_CACHED, NULL);
1742 			pmap_free_page((paddr_t)(u_long)pdir);
1743 		}
1744 	}
1745 	tmp = (paddr_t)(u_long)pm->pm_segs;
1746 	pm->pm_segs = NULL;
1747 	pmap_free_page(tmp);
1748 #ifdef NOTDEF_DEBUG
1749 	for (i=0; i<physmem; i++) {
1750 		struct pv_entry *pv;
1751 		for (pv = &pv_table[i]; pv; pv=pv->pv_next) {
1752 			if (pv->pv_pmap == pm) {
1753 				printf("pmap_release(): unreferenced pv=%p pa=%p va=%p pm=%p\n",
1754 				       i, ptoa(first_phys_addr+i), pv->pv_va, pm);
1755 				Debugger();
1756 				pmap_remove_pv(pm, pv->pv_va, i);
1757 				break;
1758 			}
1759 		}
1760 	}
1761 #endif
1762 	splx(s);
1763 	simple_unlock(&pm->pm_lock);
1764 	ctx_free(pm);
1765 }
1766 
1767 /*
1768  * Copy the range specified by src_addr/len
1769  * from the source map to the range dst_addr/len
1770  * in the destination map.
1771  *
1772  * This routine is only advisory and need not do anything.
1773  */
1774 void
1775 pmap_copy(dst_pmap, src_pmap, dst_addr, len, src_addr)
1776 	struct pmap *dst_pmap, *src_pmap;
1777 	vaddr_t dst_addr, src_addr;
1778 	vsize_t len;
1779 {
1780 #ifdef DEBUG
1781 	if (pmapdebug&PDB_CREATE)
1782 		printf("pmap_copy(%p, %p, %p, %lx, %p)\n",
1783 		       dst_pmap, src_pmap, (void *)(u_long)dst_addr,
1784 		       (u_long)len, (void *)(u_long)src_addr);
1785 #endif
1786 }
1787 
1788 /*
1789  * Garbage collects the physical map system for
1790  * pages which are no longer used.
1791  * Success need not be guaranteed -- that is, there
1792  * may well be pages which are not referenced, but
1793  * others may be collected.
1794  * Called by the pageout daemon when pages are scarce.
1795  */
1796 void
1797 pmap_collect(pm)
1798 	struct pmap *pm;
1799 {
1800 #if 1
1801 	int i, j, k, n, m, s;
1802 	paddr_t *pdir, *ptbl;
1803 	/* This is a good place to scan the pmaps for page tables with
1804 	 * no valid mappings in them and free them. */
1805 
1806 	/* NEVER GARBAGE COLLECT THE KERNEL PMAP */
1807 	if (pm == pmap_kernel()) return;
1808 
1809 	s = splvm();
1810 	simple_lock(&pm->pm_lock);
1811 	for (i=0; i<STSZ; i++) {
1812 		if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
1813 			m = 0;
1814 			for (k=0; k<PDSZ; k++) {
1815 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
1816 					m++;
1817 					n = 0;
1818 					for (j=0; j<PTSZ; j++) {
1819 						int64_t data = ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
1820 						if (data&TLB_V)
1821 							n++;
1822 					}
1823 					if (!n) {
1824 						/* Free the damn thing */
1825 						stxa((paddr_t)(u_long)&pdir[k], ASI_PHYS_CACHED, NULL);
1826 						pmap_free_page((paddr_t)
1827 							(u_long)ptbl);
1828 					}
1829 				}
1830 			}
1831 			if (!m) {
1832 				/* Free the damn thing */
1833 				stxa((paddr_t)(u_long)&pm->pm_segs[i], ASI_PHYS_CACHED, NULL);
1834 				pmap_free_page((paddr_t)(u_long)pdir);
1835 			}
1836 		}
1837 	}
1838 	simple_unlock(&pm->pm_lock);
1839 	splx(s);
1840 #endif
1841 }
1842 
1843 #if 0
1844 /*
1845  * The two following routines are now in locore.s so I can code them in assembly
1846  * They can bypass the MMU or use VIS bcopy extensions for speed.
1847  */
1848 /*
1849  * Fill the given physical page with zeroes.
1850  */
1851 void
1852 pmap_zero_page(pa)
1853 	paddr_t pa;
1854 {
1855 	/*
1856 	 * We don't need to worry about flushing caches
1857 	 * since all our virtual caches are write-through.
1858 	 * All we need to do is map the page in somewhere, bzero it,
1859 	 * and unmap it.  However, we need to be sure we don't
1860 	 * map it in anywhere near the kernel or we may lose, badly.
1861 	 */
1862 	bzero((caddr_t)pa, NBPG);
1863 }
1864 
1865 /*
1866  * Copy the given physical source page to its destination.
1867  *
1868  * I will code this in assembly RSN.
1869  */
1870 void
1871 pmap_copy_page(src, dst)
1872 	paddr_t src, dst;
1873 {
1874 	bcopy((caddr_t)src, (caddr_t)dst, NBPG);
1875 }
1876 #endif
1877 
1878 /*
1879  * Activate the address space for the specified process.  If the
1880  * process is the current process, load the new MMU context.
1881  */
1882 void
1883 pmap_activate(p)
1884 	struct proc *p;
1885 {
1886 	pmap_t pmap = p->p_vmspace->vm_map.pmap;
1887 	int s;
1888 
1889 	/*
1890 	 * This is essentially the same thing that happens in cpu_switch()
1891 	 * when the newly selected process is about to run, except that we
1892 	 * have to make sure to clean the register windows before we set
1893 	 * the new context.
1894 	 */
1895 
1896 	s = splvm();
1897 	if (p == curproc) {
1898 		write_user_windows();
1899 		if (pmap->pm_ctx == NULL)
1900 			ctx_alloc(pmap);
1901 		stxa(CTX_SECONDARY, ASI_DMMU, pmap->pm_ctx);
1902 	}
1903 	splx(s);
1904 }
1905 
1906 /*
1907  * Deactivate the address space of the specified process.
1908  */
1909 void
1910 pmap_deactivate(p)
1911 	struct proc *p;
1912 {
1913 }
1914 
1915 /*
1916  * pmap_kenter_pa:		[ INTERFACE ]
1917  *
1918  *	Enter a va -> pa mapping into the kernel pmap without any
1919  *	physical->virtual tracking.
1920  *
1921  *	Note: no locking is necessary in this function.
1922  */
1923 void
1924 pmap_kenter_pa(va, pa, prot)
1925 	vaddr_t va;
1926 	paddr_t pa;
1927 	vm_prot_t prot;
1928 {
1929 	pte_t tte;
1930 	paddr_t pg;
1931 	struct pmap *pm = pmap_kernel();
1932 	int i, s;
1933 
1934 	ASSERT(va < INTSTACK || va > EINTSTACK);
1935 	ASSERT(va < kdata || va > ekdata);
1936 
1937 	/*
1938 	 * Construct the TTE.
1939 	 */
1940 	s = splvm();
1941 #if 0
1942 	/* Not needed -- all operations are atomic. */
1943 	simple_lock(&pm->pm_lock);
1944 #endif
1945 #ifdef DEBUG
1946 	enter_stats.unmanaged ++;
1947 #endif
1948 #ifdef DEBUG
1949 	if (pa & (PMAP_NVC|PMAP_NC))
1950 		enter_stats.ci ++;
1951 #endif
1952 	tte.tag = TSB_TAG(0,pm->pm_ctx,va);
1953 	tte.data = TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1954 				 (VM_PROT_WRITE & prot),
1955 				 (!(pa & PMAP_NC)), pa & (PMAP_NVC), 1, 0);
1956 	/* We don't track modification here. */
1957 	if (VM_PROT_WRITE & prot) tte.data |= TLB_REAL_W|TLB_W; /* HWREF -- XXXX */
1958 	tte.data |= TLB_TSB_LOCK;	/* wired */
1959 	ASSERT((tte.data & TLB_NFO) == 0);
1960 	pg = NULL;
1961 	while ((i = pseg_set(pm, va, tte.data, pg)) == 1) {
1962 		pg = NULL;
1963 		if (!pmap_get_page(&pg, NULL))
1964 			panic("pmap_kenter_pa: no pages");
1965 #ifdef DEBUG
1966 		enter_stats.ptpneeded ++;
1967 #endif
1968 	}
1969 	if (i == 2) {
1970 		/* We allocated a spare page but didn't use it.  Free it. */
1971 		printf("pmap_kenter_pa: freeing unused page %llx\n",
1972 		       (long long)pg);
1973 		pmap_free_page(pg);
1974 	}
1975 #ifdef DEBUG
1976 	i = ptelookup_va(va);
1977 	if( pmapdebug & PDB_ENTER )
1978 		prom_printf("pmap_kenter_pa: va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
1979 			    (int)(tte.tag>>32), (int)tte.tag,
1980 			    (int)(tte.data>>32), (int)tte.data,
1981 			    i, &tsb[i]);
1982 	if( pmapdebug & PDB_MMU_STEAL && tsb[i].data ) {
1983 		prom_printf("pmap_kenter_pa: evicting entry tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n",
1984 			    (int)(tsb[i].tag>>32), (int)tsb[i].tag,
1985 			    (int)(tsb[i].data>>32), (int)tsb[i].data,
1986 			    i, &tsb[i]);
1987 		prom_printf("with va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
1988 			    (int)(tte.tag>>32), (int)tte.tag,
1989 			    (int)(tte.data>>32), (int)tte.data,
1990 			    i, &tsb[i]);
1991 	}
1992 #endif
1993 #if 0
1994 /* Not needed -- all operations are atomic. */
1995 	simple_unlock(&pm->pm_lock);
1996 #endif
1997 	splx(s);
1998 	ASSERT((tsb[i].data & TLB_NFO) == 0);
1999 }
2000 
2001 /*
2002  * pmap_kremove:		[ INTERFACE ]
2003  *
2004  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
2005  *	for size bytes (assumed to be page rounded).
2006  */
2007 #if 0
2008 void
2009 pmap_kremove(va, size)
2010 	vaddr_t va;
2011 	vsize_t size;
2012 {
2013 	return pmap_remove(pmap_kernel(), va, va+size);
2014 }
2015 #else
2016 void
2017 pmap_kremove(va, size)
2018 	vaddr_t va;
2019 	vsize_t size;
2020 {
2021 	struct pmap *pm = pmap_kernel();
2022 	int64_t data;
2023 	int i, s, flush = 0;
2024 
2025 	ASSERT(va < INTSTACK || va > EINTSTACK);
2026 	ASSERT(va < kdata || va > ekdata);
2027 
2028 	s = splvm();
2029 	simple_lock(&pm->pm_lock);
2030 #ifdef DEBUG
2031 	if (pmapdebug & PDB_DEMAP) {
2032 		printf("pmap_kremove: start %p size %lx\n",
2033 		    (void *)(u_long)va, size);
2034 	}
2035 #endif
2036 	while (size >= NBPG) {
2037 		/*
2038 		 * Is this part of the permanent 4MB mapping?
2039 		 */
2040 #ifdef DIAGNOSTIC
2041 		if (pm == pmap_kernel() &&
2042 			(va >= ktext && va < roundup(ekdata, 4*MEG)))
2043 			panic("pmap_kremove: va=%08x in locked TLB\r\n",
2044 				(u_int)va);
2045 #endif
2046 		/* Shouldn't need to do this if the entry's not valid. */
2047 		if ((data = pseg_get(pm, va))) {
2048 			paddr_t entry;
2049 
2050 			flush |= 1;
2051 			entry = (data&TLB_PA_MASK);
2052 			/* We need to flip the valid bit and clear the access statistics. */
2053 			if (pseg_set(pm, va, 0, 0)) {
2054 				printf("pmap_kremove: gotten pseg empty!\n");
2055 				Debugger();
2056 				/* panic? */
2057 			}
2058 #ifdef DEBUG
2059 			if (pmapdebug & PDB_DEMAP)
2060 				printf("pmap_kremove: clearing seg %x pdir %x pte %x\n",
2061 				       (int)va_to_seg(va), (int)va_to_dir(va),
2062 				       (int)va_to_pte(va));
2063 			remove_stats.removes ++;
2064 #endif
2065 
2066 			i = ptelookup_va(va);
2067 			if (tsb[i].tag > 0
2068 			    && tsb[i].tag == TSB_TAG(0,pm->pm_ctx,va))
2069 			{
2070 				/*
2071 				 * Invalidate the TSB
2072 				 *
2073 				 * While we can invalidate it by clearing the
2074 				 * valid bit:
2075 				 *
2076 				 * ptp->data_v = 0;
2077 				 *
2078 				 * it's faster to do store 1 doubleword.
2079 				 */
2080 #ifdef DEBUG
2081 				if (pmapdebug & PDB_DEMAP)
2082 					printf(" clearing TSB [%d]\n", i);
2083 #endif
2084 				tsb[i].data = 0LL;
2085 				ASSERT((tsb[i].data & TLB_NFO) == 0);
2086 				/* Flush the TLB */
2087 			}
2088 #ifdef DEBUG
2089 			remove_stats.tflushes ++;
2090 #endif
2091 			/* Here we assume nothing can get into the TLB unless it has a PTE */
2092 			tlb_flush_pte(va, pm->pm_ctx);
2093 		}
2094 		va += NBPG;
2095 		size -= NBPG;
2096 	}
2097 	if (flush) {
2098 #ifdef DEBUG
2099 		remove_stats.flushes ++;
2100 #endif
2101 	}
2102 	simple_unlock(&pm->pm_lock);
2103 	splx(s);
2104 }
2105 #endif
2106 
2107 /*
2108  * Insert physical page at pa into the given pmap at virtual address va.
2109  * Supports 64-bit pa so we can map I/O space.
2110  */
2111 int
2112 pmap_enter(pm, va, pa, prot, flags)
2113 	struct pmap *pm;
2114 	vaddr_t va;
2115 	u_int64_t pa;
2116 	vm_prot_t prot;
2117 	int flags;
2118 {
2119 	pte_t tte;
2120 	paddr_t pg;
2121 	int i, s, aliased = 0;
2122 	pv_entry_t pv = NULL;
2123 	int size = 0; /* PMAP_SZ_TO_TTE(pa); */
2124 	boolean_t wired = (flags & PMAP_WIRED) != 0;
2125 
2126 	/*
2127 	 * Is this part of the permanent mappings?
2128 	 */
2129 	ASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
2130 	ASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
2131 
2132 #ifdef DEBUG
2133 	/* Trap mapping of page zero */
2134 	if (va == NULL) {
2135 		prom_printf("pmap_enter: NULL va=%08x pa=%x:%08x\r\n",
2136 			    va, (int)(pa>>32), (int)pa);
2137 		OF_enter();
2138 	}
2139 #endif
2140 	/*
2141 	 * XXXX If a mapping at this address already exists, remove it.
2142 	 */
2143 	s = splvm();
2144 	simple_lock(&pm->pm_lock);
2145 	if ((tte.data = pseg_get(pm, va))<0) {
2146 		simple_unlock(&pm->pm_lock);
2147 		pmap_remove(pm, va, va+NBPG-1);
2148 		simple_lock(&pm->pm_lock);
2149 		tte.data = pseg_get(pm, va);
2150 	}
2151 
2152 	/*
2153 	 * Construct the TTE.
2154 	 */
2155 	if (IS_VM_PHYSADDR(pa)) {
2156 		pv = pa_to_pvh(pa);
2157 		aliased = (pv->pv_va&(PV_ALIAS|PV_NVC));
2158 #ifdef DIAGNOSTIC
2159 		if ((flags & VM_PROT_ALL) & ~prot)
2160 			panic("pmap_enter: access_type exceeds prot");
2161 #endif
2162 		/* If we don't have the traphandler do it, set the ref/mod bits now */
2163 		if (flags & VM_PROT_ALL)
2164 			pv->pv_va |= PV_REF;
2165 		if (flags & VM_PROT_WRITE)
2166 			pv->pv_va |= PV_MOD;
2167 #ifdef DEBUG
2168 		enter_stats.managed ++;
2169 #endif
2170 	} else {
2171 #ifdef DEBUG
2172 		enter_stats.unmanaged ++;
2173 #endif
2174 		aliased = 0;
2175 	}
2176 	if (pa & PMAP_NVC) aliased = 1;
2177 #ifdef NO_VCACHE
2178 	aliased = 1; /* Disable D$ */
2179 #endif
2180 #ifdef DEBUG
2181 	enter_stats.ci ++;
2182 #endif
2183 	tte.data = TSB_DATA(0, size, pa, pm == pmap_kernel(),
2184 		(flags & VM_PROT_WRITE), (!(pa & PMAP_NC)),
2185 		aliased, 1, (pa & PMAP_LITTLE));
2186 #ifdef HWREF
2187 	if (prot & VM_PROT_WRITE) tte.data |= TLB_REAL_W;
2188 #else
2189 	/* If it needs ref accounting do nothing. */
2190 	if (!(flags & VM_PROT_READ)) {
2191 		simple_unlock(&pm->pm_lock);
2192 		splx(s);
2193 		if (wired) {
2194 			printf("pmap_enter: wired but not readable\n");
2195 			Debugger();
2196 		}
2197 		return 0;
2198 	}
2199 #endif
2200 	if (flags & VM_PROT_EXECUTE) {
2201 		if ((flags & (VM_PROT_READ|VM_PROT_WRITE)) == 0)
2202 			tte.data |= TLB_EXEC_ONLY|TLB_EXEC;
2203 		else
2204 			tte.data |= TLB_EXEC;
2205 	}
2206 	if (wired) tte.data |= TLB_TSB_LOCK;
2207 	ASSERT((tte.data & TLB_NFO) == 0);
2208 	pg = NULL;
2209 	while (pseg_set(pm, va, tte.data, pg) == 1) {
2210 		char *wmsg;
2211 
2212 		pg = NULL;
2213 		if ((flags & PMAP_CANFAIL) || (pm==pmap_kernel()))
2214 			wmsg = NULL;
2215 		else
2216 			wmsg = "pmap_enter";
2217 
2218 		if (!pmap_get_page(&pg, wmsg)) {
2219 			if (flags & PMAP_CANFAIL)
2220 				return (ENOMEM);
2221 			else
2222 				panic("pmap_enter: no pages");
2223 		}
2224 #ifdef DEBUG
2225 		enter_stats.ptpneeded ++;
2226 #endif
2227 	}
2228 
2229 	if (pv)
2230 		pmap_enter_pv(pm, va, pa);
2231 	simple_unlock(&pm->pm_lock);
2232 	splx(s);
2233 	i = ptelookup_va(va);
2234 #ifdef DEBUG
2235 	if( pmapdebug & PDB_ENTER )
2236 		prom_printf("pmap_enter: va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
2237 			    (int)(tte.tag>>32), (int)tte.tag,
2238 			    (int)(tte.data>>32), (int)tte.data,
2239 			    i, &tsb[i]);
2240 	if( pmapdebug & PDB_MMU_STEAL && tsb[i].data ) {
2241 		prom_printf("pmap_enter: evicting entry tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n",
2242 			    (int)(tsb[i].tag>>32), (int)tsb[i].tag,
2243 			    (int)(tsb[i].data>>32), (int)tsb[i].data,
2244 			    i, &tsb[i]);
2245 		prom_printf("with va=%08x tag=%x:%08x data=%08x:%08x tsb[%d]=%08x\r\n", va,
2246 			    (int)(tte.tag>>32), (int)tte.tag,
2247 			    (int)(tte.data>>32), (int)tte.data,
2248 			    i, &tsb[i]);
2249 	}
2250 #endif
2251 	if (pm->pm_ctx || pm == pmap_kernel()) {
2252 		if (tsb[i].tag > 0 &&
2253 		    tsb[i].tag == TSB_TAG(0,pm->pm_ctx,va)) {
2254 			/*
2255 			 * Invalidate the TSB
2256 			 *
2257 			 * While we can invalidate it by clearing the
2258 			 * valid bit:
2259 			 *
2260 			 * ptp->data_v = 0;
2261 			 *
2262 			 * it's faster to do store 1 doubleword.
2263 			 */
2264 			tsb[i].data = 0LL;
2265 			ASSERT((tsb[i].data & TLB_NFO) == 0);
2266 		}
2267 		/* Force reload -- protections may be changed */
2268 		tlb_flush_pte(va, pm->pm_ctx);
2269 		ASSERT((tsb[i].data & TLB_NFO) == 0);
2270 	}
2271 	if (pm != pmap_kernel() && (flags & VM_PROT_EXECUTE) != 0)
2272 		icache_flush_page(pa);
2273 
2274 	/* We will let the fast mmu miss interrupt load the new translation */
2275 	pv_check();
2276 	return 0;
2277 }
2278 
2279 /*
2280  * Remove the given range of mapping entries.
2281  */
2282 void
2283 pmap_remove(pm, va, endva)
2284 	struct pmap *pm;
2285 	vaddr_t va, endva;
2286 {
2287 	int i, s, flush=0;
2288 	int64_t data;
2289 	vaddr_t flushva = va;
2290 
2291 	/*
2292 	 * In here we should check each pseg and if there are no more entries,
2293 	 * free it.  It's just that linear scans of 8K pages gets expensive.
2294 	 */
2295 
2296 	ASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
2297 	ASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
2298 
2299 	s = splvm();
2300 	simple_lock(&pm->pm_lock);
2301 #ifdef DEBUG
2302 	if (pmapdebug & PDB_REMOVE)
2303 		printf("pmap_remove(pm=%p, va=%p, endva=%p):", pm,
2304 		    (void *)(u_long)va, (void *)(u_long)endva);
2305 	remove_stats.calls ++;
2306 #endif
2307 
2308 	/* Now do the real work */
2309 	while (va < endva) {
2310 		/*
2311 		 * Is this part of the permanent 4MB mapping?
2312 		 */
2313 #ifdef DIAGNOSTIC
2314 		if (pm == pmap_kernel() && va >= ktext &&
2315 			va < roundup(ekdata, 4*MEG))
2316 			panic("pmap_remove: va=%08x in locked TLB\r\n", (u_int)va);
2317 #endif
2318 		/* We don't really need to do this if the valid bit is not set... */
2319 		if ((data = pseg_get(pm, va))) {
2320 			paddr_t entry;
2321 
2322 			flush |= 1;
2323 			/* First remove it from the pv_table */
2324 			entry = (data&TLB_PA_MASK);
2325 			if (IS_VM_PHYSADDR(entry)) {
2326 				pv_entry_t pv;
2327 
2328 				/* Save REF/MOD info */
2329 				pv = pa_to_pvh(entry);
2330 				if (data & TLB_ACCESS) pv->pv_va |= PV_REF;
2331 				if (data & (TLB_MODIFY))  pv->pv_va |= PV_MOD;
2332 
2333 				pmap_remove_pv(pm, va, entry);
2334 			}
2335 			/* We need to flip the valid bit and clear the access statistics. */
2336 			if (pseg_set(pm, va, 0, 0)) {
2337 				printf("pmap_remove: gotten pseg empty!\n");
2338 				Debugger();
2339 				/* panic? */
2340 			}
2341 #ifdef DEBUG
2342 			if (pmapdebug & PDB_REMOVE)
2343 				printf(" clearing seg %x pte %x\n", (int)va_to_seg(va), (int)va_to_pte(va));
2344 			remove_stats.removes ++;
2345 #endif
2346 			if (!pm->pm_ctx && pm != pmap_kernel()) continue;
2347 			i = ptelookup_va(va);
2348 			if (tsb[i].tag > 0
2349 			    && tsb[i].tag == TSB_TAG(0,pm->pm_ctx,va))
2350 			{
2351 				/*
2352 				 * Invalidate the TSB
2353 				 *
2354 				 * While we can invalidate it by clearing the
2355 				 * valid bit:
2356 				 *
2357 				 * ptp->data_v = 0;
2358 				 *
2359 				 * it's faster to do store 1 doubleword.
2360 				 */
2361 #ifdef DEBUG
2362 				if (pmapdebug & PDB_REMOVE)
2363 					printf(" clearing TSB [%d]\n", i);
2364 #endif
2365 				tsb[i].data = 0LL;
2366 				ASSERT((tsb[i].data & TLB_NFO) == 0);
2367 				/* Flush the TLB */
2368 			}
2369 #ifdef DEBUG
2370 			remove_stats.tflushes ++;
2371 #endif
2372 			/* Here we assume nothing can get into the TLB unless it has a PTE */
2373 			tlb_flush_pte(va, pm->pm_ctx);
2374 		}
2375 		va += NBPG;
2376 	}
2377 	simple_unlock(&pm->pm_lock);
2378 	splx(s);
2379 	if (flush) {
2380 #ifdef DEBUG
2381 		remove_stats.flushes ++;
2382 #endif
2383 		cache_flush_virt(flushva, endva - flushva);
2384 	}
2385 #ifdef DEBUG
2386 	if (pmapdebug & PDB_REMOVE)
2387 		printf("\n");
2388 #endif
2389 	pv_check();
2390 }
2391 
2392 /*
2393  * Change the protection on the specified range of this pmap.
2394  */
2395 void
2396 pmap_protect(pm, sva, eva, prot)
2397 	struct pmap *pm;
2398 	vaddr_t sva, eva;
2399 	vm_prot_t prot;
2400 {
2401 	int i, s;
2402 	paddr_t pa;
2403 	int64_t data;
2404 
2405 	ASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
2406 	ASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
2407 
2408 	if ((prot & (VM_PROT_WRITE|PMAP_WIRED)) == VM_PROT_WRITE)
2409 		return;
2410 
2411 	if (prot == VM_PROT_NONE) {
2412 		pmap_remove(pm, sva, eva);
2413 		return;
2414 	}
2415 
2416 	s = splvm();
2417 	simple_lock(&pm->pm_lock);
2418 	sva = sva & ~PGOFSET;
2419 	while (sva < eva) {
2420 		/*
2421 		 * Is this part of the permanent 4MB mapping?
2422 		 */
2423 		if (pm == pmap_kernel() && sva >= ktext &&
2424 			sva < roundup(ekdata, 4*MEG)) {
2425 			prom_printf("pmap_protect: va=%08x in locked TLB\r\n", sva);
2426 			OF_enter();
2427 			return;
2428 		}
2429 
2430 #ifdef DEBUG
2431 		if (pmapdebug & PDB_CHANGEPROT)
2432 			printf("pmap_protect: va %p\n", (void *)(u_long)sva);
2433 #endif
2434 		if (((data = pseg_get(pm, sva))&TLB_V) /*&& ((data&TLB_TSB_LOCK) == 0)*/) {
2435 			pa = data&TLB_PA_MASK;
2436 #ifdef DEBUG
2437 			if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2438 				printf("pmap_protect: va=%08x data=%x:%08x seg=%08x pte=%08x\r\n",
2439 					    (u_int)sva, (int)(pa>>32), (int)pa, (int)va_to_seg(sva), (int)va_to_pte(sva));
2440 /* Catch this before the assertion */
2441 			if (data & TLB_NFO) {
2442 				printf("pmap_protect: pm=%p  NFO mapping va=%x data=%x:%x\n",
2443 				       pm, (u_int)sva, (int)(data>>32), (int)data);
2444 				Debugger();
2445 			}
2446 #endif
2447 			if (IS_VM_PHYSADDR(pa)) {
2448 				pv_entry_t pv;
2449 
2450 				/* Save REF/MOD info */
2451 				pv = pa_to_pvh(pa);
2452 				if (data & TLB_ACCESS) pv->pv_va |= PV_REF;
2453 				if (data & (TLB_MODIFY))
2454 					pv->pv_va |= PV_MOD;
2455 			}
2456 			/* Just do the pmap and TSB, not the pv_list */
2457 			data &= ~(TLB_W|TLB_REAL_W);
2458 			/* Turn *ON* write to wired mappings. */
2459 			if ((prot & (VM_PROT_WRITE|PMAP_WIRED)) ==
2460 				(VM_PROT_WRITE|PMAP_WIRED))
2461 				data |= (TLB_W|TLB_REAL_W);
2462 			ASSERT((data & TLB_NFO) == 0);
2463 			if (pseg_set(pm, sva, data, 0)) {
2464 				printf("pmap_protect: gotten pseg empty!\n");
2465 				Debugger();
2466 				/* panic? */
2467 			}
2468 
2469 			if (!pm->pm_ctx && pm != pmap_kernel()) continue;
2470 			i = ptelookup_va(sva);
2471 			if (tsb[i].tag > 0
2472 			    && tsb[i].tag == TSB_TAG(0,pm->pm_ctx,sva)) {
2473 				tsb[i].data = data;
2474 				ASSERT((tsb[i].data & TLB_NFO) == 0);
2475 
2476 			}
2477 			tlb_flush_pte(sva, pm->pm_ctx);
2478 		}
2479 		sva += NBPG;
2480 	}
2481 	simple_unlock(&pm->pm_lock);
2482 	splx(s);
2483 	pv_check();
2484 }
2485 
2486 /*
2487  * Extract the physical page address associated
2488  * with the given map/virtual_address pair.
2489  */
2490 boolean_t
2491 pmap_extract(pm, va, pap)
2492 	register struct pmap *pm;
2493 	vaddr_t va;
2494 	paddr_t *pap;
2495 {
2496 	paddr_t pa;
2497 
2498 	if (pm == pmap_kernel() && va >= kdata &&
2499 		va < roundup(ekdata, 4*MEG)) {
2500 		/* Need to deal w/locked TLB entry specially. */
2501 		pa = (paddr_t) (kdatap - kdata + va);
2502 #ifdef DEBUG
2503 		if (pmapdebug & PDB_EXTRACT) {
2504 			printf("pmap_extract: va=%lx pa=%llx\n", (u_long)va, (unsigned long long)pa);
2505 		}
2506 #endif
2507 	} else if( pm == pmap_kernel() && va >= ktext && va < ektext ) {
2508 		/* Need to deal w/locked TLB entry specially. */
2509 		pa = (paddr_t) (ktextp - ktext + va);
2510 #ifdef DEBUG
2511 		if (pmapdebug & PDB_EXTRACT) {
2512 			printf("pmap_extract: va=%lx pa=%llx\n",
2513 			    (u_long)va, (unsigned long long)pa);
2514 		}
2515 #endif
2516 	} else {
2517 		int s;
2518 
2519 		s = splvm();
2520 		simple_lock(&pm->pm_lock);
2521 		pa = (pseg_get(pm, va)&TLB_PA_MASK)+(va&PGOFSET);
2522 #ifdef DEBUG
2523 		if (pmapdebug & PDB_EXTRACT) {
2524 			pa = ldxa((vaddr_t)&pm->pm_segs[va_to_seg(va)], ASI_PHYS_CACHED);
2525 			printf("pmap_extract: va=%p segs[%ld]=%llx", (void *)(u_long)va, (long)va_to_seg(va), (unsigned long long)pa);
2526 			if (pa) {
2527 				pa = (paddr_t)ldxa((vaddr_t)&((paddr_t*)(u_long)pa)[va_to_dir(va)], ASI_PHYS_CACHED);
2528 				printf(" segs[%ld][%ld]=%lx", (long)va_to_seg(va), (long)va_to_dir(va), (long)pa);
2529 			}
2530 			if (pa)	{
2531 				pa = (paddr_t)ldxa((vaddr_t)&((paddr_t*)(u_long)pa)[va_to_pte(va)], ASI_PHYS_CACHED);
2532 				printf(" segs[%ld][%ld][%ld]=%lx", (long)va_to_seg(va),
2533 				       (long)va_to_dir(va), (long)va_to_pte(va), (long)pa);
2534 			}
2535 			printf(" pseg_get: %lx\n", (long)pa);
2536 		}
2537 #endif
2538 		simple_unlock(&pm->pm_lock);
2539 		splx(s);
2540 	}
2541 	if (pa == 0)
2542 		return (FALSE);
2543 	if (pap != NULL)
2544 		*pap = pa;
2545 	return (TRUE);
2546 }
2547 
2548 /*
2549  * Return the number bytes that pmap_dumpmmu() will dump.
2550  */
2551 int
2552 pmap_dumpsize()
2553 {
2554 	int	sz;
2555 
2556 	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2557 	sz += memsize * sizeof(phys_ram_seg_t);
2558 
2559 	return btodb(sz + DEV_BSIZE - 1);
2560 }
2561 
2562 /*
2563  * Write the mmu contents to the dump device.
2564  * This gets appended to the end of a crash dump since
2565  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2566  *
2567  * Write the core dump headers and MD data to the dump device.
2568  * We dump the following items:
2569  *
2570  *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
2571  *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
2572  *	phys_ram_seg_t[memsize]  physical memory segments
2573  */
2574 int
2575 pmap_dumpmmu(dump, blkno)
2576 	register daddr_t blkno;
2577 	register int (*dump)	__P((dev_t, daddr_t, caddr_t, size_t));
2578 {
2579 	kcore_seg_t	*kseg;
2580 	cpu_kcore_hdr_t	*kcpu;
2581 	phys_ram_seg_t	memseg;
2582 	register int	error = 0;
2583 	register int	i, memsegoffset;
2584 	int		buffer[dbtob(1) / sizeof(int)];
2585 	int		*bp, *ep;
2586 
2587 #define EXPEDITE(p,n) do {						\
2588 	int *sp = (int *)(p);						\
2589 	int sz = (n);							\
2590 	while (sz > 0) {						\
2591 		*bp++ = *sp++;						\
2592 		if (bp >= ep) {						\
2593 			error = (*dump)(dumpdev, blkno,			\
2594 					(caddr_t)buffer, dbtob(1));	\
2595 			if (error != 0)					\
2596 				return (error);				\
2597 			++blkno;					\
2598 			bp = buffer;					\
2599 		}							\
2600 		sz -= 4;						\
2601 	}								\
2602 } while (0)
2603 
2604 	/* Setup bookkeeping pointers */
2605 	bp = buffer;
2606 	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2607 
2608 	/* Fill in MI segment header */
2609 	kseg = (kcore_seg_t *)bp;
2610 	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2611 	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2612 
2613 	/* Fill in MD segment header (interpreted by MD part of libkvm) */
2614 	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2615 	kcpu->cputype = CPU_SUN4U;
2616 	kcpu->kernbase = (u_int64_t)KERNBASE;
2617 	kcpu->cpubase = (u_int64_t)CPUINFO_VA;
2618 
2619 	/* Describe the locked text segment */
2620 	kcpu->ktextbase = (u_int64_t)ktext;
2621 	kcpu->ktextp = (u_int64_t)ktextp;
2622 	kcpu->ktextsz = (u_int64_t)ektextp - ktextp;
2623 
2624 	/* Describe locked data segment */
2625 	kcpu->kdatabase = (u_int64_t)kdata;
2626 	kcpu->kdatap = (u_int64_t)kdatap;
2627 	kcpu->kdatasz = (u_int64_t)ekdatap - kdatap;
2628 
2629 	/* Now the memsegs */
2630 	kcpu->nmemseg = memsize;
2631 	kcpu->memsegoffset = memsegoffset = ALIGN(sizeof(cpu_kcore_hdr_t));
2632 
2633 	/* Now we need to point this at our kernel pmap. */
2634 	kcpu->nsegmap = STSZ;
2635 	kcpu->segmapoffset = (u_int64_t)pmap_kernel()->pm_physaddr;
2636 
2637 	/* Note: we have assumed everything fits in buffer[] so far... */
2638 	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2639 
2640 	for (i = 0; i < memsize; i++) {
2641 		memseg.start = mem[i].start;
2642 		memseg.size = mem[i].size;
2643 		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2644 	}
2645 
2646 	if (bp != buffer)
2647 		error = (*dump)(dumpdev, blkno++, (caddr_t)buffer, dbtob(1));
2648 
2649 	return (error);
2650 }
2651 
2652 /*
2653  * Determine (non)existance of physical page
2654  */
2655 int pmap_pa_exists(pa)
2656 paddr_t pa;
2657 {
2658 	register struct mem_region *mp;
2659 
2660 	/* Just go through physical memory list & see if we're there */
2661 	for (mp = mem; mp->size && mp->start <= pa; mp++)
2662 		if( mp->start <= pa && mp->start + mp->size >= pa )
2663 			return 1;
2664 	return 0;
2665 }
2666 
2667 /*
2668  * Lookup the appropriate TSB entry.
2669  *
2670  * Here is the full official pseudo code:
2671  *
2672  */
2673 
2674 #ifdef NOTYET
2675 int64 GenerateTSBPointer(
2676  	int64 va,		/* Missing VA			*/
2677  	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
2678  	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
2679  	Boolean split,		/* TSB Register[12]		*/
2680  	int TSBSize)		/* TSB Register[2:0]		*/
2681 {
2682  	int64 vaPortion;
2683  	int64 TSBBaseMask;
2684  	int64 splitMask;
2685 
2686 	/* TSBBaseMask marks the bits from TSB Base Reg		*/
2687 	TSBBaseMask = 0xffffffffffffe000 <<
2688 		(split? (TSBsize + 1) : TSBsize);
2689 
2690 	/* Shift va towards lsb appropriately and		*/
2691 	/* zero out the original va page offset			*/
2692 	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2693 		0xfffffffffffffff0;
2694 
2695 	if (split) {
2696 		/* There's only one bit in question for split	*/
2697 		splitMask = 1 << (13 + TSBsize);
2698 		if (type == 8K_POINTER)
2699 			/* Make sure we're in the lower half	*/
2700 			vaPortion &= ~splitMask;
2701 		else
2702 			/* Make sure we're in the upper half	*/
2703 			vaPortion |= splitMask;
2704 	}
2705 	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2706 }
2707 #endif
2708 /*
2709  * Of course, since we are not using a split TSB or variable page sizes,
2710  * we can optimize this a bit.
2711  *
2712  * The following only works for a unified 8K TSB.  It will find the slot
2713  * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
2714  */
2715 int
2716 ptelookup_va(va)
2717 	vaddr_t va;
2718 {
2719 	long tsbptr;
2720 #define TSBBASEMASK	(0xffffffffffffe000LL<<tsbsize)
2721 
2722 	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK );
2723 	return (tsbptr/sizeof(pte_t));
2724 }
2725 
2726 #if notyet
2727 void
2728 tsb_enter(ctx, va, data)
2729 	int ctx;
2730 	int64_t va;
2731 	int64_t data;
2732 {
2733 	int i, s;
2734 	int64_t pa;
2735 
2736 	i = ptelookup_va(va);
2737 	s = splvm();
2738 	pa = tsb[i].data&TLB_PA_MASK;
2739 	/*
2740 	 * If we use fast DMMU access fault handlers to track
2741 	 * referenced and modified bits, we should save the
2742 	 * TSB entry's state here.  Since we don't, we don't.
2743 	 */
2744 	/* Do not use global entries */
2745 	tsb[i].tag = TSB_TAG(0,ctx,va);
2746 	tsb[i].data = data;
2747 	tlb_flush_pte(va, ctx);	/* Force reload -- protections may be changed */
2748 	splx(s);
2749 }
2750 #endif
2751 
2752 /*
2753  * Do whatever is needed to sync the MOD/REF flags
2754  */
2755 
2756 boolean_t
2757 pmap_clear_modify(pg)
2758 	struct vm_page *pg;
2759 {
2760 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2761 	int changed = 0;
2762 #ifdef DEBUG
2763 	int modified = 0;
2764 #endif
2765 	int i, s;
2766 	register pv_entry_t pv;
2767 
2768 #ifdef DEBUG
2769 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2770 		printf("pmap_clear_modify(%llx)\n", (unsigned long long)pa);
2771 #endif
2772 
2773 #if defined(DEBUG)
2774 	modified = pmap_is_modified(pg);
2775 #endif
2776 	/* Clear all mappings */
2777 	s = splvm();
2778 	pv = pa_to_pvh(pa);
2779 #ifdef DEBUG
2780 	if (pv->pv_va & PV_MOD)
2781 		pv->pv_va |= PV_WE;	/* Remember this was modified */
2782 #endif
2783 	if (pv->pv_va & PV_MOD)
2784 		changed |= 1;
2785 	pv->pv_va &= ~(PV_MOD);
2786 #ifdef DEBUG
2787 	if (pv->pv_next && !pv->pv_pmap) {
2788 		printf("pmap_clear_modify: npv but no pmap for pv %p\n", pv);
2789 		Debugger();
2790 	}
2791 #endif
2792 	if (pv->pv_pmap != NULL)
2793 		for (; pv; pv = pv->pv_next) {
2794 			int64_t data;
2795 
2796 
2797 			simple_lock(&pv->pv_pmap->pm_lock);
2798 			/* First clear the mod bit in the PTE and make it R/O */
2799 			data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
2800 			/* Need to both clear the modify and write bits */
2801 			if (data & (TLB_MODIFY))
2802 				changed |= 1;
2803 #ifdef HWREF
2804 			data &= ~(TLB_MODIFY|TLB_W);
2805 #else
2806 			data &= ~(TLB_MODIFY|TLB_W|TLB_REAL_W);
2807 #endif
2808 			ASSERT((data & TLB_NFO) == 0);
2809 			if (pseg_set(pv->pv_pmap, pv->pv_va&PV_VAMASK, data, 0)) {
2810 				printf("pmap_clear_modify: gotten pseg empty!\n");
2811 				Debugger();
2812 				/* panic? */
2813 			}
2814 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2815 				i = ptelookup_va(pv->pv_va&PV_VAMASK);
2816 				if (tsb[i].tag == TSB_TAG(0, pv->pv_pmap->pm_ctx, pv->pv_va&PV_VAMASK))
2817 					tsb[i].data = /* data */ 0;
2818 				tlb_flush_pte(pv->pv_va&PV_VAMASK,
2819 					pv->pv_pmap->pm_ctx);
2820 			}
2821 			/* Then clear the mod bit in the pv */
2822 			if (pv->pv_va & PV_MOD)
2823 				changed |= 1;
2824 			pv->pv_va &= ~(PV_MOD);
2825 			simple_unlock(&pv->pv_pmap->pm_lock);
2826 		}
2827 	splx(s);
2828 	pv_check();
2829 #ifdef DEBUG
2830 	if (pmap_is_modified(pg)) {
2831 		printf("pmap_clear_modify(): %p still modified!\n", pg);
2832 		Debugger();
2833 	}
2834 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2835 		printf("pmap_clear_modify: page %lx %s\n", (long)pa,
2836 		       (changed?"was modified":"was not modified"));
2837 	if (modified != changed) {
2838 		printf("pmap_clear_modify: modified %d changed %d\n", modified, changed);
2839 		Debugger();
2840 	} else return (modified);
2841 #endif
2842 	return (changed);
2843 }
2844 
2845 boolean_t
2846 pmap_clear_reference(pg)
2847 	struct vm_page* pg;
2848 {
2849 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2850 	int changed = 0;
2851 #ifdef DEBUG
2852 	int referenced = 0;
2853 #endif
2854 	int i, s;
2855 	register pv_entry_t pv;
2856 
2857 #ifdef DEBUG
2858 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2859 		printf("pmap_clear_reference(%llx)\n", (unsigned long long)pa);
2860 	referenced = pmap_is_referenced(pg);
2861 #endif
2862 	/* Clear all references */
2863 	s = splvm();
2864 	pv = pa_to_pvh(pa);
2865 #ifdef NOT_DEBUG
2866 	if (pv->pv_va & PV_MOD)
2867 		printf("pmap_clear_reference(): pv %p still modified\n", (long)pa);
2868 #endif
2869 	if (pv->pv_va & PV_REF)
2870 		changed |= 1;
2871 	pv->pv_va &= ~(PV_REF);
2872 #ifdef DEBUG
2873 	if (pv->pv_next && !pv->pv_pmap) {
2874 		printf("pmap_clear_reference: npv but no pmap for pv %p\n", pv);
2875 		Debugger();
2876 	}
2877 #endif
2878 	if (pv->pv_pmap != NULL) {
2879 		for (; pv; pv = pv->pv_next) {
2880 			int64_t data;
2881 
2882 			simple_lock(&pv->pv_pmap->pm_lock);
2883 			data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
2884 #ifdef DEBUG
2885 			if (pmapdebug & PDB_CHANGEPROT)
2886 				printf("clearing ref pm:%p va:%p ctx:%lx data:%x:%x\n", pv->pv_pmap,
2887 				       (void *)(u_long)pv->pv_va, (u_long)pv->pv_pmap->pm_ctx, (int)(data>>32), (int)data);
2888 #endif
2889 #ifdef HWREF
2890 			if (data & TLB_ACCESS)
2891 				changed |= 1;
2892 			data &= ~TLB_ACCESS;
2893 #else
2894 			if (data < 0)
2895 				changed |= 1;
2896 			data = 0;
2897 #endif
2898 			ASSERT((data & TLB_NFO) == 0);
2899 			if (pseg_set(pv->pv_pmap, pv->pv_va, data, 0)) {
2900 				printf("pmap_clear_reference: gotten pseg empty!\n");
2901 				Debugger();
2902 				/* panic? */
2903 			}
2904 			if (pv->pv_pmap->pm_ctx ||
2905 				pv->pv_pmap == pmap_kernel()) {
2906 				i = ptelookup_va(pv->pv_va&PV_VAMASK);
2907 				/* Invalidate our TSB entry since ref info is in the PTE */
2908 				if (tsb[i].tag ==
2909 					TSB_TAG(0,pv->pv_pmap->pm_ctx,pv->pv_va&
2910 						PV_VAMASK))
2911 					tsb[i].data = 0;
2912 /*
2913 				tlb_flush_pte(pv->pv_va&PV_VAMASK,
2914 					pv->pv_pmap->pm_ctx);
2915 */
2916 			}
2917 			if (pv->pv_va & PV_REF)
2918 				changed |= 1;
2919 			pv->pv_va &= ~(PV_REF);
2920 			simple_unlock(&pv->pv_pmap->pm_lock);
2921 		}
2922 	}
2923 	splx(s);
2924 	pv_check();
2925 #ifdef DEBUG
2926 	if (pmap_is_referenced(pg)) {
2927 		printf("pmap_clear_reference(): %p still referenced!\n", pg);
2928 		Debugger();
2929 	}
2930 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF))
2931 		printf("pmap_clear_reference: page %lx %s\n", (long)pa,
2932 		       (changed?"was referenced":"was not referenced"));
2933 	if (referenced != changed) {
2934 		printf("pmap_clear_reference: referenced %d changed %d\n", referenced, changed);
2935 		Debugger();
2936 	} else return (referenced);
2937 #endif
2938 	return (changed);
2939 }
2940 
2941 boolean_t
2942 pmap_is_modified(pg)
2943 	struct vm_page* pg;
2944 {
2945 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2946 	int i=0, s;
2947 	register pv_entry_t pv, npv;
2948 
2949 	/* Check if any mapping has been modified */
2950 	s = splvm();
2951 	pv = pa_to_pvh(pa);
2952 	if (pv->pv_va&PV_MOD) i = 1;
2953 #ifdef HWREF
2954 #ifdef DEBUG
2955 	if (pv->pv_next && !pv->pv_pmap) {
2956 		printf("pmap_is_modified: npv but no pmap for pv %p\n", pv);
2957 		Debugger();
2958 	}
2959 #endif
2960 	if (!i && (pv->pv_pmap != NULL))
2961 		for (npv = pv; i == 0 && npv && npv->pv_pmap; npv = npv->pv_next) {
2962 			int64_t data;
2963 
2964 			data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
2965 			if (data & (TLB_MODIFY)) i = 1;
2966 			/* Migrate modify info to head pv */
2967 			if (npv->pv_va & PV_MOD) i = 1;
2968 			npv->pv_va &= ~PV_MOD;
2969 		}
2970 	/* Save modify info */
2971 	if (i) pv->pv_va |= PV_MOD;
2972 #ifdef DEBUG
2973 	if (i) pv->pv_va |= PV_WE;
2974 #endif
2975 #endif
2976 	splx(s);
2977 
2978 #ifdef DEBUG
2979 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF)) {
2980 		printf("pmap_is_modified(%llx) = %d\n", (unsigned long long)pa, i);
2981 		/* if (i) Debugger(); */
2982 	}
2983 #endif
2984 	pv_check();
2985 	return (i);
2986 }
2987 
2988 boolean_t
2989 pmap_is_referenced(pg)
2990 	struct vm_page* pg;
2991 {
2992 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2993 	int i=0, s;
2994 	register pv_entry_t pv, npv;
2995 
2996 	/* Check if any mapping has been referenced */
2997 	s = splvm();
2998 	pv = pa_to_pvh(pa);
2999 	if (pv->pv_va&PV_REF) i = 1;
3000 #ifdef HWREF
3001 #ifdef DEBUG
3002 	if (pv->pv_next && !pv->pv_pmap) {
3003 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
3004 		Debugger();
3005 	}
3006 #endif
3007 	if (!i && (pv->pv_pmap != NULL))
3008 		for (npv = pv; npv; npv = npv->pv_next) {
3009 			int64_t data;
3010 
3011 			data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
3012 			if (data & TLB_ACCESS) i = 1;
3013 			/* Migrate modify info to head pv */
3014 			if (npv->pv_va & PV_REF) i = 1;
3015 			npv->pv_va &= ~PV_REF;
3016 		}
3017 	/* Save ref info */
3018 	if (i) pv->pv_va |= PV_REF;
3019 #endif
3020 	splx(s);
3021 
3022 #ifdef DEBUG
3023 	if (pmapdebug & (PDB_CHANGEPROT|PDB_REF)) {
3024 		printf("pmap_is_referenced(%llx) = %d\n", (unsigned long long)pa, i);
3025 		/* if (i) Debugger(); */
3026 	}
3027 #endif
3028 	pv_check();
3029 	return i;
3030 }
3031 
3032 
3033 
3034 /*
3035  *	Routine:	pmap_unwire
3036  *	Function:	Clear the wired attribute for a map/virtual-address
3037  *			pair.
3038  *	In/out conditions:
3039  *			The mapping must already exist in the pmap.
3040  */
3041 void
3042 pmap_unwire(pmap, va)
3043 	register pmap_t	pmap;
3044 	vaddr_t va;
3045 {
3046 	int64_t data;
3047 	int s;
3048 
3049 #ifdef DEBUG
3050 	if (pmapdebug & (PDB_MMU_STEAL)) /* XXXX Need another flag for this */
3051 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
3052 #endif
3053 	if (pmap == NULL) {
3054 		pv_check();
3055 		return;
3056 	}
3057 
3058 	/*
3059 	 * Is this part of the permanent 4MB mapping?
3060 	 */
3061 	if (pmap == pmap_kernel() && va >= ktext &&
3062 		va < roundup(ekdata, 4*MEG)) {
3063 		prom_printf("pmap_unwire: va=%08x in locked TLB\r\n", va);
3064 		OF_enter();
3065 		return;
3066 	}
3067 	s = splvm();
3068 	simple_lock(&pmap->pm_lock);
3069 	data = pseg_get(pmap, va&PV_VAMASK);
3070 
3071 	data &= ~TLB_TSB_LOCK;
3072 
3073 	if (pseg_set(pmap, va&PV_VAMASK, data, 0)) {
3074 		printf("pmap_unwire: gotten pseg empty!\n");
3075 		Debugger();
3076 		/* panic? */
3077 	}
3078 	simple_unlock(&pmap->pm_lock);
3079 	splx(s);
3080 	pv_check();
3081 }
3082 
3083 /*
3084  * Lower the protection on the specified physical page.
3085  *
3086  * Never enable writing as it will break COW
3087  */
3088 
3089 void
3090 pmap_page_protect(pg, prot)
3091 	struct vm_page* pg;
3092 	vm_prot_t prot;
3093 {
3094 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
3095 	register pv_entry_t pv;
3096 	register int i, s;
3097 	long long clear, set;
3098 	int64_t data = 0LL;
3099 
3100 #ifdef DEBUG
3101 	if (pmapdebug & PDB_CHANGEPROT)
3102 		printf("pmap_page_protect: pa %llx prot %x\n",
3103 			(unsigned long long)pa, prot);
3104 #endif
3105 
3106 	if (prot & VM_PROT_WRITE) {
3107 		pv_check();
3108 		return;
3109 	}
3110 
3111 	if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
3112 		/* copy_on_write */
3113 
3114 		set = TLB_V;
3115 		clear = TLB_REAL_W|TLB_W;
3116 		if (VM_PROT_EXECUTE & prot)
3117 			set |= TLB_EXEC;
3118 		else
3119 			clear |= TLB_EXEC;
3120 		if (VM_PROT_EXECUTE == prot)
3121 			set |= TLB_EXEC_ONLY;
3122 
3123 		pv = pa_to_pvh(pa);
3124 		s = splvm();
3125 #ifdef DEBUG
3126 		if (pv->pv_next && !pv->pv_pmap) {
3127 			printf("pmap_page_protect: npv but no pmap for pv %p\n", pv);
3128 			Debugger();
3129 		}
3130 #endif
3131 		if (pv->pv_pmap != NULL) {
3132 			for (; pv; pv = pv->pv_next) {
3133 				simple_lock(&pv->pv_pmap->pm_lock);
3134 #ifdef DEBUG
3135 				if (pmapdebug & (PDB_CHANGEPROT|PDB_REF)) {
3136 					printf("pmap_page_protect: RO va %p of pa %p...\n",
3137 					    (void *)(u_long)pv->pv_va, (void *)(u_long)pa);
3138 				}
3139 #if 0
3140 				if (!pv->pv_pmap->pm_segs[va_to_seg(pv->pv_va&PV_VAMASK)]) {
3141 					printf("pmap_page_protect(%x:%x,%x): pv %x va %x not in pmap %x\n",
3142 					       (int)(pa>>32), (int)pa, prot, pv, pv->pv_va, pv->pv_pmap);
3143 					Debugger();
3144 					continue;
3145 				}
3146 #endif
3147 #endif
3148 				data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
3149 
3150 				/* Save REF/MOD info */
3151 				if (data & TLB_ACCESS) pv->pv_va |= PV_REF;
3152 				if (data & (TLB_MODIFY))
3153 					pv->pv_va |= PV_MOD;
3154 
3155 				data &= ~(clear);
3156 				data |= (set);
3157 				ASSERT((data & TLB_NFO) == 0);
3158 				if (pseg_set(pv->pv_pmap, pv->pv_va&PV_VAMASK, data, 0)) {
3159 					printf("pmap_page_protect: gotten pseg empty!\n");
3160 					Debugger();
3161 					/* panic? */
3162 				}
3163 				if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
3164 					i = ptelookup_va(pv->pv_va&PV_VAMASK);
3165 					/* since we already know the va for each mapping we don't need to scan the entire TSB */
3166 					if (tsb[i].tag == TSB_TAG(0, pv->pv_pmap->pm_ctx, pv->pv_va&PV_VAMASK))
3167 						tsb[i].data = /* data */ 0;
3168 					tlb_flush_pte(pv->pv_va&PV_VAMASK, pv->pv_pmap->pm_ctx);
3169 				}
3170 				simple_unlock(&pv->pv_pmap->pm_lock);
3171 			}
3172 		}
3173 		splx(s);
3174 	} else {
3175 		pv_entry_t npv, firstpv;
3176 		/* remove mappings */
3177 
3178 #ifdef DEBUG
3179 		if (pmapdebug & PDB_REMOVE)
3180 			printf("pmap_page_protect: demapping pa %lx\n", (long)pa);
3181 #endif
3182 
3183 		firstpv = pv = pa_to_pvh(pa);
3184 		s = splvm();
3185 
3186 		/* First remove the entire list of continuation pv's*/
3187 		for (npv = pv->pv_next; npv; npv = pv->pv_next) {
3188 			/* We're removing npv from pv->pv_next */
3189 			simple_lock(&npv->pv_pmap->pm_lock);
3190 #ifdef DEBUG
3191 			if (pmapdebug & (PDB_CHANGEPROT|PDB_REF|PDB_REMOVE)) {
3192 				printf("pmap_page_protect: demap va %p of pa %p in pmap %p...\n",
3193 				       (void *)(u_long)npv->pv_va, (void *)(u_long)pa, npv->pv_pmap);
3194 			}
3195 #if 0
3196 			if (!npv->pv_pmap->pm_segs[va_to_seg(npv->pv_va&PV_VAMASK)]) {
3197 				printf("pmap_page_protect(%x:%x,%x): pv %x va %x not in pmap %x\n",
3198 				       (int)(pa>>32), (int)pa, prot, npv, npv->pv_va, npv->pv_pmap);
3199 				Debugger();
3200 				continue;
3201 			}
3202 #endif
3203 #endif
3204 			/* clear the entry in the page table */
3205 			data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
3206 
3207 			/* Save ref/mod info */
3208 			if (data & TLB_ACCESS)
3209 				firstpv->pv_va |= PV_REF;
3210 			if (data & (TLB_MODIFY))
3211 				firstpv->pv_va |= PV_MOD;
3212 			/* Clear mapping */
3213 			if (pseg_set(npv->pv_pmap, npv->pv_va&PV_VAMASK, 0, 0)) {
3214 				printf("pmap_page_protect: gotten pseg empty!\n");
3215 				Debugger();
3216 				/* panic? */
3217 			}
3218 			if (npv->pv_pmap->pm_ctx || npv->pv_pmap == pmap_kernel()) {
3219 				/* clear the entry in the TSB */
3220 				i = ptelookup_va(npv->pv_va&PV_VAMASK);
3221 				/* since we already know the va for each mapping we don't need to scan the entire TSB */
3222 				if (tsb[i].tag == TSB_TAG(0, npv->pv_pmap->pm_ctx, npv->pv_va&PV_VAMASK))
3223 					tsb[i].data = 0LL;
3224 				tlb_flush_pte(npv->pv_va&PV_VAMASK, npv->pv_pmap->pm_ctx);
3225 			}
3226 			simple_unlock(&npv->pv_pmap->pm_lock);
3227 
3228 			/* free the pv */
3229 			pv->pv_next = npv->pv_next;
3230 			pool_put(&pv_pool, npv);
3231 		}
3232 
3233 		pv = firstpv;
3234 
3235 		/* Then remove the primary pv */
3236 #ifdef DEBUG
3237 		if (pv->pv_next && !pv->pv_pmap) {
3238 			printf("pmap_page_protect: npv but no pmap for pv %p\n", pv);
3239 			Debugger();
3240 		}
3241 #endif
3242 		if (pv->pv_pmap != NULL) {
3243 			simple_lock(&pv->pv_pmap->pm_lock);
3244 #ifdef DEBUG
3245 			if (pmapdebug & (PDB_CHANGEPROT|PDB_REF|PDB_REMOVE)) {
3246 				printf("pmap_page_protect: demap va %p of pa %lx from pm %p...\n",
3247 				       (void *)(u_long)pv->pv_va, (long)pa, pv->pv_pmap);
3248 			}
3249 #endif
3250 			data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
3251 			/* Save ref/mod info */
3252 			if (data & TLB_ACCESS)
3253 				pv->pv_va |= PV_REF;
3254 			if (data & (TLB_MODIFY))
3255 				pv->pv_va |= PV_MOD;
3256 			if (data & TLB_TSB_LOCK) {
3257 #ifdef DIAGNOSTIC
3258 				printf("pmap_page_protect: Removing wired page pm %p va %p\n",
3259 				       (void *)(u_long)pv->pv_pmap, (void *)(u_long)pv->pv_va);
3260 #endif
3261 			}
3262 			if (pseg_set(pv->pv_pmap, pv->pv_va&PV_VAMASK, 0, 0)) {
3263 				printf("pmap_page_protect: gotten pseg empty!\n");
3264 				Debugger();
3265 				/* panic? */
3266 			}
3267 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
3268 				i = ptelookup_va(pv->pv_va&PV_VAMASK);
3269 				/* since we already know the va for each mapping we don't need to scan the entire TSB */
3270 				if (tsb[i].tag == TSB_TAG(0, pv->pv_pmap->pm_ctx, pv->pv_va&PV_VAMASK))
3271 					tsb[i].data = 0LL;
3272 				tlb_flush_pte(pv->pv_va&PV_VAMASK, pv->pv_pmap->pm_ctx);
3273 			}
3274 			simple_unlock(&pv->pv_pmap->pm_lock);
3275 			npv = pv->pv_next;
3276 			/* dump the first pv */
3277 			if (npv) {
3278 				/* First save mod/ref bits */
3279 				pv->pv_va |= (npv->pv_va&PV_MASK);
3280 				pv->pv_next = npv->pv_next;
3281 				pv->pv_pmap = npv->pv_pmap;
3282 				pool_put(&pv_pool, npv);
3283 			} else {
3284 				pv->pv_pmap = NULL;
3285 				pv->pv_next = NULL;
3286 			}
3287 		}
3288 		splx(s);
3289 	}
3290 	/* We should really only flush the pages we demapped. */
3291 	pv_check();
3292 }
3293 
3294 /*
3295  * count pages in pmap -- this can be slow.
3296  */
3297 int
3298 pmap_count_res(pm)
3299 	pmap_t pm;
3300 {
3301 	int i, j, k, n, s;
3302 	paddr_t *pdir, *ptbl;
3303 	/* Almost the same as pmap_collect() */
3304 
3305 	/* Don't want one of these pages reused while we're reading it. */
3306 	s = splvm();
3307 	simple_lock(&pm->pm_lock);
3308 	n = 0;
3309 	for (i=0; i<STSZ; i++) {
3310 		if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
3311 			for (k=0; k<PDSZ; k++) {
3312 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
3313 					for (j=0; j<PTSZ; j++) {
3314 						int64_t data = (int64_t)ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
3315 						if (data&TLB_V)
3316 							n++;
3317 					}
3318 				}
3319 			}
3320 		}
3321 	}
3322 	simple_unlock(&pm->pm_lock);
3323 	splx(s);
3324 	return n;
3325 }
3326 
3327 /*
3328  * count wired pages in pmap -- this can be slow.
3329  */
3330 int
3331 pmap_count_wired(pm)
3332 	pmap_t pm;
3333 {
3334 	int i, j, k, n, s;
3335 	paddr_t *pdir, *ptbl;
3336 	/* Almost the same as pmap_collect() */
3337 
3338 	/* Don't want one of these pages reused while we're reading it. */
3339 	s = splvm();
3340 	simple_lock(&pm->pm_lock);
3341 	n = 0;
3342 	for (i = 0; i < STSZ; i++) {
3343 		if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
3344 			for (k = 0; k < PDSZ; k++) {
3345 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
3346 					for (j = 0; j < PTSZ; j++) {
3347 						int64_t data = (int64_t)ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
3348 						if (data & TLB_TSB_LOCK)
3349 							n++;
3350 					}
3351 				}
3352 			}
3353 		}
3354 	}
3355 	simple_unlock(&pm->pm_lock);
3356 	splx(s);
3357 	return n;
3358 }
3359 
3360 /*
3361  * Allocate a context.  If necessary, steal one from someone else.
3362  * Changes hardware context number and loads segment map.
3363  *
3364  * This routine is only ever called from locore.s just after it has
3365  * saved away the previous process, so there are no active user windows.
3366  *
3367  * The new context is flushed from the TLB before returning.
3368  */
3369 int
3370 ctx_alloc(pm)
3371 	struct pmap* pm;
3372 {
3373 	register int s, cnum;
3374 	static int next = 0;
3375 
3376 	if (pm == pmap_kernel()) {
3377 #ifdef DIAGNOSTIC
3378 		printf("ctx_alloc: kernel pmap!\n");
3379 #endif
3380 		return (0);
3381 	}
3382 	s = splvm();
3383 	cnum = next;
3384 	do {
3385 		if (cnum >= numctx-1)
3386 			cnum = 0;
3387 	} while (ctxbusy[++cnum] != NULL && cnum != next);
3388 	if (cnum==0) cnum++; /* Never steal ctx 0 */
3389 	if (ctxbusy[cnum]) {
3390 		int i;
3391 #ifdef DEBUG
3392 		/* We should identify this pmap and clear it */
3393 		printf("Warning: stealing context %d\n", cnum);
3394 		remove_stats.pidflushes ++;
3395 #endif
3396 		/* We gotta steal this context */
3397 		for (i = 0; i < TSBENTS; i++) {
3398 			if (TSB_TAG_CTX(tsb[i].tag) == cnum)
3399 				tsb[i].data = 0LL;
3400 		}
3401 		tlb_flush_ctx(cnum);
3402 	}
3403 	ctxbusy[cnum] = pm->pm_physaddr;
3404 	next = cnum;
3405 	splx(s);
3406 	pm->pm_ctx = cnum;
3407 #ifdef DEBUG
3408 	if (pmapdebug & PDB_CTX_ALLOC)
3409 		printf("ctx_alloc: allocated ctx %d\n", cnum);
3410 #endif
3411 	return cnum;
3412 }
3413 
3414 /*
3415  * Give away a context.
3416  */
3417 void
3418 ctx_free(pm)
3419 	struct pmap* pm;
3420 {
3421 	int oldctx;
3422 
3423 	oldctx = pm->pm_ctx;
3424 
3425 	if (oldctx == 0)
3426 		panic("ctx_free: freeing kernel context");
3427 #ifdef DIAGNOSTIC
3428 	if (ctxbusy[oldctx] == 0)
3429 		printf("ctx_free: freeing free context %d\n", oldctx);
3430 	if (ctxbusy[oldctx] != pm->pm_physaddr) {
3431 		printf("ctx_free: freeing someone esle's context\n "
3432 		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
3433 		       oldctx, (void *)(u_long)ctxbusy[oldctx], pm,
3434 		       (void *)(u_long)pm->pm_physaddr);
3435 		Debugger();
3436 	}
3437 #endif
3438 	/* We should verify it has not been stolen and reallocated... */
3439 #ifdef DEBUG
3440 	if (pmapdebug & PDB_CTX_ALLOC) {
3441 		printf("ctx_free: freeing ctx %d\n", oldctx);
3442 		Debugger();
3443 	}
3444 #endif
3445 	ctxbusy[oldctx] = NULL;
3446 }
3447 
3448 /*
3449  * Enter the pmap and virtual address into the
3450  * physical to virtual map table.
3451  *
3452  * We enter here with the pmap locked.
3453  */
3454 void
3455 pmap_enter_pv(pmap, va, pa)
3456 	pmap_t pmap;
3457 	vaddr_t va;
3458 	paddr_t pa;
3459 {
3460 	pv_entry_t pv, npv;
3461 	int s;
3462 
3463 	pv = pa_to_pvh(pa);
3464 	s = splvm();
3465 #ifdef DEBUG
3466 	if (pmapdebug & PDB_ENTER)
3467 		printf("pmap_enter: pv %p: was %lx/%p/%p\n",
3468 		       pv, pv->pv_va, pv->pv_pmap, pv->pv_next);
3469 #endif
3470 	if (pv->pv_pmap == NULL) {
3471 		/*
3472 		 * No entries yet, use header as the first entry
3473 		 */
3474 #ifdef DEBUG
3475 		if (pmapdebug & PDB_ENTER)
3476 			printf("pmap_enter: first pv: pmap %p va %lx\n",
3477 				pmap, va);
3478 		enter_stats.firstpv++;
3479 #endif
3480 		PV_SETVA(pv, va);
3481 		pv->pv_pmap = pmap;
3482 		pv->pv_next = NULL;
3483 	} else {
3484 		if (!(pv->pv_va & PV_ALIAS)) {
3485 			/*
3486 			 * There is at least one other VA mapping this page.
3487 			 * Check if they are cache index compatible. If not
3488 			 * remove all mappings, flush the cache and set page
3489 			 * to be mapped uncached. Caching will be restored
3490 			 * when pages are mapped compatible again.
3491 			 * XXX - caching is not currently being restored, but
3492 			 * XXX - I haven't seen the pages uncached since
3493 			 * XXX - using pmap_prefer().	mhitch
3494 			 */
3495 			if ((pv->pv_va^va)&VA_ALIAS_MASK) {
3496 				pv->pv_va |= PV_ALIAS;
3497 				pmap_page_cache(pmap, pa, 0);
3498 #ifdef DEBUG
3499 				enter_stats.ci++;
3500 #endif
3501 			}
3502 		}
3503 		/*
3504 		 * There is at least one other VA mapping this page.
3505 		 * Place this entry after the header.
3506 		 *
3507 		 * Note: the entry may already be in the table if
3508 		 * we are only changing the protection bits.
3509 		 */
3510 		for (npv = pv; npv; npv = npv->pv_next) {
3511 			if (pmap == npv->pv_pmap && PV_MATCH(npv, va)) {
3512 #ifdef PARANOIADIAG
3513 				int64_t data;
3514 
3515 				data = pseg_get(pm, va);
3516 				if (data >= 0 ||
3517 				    data&TLB_PA_MASK != pa)
3518 					printf(
3519 		"pmap_enter: found va %lx pa %lx in pv_table but != %lx\n",
3520 						va, pa, (long)data);
3521 #endif
3522 				goto fnd;
3523 			}
3524 		}
3525 #ifdef DEBUG
3526 		if (pmapdebug & PDB_ENTER)
3527 			printf("pmap_enter: new pv: pmap %p va %lx\n",
3528 				pmap, va);
3529 #endif
3530 		/*
3531 		 * XXXXX can this cause us to recurse forever?
3532 		 *
3533 		 * We need to drop the lock on the kernel_pmap
3534 		 * to do memory allocation.  But that should not
3535 		 * cause any real problems unless someone tries to
3536 		 * touch the particular mapping we're adding.
3537 		 */
3538 		npv = pool_get(&pv_pool, PR_NOWAIT);
3539 		if (npv == NULL)
3540 			panic("pmap_enter: new pv malloc() failed");
3541 		npv->pv_va = va&PV_VAMASK;
3542 		npv->pv_pmap = pmap;
3543 		npv->pv_next = pv->pv_next;
3544 		pv->pv_next = npv;
3545 #ifdef DEBUG
3546 		if (!npv->pv_next)
3547 			enter_stats.secondpv++;
3548 #endif
3549 	fnd:
3550 		;
3551 	}
3552 	splx(s);
3553 }
3554 
3555 /*
3556  * Remove a physical to virtual address translation.
3557  */
3558 
3559 void
3560 pmap_remove_pv(pmap, va, pa)
3561 	pmap_t pmap;
3562 	vaddr_t va;
3563 	paddr_t pa;
3564 {
3565 	register pv_entry_t pv, npv, opv;
3566 	int64_t data = 0LL;
3567 	int s;
3568 
3569 #ifdef DEBUG
3570 	if (pmapdebug & (PDB_REMOVE))
3571 		printf("pmap_remove_pv(pm=%p, va=%p, pa=%llx)\n", pmap,
3572 		    (void *)(u_long)va, (unsigned long long)pa);
3573 #endif
3574 	/*
3575 	 * Remove page from the PV table (raise IPL since we
3576 	 * may be called at interrupt time).
3577 	 */
3578 	pv_check();
3579 	opv = pv = pa_to_pvh(pa);
3580 	s = splvm();
3581 	/*
3582 	 * If it is the first entry on the list, it is actually
3583 	 * in the header and we must copy the following entry up
3584 	 * to the header.  Otherwise we must search the list for
3585 	 * the entry.  In either case we free the now unused entry.
3586 	 */
3587 	if (pmap == pv->pv_pmap && PV_MATCH(pv,va)) {
3588 		/* Save modified/ref bits */
3589 		data = pseg_get(pv->pv_pmap, pv->pv_va&PV_VAMASK);
3590 		npv = pv->pv_next;
3591 		if (npv) {
3592 			/* First save mod/ref bits */
3593 			pv->pv_va = (pv->pv_va&PV_MASK) | npv->pv_va;
3594 			pv->pv_next = npv->pv_next;
3595 			pv->pv_pmap = npv->pv_pmap;
3596 			pool_put(&pv_pool, npv);
3597 		} else {
3598 			pv->pv_pmap = NULL;
3599 			pv->pv_next = NULL;
3600 			pv->pv_va &= (PV_REF|PV_MOD); /* Only save ref/mod bits */
3601 		}
3602 #ifdef DEBUG
3603 		remove_stats.pvfirst++;
3604 #endif
3605 	} else {
3606 		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
3607 #ifdef DEBUG
3608 			remove_stats.pvsearch++;
3609 #endif
3610 			if (pmap == npv->pv_pmap && PV_MATCH(npv,va))
3611 				goto fnd;
3612 		}
3613 
3614 		/*
3615 		 * Sometimes UVM gets confused and calls pmap_remove() instead
3616 		 * of pmap_kremove()
3617 		 */
3618 		return;
3619 #ifdef DIAGNOSTIC
3620 		printf("pmap_remove_pv(%lx, %x, %x) not found\n", (u_long)pmap, (u_int)va, (u_int)pa);
3621 
3622 		Debugger();
3623 		splx(s);
3624 		return;
3625 #endif
3626 	fnd:
3627 		pv->pv_next = npv->pv_next;
3628 		/*
3629 		 * move any referenced/modified info to the base pv
3630 		 */
3631 		data = pseg_get(npv->pv_pmap, npv->pv_va&PV_VAMASK);
3632 		/*
3633 		 * Here, if this page was aliased, we should try clear out any
3634 		 * alias that may have occurred.  However, that's a complicated
3635 		 * operation involving multiple scans of the pv list.
3636 		 */
3637 		pool_put(&pv_pool, npv);
3638 	}
3639 
3640 	/* Save ref/mod info */
3641 	if (data & TLB_ACCESS)
3642 		opv->pv_va |= PV_REF;
3643 	if (data & (TLB_MODIFY))
3644 		opv->pv_va |= PV_MOD;
3645 
3646 	/* Check to see if the alias went away */
3647 	if (opv->pv_va & PV_ALIAS) {
3648 		opv->pv_va &= ~PV_ALIAS;
3649 		for (npv = opv; npv; npv = npv->pv_next) {
3650 			if ((npv->pv_va^opv->pv_va)&VA_ALIAS_MASK) {
3651 				opv->pv_va |= PV_ALIAS;
3652 			}
3653 		}
3654 		if (!(opv->pv_va & PV_ALIAS))
3655 			pmap_page_cache(pmap, pa, 1);
3656 	}
3657 	splx(s);
3658 	pv_check();
3659 }
3660 
3661 /*
3662  *	pmap_page_cache:
3663  *
3664  *	Change all mappings of a page to cached/uncached.
3665  */
3666 void
3667 pmap_page_cache(pm, pa, mode)
3668 	struct pmap *pm;
3669 	paddr_t pa;
3670 	int mode;
3671 {
3672 	pv_entry_t pv;
3673 	int i, s;
3674 
3675 #ifdef DEBUG
3676 	if (pmapdebug & (PDB_ENTER))
3677 		printf("pmap_page_uncache(%llx)\n", (unsigned long long)pa);
3678 #endif
3679 	if (!IS_VM_PHYSADDR(pa))
3680 		return;
3681 
3682 	pv = pa_to_pvh(pa);
3683 	s = splvm();
3684 
3685 	while (pv) {
3686 		vaddr_t va;
3687 
3688 		va = (pv->pv_va & PV_VAMASK);
3689 		if (pv->pv_pmap != pm)
3690 			simple_lock(&pv->pv_pmap->pm_lock);
3691 		if (pv->pv_va & PV_NC) {
3692 			/* Non-cached -- I/O mapping */
3693 			if (pseg_set(pv->pv_pmap, va,
3694 				     pseg_get(pv->pv_pmap, va) & ~(TLB_CV|TLB_CP),
3695 				     0)) {
3696 				printf("pmap_page_cache: aliased pseg empty!\n");
3697 				Debugger();
3698 				/* panic? */
3699 			}
3700 		} else if (mode && (!(pv->pv_va & PV_NVC))) {
3701 			/* Enable caching */
3702 			if (pseg_set(pv->pv_pmap, va,
3703 				     pseg_get(pv->pv_pmap, va) | TLB_CV, 0)) {
3704 				printf("pmap_page_cache: aliased pseg empty!\n");
3705 				Debugger();
3706 				/* panic? */
3707 			}
3708 		} else {
3709 			/* Disable caching */
3710 			if (pseg_set(pv->pv_pmap, va,
3711 				     pseg_get(pv->pv_pmap, va) & ~TLB_CV, 0)) {
3712 				printf("pmap_page_cache: aliased pseg empty!\n");
3713 				Debugger();
3714 				/* panic? */
3715 			}
3716 		}
3717 		if (pv->pv_pmap != pm)
3718 			simple_unlock(&pv->pv_pmap->pm_lock);
3719 		if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
3720 			i = ptelookup_va(va);
3721 			if (tsb[i].tag > 0 && tsb[i].tag ==
3722 			    TSB_TAG(0, pv->pv_pmap->pm_ctx, va)) {
3723 				/*
3724 				 * Invalidate the TSB
3725 				 *
3726 				 * While we can invalidate it by clearing the
3727 				 * valid bit:
3728 				 *
3729 				 * ptp->data_v = 0;
3730 				 *
3731 				 * it's faster to do store 1 doubleword.
3732 				 */
3733 				tsb[i].data = 0LL;
3734 				ASSERT((tsb[i].data & TLB_NFO) == 0);
3735 			}
3736 			/* Force reload -- protections may be changed */
3737 			tlb_flush_pte(va, pv->pv_pmap->pm_ctx);
3738 		}
3739 
3740 		pv = pv->pv_next;
3741 	}
3742 
3743 	splx(s);
3744 }
3745 
3746 
3747 static paddr_t leftovers = 0;
3748 
3749 static int
3750 pmap_get_page(paddr_t *p, char *wait)
3751 {
3752 	struct vm_page *pg;
3753 	paddr_t pa;
3754 
3755 	if ((pa = leftovers)) {
3756 		/* Use any leftover pages. */
3757 		leftovers = ldxa(pa, ASI_PHYS_CACHED);
3758 		pmap_zero_page(pa);
3759 	} else if (uvm.page_init_done) {
3760 		while ((pg = uvm_pagealloc(NULL, 0, NULL,
3761 			UVM_PGA_ZERO|UVM_PGA_USERESERVE)) == NULL) {
3762 			if (!wait) return (0);
3763 			uvm_wait(wait);
3764 		}
3765 		pg->wire_count = 1;	/* no mappings yet */
3766 		pg->flags &= ~PG_BUSY;	/* never busy */
3767 		pa = (paddr_t)VM_PAGE_TO_PHYS(pg);
3768 	} else {
3769 		if (!uvm_page_physget(&pa))
3770 			return (0);
3771 		pmap_zero_page(pa);
3772 	}
3773 	*p = pa;
3774 	return (1);
3775 }
3776 
3777 static void
3778 pmap_free_page(paddr_t pa)
3779 {
3780 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
3781 
3782 	if (pg) {
3783 		if (pg->flags != (PG_FAKE)) {
3784 			printf("Freeing invalid page %p\n", pg);
3785 			printf("pa = %llx (pg = %llx\n)",
3786 				(unsigned long long)pa,
3787 				(unsigned long long)VM_PAGE_TO_PHYS(pg));
3788 #ifdef DDB
3789 			Debugger();
3790 #endif
3791 			return;
3792 		}
3793 		pg->flags |= PG_BUSY;
3794 		pg->wire_count = 0;
3795 		uvm_pagefree(pg);
3796 	} else {
3797 		/*
3798 		 * This is not a VM page.  It must have been allocated before
3799 		 * the VM system was initialized.  We could hand it over to
3800 		 * the VM system, but that involves extra overhead.  Instead
3801 		 * we'll just link them into a list of available pages for
3802 		 * the next time pmap_get_page() is called.
3803 		 */
3804 		stxa(pa, ASI_PHYS_CACHED, leftovers);
3805 		leftovers = pa;
3806 	}
3807 }
3808 
3809 
3810 #ifdef DDB
3811 
3812 void db_dump_pv __P((db_expr_t, int, db_expr_t, char *));
3813 void
3814 db_dump_pv(addr, have_addr, count, modif)
3815 	db_expr_t addr;
3816 	int have_addr;
3817 	db_expr_t count;
3818 	char *modif;
3819 {
3820 	struct pv_entry *pv;
3821 
3822 	if (!have_addr) {
3823 		db_printf("Need addr for pv\n");
3824 		return;
3825 	}
3826 
3827 	for (pv = pa_to_pvh(addr); pv; pv = pv->pv_next)
3828 		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
3829 			  pv, pv->pv_next, pv->pv_pmap,
3830 			  (unsigned long long)pv->pv_va);
3831 
3832 }
3833 
3834 #endif
3835 
3836 #ifdef DEBUG
3837 /*
3838  * Test ref/modify handling.  */
3839 void pmap_testout __P((void));
3840 void
3841 pmap_testout()
3842 {
3843 	vaddr_t va;
3844 	volatile int *loc;
3845 	int val = 0;
3846 	paddr_t pa;
3847 	struct vm_page *pg;
3848 	int ref, mod;
3849 
3850 	/* Allocate a page */
3851 	va = (vaddr_t)(vmmap - NBPG);
3852 	ASSERT(va != NULL);
3853 	loc = (int*)va;
3854 
3855 	pmap_get_page(&pa, NULL);
3856 	pg = PHYS_TO_VM_PAGE(pa);
3857 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3858 	pmap_update(pmap_kernel());
3859 
3860 	/* Now clear reference and modify */
3861 	ref = pmap_clear_reference(pg);
3862 	mod = pmap_clear_modify(pg);
3863 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3864 	       (void *)(u_long)va, (long)pa,
3865 	       ref, mod);
3866 
3867 	/* Check it's properly cleared */
3868 	ref = pmap_is_referenced(pg);
3869 	mod = pmap_is_modified(pg);
3870 	printf("Checking cleared page: ref %d, mod %d\n",
3871 	       ref, mod);
3872 
3873 	/* Reference page */
3874 	val = *loc;
3875 
3876 	ref = pmap_is_referenced(pg);
3877 	mod = pmap_is_modified(pg);
3878 	printf("Referenced page: ref %d, mod %d val %x\n",
3879 	       ref, mod, val);
3880 
3881 	/* Now clear reference and modify */
3882 	ref = pmap_clear_reference(pg);
3883 	mod = pmap_clear_modify(pg);
3884 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3885 	       (void *)(u_long)va, (long)pa,
3886 	       ref, mod);
3887 
3888 	/* Modify page */
3889 	*loc = 1;
3890 
3891 	ref = pmap_is_referenced(pg);
3892 	mod = pmap_is_modified(pg);
3893 	printf("Modified page: ref %d, mod %d\n",
3894 	       ref, mod);
3895 
3896 	/* Now clear reference and modify */
3897 	ref = pmap_clear_reference(pg);
3898 	mod = pmap_clear_modify(pg);
3899 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3900 	       (void *)(u_long)va, (long)pa,
3901 	       ref, mod);
3902 
3903 	/* Check it's properly cleared */
3904 	ref = pmap_is_referenced(pg);
3905 	mod = pmap_is_modified(pg);
3906 	printf("Checking cleared page: ref %d, mod %d\n",
3907 	       ref, mod);
3908 
3909 	/* Modify page */
3910 	*loc = 1;
3911 
3912 	ref = pmap_is_referenced(pg);
3913 	mod = pmap_is_modified(pg);
3914 	printf("Modified page: ref %d, mod %d\n",
3915 	       ref, mod);
3916 
3917 	/* Check pmap_protect() */
3918 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_READ);
3919 	pmap_update(pmap_kernel());
3920 	ref = pmap_is_referenced(pg);
3921 	mod = pmap_is_modified(pg);
3922 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3923 	       ref, mod);
3924 
3925 	/* Now clear reference and modify */
3926 	ref = pmap_clear_reference(pg);
3927 	mod = pmap_clear_modify(pg);
3928 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3929 	       (void *)(u_long)va, (long)pa,
3930 	       ref, mod);
3931 
3932 	/* Modify page */
3933 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3934 	pmap_update(pmap_kernel());
3935 	*loc = 1;
3936 
3937 	ref = pmap_is_referenced(pg);
3938 	mod = pmap_is_modified(pg);
3939 	printf("Modified page: ref %d, mod %d\n",
3940 	       ref, mod);
3941 
3942 	/* Check pmap_protect() */
3943 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_NONE);
3944 	pmap_update(pmap_kernel());
3945 	ref = pmap_is_referenced(pg);
3946 	mod = pmap_is_modified(pg);
3947 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3948 	       ref, mod);
3949 
3950 	/* Now clear reference and modify */
3951 	ref = pmap_clear_reference(pg);
3952 	mod = pmap_clear_modify(pg);
3953 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3954 	       (void *)(u_long)va, (long)pa,
3955 	       ref, mod);
3956 
3957 	/* Modify page */
3958 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3959 	pmap_update(pmap_kernel());
3960 	*loc = 1;
3961 
3962 	ref = pmap_is_referenced(pg);
3963 	mod = pmap_is_modified(pg);
3964 	printf("Modified page: ref %d, mod %d\n",
3965 	       ref, mod);
3966 
3967 	/* Check pmap_pag_protect() */
3968 	pmap_page_protect(pg, VM_PROT_READ);
3969 	ref = pmap_is_referenced(pg);
3970 	mod = pmap_is_modified(pg);
3971 	printf("pmap_protect(): ref %d, mod %d\n",
3972 	       ref, mod);
3973 
3974 	/* Now clear reference and modify */
3975 	ref = pmap_clear_reference(pg);
3976 	mod = pmap_clear_modify(pg);
3977 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3978 	       (void *)(u_long)va, (long)pa,
3979 	       ref, mod);
3980 
3981 
3982 	/* Modify page */
3983 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3984 	pmap_update(pmap_kernel());
3985 	*loc = 1;
3986 
3987 	ref = pmap_is_referenced(pg);
3988 	mod = pmap_is_modified(pg);
3989 	printf("Modified page: ref %d, mod %d\n",
3990 	       ref, mod);
3991 
3992 	/* Check pmap_pag_protect() */
3993 	pmap_page_protect(pg, VM_PROT_NONE);
3994 	ref = pmap_is_referenced(pg);
3995 	mod = pmap_is_modified(pg);
3996 	printf("pmap_protect(): ref %d, mod %d\n",
3997 	       ref, mod);
3998 
3999 	/* Now clear reference and modify */
4000 	ref = pmap_clear_reference(pg);
4001 	mod = pmap_clear_modify(pg);
4002 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
4003 	       (void *)(u_long)va, (long)pa,
4004 	       ref, mod);
4005 
4006 	/* Unmap page */
4007 	pmap_remove(pmap_kernel(), va, va+1);
4008 	pmap_update(pmap_kernel());
4009 	ref = pmap_is_referenced(pg);
4010 	mod = pmap_is_modified(pg);
4011 	printf("Unmapped page: ref %d, mod %d\n", ref, mod);
4012 
4013 	/* Now clear reference and modify */
4014 	ref = pmap_clear_reference(pg);
4015 	mod = pmap_clear_modify(pg);
4016 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
4017 	       (void *)(u_long)va, (long)pa, ref, mod);
4018 
4019 	/* Check it's properly cleared */
4020 	ref = pmap_is_referenced(pg);
4021 	mod = pmap_is_modified(pg);
4022 	printf("Checking cleared page: ref %d, mod %d\n",
4023 	       ref, mod);
4024 
4025 	pmap_remove(pmap_kernel(), va, va+1);
4026 	pmap_update(pmap_kernel());
4027 	pmap_free_page(pa);
4028 }
4029 #endif
4030