xref: /openbsd/sys/arch/sparc64/sparc64/pmap.c (revision 3bef86f7)
1 /*	$OpenBSD: pmap.c,v 1.111 2023/04/13 15:23:22 miod Exp $	*/
2 /*	$NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $	*/
3 /*
4  *
5  * Copyright (C) 1996-1999 Eduardo Horvath.
6  * All rights reserved.
7  *
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  *
27  */
28 
29 #include <sys/atomic.h>
30 #include <sys/param.h>
31 #include <sys/malloc.h>
32 #include <sys/queue.h>
33 #include <sys/systm.h>
34 #include <sys/proc.h>
35 #include <sys/msgbuf.h>
36 #include <sys/pool.h>
37 #include <sys/exec.h>
38 #include <sys/core.h>
39 #include <sys/kcore.h>
40 
41 #include <uvm/uvm.h>
42 
43 #include <machine/pcb.h>
44 #include <machine/sparc64.h>
45 #include <machine/ctlreg.h>
46 #include <machine/hypervisor.h>
47 #include <machine/openfirm.h>
48 #include <machine/kcore.h>
49 
50 #include "cache.h"
51 
52 #ifdef DDB
53 #include <machine/db_machdep.h>
54 #include <ddb/db_command.h>
55 #include <ddb/db_sym.h>
56 #include <ddb/db_variables.h>
57 #include <ddb/db_extern.h>
58 #include <ddb/db_access.h>
59 #include <ddb/db_output.h>
60 #define db_enter()	__asm volatile("ta 1; nop");
61 #else
62 #define db_enter()
63 #define db_printf	printf
64 #endif
65 
66 #define	MEG		(1<<20) /* 1MB */
67 #define	KB		(1<<10)	/* 1KB */
68 
69 paddr_t cpu0paddr;/* XXXXXXXXXXXXXXXX */
70 
71 /* These routines are in assembly to allow access thru physical mappings */
72 extern int64_t pseg_get(struct pmap*, vaddr_t addr);
73 extern int pseg_set(struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare);
74 
75 extern void pmap_zero_phys(paddr_t pa);
76 extern void pmap_copy_phys(paddr_t src, paddr_t dst);
77 
78 /*
79  * Diatribe on ref/mod counting:
80  *
81  * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
82  * in the pv_entry structure for each page.  (We could bypass this for the
83  * vm_page, but that's a long story....)
84  *
85  * This architecture has nice, fast traps with lots of space for software bits
86  * in the TTE.  To accelerate ref/mod counts we make use of these features.
87  *
88  * When we map a page initially, we place a TTE in the page table.  It's
89  * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
90  * writeable we set the TLB_REAL_W bit for the trap handler.
91  *
92  * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
93  * bit in the appropriate TTE in the page table.  Whenever we take a protection
94  * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
95  * bits to enable writing and mark the page as modified.
96  *
97  * This means that we may have ref/mod information all over the place.  The
98  * pmap routines must traverse the page tables of all pmaps with a given page
99  * and collect/clear all the ref/mod information and copy it into the pv_entry.
100  */
101 
102 #define	PV_ALIAS	0x1LL
103 #define PV_REF		0x2LL
104 #define PV_MOD		0x4LL
105 #define PV_MASK		(0x03fLL)
106 #define PV_VAMASK	(~(NBPG - 1))
107 #define PV_MATCH(pv,va)	(!((((pv)->pv_va) ^ (va)) & PV_VAMASK))
108 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | (((pv)->pv_va) & PV_MASK)))
109 
110 static struct pool pv_pool;
111 static struct pool pmap_pool;
112 
113 pv_entry_t pmap_remove_pv(struct pmap *pm, vaddr_t va, paddr_t pa);
114 pv_entry_t pmap_enter_pv(struct pmap *pm, pv_entry_t, vaddr_t va, paddr_t pa);
115 void	pmap_page_cache(struct pmap *pm, paddr_t pa, int mode);
116 
117 void	pmap_bootstrap_cpu(paddr_t);
118 
119 void	pmap_pinit(struct pmap *);
120 void	pmap_release(struct pmap *);
121 pv_entry_t pa_to_pvh(paddr_t);
122 
123 pv_entry_t
124 pa_to_pvh(paddr_t pa)
125 {
126 	struct vm_page *pg;
127 
128 	pg = PHYS_TO_VM_PAGE(pa);
129 	return pg ? &pg->mdpage.pvent : NULL;
130 }
131 
132 static __inline u_int
133 pmap_tte2flags(u_int64_t tte)
134 {
135 	if (CPU_ISSUN4V)
136 		return (((tte & SUN4V_TLB_ACCESS) ? PV_REF : 0) |
137 		    ((tte & SUN4V_TLB_MODIFY) ? PV_MOD : 0));
138 	else
139 		return (((tte & SUN4U_TLB_ACCESS) ? PV_REF : 0) |
140 		    ((tte & SUN4U_TLB_MODIFY) ? PV_MOD : 0));
141 }
142 
143 /*
144  * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
145  */
146 pte_t *tsb_dmmu;
147 pte_t *tsb_immu;
148 int tsbsize;		/* tsbents = 512 * 2^tsbsize */
149 #define TSBENTS (512 << tsbsize)
150 #define	TSBSIZE	(TSBENTS * 16)
151 
152 /*
153  * The invalid tsb tag uses the fact that the last context we have is
154  * never allocated.
155  */
156 #define TSB_TAG_INVALID	(~0LL << 48)
157 
158 #define TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) \
159   (CPU_ISSUN4V ?\
160     SUN4V_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) : \
161     SUN4U_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie))
162 
163 /* The same for sun4u and sun4v. */
164 #define TLB_V		SUN4U_TLB_V
165 
166 /* Only used for DEBUG. */
167 #define TLB_NFO		(CPU_ISSUN4V ? SUN4V_TLB_NFO : SUN4U_TLB_NFO)
168 
169 /*
170  * UltraSPARC T1 & T2 implement only a 40-bit real address range, just
171  * like older UltraSPARC CPUs.
172  */
173 #define TLB_PA_MASK	SUN4U_TLB_PA_MASK
174 
175 /* XXX */
176 #define TLB_TSB_LOCK	(CPU_ISSUN4V ? SUN4V_TLB_TSB_LOCK : SUN4U_TLB_TSB_LOCK)
177 
178 #ifdef SUN4V
179 struct tsb_desc *tsb_desc;
180 #endif
181 
182 struct pmap kernel_pmap_;
183 
184 /*
185  * Virtual and physical addresses of the start and end of kernel text
186  * and data segments.
187  */
188 vaddr_t ktext;
189 paddr_t ktextp;
190 vaddr_t ektext;
191 paddr_t ektextp;
192 vaddr_t kdata;
193 paddr_t kdatap;
194 vaddr_t ekdata;
195 paddr_t ekdatap;
196 
197 static struct mem_region memlist[8]; /* Pick a random size here */
198 
199 vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
200 
201 struct mem_region *mem, *avail, *orig;
202 int memsize;
203 
204 static int memh = 0, vmemh = 0;	/* Handles to OBP devices */
205 
206 static int ptelookup_va(vaddr_t va); /* sun4u */
207 
208 static __inline void
209 tsb_invalidate(int ctx, vaddr_t va)
210 {
211 	int i;
212 	int64_t tag;
213 
214 	i = ptelookup_va(va);
215 	tag = TSB_TAG(0, ctx, va);
216 	if (tsb_dmmu[i].tag == tag)
217 		atomic_cas_ulong((volatile unsigned long *)&tsb_dmmu[i].tag,
218 		    tag, TSB_TAG_INVALID);
219 	if (tsb_immu[i].tag == tag)
220 		atomic_cas_ulong((volatile unsigned long *)&tsb_immu[i].tag,
221 		    tag, TSB_TAG_INVALID);
222 }
223 
224 struct prom_map *prom_map;
225 int prom_map_size;
226 
227 #ifdef DEBUG
228 #define	PDB_BOOT	0x20000
229 #define	PDB_BOOT1	0x40000
230 int	pmapdebug = 0;
231 
232 #define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
233 #else
234 #define	BDPRINTF(n, f)
235 #endif
236 
237 /*
238  *
239  * A context is simply a small number that differentiates multiple mappings
240  * of the same address.  Contexts on the spitfire are 13 bits, but could
241  * be as large as 17 bits.
242  *
243  * Each context is either free or attached to a pmap.
244  *
245  * The context table is an array of pointers to psegs.  Just dereference
246  * the right pointer and you get to the pmap segment tables.  These are
247  * physical addresses, of course.
248  *
249  */
250 paddr_t *ctxbusy;
251 int numctx;
252 #define CTXENTRY	(sizeof(paddr_t))
253 #define CTXSIZE		(numctx * CTXENTRY)
254 
255 int pmap_get_page(paddr_t *, const char *, struct pmap *);
256 void pmap_free_page(paddr_t, struct pmap *);
257 
258 /*
259  * Support for big page sizes.  This maps the page size to the
260  * page bits.  That is: these are the bits between 8K pages and
261  * larger page sizes that cause aliasing.
262  */
263 const struct page_size_map page_size_map[] = {
264 	{ (4*1024*1024-1) & ~(8*1024-1), PGSZ_4M },
265 	{ (512*1024-1) & ~(8*1024-1), PGSZ_512K  },
266 	{ (64*1024-1) & ~(8*1024-1), PGSZ_64K  },
267 	{ (8*1024-1) & ~(8*1024-1), PGSZ_8K  },
268 	{ 0, 0  }
269 };
270 
271 /*
272  * Enter a TTE into the kernel pmap only.  Don't do anything else.
273  *
274  * Use only during bootstrapping since it does no locking and
275  * can lose ref/mod info!!!!
276  *
277  */
278 static void
279 pmap_enter_kpage(vaddr_t va, int64_t data)
280 {
281 	paddr_t newp;
282 
283 	newp = 0;
284 	while (pseg_set(pmap_kernel(), va, data, newp) == 1) {
285 		newp = 0;
286 		if (!pmap_get_page(&newp, NULL, pmap_kernel())) {
287 			prom_printf("pmap_enter_kpage: out of pages\n");
288 			panic("pmap_enter_kpage");
289 		}
290 
291 		BDPRINTF(PDB_BOOT1,
292 			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\r\n",
293 			  pmap_kernel(), va, (long)data, (long)newp));
294 	}
295 }
296 
297 /*
298  * Check bootargs to see if we need to enable bootdebug.
299  */
300 #ifdef DEBUG
301 void
302 pmap_bootdebug(void)
303 {
304 	int chosen;
305 	char *cp;
306 	char buf[128];
307 
308 	/*
309 	 * Grab boot args from PROM
310 	 */
311 	chosen = OF_finddevice("/chosen");
312 	/* Setup pointer to boot flags */
313 	OF_getprop(chosen, "bootargs", buf, sizeof(buf));
314 	cp = buf;
315 	while (*cp != '-')
316 		if (*cp++ == '\0')
317 			return;
318 	for (;;)
319 		switch (*++cp) {
320 		case '\0':
321 			return;
322 		case 'V':
323 			pmapdebug |= PDB_BOOT|PDB_BOOT1;
324 			break;
325 		case 'D':
326 			pmapdebug |= PDB_BOOT1;
327 			break;
328 		}
329 }
330 #endif
331 
332 /*
333  * This is called during bootstrap, before the system is really initialized.
334  *
335  * It's called with the start and end virtual addresses of the kernel.  We
336  * bootstrap the pmap allocator now.  We will allocate the basic structures we
337  * need to bootstrap the VM system here: the page frame tables, the TSB, and
338  * the free memory lists.
339  *
340  * Now all this is becoming a bit obsolete.  maxctx is still important, but by
341  * separating the kernel text and data segments we really would need to
342  * provide the start and end of each segment.  But we can't.  The rodata
343  * segment is attached to the end of the kernel segment and has nothing to
344  * delimit its end.  We could still pass in the beginning of the kernel and
345  * the beginning and end of the data segment but we could also just as easily
346  * calculate that all in here.
347  *
348  * To handle the kernel text, we need to do a reverse mapping of the start of
349  * the kernel, then traverse the free memory lists to find out how big it is.
350  */
351 
352 void
353 pmap_bootstrap(u_long kernelstart, u_long kernelend, u_int maxctx, u_int numcpus)
354 {
355 	extern int data_start[], end[];	/* start of data segment */
356 	extern int msgbufmapped;
357 	struct mem_region *mp, *mp1;
358 	int msgbufsiz;
359 	int pcnt;
360 	size_t s, sz;
361 	int i, j;
362 	int64_t data;
363 	vaddr_t va;
364 	u_int64_t phys_msgbuf;
365 	paddr_t newkp;
366 	vaddr_t newkv, firstaddr, intstk;
367 	vsize_t kdsize, ktsize;
368 
369 #ifdef DEBUG
370 	pmap_bootdebug();
371 #endif
372 
373 	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\r\n"));
374 	/*
375 	 * set machine page size
376 	 */
377 	uvmexp.pagesize = NBPG;
378 	uvm_setpagesize();
379 
380 	/*
381 	 * Find out how big the kernel's virtual address
382 	 * space is.  The *$#@$ prom loses this info
383 	 */
384 	if ((vmemh = OF_finddevice("/virtual-memory")) == -1) {
385 		prom_printf("no virtual-memory?");
386 		OF_exit();
387 	}
388 	bzero((caddr_t)memlist, sizeof(memlist));
389 	if (OF_getprop(vmemh, "available", memlist, sizeof(memlist)) <= 0) {
390 		prom_printf("no vmemory avail?");
391 		OF_exit();
392 	}
393 
394 #ifdef DEBUG
395 	if (pmapdebug & PDB_BOOT) {
396 		/* print out mem list */
397 		prom_printf("Available virtual memory:\r\n");
398 		for (mp = memlist; mp->size; mp++) {
399 			prom_printf("memlist start %p size %lx\r\n",
400 				    (void *)(u_long)mp->start,
401 				    (u_long)mp->size);
402 		}
403 		prom_printf("End of available virtual memory\r\n");
404 	}
405 #endif
406 	/*
407 	 * Get hold or the message buffer.
408 	 */
409 	msgbufp = (struct msgbuf *)(vaddr_t)MSGBUF_VA;
410 /* XXXXX -- increase msgbufsiz for uvmhist printing */
411 	msgbufsiz = 4*NBPG /* round_page(sizeof(struct msgbuf)) */;
412 	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\r\n",
413 			    (long)msgbufp, (long)msgbufsiz));
414 	if ((long)msgbufp !=
415 	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
416 		prom_printf(
417 		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\r\n",
418 		    (void *)msgbufp, (long)phys_msgbuf);
419 	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
420 	BDPRINTF(PDB_BOOT,
421 		("We should have the memory at %lx, let's map it in\r\n",
422 			phys_msgbuf));
423 	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
424 			  -1/* sunos does this */) == -1)
425 		prom_printf("Failed to map msgbuf\r\n");
426 	else
427 		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\r\n",
428 			(void *)msgbufp));
429 	msgbufmapped = 1;	/* enable message buffer */
430 	initmsgbuf((caddr_t)msgbufp, msgbufsiz);
431 
432 	/*
433 	 * Record kernel mapping -- we will map these with a permanent 4MB
434 	 * TLB entry when we initialize the CPU later.
435 	 */
436 	BDPRINTF(PDB_BOOT, ("translating kernelstart %p\r\n",
437 		(void *)kernelstart));
438 	ktext = kernelstart;
439 	ktextp = prom_vtop(kernelstart);
440 
441 	kdata = (vaddr_t)data_start;
442 	kdatap = prom_vtop(kdata);
443 	ekdata = (vaddr_t)end;
444 
445 	/*
446 	 * Find the real size of the kernel.  Locate the smallest starting
447 	 * address > kernelstart.
448 	 */
449 	for (mp1 = mp = memlist; mp->size; mp++) {
450 		/*
451 		 * Check whether this region is at the end of the kernel.
452 		 */
453 		if (mp->start >= ekdata && (mp1->start < ekdata ||
454 						mp1->start > mp->start))
455 			mp1 = mp;
456 	}
457 	if (mp1->start < kdata)
458 		prom_printf("Kernel at end of vmem???\r\n");
459 
460 	BDPRINTF(PDB_BOOT1,
461 		("Kernel data is mapped at %lx, next free seg: %lx, %lx\r\n",
462 			(long)kdata, (u_long)mp1->start, (u_long)mp1->size));
463 
464 	/*
465 	 * We save where we can start allocating memory.
466 	 */
467 	firstaddr = (ekdata + 07) & ~ 07;	/* Longword align */
468 
469 	/*
470 	 * We reserve 100K to grow.
471 	 */
472 	ekdata += 100*KB;
473 
474 	/*
475 	 * And set the end of the data segment to the end of what our
476 	 * bootloader allocated for us, if we still fit in there.
477 	 */
478 	if (ekdata < mp1->start)
479 		ekdata = mp1->start;
480 
481 #define	valloc(name, type, num) (name) = (type *)firstaddr; firstaddr += (num)
482 
483 	/*
484 	 * Since we can't always give the loader the hint to align us on a 4MB
485 	 * boundary, we will need to do the alignment ourselves.  First
486 	 * allocate a new 4MB aligned segment for the kernel, then map it
487 	 * in, copy the kernel over, swap mappings, then finally, free the
488 	 * old kernel.  Then we can continue with this.
489 	 *
490 	 * We'll do the data segment up here since we know how big it is.
491 	 * We'll do the text segment after we've read in the PROM translations
492 	 * so we can figure out its size.
493 	 *
494 	 * The ctxbusy table takes about 64KB, the TSB up to 32KB, and the
495 	 * rest should be less than 1K, so 100KB extra should be plenty.
496 	 */
497 	kdsize = round_page(ekdata - kdata);
498 	BDPRINTF(PDB_BOOT1, ("Kernel data size is %lx\r\n", (long)kdsize));
499 
500 	if ((kdatap & (4*MEG-1)) == 0) {
501 		/* We were at a 4MB boundary -- claim the rest */
502 		psize_t szdiff = (4*MEG - kdsize) & (4*MEG - 1);
503 
504 		BDPRINTF(PDB_BOOT1, ("Need to extend dseg by %lx\r\n",
505 			(long)szdiff));
506 		if (szdiff) {
507 			/* Claim the rest of the physical page. */
508 			newkp = kdatap + kdsize;
509 			newkv = kdata + kdsize;
510 			if (newkp != prom_claim_phys(newkp, szdiff)) {
511 				prom_printf("pmap_bootstrap: could not claim "
512 					"physical dseg extension "
513 					"at %lx size %lx\r\n",
514 					newkp, szdiff);
515 				goto remap_data;
516 			}
517 
518 			/* And the rest of the virtual page. */
519 			if (prom_claim_virt(newkv, szdiff) != newkv)
520 			prom_printf("pmap_bootstrap: could not claim "
521 				"virtual dseg extension "
522 				"at size %lx\r\n", newkv, szdiff);
523 
524 			/* Make sure all 4MB are mapped */
525 			prom_map_phys(newkp, szdiff, newkv, -1);
526 		}
527 	} else {
528 		psize_t sz;
529 remap_data:
530 		/*
531 		 * Either we're not at a 4MB boundary or we can't get the rest
532 		 * of the 4MB extension.  We need to move the data segment.
533 		 * Leave 1MB of extra fiddle space in the calculations.
534 		 */
535 
536 		sz = (kdsize + 4*MEG - 1) & ~(4*MEG-1);
537 		BDPRINTF(PDB_BOOT1,
538 			 ("Allocating new %lx kernel data at 4MB boundary\r\n",
539 			  (u_long)sz));
540 		if ((newkp = prom_alloc_phys(sz, 4*MEG)) == (paddr_t)-1 ) {
541 			prom_printf("Cannot allocate new kernel\r\n");
542 			OF_exit();
543 		}
544 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
545 				     (u_int64_t)newkp));
546 		if ((newkv = (vaddr_t)prom_alloc_virt(sz, 8)) ==
547 		    (vaddr_t)-1) {
548 			prom_printf("Cannot allocate new kernel va\r\n");
549 			OF_exit();
550 		}
551 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %llx at %llx\r\n",
552 		    (u_int64_t)newkp, (u_int64_t)newkv));
553 		prom_map_phys(newkp, sz, (vaddr_t)newkv, -1);
554 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel data...",
555 			kdsize));
556 		bzero((void *)newkv, sz);
557 		bcopy((void *)kdata, (void *)newkv, kdsize);
558 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
559 		prom_unmap_virt((vaddr_t)newkv, sz);
560 		BDPRINTF(PDB_BOOT, ("remap old "));
561 #if 0
562 		/*
563 		 * calling the prom will probably require reading part of the
564 		 * data segment so we can't do this.  */
565 		prom_unmap_virt((vaddr_t)kdatap, kdsize);
566 #endif
567 		prom_map_phys(newkp, sz, kdata, -1);
568 		/*
569 		 * we will map in 4MB, more than we allocated, to allow
570 		 * further allocation
571 		 */
572 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
573 		prom_free_phys(kdatap, kdsize);
574 		kdatap = newkp;
575 		BDPRINTF(PDB_BOOT1,
576 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
577 			  "avail for kernel\r\n", (u_long)firstaddr,
578 			  (u_long)prom_vtop(firstaddr)));
579 	}
580 
581 	/*
582 	 * Find out how much RAM we have installed.
583 	 */
584 	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\r\n"));
585 	if ((memh = OF_finddevice("/memory")) == -1) {
586 		prom_printf("no memory?");
587 		OF_exit();
588 	}
589 	memsize = OF_getproplen(memh, "reg") + 2 * sizeof(struct mem_region);
590 	valloc(mem, struct mem_region, memsize);
591 	bzero((caddr_t)mem, memsize);
592 	if (OF_getprop(memh, "reg", mem, memsize) <= 0) {
593 		prom_printf("no memory installed?");
594 		OF_exit();
595 	}
596 
597 #ifdef DEBUG
598 	if (pmapdebug & PDB_BOOT1) {
599 		/* print out mem list */
600 		prom_printf("Installed physical memory:\r\n");
601 		for (mp = mem; mp->size; mp++) {
602 			prom_printf("memlist start %lx size %lx\r\n",
603 				    (u_long)mp->start, (u_long)mp->size);
604 		}
605 	}
606 #endif
607 	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
608 
609 	for (mp = mem; mp->size; mp++)
610 		physmem += atop(mp->size);
611 	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\r\n",
612 			     (int)physmem, (int)physmem));
613 
614 	/*
615 	 * Calculate approx TSB size.
616 	 */
617 	tsbsize = 0;
618 #ifdef SMALL_KERNEL
619 	while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 2)
620 #else
621 	while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 7)
622 #endif
623 		tsbsize++;
624 
625 	/*
626 	 * Save the prom translations
627 	 */
628 	sz = OF_getproplen(vmemh, "translations");
629 	valloc(prom_map, struct prom_map, sz);
630 	if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <= 0) {
631 		prom_printf("no translations installed?");
632 		OF_exit();
633 	}
634 	prom_map_size = sz / sizeof(struct prom_map);
635 #ifdef DEBUG
636 	if (pmapdebug & PDB_BOOT) {
637 		/* print out mem list */
638 		prom_printf("Prom xlations:\r\n");
639 		for (i = 0; i < prom_map_size; i++) {
640 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
641 				    (u_long)prom_map[i].vstart,
642 				    (u_long)prom_map[i].vsize,
643 				    (u_long)prom_map[i].tte);
644 		}
645 		prom_printf("End of prom xlations\r\n");
646 	}
647 #endif
648 	/*
649 	 * Hunt for the kernel text segment and figure out it size and
650 	 * alignment.
651 	 */
652 	ktsize = 0;
653 	for (i = 0; i < prom_map_size; i++)
654 		if (prom_map[i].vstart == ktext + ktsize)
655 			ktsize += prom_map[i].vsize;
656 	if (ktsize == 0)
657 		panic("No kernel text segment!");
658 	ektext = ktext + ktsize;
659 
660 	if (ktextp & (4*MEG-1)) {
661 		/* Kernel text is not 4MB aligned -- need to fix that */
662 		BDPRINTF(PDB_BOOT1,
663 			 ("Allocating new %lx kernel text at 4MB boundary\r\n",
664 			  (u_long)ktsize));
665 		if ((newkp = prom_alloc_phys(ktsize, 4*MEG)) == 0 ) {
666 			prom_printf("Cannot allocate new kernel text\r\n");
667 			OF_exit();
668 		}
669 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
670 				     (u_int64_t)newkp));
671 		if ((newkv = (vaddr_t)prom_alloc_virt(ktsize, 8)) ==
672 		    (vaddr_t)-1) {
673 			prom_printf("Cannot allocate new kernel text va\r\n");
674 			OF_exit();
675 		}
676 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %lx at %lx\r\n",
677 				     (u_long)newkp, (u_long)newkv));
678 		prom_map_phys(newkp, ktsize, (vaddr_t)newkv, -1);
679 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel text...",
680 			ktsize));
681 		bcopy((void *)ktext, (void *)newkv,
682 		    ktsize);
683 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
684 		prom_unmap_virt((vaddr_t)newkv, 4*MEG);
685 		BDPRINTF(PDB_BOOT, ("remap old "));
686 #if 0
687 		/*
688 		 * calling the prom will probably require reading part of the
689 		 * text segment so we can't do this.
690 		 */
691 		prom_unmap_virt((vaddr_t)ktextp, ktsize);
692 #endif
693 		prom_map_phys(newkp, ktsize, ktext, -1);
694 		/*
695 		 * we will map in 4MB, more than we allocated, to allow
696 		 * further allocation
697 		 */
698 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
699 		prom_free_phys(ktextp, ktsize);
700 		ktextp = newkp;
701 
702 		BDPRINTF(PDB_BOOT1,
703 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
704 			  "avail for kernel\r\n", (u_long)firstaddr,
705 			  (u_long)prom_vtop(firstaddr)));
706 
707 		/*
708 		 * Re-fetch translations -- they've certainly changed.
709 		 */
710 		if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <=
711 			0) {
712 			prom_printf("no translations installed?");
713 			OF_exit();
714 		}
715 #ifdef DEBUG
716 		if (pmapdebug & PDB_BOOT) {
717 			/* print out mem list */
718 			prom_printf("New prom xlations:\r\n");
719 			for (i = 0; i < prom_map_size; i++) {
720 				prom_printf("start %016lx size %016lx tte %016lx\r\n",
721 					    (u_long)prom_map[i].vstart,
722 					    (u_long)prom_map[i].vsize,
723 					    (u_long)prom_map[i].tte);
724 			}
725 			prom_printf("End of prom xlations\r\n");
726 		}
727 #endif
728 	}
729 	ektextp = ktextp + ktsize;
730 
731 	/*
732 	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
733 	 * any translations inside the kernel data VA range.
734 	 */
735 	for(i = 0; i < prom_map_size; i++) {
736 		if (prom_map[i].vstart >= kdata &&
737 		    prom_map[i].vstart <= firstaddr) {
738 			prom_map[i].vstart = 0;
739 			prom_map[i].vsize = 0;
740 		}
741 		if (prom_map[i].vstart >= ktext &&
742 		    prom_map[i].vstart <= ektext) {
743 			prom_map[i].vstart = 0;
744 			prom_map[i].vsize = 0;
745 		}
746 		for(j = i; j < prom_map_size; j++) {
747 			if (prom_map[j].vstart >= kdata &&
748 			    prom_map[j].vstart <= firstaddr)
749 				continue;	/* this is inside the kernel */
750 			if (prom_map[j].vstart >= ktext &&
751 			    prom_map[j].vstart <= ektext)
752 				continue;	/* this is inside the kernel */
753 			if (prom_map[j].vstart > prom_map[i].vstart) {
754 				struct prom_map tmp;
755 				tmp = prom_map[i];
756 				prom_map[i] = prom_map[j];
757 				prom_map[j] = tmp;
758 			}
759 		}
760 	}
761 #ifdef DEBUG
762 	if (pmapdebug & PDB_BOOT) {
763 		/* print out mem list */
764 		prom_printf("Prom xlations:\r\n");
765 		for (i = 0; i < prom_map_size; i++) {
766 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
767 				    (u_long)prom_map[i].vstart,
768 				    (u_long)prom_map[i].vsize,
769 				    (u_long)prom_map[i].tte);
770 		}
771 		prom_printf("End of prom xlations\r\n");
772 	}
773 #endif
774 
775 	/*
776 	 * Allocate a 64KB page for the cpu_info structure now.
777 	 */
778 	if ((cpu0paddr = prom_alloc_phys(numcpus * 8*NBPG, 8*NBPG)) == 0 ) {
779 		prom_printf("Cannot allocate new cpu_info\r\n");
780 		OF_exit();
781 	}
782 
783 	/*
784 	 * Now the kernel text segment is in its final location we can try to
785 	 * find out how much memory really is free.
786 	 */
787 	sz = OF_getproplen(memh, "available") + sizeof(struct mem_region);
788 	valloc(orig, struct mem_region, sz);
789 	bzero((caddr_t)orig, sz);
790 	if (OF_getprop(memh, "available", orig, sz) <= 0) {
791 		prom_printf("no available RAM?");
792 		OF_exit();
793 	}
794 #ifdef DEBUG
795 	if (pmapdebug & PDB_BOOT1) {
796 		/* print out mem list */
797 		prom_printf("Available physical memory:\r\n");
798 		for (mp = orig; mp->size; mp++) {
799 			prom_printf("memlist start %lx size %lx\r\n",
800 				    (u_long)mp->start, (u_long)mp->size);
801 		}
802 		prom_printf("End of available physical memory\r\n");
803 	}
804 #endif
805 	valloc(avail, struct mem_region, sz);
806 	bzero((caddr_t)avail, sz);
807 	for (pcnt = 0, mp = orig, mp1 = avail; (mp1->size = mp->size);
808 	    mp++, mp1++) {
809 		mp1->start = mp->start;
810 		pcnt++;
811 	}
812 
813 	/*
814 	 * Allocate and initialize a context table
815 	 */
816 	numctx = maxctx;
817 	valloc(ctxbusy, paddr_t, CTXSIZE);
818 	bzero((caddr_t)ctxbusy, CTXSIZE);
819 
820 	/*
821 	 * Allocate our TSB.
822 	 *
823 	 * We will use the left over space to flesh out the kernel pmap.
824 	 */
825 	BDPRINTF(PDB_BOOT1, ("firstaddr before TSB=%lx\r\n",
826 		(u_long)firstaddr));
827 	firstaddr = ((firstaddr + TSBSIZE - 1) & ~(TSBSIZE-1));
828 #ifdef DEBUG
829 	i = (firstaddr + (NBPG-1)) & ~(NBPG-1);	/* First, page align */
830 	if ((int)firstaddr < i) {
831 		prom_printf("TSB alloc fixup failed\r\n");
832 		prom_printf("frobbed i, firstaddr before TSB=%x, %lx\r\n",
833 		    (int)i, (u_long)firstaddr);
834 		panic("TSB alloc");
835 		OF_exit();
836 	}
837 #endif
838 	BDPRINTF(PDB_BOOT, ("frobbed i, firstaddr before TSB=%x, %lx\r\n",
839 			    (int)i, (u_long)firstaddr));
840 	valloc(tsb_dmmu, pte_t, TSBSIZE);
841 	bzero(tsb_dmmu, TSBSIZE);
842 	valloc(tsb_immu, pte_t, TSBSIZE);
843 	bzero(tsb_immu, TSBSIZE);
844 
845 	BDPRINTF(PDB_BOOT1, ("firstaddr after TSB=%lx\r\n", (u_long)firstaddr));
846 	BDPRINTF(PDB_BOOT1, ("TSB allocated at %p size %08x\r\n", (void *)tsb_dmmu,
847 	    (int)TSBSIZE));
848 
849 #ifdef SUN4V
850 	if (CPU_ISSUN4V) {
851 		valloc(tsb_desc, struct tsb_desc, sizeof(struct tsb_desc));
852 		bzero(tsb_desc, sizeof(struct tsb_desc));
853 		tsb_desc->td_idxpgsz = 0;
854 		tsb_desc->td_assoc = 1;
855 		tsb_desc->td_size = TSBENTS;
856 		tsb_desc->td_ctxidx = -1;
857 		tsb_desc->td_pgsz = 0xf;
858 		tsb_desc->td_pa = (paddr_t)tsb_dmmu + kdatap - kdata;
859 	}
860 #endif
861 
862 	BDPRINTF(PDB_BOOT1, ("firstaddr after pmap=%08lx\r\n",
863 		(u_long)firstaddr));
864 
865 	/*
866 	 * Page align all regions.
867 	 * Non-page memory isn't very interesting to us.
868 	 * Also, sort the entries for ascending addresses.
869 	 *
870 	 * And convert from virtual to physical addresses.
871 	 */
872 
873 	BDPRINTF(PDB_BOOT, ("kernel virtual size %08lx - %08lx\r\n",
874 			    (u_long)kernelstart, (u_long)firstaddr));
875 	kdata = kdata & ~PGOFSET;
876 	ekdata = firstaddr;
877 	ekdata = (ekdata + PGOFSET) & ~PGOFSET;
878 	BDPRINTF(PDB_BOOT1, ("kernel virtual size %08lx - %08lx\r\n",
879 			     (u_long)kernelstart, (u_long)kernelend));
880 	ekdatap = ekdata - kdata + kdatap;
881 	/* Switch from vaddrs to paddrs */
882 	if(ekdatap > (kdatap + 4*MEG)) {
883 		prom_printf("Kernel size exceeds 4MB\r\n");
884 	}
885 
886 #ifdef DEBUG
887 	if (pmapdebug & PDB_BOOT1) {
888 		/* print out mem list */
889 		prom_printf("Available %lx physical memory before cleanup:\r\n",
890 			    (u_long)avail);
891 		for (mp = avail; mp->size; mp++) {
892 			prom_printf("memlist start %lx size %lx\r\n",
893 				    (u_long)mp->start,
894 				    (u_long)mp->size);
895 		}
896 		prom_printf("End of available physical memory before cleanup\r\n");
897 		prom_printf("kernel physical text size %08lx - %08lx\r\n",
898 			    (u_long)ktextp, (u_long)ektextp);
899 		prom_printf("kernel physical data size %08lx - %08lx\r\n",
900 			    (u_long)kdatap, (u_long)ekdatap);
901 	}
902 #endif
903 	/*
904 	 * Here's a another quick in-lined bubble sort.
905 	 */
906 	for (i = 0; i < pcnt; i++) {
907 		for (j = i; j < pcnt; j++) {
908 			if (avail[j].start < avail[i].start) {
909 				struct mem_region tmp;
910 				tmp = avail[i];
911 				avail[i] = avail[j];
912 				avail[j] = tmp;
913 			}
914 		}
915 	}
916 
917 	/* Throw away page zero if we have it. */
918 	if (avail->start == 0) {
919 		avail->start += NBPG;
920 		avail->size -= NBPG;
921 	}
922 	/*
923 	 * Now we need to remove the area we valloc'ed from the available
924 	 * memory lists.  (NB: we may have already alloc'ed the entire space).
925 	 */
926 	for (mp = avail; mp->size; mp++) {
927 		/*
928 		 * Check whether this region holds all of the kernel.
929 		 */
930 		s = mp->start + mp->size;
931 		if (mp->start < kdatap && s > roundup(ekdatap, 4*MEG)) {
932 			avail[pcnt].start = roundup(ekdatap, 4*MEG);
933 			avail[pcnt++].size = s - kdatap;
934 			mp->size = kdatap - mp->start;
935 		}
936 		/*
937 		 * Look whether this regions starts within the kernel.
938 		 */
939 		if (mp->start >= kdatap &&
940 			mp->start < roundup(ekdatap, 4*MEG)) {
941 			s = ekdatap - mp->start;
942 			if (mp->size > s)
943 				mp->size -= s;
944 			else
945 				mp->size = 0;
946 			mp->start = roundup(ekdatap, 4*MEG);
947 		}
948 		/*
949 		 * Now look whether this region ends within the kernel.
950 		 */
951 		s = mp->start + mp->size;
952 		if (s > kdatap && s < roundup(ekdatap, 4*MEG))
953 			mp->size -= s - kdatap;
954 		/*
955 		 * Now page align the start of the region.
956 		 */
957 		s = mp->start % NBPG;
958 		if (mp->size >= s) {
959 			mp->size -= s;
960 			mp->start += s;
961 		}
962 		/*
963 		 * And now align the size of the region.
964 		 */
965 		mp->size -= mp->size % NBPG;
966 		/*
967 		 * Check whether some memory is left here.
968 		 */
969 		if (mp->size == 0) {
970 			bcopy(mp + 1, mp,
971 			      (pcnt - (mp - avail)) * sizeof *mp);
972 			pcnt--;
973 			mp--;
974 			continue;
975 		}
976 		s = mp->start;
977 		sz = mp->size;
978 		for (mp1 = avail; mp1 < mp; mp1++)
979 			if (s < mp1->start)
980 				break;
981 		if (mp1 < mp) {
982 			bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1);
983 			mp1->start = s;
984 			mp1->size = sz;
985 		}
986 		/*
987 		 * In future we should be able to specify both allocated
988 		 * and free.
989 		 */
990 		uvm_page_physload(
991 			atop(mp->start),
992 			atop(mp->start+mp->size),
993 			atop(mp->start),
994 			atop(mp->start+mp->size), 0);
995 	}
996 
997 #if 0
998 	/* finally, free up any space that valloc did not use */
999 	prom_unmap_virt((vaddr_t)ekdata, roundup(ekdata, 4*MEG) - ekdata);
1000 	if (ekdatap < roundup(kdatap, 4*MEG))) {
1001 		uvm_page_physload(atop(ekdatap),
1002 			atop(roundup(ekdatap, (4*MEG))),
1003 			atop(ekdatap),
1004 			atop(roundup(ekdatap, (4*MEG))), 0);
1005 	}
1006 #endif
1007 
1008 #ifdef DEBUG
1009 	if (pmapdebug & PDB_BOOT) {
1010 		/* print out mem list */
1011 		prom_printf("Available physical memory after cleanup:\r\n");
1012 		for (mp = avail; mp->size; mp++) {
1013 			prom_printf("avail start %lx size %lx\r\n",
1014 				    (long)mp->start, (long)mp->size);
1015 		}
1016 		prom_printf("End of available physical memory after cleanup\r\n");
1017 	}
1018 #endif
1019 	/*
1020 	 * Allocate and clear out pmap_kernel()->pm_segs[]
1021 	 */
1022 	mtx_init(&pmap_kernel()->pm_mtx, IPL_VM);
1023 	pmap_kernel()->pm_refs = 1;
1024 	pmap_kernel()->pm_ctx = 0;
1025 	{
1026 		paddr_t newp;
1027 
1028 		do {
1029 			pmap_get_page(&newp, NULL, pmap_kernel());
1030 		} while (!newp); /* Throw away page zero */
1031 		pmap_kernel()->pm_segs=(int64_t *)(u_long)newp;
1032 		pmap_kernel()->pm_physaddr = newp;
1033 		/* mark kernel context as busy */
1034 		((paddr_t*)ctxbusy)[0] = pmap_kernel()->pm_physaddr;
1035 	}
1036 	/*
1037 	 * finish filling out kernel pmap.
1038 	 */
1039 
1040 	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\r\n",
1041 	    (long)pmap_kernel()->pm_physaddr));
1042 	/*
1043 	 * Tell pmap about our mesgbuf -- Hope this works already
1044 	 */
1045 #ifdef DEBUG
1046 	BDPRINTF(PDB_BOOT1, ("Calling consinit()\r\n"));
1047 	if (pmapdebug & PDB_BOOT1) consinit();
1048 	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\r\n"));
1049 #endif
1050 	/* it's not safe to call pmap_enter so we need to do this ourselves */
1051 	va = (vaddr_t)msgbufp;
1052 	prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1);
1053 	while (msgbufsiz) {
1054 		data = TSB_DATA(0 /* global */,
1055 			PGSZ_8K,
1056 			phys_msgbuf,
1057 			1 /* priv */,
1058 			1 /* Write */,
1059 			1 /* Cacheable */,
1060 			0 /* ALIAS -- Disable D$ */,
1061 			1 /* valid */,
1062 			0 /* IE */);
1063 		pmap_enter_kpage(va, data);
1064 		va += PAGE_SIZE;
1065 		msgbufsiz -= PAGE_SIZE;
1066 		phys_msgbuf += PAGE_SIZE;
1067 	}
1068 	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\r\n"));
1069 
1070 	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\r\n"));
1071 	data = (CPU_ISSUN4V ? SUN4V_TLB_EXEC : SUN4U_TLB_EXEC);
1072 	for (i = 0; i < prom_map_size; i++) {
1073 		if (prom_map[i].vstart && ((prom_map[i].vstart>>32) == 0)) {
1074 			for (j = 0; j < prom_map[i].vsize; j += NBPG) {
1075 				int k;
1076 				uint64_t tte;
1077 
1078 				for (k = 0; page_size_map[k].mask; k++) {
1079 					if (((prom_map[i].vstart |
1080 					      prom_map[i].tte) &
1081 					      page_size_map[k].mask) == 0 &&
1082 					      page_size_map[k].mask <
1083 					      prom_map[i].vsize)
1084 						break;
1085 				}
1086 				/* Enter PROM map into pmap_kernel() */
1087 				tte = prom_map[i].tte;
1088 				if (CPU_ISSUN4V)
1089 					tte &= ~SUN4V_TLB_SOFT_MASK;
1090 				else
1091 					tte &= ~(SUN4U_TLB_SOFT2_MASK |
1092 					    SUN4U_TLB_SOFT_MASK);
1093 				pmap_enter_kpage(prom_map[i].vstart + j,
1094 				    (tte + j) | data | page_size_map[k].code);
1095 			}
1096 		}
1097 	}
1098 	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\r\n"));
1099 
1100 	/*
1101 	 * Fix up start of kernel heap.
1102 	 */
1103 	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1104 	/* Let's keep 1 page of redzone after the kernel */
1105 	vmmap += NBPG;
1106 	{
1107 		extern vaddr_t u0[2];
1108 		extern struct pcb* proc0paddr;
1109 		extern void main(void);
1110 		paddr_t pa;
1111 
1112 		/* Initialize all the pointers to u0 */
1113 		u0[0] = vmmap;
1114 		/* Allocate some VAs for u0 */
1115 		u0[1] = vmmap + 2*USPACE;
1116 
1117 		BDPRINTF(PDB_BOOT1,
1118 			("Inserting stack 0 into pmap_kernel() at %p\r\n",
1119 				vmmap));
1120 
1121 		while (vmmap < u0[1]) {
1122 			int64_t data;
1123 
1124 			pmap_get_page(&pa, NULL, pmap_kernel());
1125 			prom_map_phys(pa, NBPG, vmmap, -1);
1126 			data = TSB_DATA(0 /* global */,
1127 				PGSZ_8K,
1128 				pa,
1129 				1 /* priv */,
1130 				1 /* Write */,
1131 				1 /* Cacheable */,
1132 				0 /* ALIAS -- Disable D$ */,
1133 				1 /* valid */,
1134 				0 /* IE */);
1135 			pmap_enter_kpage(vmmap, data);
1136 			vmmap += NBPG;
1137 		}
1138 		BDPRINTF(PDB_BOOT1,
1139 			 ("Done inserting stack 0 into pmap_kernel()\r\n"));
1140 
1141 		/* Now map in and initialize our cpu_info structure */
1142 #ifdef DIAGNOSTIC
1143 		vmmap += NBPG; /* redzone -- XXXX do we need one? */
1144 #endif
1145 		intstk = vmmap = roundup(vmmap, 64*KB);
1146 		cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK);
1147 
1148 		BDPRINTF(PDB_BOOT1,
1149 			("Inserting cpu_info into pmap_kernel() at %p\r\n",
1150 				 cpus));
1151 		/* Now map in all 8 pages of cpu_info */
1152 		pa = cpu0paddr;
1153 		prom_map_phys(pa, 64*KB, vmmap, -1);
1154 		/*
1155 		 * Also map it in as the interrupt stack.
1156 		 * This lets the PROM see this if needed.
1157 		 *
1158 		 * XXXX locore.s does not flush these mappings
1159 		 * before installing the locked TTE.
1160 		 */
1161 		prom_map_phys(pa, 64*KB, CPUINFO_VA, -1);
1162 		for (i=0; i<8; i++) {
1163 			int64_t data;
1164 
1165 			data = TSB_DATA(0 /* global */,
1166 				PGSZ_8K,
1167 				pa,
1168 				1 /* priv */,
1169 				1 /* Write */,
1170 				1 /* Cacheable */,
1171 				0 /* ALIAS -- Disable D$ */,
1172 				1 /* valid */,
1173 				0 /* IE */);
1174 			pmap_enter_kpage(vmmap, data);
1175 			vmmap += NBPG;
1176 			pa += NBPG;
1177 		}
1178 		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\r\n"));
1179 
1180 		/* Initialize our cpu_info structure */
1181 		bzero((void *)intstk, 8*NBPG);
1182 		cpus->ci_self = cpus;
1183 		cpus->ci_next = NULL; /* Redundant, I know. */
1184 		cpus->ci_curproc = &proc0;
1185 		cpus->ci_cpcb = (struct pcb *)u0[0]; /* Need better source */
1186 		cpus->ci_upaid = cpu_myid();
1187 		cpus->ci_cpuid = 0;
1188 		cpus->ci_flags = CPUF_RUNNING;
1189 		cpus->ci_fpproc = NULL;
1190 		cpus->ci_spinup = main; /* Call main when we're running. */
1191 		cpus->ci_initstack = (void *)u0[1];
1192 		cpus->ci_paddr = cpu0paddr;
1193 #ifdef SUN4V
1194 		cpus->ci_mmfsa = cpu0paddr;
1195 #endif
1196 		proc0paddr = cpus->ci_cpcb;
1197 
1198 		cpu0paddr += 64 * KB;
1199 
1200 		/* The rest will be done at CPU attach time. */
1201 		BDPRINTF(PDB_BOOT1,
1202 			 ("Done inserting cpu_info into pmap_kernel()\r\n"));
1203 	}
1204 
1205 	vmmap = (vaddr_t)reserve_dumppages((caddr_t)(u_long)vmmap);
1206 	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\r\n"));
1207 
1208 	pmap_bootstrap_cpu(cpus->ci_paddr);
1209 }
1210 
1211 void sun4u_bootstrap_cpu(paddr_t);
1212 void sun4v_bootstrap_cpu(paddr_t);
1213 
1214 void
1215 pmap_bootstrap_cpu(paddr_t intstack)
1216 {
1217 	if (CPU_ISSUN4V)
1218 		sun4v_bootstrap_cpu(intstack);
1219 	else
1220 		sun4u_bootstrap_cpu(intstack);
1221 }
1222 
1223 extern void sun4u_set_tsbs(void);
1224 
1225 void
1226 sun4u_bootstrap_cpu(paddr_t intstack)
1227 {
1228 	u_int64_t data;
1229 	paddr_t pa;
1230 	vaddr_t va;
1231 	int index;
1232 	int impl;
1233 
1234 	impl = (getver() & VER_IMPL) >> VER_IMPL_SHIFT;
1235 
1236 	/*
1237 	 * Establish the 4MB locked mappings for kernel data and text.
1238 	 *
1239 	 * The text segment needs to be mapped into the DTLB too,
1240 	 * because of .rodata.
1241 	 */
1242 
1243 	index = 15; /* XXX */
1244 	for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) {
1245 		data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0);
1246 		data |= SUN4U_TLB_L;
1247 		prom_itlb_load(index, data, va);
1248 		prom_dtlb_load(index, data, va);
1249 		index--;
1250 	}
1251 
1252 	for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) {
1253 		data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0);
1254 		data |= SUN4U_TLB_L;
1255 		prom_dtlb_load(index, data, va);
1256 		index--;
1257 	}
1258 
1259 #ifdef MULTIPROCESSOR
1260 	if (impl >= IMPL_OLYMPUS_C && impl <= IMPL_JUPITER) {
1261 		/*
1262 		 * On SPARC64-VI and SPARC64-VII processors, the MMU is
1263 		 * shared between threads, so we can't establish a locked
1264 		 * mapping for the interrupt stack since the mappings would
1265 		 * conflict.  Instead we stick the address in a scratch
1266 		 * register, like we do for sun4v.
1267 		 */
1268 		pa = intstack + (CPUINFO_VA - INTSTACK);
1269 		pa += offsetof(struct cpu_info, ci_self);
1270 		va = ldxa(pa, ASI_PHYS_CACHED);
1271 		stxa(0x00, ASI_SCRATCH, va);
1272 
1273 		if ((CPU_JUPITERID % 2) == 1)
1274 			index--;
1275 
1276 		data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0);
1277 		data |= SUN4U_TLB_L;
1278 		prom_dtlb_load(index, data, va - (CPUINFO_VA - INTSTACK));
1279 
1280 		sun4u_set_tsbs();
1281 		return;
1282 	}
1283 #endif
1284 
1285 	/*
1286 	 * Establish the 64KB locked mapping for the interrupt stack.
1287 	 */
1288 
1289 	data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0);
1290 	data |= SUN4U_TLB_L;
1291 	prom_dtlb_load(index, data, INTSTACK);
1292 
1293 	sun4u_set_tsbs();
1294 }
1295 
1296 void
1297 sun4v_bootstrap_cpu(paddr_t intstack)
1298 {
1299 #ifdef SUN4V
1300 	u_int64_t data;
1301 	paddr_t pa;
1302 	vaddr_t va;
1303 	int err;
1304 
1305 	/*
1306 	 * Establish the 4MB locked mappings for kernel data and text.
1307 	 *
1308 	 * The text segment needs to be mapped into the DTLB too,
1309 	 * because of .rodata.
1310 	 */
1311 
1312 	for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) {
1313 		data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0);
1314 		data |= SUN4V_TLB_X;
1315 		err = hv_mmu_map_perm_addr(va, data, MAP_ITLB|MAP_DTLB);
1316 		if (err != H_EOK)
1317 			prom_printf("err: %d\r\n", err);
1318 	}
1319 
1320 	for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) {
1321 		data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0);
1322 		err = hv_mmu_map_perm_addr(va, data, MAP_DTLB);
1323 		if (err != H_EOK)
1324 			prom_printf("err: %d\r\n", err);
1325 	}
1326 
1327 #ifndef MULTIPROCESSOR
1328 	/*
1329 	 * Establish the 64KB locked mapping for the interrupt stack.
1330 	 */
1331 	data = SUN4V_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0);
1332 	err = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB);
1333 	if (err != H_EOK)
1334 		prom_printf("err: %d\r\n", err);
1335 #else
1336 	pa = intstack + (CPUINFO_VA - INTSTACK);
1337 	pa += offsetof(struct cpu_info, ci_self);
1338 	stxa(0x00, ASI_SCRATCHPAD, ldxa(pa, ASI_PHYS_CACHED));
1339 #endif
1340 
1341 	stxa(0x10, ASI_SCRATCHPAD, intstack + (CPUINFO_VA - INTSTACK));
1342 
1343 	err = hv_mmu_tsb_ctx0(1, (paddr_t)tsb_desc + kdatap - kdata);
1344 	if (err != H_EOK)
1345 		prom_printf("err: %d\r\n", err);
1346 	err = hv_mmu_tsb_ctxnon0(1, (paddr_t)tsb_desc + kdatap - kdata);
1347 	if (err != H_EOK)
1348 		prom_printf("err: %d\r\n", err);
1349 #endif
1350 }
1351 
1352 /*
1353  * Initialize anything else for pmap handling.
1354  * Called during uvm_init().
1355  */
1356 void
1357 pmap_init(void)
1358 {
1359 	BDPRINTF(PDB_BOOT1, ("pmap_init()\r\n"));
1360 	if (PAGE_SIZE != NBPG)
1361 		panic("pmap_init: CLSIZE!=1");
1362 
1363 	/* Setup a pool for additional pvlist structures */
1364 	pool_init(&pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0,
1365 	    "pv_entry", NULL);
1366 	pool_init(&pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
1367 	    "pmappl", NULL);
1368 }
1369 
1370 /* Start of non-cachable physical memory on UltraSPARC-III. */
1371 #define VM_MAXPHYS_ADDRESS	((vaddr_t)0x0000040000000000L)
1372 
1373 static vaddr_t kbreak; /* End of kernel VA */
1374 
1375 /*
1376  * How much virtual space is available to the kernel?
1377  */
1378 void
1379 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1380 {
1381 	/*
1382 	 * Make sure virtual memory and physical memory don't overlap
1383 	 * to avoid problems with ASI_PHYS_CACHED on UltraSPARC-III.
1384 	 */
1385 	if (vmmap < VM_MAXPHYS_ADDRESS)
1386 		vmmap = VM_MAXPHYS_ADDRESS;
1387 
1388 	/* Reserve two pages for pmap_copy_page && /dev/mem */
1389 	*start = kbreak = (vaddr_t)(vmmap + 2*NBPG);
1390 	*end = VM_MAX_KERNEL_ADDRESS;
1391 	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\r\n", *start, *end));
1392 }
1393 
1394 /*
1395  * Preallocate kernel page tables to a specified VA.
1396  * This simply loops through the first TTE for each
1397  * page table from the beginning of the kernel pmap,
1398  * reads the entry, and if the result is
1399  * zero (either invalid entry or no page table) it stores
1400  * a zero there, populating page tables in the process.
1401  * This is not the most efficient technique but i don't
1402  * expect it to be called that often.
1403  */
1404 vaddr_t
1405 pmap_growkernel(vaddr_t maxkvaddr)
1406 {
1407 	paddr_t pg;
1408 	struct pmap *pm = pmap_kernel();
1409 
1410 	if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) {
1411 		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1412 		       (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr);
1413 		return (kbreak);
1414 	}
1415 
1416 	/* Align with the start of a page table */
1417 	for (kbreak &= (-1<<PDSHIFT); kbreak < maxkvaddr;
1418 	     kbreak += (1<<PDSHIFT)) {
1419 		if (pseg_get(pm, kbreak))
1420 			continue;
1421 
1422 		pg = 0;
1423 		while (pseg_set(pm, kbreak, 0, pg) == 1) {
1424 			pg = 0;
1425 			pmap_get_page(&pg, "growk", pm);
1426 		}
1427 
1428 	}
1429 
1430 	return (kbreak);
1431 }
1432 
1433 /*
1434  * Create and return a physical map.
1435  */
1436 struct pmap *
1437 pmap_create(void)
1438 {
1439 	struct pmap *pm;
1440 
1441 	pm = pool_get(&pmap_pool, PR_WAITOK | PR_ZERO);
1442 
1443 	mtx_init(&pm->pm_mtx, IPL_VM);
1444 	pm->pm_refs = 1;
1445 	pmap_get_page(&pm->pm_physaddr, "pmap_create", pm);
1446 	pm->pm_segs = (int64_t *)(u_long)pm->pm_physaddr;
1447 	ctx_alloc(pm);
1448 
1449 	return (pm);
1450 }
1451 
1452 /*
1453  * Add a reference to the given pmap.
1454  */
1455 void
1456 pmap_reference(struct pmap *pm)
1457 {
1458 	atomic_inc_int(&pm->pm_refs);
1459 }
1460 
1461 /*
1462  * Retire the given pmap from service.
1463  * Should only be called if the map contains no valid mappings.
1464  */
1465 void
1466 pmap_destroy(struct pmap *pm)
1467 {
1468 	if (atomic_dec_int_nv(&pm->pm_refs) == 0) {
1469 		pmap_release(pm);
1470 		pool_put(&pmap_pool, pm);
1471 	}
1472 }
1473 
1474 /*
1475  * Release any resources held by the given physical map.
1476  * Called when a pmap initialized by pmap_pinit is being released.
1477  */
1478 void
1479 pmap_release(struct pmap *pm)
1480 {
1481 	int i, j, k;
1482 	paddr_t *pdir, *ptbl, tmp;
1483 
1484 #ifdef DIAGNOSTIC
1485 	if(pm == pmap_kernel())
1486 		panic("pmap_release: releasing pmap_kernel()");
1487 #endif
1488 
1489 	mtx_enter(&pm->pm_mtx);
1490 	for(i=0; i<STSZ; i++) {
1491 		paddr_t psegentp = (paddr_t)(u_long)&pm->pm_segs[i];
1492 		if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)psegentp,
1493 		    ASI_PHYS_CACHED))) {
1494 			for (k=0; k<PDSZ; k++) {
1495 				paddr_t pdirentp = (paddr_t)(u_long)&pdir[k];
1496 				if ((ptbl = (paddr_t *)(u_long)ldxa(
1497 					(vaddr_t)pdirentp, ASI_PHYS_CACHED))) {
1498 					for (j=0; j<PTSZ; j++) {
1499 						int64_t data;
1500 						paddr_t pa;
1501 						pv_entry_t pv;
1502 
1503 						data  = ldxa((vaddr_t)&ptbl[j],
1504 							ASI_PHYS_CACHED);
1505 						if (!(data & TLB_V))
1506 							continue;
1507 						pa = data & TLB_PA_MASK;
1508 						pv = pa_to_pvh(pa);
1509 						if (pv != NULL) {
1510 							printf("pmap_release: pm=%p page %llx still in use\n", pm,
1511 							       (unsigned long long)(((u_int64_t)i<<STSHIFT)|((u_int64_t)k<<PDSHIFT)|((u_int64_t)j<<PTSHIFT)));
1512 							db_enter();
1513 						}
1514 					}
1515 					stxa(pdirentp, ASI_PHYS_CACHED, 0);
1516 					pmap_free_page((paddr_t)ptbl, pm);
1517 				}
1518 			}
1519 			stxa(psegentp, ASI_PHYS_CACHED, 0);
1520 			pmap_free_page((paddr_t)pdir, pm);
1521 		}
1522 	}
1523 	tmp = (paddr_t)(u_long)pm->pm_segs;
1524 	pm->pm_segs = NULL;
1525 	pmap_free_page(tmp, pm);
1526 	mtx_leave(&pm->pm_mtx);
1527 	ctx_free(pm);
1528 }
1529 
1530 /*
1531  * Garbage collects the physical map system for
1532  * pages which are no longer used.
1533  * Success need not be guaranteed -- that is, there
1534  * may well be pages which are not referenced, but
1535  * others may be collected.
1536  * Called by the pageout daemon when pages are scarce.
1537  */
1538 void
1539 pmap_collect(struct pmap *pm)
1540 {
1541 	int i, j, k, n, m, s;
1542 	paddr_t *pdir, *ptbl;
1543 	/* This is a good place to scan the pmaps for page tables with
1544 	 * no valid mappings in them and free them. */
1545 
1546 	/* NEVER GARBAGE COLLECT THE KERNEL PMAP */
1547 	if (pm == pmap_kernel())
1548 		return;
1549 
1550 	s = splvm();
1551 	for (i=0; i<STSZ; i++) {
1552 		if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
1553 			m = 0;
1554 			for (k=0; k<PDSZ; k++) {
1555 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
1556 					m++;
1557 					n = 0;
1558 					for (j=0; j<PTSZ; j++) {
1559 						int64_t data = ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
1560 						if (data&TLB_V)
1561 							n++;
1562 					}
1563 					if (!n) {
1564 						/* Free the damn thing */
1565 						stxa((paddr_t)(u_long)&pdir[k], ASI_PHYS_CACHED, 0);
1566 						pmap_free_page((paddr_t)ptbl, pm);
1567 					}
1568 				}
1569 			}
1570 			if (!m) {
1571 				/* Free the damn thing */
1572 				stxa((paddr_t)(u_long)&pm->pm_segs[i], ASI_PHYS_CACHED, 0);
1573 				pmap_free_page((paddr_t)pdir, pm);
1574 			}
1575 		}
1576 	}
1577 	splx(s);
1578 }
1579 
1580 void
1581 pmap_zero_page(struct vm_page *pg)
1582 {
1583 	pmap_zero_phys(VM_PAGE_TO_PHYS(pg));
1584 }
1585 
1586 void
1587 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1588 {
1589 	paddr_t src = VM_PAGE_TO_PHYS(srcpg);
1590 	paddr_t dst = VM_PAGE_TO_PHYS(dstpg);
1591 
1592 	pmap_copy_phys(src, dst);
1593 }
1594 
1595 /*
1596  * Activate the address space for the specified process.  If the
1597  * process is the current process, load the new MMU context.
1598  */
1599 void
1600 pmap_activate(struct proc *p)
1601 {
1602 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1603 	int s;
1604 
1605 	/*
1606 	 * This is essentially the same thing that happens in cpu_switch()
1607 	 * when the newly selected process is about to run, except that we
1608 	 * have to make sure to clean the register windows before we set
1609 	 * the new context.
1610 	 */
1611 
1612 	s = splvm();
1613 	if (p == curproc) {
1614 		write_user_windows();
1615 		if (pmap->pm_ctx == 0)
1616 			ctx_alloc(pmap);
1617 		if (CPU_ISSUN4V)
1618 			stxa(CTX_SECONDARY, ASI_MMU_CONTEXTID, pmap->pm_ctx);
1619 		else
1620 			stxa(CTX_SECONDARY, ASI_DMMU, pmap->pm_ctx);
1621 	}
1622 	splx(s);
1623 }
1624 
1625 /*
1626  * Deactivate the address space of the specified process.
1627  */
1628 void
1629 pmap_deactivate(struct proc *p)
1630 {
1631 }
1632 
1633 /*
1634  * pmap_kenter_pa:		[ INTERFACE ]
1635  *
1636  *	Enter a va -> pa mapping into the kernel pmap without any
1637  *	physical->virtual tracking.
1638  *
1639  *	Note: no locking is necessary in this function.
1640  */
1641 void
1642 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1643 {
1644 	struct pmap *pm = pmap_kernel();
1645 	pte_t tte;
1646 
1647 	KDASSERT(va < INTSTACK || va > EINTSTACK);
1648 	KDASSERT(va < kdata || va > ekdata);
1649 
1650 #ifdef DIAGNOSTIC
1651 	if (pa & (PMAP_NVC|PMAP_NC|PMAP_LITTLE))
1652 		panic("%s: illegal cache flags 0x%lx", __func__, pa);
1653 #endif
1654 
1655 	/*
1656 	 * Construct the TTE.
1657 	 */
1658 	tte.tag = TSB_TAG(0, pm->pm_ctx,va);
1659 	if (CPU_ISSUN4V) {
1660 		tte.data = SUN4V_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1661 		    (PROT_WRITE & prot), 1, 0, 1, 0);
1662 		/*
1663 		 * We don't track modification on kenter mappings.
1664 		 */
1665 		if (prot & PROT_WRITE)
1666 			tte.data |= SUN4V_TLB_REAL_W|SUN4V_TLB_W;
1667 		if (prot & PROT_EXEC)
1668 			tte.data |= SUN4V_TLB_EXEC;
1669 		tte.data |= SUN4V_TLB_TSB_LOCK;	/* wired */
1670 	} else {
1671 		tte.data = SUN4U_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1672 		    (PROT_WRITE & prot), 1, 0, 1, 0);
1673 		/*
1674 		 * We don't track modification on kenter mappings.
1675 		 */
1676 		if (prot & PROT_WRITE)
1677 			tte.data |= SUN4U_TLB_REAL_W|SUN4U_TLB_W;
1678 		if (prot & PROT_EXEC)
1679 			tte.data |= SUN4U_TLB_EXEC;
1680 		if (prot == PROT_EXEC)
1681 			tte.data |= SUN4U_TLB_EXEC_ONLY;
1682 		tte.data |= SUN4U_TLB_TSB_LOCK;	/* wired */
1683 	}
1684 	KDASSERT((tte.data & TLB_NFO) == 0);
1685 
1686 	/* Kernel page tables are pre-allocated. */
1687 	if (pseg_set(pm, va, tte.data, 0) != 0)
1688 		panic("%s: no pseg", __func__);
1689 
1690 	/* this is correct */
1691 	dcache_flush_page(pa);
1692 }
1693 
1694 /*
1695  * pmap_kremove:		[ INTERFACE ]
1696  *
1697  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1698  *	for size bytes (assumed to be page rounded).
1699  */
1700 void
1701 pmap_kremove(vaddr_t va, vsize_t size)
1702 {
1703 	struct pmap *pm = pmap_kernel();
1704 
1705 	KDASSERT(va < INTSTACK || va > EINTSTACK);
1706 	KDASSERT(va < kdata || va > ekdata);
1707 
1708 	while (size >= NBPG) {
1709 		/*
1710 		 * Is this part of the permanent 4MB mapping?
1711 		 */
1712 #ifdef DIAGNOSTIC
1713 		if (pm == pmap_kernel() &&
1714 		    (va >= ktext && va < roundup(ekdata, 4*MEG)))
1715 			panic("%s: va=0x%lx in locked TLB", __func__, va);
1716 #endif
1717 		/* Shouldn't need to do this if the entry's not valid. */
1718 		if (pseg_get(pm, va)) {
1719 			/* We need to flip the valid bit and clear the access statistics. */
1720 			if (pseg_set(pm, va, 0, 0)) {
1721 				printf("pmap_kremove: gotten pseg empty!\n");
1722 				db_enter();
1723 				/* panic? */
1724 			}
1725 
1726 			tsb_invalidate(pm->pm_ctx, va);
1727 			/* Here we assume nothing can get into the TLB unless it has a PTE */
1728 			tlb_flush_pte(va, pm->pm_ctx);
1729 		}
1730 		va += NBPG;
1731 		size -= NBPG;
1732 	}
1733 }
1734 
1735 /*
1736  * Insert physical page at pa into the given pmap at virtual address va.
1737  * Supports 64-bit pa so we can map I/O space.
1738  */
1739 int
1740 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1741 {
1742 	pte_t tte;
1743 	paddr_t pg;
1744 	int aliased = 0;
1745 	pv_entry_t pv, npv;
1746 	int size = 0; /* PMAP_SZ_TO_TTE(pa); */
1747 	boolean_t wired = (flags & PMAP_WIRED) != 0;
1748 
1749 	/*
1750 	 * Is this part of the permanent mappings?
1751 	 */
1752 	KDASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
1753 	KDASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
1754 
1755 	npv = pool_get(&pv_pool, PR_NOWAIT);
1756 	if (npv == NULL && (flags & PMAP_CANFAIL))
1757 		return (ENOMEM);
1758 
1759 	/*
1760 	 * XXXX If a mapping at this address already exists, remove it.
1761 	 */
1762 	mtx_enter(&pm->pm_mtx);
1763 	tte.data = pseg_get(pm, va);
1764 	if (tte.data & TLB_V) {
1765 		mtx_leave(&pm->pm_mtx);
1766 		pmap_remove(pm, va, va + NBPG-1);
1767 		mtx_enter(&pm->pm_mtx);
1768 		tte.data = pseg_get(pm, va);
1769 	}
1770 
1771 	/*
1772 	 * Construct the TTE.
1773 	 */
1774 	pv = pa_to_pvh(pa);
1775 	if (pv != NULL) {
1776 		struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
1777 
1778 		mtx_enter(&pg->mdpage.pvmtx);
1779 		aliased = (pv->pv_va & PV_ALIAS);
1780 #ifdef DIAGNOSTIC
1781 		if ((flags & PROT_MASK) & ~prot)
1782 			panic("pmap_enter: access_type exceeds prot");
1783 #endif
1784 		/* If we don't have the traphandler do it, set the ref/mod bits now */
1785 		if (flags & PROT_MASK)
1786 			pv->pv_va |= PV_REF;
1787 		if (flags & PROT_WRITE)
1788 			pv->pv_va |= PV_MOD;
1789 		pv->pv_va |= pmap_tte2flags(tte.data);
1790 		mtx_leave(&pg->mdpage.pvmtx);
1791 	} else {
1792 		aliased = 0;
1793 	}
1794 	if (pa & PMAP_NVC)
1795 		aliased = 1;
1796 	if (CPU_ISSUN4V) {
1797 		tte.data = SUN4V_TSB_DATA(0, size, pa, pm == pmap_kernel(),
1798 		    (flags & PROT_WRITE), (!(pa & PMAP_NC)),
1799 		    aliased, 1, (pa & PMAP_LITTLE));
1800 		if (prot & PROT_WRITE)
1801 			tte.data |= SUN4V_TLB_REAL_W;
1802 		if (prot & PROT_EXEC)
1803 			tte.data |= SUN4V_TLB_EXEC;
1804 		if (wired)
1805 			tte.data |= SUN4V_TLB_TSB_LOCK;
1806 	} else {
1807 		tte.data = SUN4U_TSB_DATA(0, size, pa, pm == pmap_kernel(),
1808 		    (flags & PROT_WRITE), (!(pa & PMAP_NC)),
1809 		    aliased, 1, (pa & PMAP_LITTLE));
1810 		if (prot & PROT_WRITE)
1811 			tte.data |= SUN4U_TLB_REAL_W;
1812 		if (prot & PROT_EXEC)
1813 			tte.data |= SUN4U_TLB_EXEC;
1814 		if (prot == PROT_EXEC)
1815 			tte.data |= SUN4U_TLB_EXEC_ONLY;
1816 		if (wired)
1817 			tte.data |= SUN4U_TLB_TSB_LOCK;
1818 	}
1819 	KDASSERT((tte.data & TLB_NFO) == 0);
1820 
1821 	pg = 0;
1822 	while (pseg_set(pm, va, tte.data, pg) == 1) {
1823 		pg = 0;
1824 		if (!pmap_get_page(&pg, NULL, pm)) {
1825 			if ((flags & PMAP_CANFAIL) == 0)
1826 				panic("pmap_enter: no memory");
1827 			mtx_leave(&pm->pm_mtx);
1828 			if (npv != NULL)
1829 				pool_put(&pv_pool, npv);
1830 			return (ENOMEM);
1831 		}
1832 	}
1833 
1834 	if (pv != NULL)
1835 		npv = pmap_enter_pv(pm, npv, va, pa);
1836 	atomic_inc_long(&pm->pm_stats.resident_count);
1837 	mtx_leave(&pm->pm_mtx);
1838 	if (pm->pm_ctx || pm == pmap_kernel()) {
1839 		tsb_invalidate(pm->pm_ctx, va);
1840 
1841 		/* Force reload -- protections may be changed */
1842 		tlb_flush_pte(va, pm->pm_ctx);
1843 	}
1844 	/* this is correct */
1845 	dcache_flush_page(pa);
1846 
1847 	if (npv != NULL)
1848 		pool_put(&pv_pool, npv);
1849 
1850 	/* We will let the fast mmu miss interrupt load the new translation */
1851 	return 0;
1852 }
1853 
1854 /*
1855  * Remove the given range of mapping entries.
1856  */
1857 void
1858 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
1859 {
1860 	pv_entry_t pv, freepvs = NULL;
1861 	int flush = 0;
1862 	int64_t data;
1863 	vaddr_t flushva = va;
1864 
1865 	/*
1866 	 * In here we should check each pseg and if there are no more entries,
1867 	 * free it.  It's just that linear scans of 8K pages gets expensive.
1868 	 */
1869 
1870 	KDASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
1871 	KDASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
1872 
1873 	mtx_enter(&pm->pm_mtx);
1874 
1875 	/* Now do the real work */
1876 	while (va < endva) {
1877 		/*
1878 		 * Is this part of the permanent 4MB mapping?
1879 		 */
1880 #ifdef DIAGNOSTIC
1881 		if (pm == pmap_kernel() && va >= ktext &&
1882 			va < roundup(ekdata, 4*MEG))
1883 			panic("pmap_remove: va=%08x in locked TLB", (u_int)va);
1884 #endif
1885 		/* We don't really need to do this if the valid bit is not set... */
1886 		if ((data = pseg_get(pm, va)) && (data & TLB_V) != 0) {
1887 			paddr_t entry;
1888 
1889 			flush |= 1;
1890 			/* First remove it from the pv_table */
1891 			entry = (data & TLB_PA_MASK);
1892 			pv = pa_to_pvh(entry);
1893 			if (pv != NULL) {
1894 				pv = pmap_remove_pv(pm, va, entry);
1895 				if (pv != NULL) {
1896 					pv->pv_next = freepvs;
1897 					freepvs = pv;
1898 				}
1899 			}
1900 			/* We need to flip the valid bit and clear the access statistics. */
1901 			if (pseg_set(pm, va, 0, 0)) {
1902 				printf("pmap_remove: gotten pseg empty!\n");
1903 				db_enter();
1904 				/* panic? */
1905 			}
1906 			atomic_dec_long(&pm->pm_stats.resident_count);
1907 			if (!pm->pm_ctx && pm != pmap_kernel())
1908 				continue;
1909 			tsb_invalidate(pm->pm_ctx, va);
1910 			/* Here we assume nothing can get into the TLB unless it has a PTE */
1911 			tlb_flush_pte(va, pm->pm_ctx);
1912 		}
1913 		va += NBPG;
1914 	}
1915 
1916 	mtx_leave(&pm->pm_mtx);
1917 
1918 	while ((pv = freepvs) != NULL) {
1919 		freepvs = pv->pv_next;
1920 		pool_put(&pv_pool, pv);
1921 	}
1922 
1923 	if (flush)
1924 		cache_flush_virt(flushva, endva - flushva);
1925 }
1926 
1927 /*
1928  * Change the protection on the specified range of this pmap.
1929  */
1930 void
1931 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1932 {
1933 	paddr_t pa;
1934 	pv_entry_t pv;
1935 	int64_t data;
1936 
1937 	KDASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
1938 	KDASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
1939 
1940 	if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC))
1941 		return;
1942 
1943 	if (prot == PROT_NONE) {
1944 		pmap_remove(pm, sva, eva);
1945 		return;
1946 	}
1947 
1948 	mtx_enter(&pm->pm_mtx);
1949 	sva = sva & ~PGOFSET;
1950 	while (sva < eva) {
1951 		/*
1952 		 * Is this part of the permanent 4MB mapping?
1953 		 */
1954 		if (pm == pmap_kernel() && sva >= ktext &&
1955 			sva < roundup(ekdata, 4*MEG)) {
1956 			prom_printf("pmap_protect: va=%08x in locked TLB\r\n", sva);
1957 			OF_enter();
1958 			mtx_leave(&pm->pm_mtx);
1959 			return;
1960 		}
1961 
1962 		if (((data = pseg_get(pm, sva))&TLB_V) /*&& ((data&TLB_TSB_LOCK) == 0)*/) {
1963 			pa = data & TLB_PA_MASK;
1964 			pv = pa_to_pvh(pa);
1965 			if (pv != NULL) {
1966 				struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
1967 
1968 				/* Save REF/MOD info */
1969 				mtx_enter(&pg->mdpage.pvmtx);
1970 				pv->pv_va |= pmap_tte2flags(data);
1971 				mtx_leave(&pg->mdpage.pvmtx);
1972 			}
1973 			/* Just do the pmap and TSB, not the pv_list */
1974 			if (CPU_ISSUN4V) {
1975 				if ((prot & PROT_WRITE) == 0)
1976 					data &= ~(SUN4V_TLB_W|SUN4V_TLB_REAL_W);
1977 				if ((prot & PROT_EXEC) == 0)
1978 					data &= ~(SUN4V_TLB_EXEC);
1979 			} else {
1980 				if ((prot & PROT_WRITE) == 0)
1981 					data &= ~(SUN4U_TLB_W|SUN4U_TLB_REAL_W);
1982 				if ((prot & PROT_EXEC) == 0)
1983 					data &= ~(SUN4U_TLB_EXEC | SUN4U_TLB_EXEC_ONLY);
1984 			}
1985 			KDASSERT((data & TLB_NFO) == 0);
1986 			if (pseg_set(pm, sva, data, 0)) {
1987 				printf("pmap_protect: gotten pseg empty!\n");
1988 				db_enter();
1989 				/* panic? */
1990 			}
1991 
1992 			if (!pm->pm_ctx && pm != pmap_kernel())
1993 				continue;
1994 			tsb_invalidate(pm->pm_ctx, sva);
1995 			tlb_flush_pte(sva, pm->pm_ctx);
1996 		}
1997 		sva += NBPG;
1998 	}
1999 	mtx_leave(&pm->pm_mtx);
2000 }
2001 
2002 /*
2003  * Extract the physical page address associated
2004  * with the given map/virtual_address pair.
2005  */
2006 boolean_t
2007 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap)
2008 {
2009 	paddr_t pa;
2010 
2011 	if (pm == pmap_kernel()) {
2012 		if (va >= kdata && va < roundup(ekdata, 4*MEG)) {
2013 			/* Need to deal w/locked TLB entry specially. */
2014 			pa = (paddr_t)(kdatap - kdata + va);
2015 		} else if (va >= ktext && va < ektext) {
2016 			/* Need to deal w/locked TLB entry specially. */
2017 			pa = (paddr_t)(ktextp - ktext + va);
2018 		} else if (va >= INTSTACK && va < EINTSTACK) {
2019 			pa = curcpu()->ci_paddr + va - INTSTACK;
2020 		} else {
2021 			goto check_pseg;
2022 		}
2023 	} else {
2024 check_pseg:
2025 		mtx_enter(&pm->pm_mtx);
2026 		pa = pseg_get(pm, va) & TLB_PA_MASK;
2027 		mtx_leave(&pm->pm_mtx);
2028 		if (pa == 0)
2029 			return FALSE;
2030 		pa |= va & PAGE_MASK;
2031 	}
2032 	if (pap != NULL)
2033 		*pap = pa;
2034 	return TRUE;
2035 }
2036 
2037 /*
2038  * Return the number bytes that pmap_dumpmmu() will dump.
2039  */
2040 int
2041 pmap_dumpsize(void)
2042 {
2043 	int	sz;
2044 
2045 	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2046 	sz += memsize * sizeof(phys_ram_seg_t);
2047 
2048 	return btodb(sz + DEV_BSIZE - 1);
2049 }
2050 
2051 /*
2052  * Write the mmu contents to the dump device.
2053  * This gets appended to the end of a crash dump since
2054  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2055  *
2056  * Write the core dump headers and MD data to the dump device.
2057  * We dump the following items:
2058  *
2059  *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
2060  *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
2061  *	phys_ram_seg_t[memsize]  physical memory segments
2062  */
2063 int
2064 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, caddr_t, size_t), daddr_t blkno)
2065 {
2066 	kcore_seg_t	*kseg;
2067 	cpu_kcore_hdr_t	*kcpu;
2068 	phys_ram_seg_t	memseg;
2069 	register int	error = 0;
2070 	register int	i, memsegoffset;
2071 	int		buffer[dbtob(1) / sizeof(int)];
2072 	int		*bp, *ep;
2073 
2074 #define EXPEDITE(p,n) do {						\
2075 	int *sp = (int *)(p);						\
2076 	int sz = (n);							\
2077 	while (sz > 0) {						\
2078 		*bp++ = *sp++;						\
2079 		if (bp >= ep) {						\
2080 			error = (*dump)(dumpdev, blkno,			\
2081 					(caddr_t)buffer, dbtob(1));	\
2082 			if (error != 0)					\
2083 				return (error);				\
2084 			++blkno;					\
2085 			bp = buffer;					\
2086 		}							\
2087 		sz -= 4;						\
2088 	}								\
2089 } while (0)
2090 
2091 	/* Setup bookkeeping pointers */
2092 	bp = buffer;
2093 	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2094 
2095 	/* Fill in MI segment header */
2096 	kseg = (kcore_seg_t *)bp;
2097 	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2098 	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2099 
2100 	/* Fill in MD segment header (interpreted by MD part of libkvm) */
2101 	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2102 	kcpu->cputype = CPU_SUN4U;
2103 	kcpu->kernbase = (u_int64_t)KERNBASE;
2104 	kcpu->cpubase = (u_int64_t)CPUINFO_VA;
2105 
2106 	/* Describe the locked text segment */
2107 	kcpu->ktextbase = (u_int64_t)ktext;
2108 	kcpu->ktextp = (u_int64_t)ktextp;
2109 	kcpu->ktextsz = (u_int64_t)(roundup(ektextp, 4*MEG) - ktextp);
2110 
2111 	/* Describe locked data segment */
2112 	kcpu->kdatabase = (u_int64_t)kdata;
2113 	kcpu->kdatap = (u_int64_t)kdatap;
2114 	kcpu->kdatasz = (u_int64_t)(roundup(ekdatap, 4*MEG) - kdatap);
2115 
2116 	/* Now the memsegs */
2117 	kcpu->nmemseg = memsize;
2118 	kcpu->memsegoffset = memsegoffset = ALIGN(sizeof(cpu_kcore_hdr_t));
2119 
2120 	/* Now we need to point this at our kernel pmap. */
2121 	kcpu->nsegmap = STSZ;
2122 	kcpu->segmapoffset = (u_int64_t)pmap_kernel()->pm_physaddr;
2123 
2124 	/* Note: we have assumed everything fits in buffer[] so far... */
2125 	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2126 
2127 	for (i = 0; i < memsize; i++) {
2128 		memseg.start = mem[i].start;
2129 		memseg.size = mem[i].size;
2130 		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2131 	}
2132 
2133 	if (bp != buffer)
2134 		error = (*dump)(dumpdev, blkno++, (caddr_t)buffer, dbtob(1));
2135 
2136 	return (error);
2137 }
2138 
2139 /*
2140  * Determine (non)existence of physical page
2141  */
2142 int pmap_pa_exists(paddr_t pa)
2143 {
2144 	struct mem_region *mp;
2145 
2146 	/* Just go through physical memory list & see if we're there */
2147 	for (mp = mem; mp->size && mp->start <= pa; mp++)
2148 		if (mp->start <= pa && mp->start + mp->size >= pa)
2149 			return 1;
2150 	return 0;
2151 }
2152 
2153 /*
2154  * Lookup the appropriate TSB entry.
2155  *
2156  * Here is the full official pseudo code:
2157  *
2158  */
2159 
2160 #ifdef NOTYET
2161 int64 GenerateTSBPointer(
2162  	int64 va,		/* Missing VA			*/
2163  	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
2164  	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
2165  	Boolean split,		/* TSB Register[12]		*/
2166  	int TSBSize)		/* TSB Register[2:0]		*/
2167 {
2168  	int64 vaPortion;
2169  	int64 TSBBaseMask;
2170  	int64 splitMask;
2171 
2172 	/* TSBBaseMask marks the bits from TSB Base Reg		*/
2173 	TSBBaseMask = 0xffffffffffffe000 <<
2174 		(split? (TSBsize + 1) : TSBsize);
2175 
2176 	/* Shift va towards lsb appropriately and		*/
2177 	/* zero out the original va page offset			*/
2178 	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2179 		0xfffffffffffffff0;
2180 
2181 	if (split) {
2182 		/* There's only one bit in question for split	*/
2183 		splitMask = 1 << (13 + TSBsize);
2184 		if (type == 8K_POINTER)
2185 			/* Make sure we're in the lower half	*/
2186 			vaPortion &= ~splitMask;
2187 		else
2188 			/* Make sure we're in the upper half	*/
2189 			vaPortion |= splitMask;
2190 	}
2191 	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2192 }
2193 #endif
2194 /*
2195  * Of course, since we are not using a split TSB or variable page sizes,
2196  * we can optimize this a bit.
2197  *
2198  * The following only works for a unified 8K TSB.  It will find the slot
2199  * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
2200  */
2201 int
2202 ptelookup_va(vaddr_t va)
2203 {
2204 	long tsbptr;
2205 #define TSBBASEMASK	(0xffffffffffffe000LL<<tsbsize)
2206 
2207 	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK );
2208 	return (tsbptr/sizeof(pte_t));
2209 }
2210 
2211 /*
2212  * Do whatever is needed to sync the MOD/REF flags
2213  */
2214 
2215 boolean_t
2216 pmap_clear_modify(struct vm_page *pg)
2217 {
2218 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2219 	int changed = 0;
2220 	pv_entry_t pv;
2221 
2222 	/* Clear all mappings */
2223 	mtx_enter(&pg->mdpage.pvmtx);
2224 	pv = pa_to_pvh(pa);
2225 	if (pv->pv_va & PV_MOD) {
2226 		changed |= 1;
2227 		pv->pv_va &= ~PV_MOD;
2228 	}
2229 	if (pv->pv_pmap != NULL) {
2230 		for (; pv; pv = pv->pv_next) {
2231 			int64_t data;
2232 
2233 			/* First clear the mod bit in the PTE and make it R/O */
2234 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2235 
2236 			/* Need to both clear the modify and write bits */
2237 			if (CPU_ISSUN4V) {
2238 				if (data & (SUN4V_TLB_MODIFY))
2239 					changed |= 1;
2240 				data &= ~(SUN4V_TLB_MODIFY|SUN4V_TLB_W);
2241 			} else {
2242 				if (data & (SUN4U_TLB_MODIFY))
2243 					changed |= 1;
2244 				data &= ~(SUN4U_TLB_MODIFY|SUN4U_TLB_W);
2245 			}
2246 			KDASSERT((data & TLB_NFO) == 0);
2247 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2248 				printf("pmap_clear_modify: gotten pseg empty!\n");
2249 				db_enter();
2250 				/* panic? */
2251 			}
2252 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2253 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2254 				    (pv->pv_va & PV_VAMASK));
2255 				tlb_flush_pte((pv->pv_va & PV_VAMASK),
2256 				    pv->pv_pmap->pm_ctx);
2257 			}
2258 			/* Then clear the mod bit in the pv */
2259 			if (pv->pv_va & PV_MOD) {
2260 				changed |= 1;
2261 				pv->pv_va &= ~PV_MOD;
2262 			}
2263 			dcache_flush_page(pa);
2264 		}
2265 	}
2266 	mtx_leave(&pg->mdpage.pvmtx);
2267 
2268 	return (changed);
2269 }
2270 
2271 boolean_t
2272 pmap_clear_reference(struct vm_page *pg)
2273 {
2274 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2275 	int changed = 0;
2276 	pv_entry_t pv;
2277 
2278 	/* Clear all references */
2279 	mtx_enter(&pg->mdpage.pvmtx);
2280 	pv = pa_to_pvh(pa);
2281 	if (pv->pv_va & PV_REF) {
2282 		changed = 1;
2283 		pv->pv_va &= ~PV_REF;
2284 	}
2285 	if (pv->pv_pmap != NULL) {
2286 		for (; pv; pv = pv->pv_next) {
2287 			int64_t data;
2288 
2289 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2290 			if (CPU_ISSUN4V) {
2291 				if (data & SUN4V_TLB_ACCESS)
2292 					changed = 1;
2293 				data &= ~SUN4V_TLB_ACCESS;
2294 			} else {
2295 				if (data & SUN4U_TLB_ACCESS)
2296 					changed = 1;
2297 				data &= ~SUN4U_TLB_ACCESS;
2298 			}
2299 			KDASSERT((data & TLB_NFO) == 0);
2300 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2301 				printf("pmap_clear_reference: gotten pseg empty!\n");
2302 				db_enter();
2303 				/* panic? */
2304 			}
2305 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2306 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2307 				    (pv->pv_va & PV_VAMASK));
2308 /*
2309 				tlb_flush_pte(pv->pv_va & PV_VAMASK,
2310 					pv->pv_pmap->pm_ctx);
2311 */
2312 			}
2313 			if (pv->pv_va & PV_REF) {
2314 				changed = 1;
2315 				pv->pv_va &= ~PV_REF;
2316 			}
2317 		}
2318 	}
2319 	/* Stupid here will take a cache hit even on unmapped pages 8^( */
2320 	dcache_flush_page(VM_PAGE_TO_PHYS(pg));
2321 	mtx_leave(&pg->mdpage.pvmtx);
2322 
2323 	return (changed);
2324 }
2325 
2326 boolean_t
2327 pmap_is_modified(struct vm_page *pg)
2328 {
2329 	pv_entry_t pv, npv;
2330 	int mod = 0;
2331 
2332 	/* Check if any mapping has been modified */
2333 	mtx_enter(&pg->mdpage.pvmtx);
2334 	pv = &pg->mdpage.pvent;
2335 	if (pv->pv_va & PV_MOD)
2336 		mod = 1;
2337 	if (!mod && (pv->pv_pmap != NULL)) {
2338 		for (npv = pv; mod == 0 && npv && npv->pv_pmap; npv = npv->pv_next) {
2339 			int64_t data;
2340 
2341 			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2342 			if (pmap_tte2flags(data) & PV_MOD)
2343 				mod = 1;
2344 			/* Migrate modify info to head pv */
2345 			if (npv->pv_va & PV_MOD) {
2346 				mod = 1;
2347 				npv->pv_va &= ~PV_MOD;
2348 			}
2349 		}
2350 	}
2351 	/* Save modify info */
2352 	if (mod)
2353 		pv->pv_va |= PV_MOD;
2354 	mtx_leave(&pg->mdpage.pvmtx);
2355 
2356 	return (mod);
2357 }
2358 
2359 boolean_t
2360 pmap_is_referenced(struct vm_page *pg)
2361 {
2362 	pv_entry_t pv, npv;
2363 	int ref = 0;
2364 
2365 	/* Check if any mapping has been referenced */
2366 	mtx_enter(&pg->mdpage.pvmtx);
2367 	pv = &pg->mdpage.pvent;
2368 	if (pv->pv_va & PV_REF)
2369 		ref = 1;
2370 	if (!ref && (pv->pv_pmap != NULL)) {
2371 		for (npv = pv; npv; npv = npv->pv_next) {
2372 			int64_t data;
2373 
2374 			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2375 			if (pmap_tte2flags(data) & PV_REF)
2376 				ref = 1;
2377 			/* Migrate modify info to head pv */
2378 			if (npv->pv_va & PV_REF) {
2379 				ref = 1;
2380 				npv->pv_va &= ~PV_REF;
2381 			}
2382 		}
2383 	}
2384 	/* Save ref info */
2385 	if (ref)
2386 		pv->pv_va |= PV_REF;
2387 	mtx_leave(&pg->mdpage.pvmtx);
2388 
2389 	return (ref);
2390 }
2391 
2392 /*
2393  *	Routine:	pmap_unwire
2394  *	Function:	Clear the wired attribute for a map/virtual-address
2395  *			pair.
2396  *	In/out conditions:
2397  *			The mapping must already exist in the pmap.
2398  */
2399 void
2400 pmap_unwire(struct pmap *pmap, vaddr_t va)
2401 {
2402 	int64_t data;
2403 
2404 	if (pmap == NULL)
2405 		return;
2406 
2407 	/*
2408 	 * Is this part of the permanent 4MB mapping?
2409 	 */
2410 	if (pmap == pmap_kernel() && va >= ktext &&
2411 		va < roundup(ekdata, 4*MEG)) {
2412 		prom_printf("pmap_unwire: va=%08x in locked TLB\r\n", va);
2413 		OF_enter();
2414 		return;
2415 	}
2416 	mtx_enter(&pmap->pm_mtx);
2417 	data = pseg_get(pmap, va & PV_VAMASK);
2418 
2419 	if (CPU_ISSUN4V)
2420 		data &= ~SUN4V_TLB_TSB_LOCK;
2421 	else
2422 		data &= ~SUN4U_TLB_TSB_LOCK;
2423 
2424 	if (pseg_set(pmap, va & PV_VAMASK, data, 0)) {
2425 		printf("pmap_unwire: gotten pseg empty!\n");
2426 		db_enter();
2427 		/* panic? */
2428 	}
2429 	mtx_leave(&pmap->pm_mtx);
2430 }
2431 
2432 /*
2433  * Lower the protection on the specified physical page.
2434  *
2435  * Never enable writing as it will break COW
2436  */
2437 void
2438 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2439 {
2440 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2441 	pv_entry_t pv;
2442 	int64_t data, clear, set;
2443 
2444 	if (prot & PROT_WRITE)
2445 		return;
2446 
2447 	if (prot & (PROT_READ | PROT_EXEC)) {
2448 		/* copy_on_write */
2449 
2450 		set = TLB_V;
2451 		if (CPU_ISSUN4V) {
2452 			clear = SUN4V_TLB_REAL_W|SUN4V_TLB_W;
2453 			if (PROT_EXEC & prot)
2454 				set |= SUN4V_TLB_EXEC;
2455 			else
2456 				clear |= SUN4V_TLB_EXEC;
2457 		} else {
2458 			clear = SUN4U_TLB_REAL_W|SUN4U_TLB_W;
2459 			if (PROT_EXEC & prot)
2460 				set |= SUN4U_TLB_EXEC;
2461 			else
2462 				clear |= SUN4U_TLB_EXEC;
2463 			if (PROT_EXEC == prot)
2464 				set |= SUN4U_TLB_EXEC_ONLY;
2465 			else
2466 				clear |= SUN4U_TLB_EXEC_ONLY;
2467 		}
2468 
2469 		pv = pa_to_pvh(pa);
2470 		mtx_enter(&pg->mdpage.pvmtx);
2471 		if (pv->pv_pmap != NULL) {
2472 			for (; pv; pv = pv->pv_next) {
2473 				data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2474 
2475 				/* Save REF/MOD info */
2476 				pv->pv_va |= pmap_tte2flags(data);
2477 
2478 				data &= ~(clear);
2479 				data |= (set);
2480 				KDASSERT((data & TLB_NFO) == 0);
2481 				if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2482 					printf("pmap_page_protect: gotten pseg empty!\n");
2483 					db_enter();
2484 					/* panic? */
2485 				}
2486 				if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2487 					tsb_invalidate(pv->pv_pmap->pm_ctx,
2488 					    (pv->pv_va & PV_VAMASK));
2489 					tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx);
2490 				}
2491 			}
2492 		}
2493 		mtx_leave(&pg->mdpage.pvmtx);
2494 	} else {
2495 		pv_entry_t firstpv;
2496 		/* remove mappings */
2497 
2498 		firstpv = pa_to_pvh(pa);
2499 		mtx_enter(&pg->mdpage.pvmtx);
2500 
2501 		/* First remove the entire list of continuation pv's*/
2502 		while ((pv = firstpv->pv_next) != NULL) {
2503 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2504 
2505 			/* Save REF/MOD info */
2506 			firstpv->pv_va |= pmap_tte2flags(data);
2507 
2508 			/* Clear mapping */
2509 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) {
2510 				printf("pmap_page_protect: gotten pseg empty!\n");
2511 				db_enter();
2512 				/* panic? */
2513 			}
2514 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2515 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2516 				    (pv->pv_va & PV_VAMASK));
2517 				tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx);
2518 			}
2519 			atomic_dec_long(&pv->pv_pmap->pm_stats.resident_count);
2520 
2521 			/* free the pv */
2522 			firstpv->pv_next = pv->pv_next;
2523 			mtx_leave(&pg->mdpage.pvmtx);
2524 			pool_put(&pv_pool, pv);
2525 			mtx_enter(&pg->mdpage.pvmtx);
2526 		}
2527 
2528 		pv = firstpv;
2529 
2530 		/* Then remove the primary pv */
2531 		if (pv->pv_pmap != NULL) {
2532 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2533 
2534 			/* Save REF/MOD info */
2535 			pv->pv_va |= pmap_tte2flags(data);
2536 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) {
2537 				printf("pmap_page_protect: gotten pseg empty!\n");
2538 				db_enter();
2539 				/* panic? */
2540 			}
2541 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2542 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2543 				    (pv->pv_va & PV_VAMASK));
2544 				tlb_flush_pte(pv->pv_va & PV_VAMASK,
2545 				    pv->pv_pmap->pm_ctx);
2546 			}
2547 			atomic_dec_long(&pv->pv_pmap->pm_stats.resident_count);
2548 
2549 			KASSERT(pv->pv_next == NULL);
2550 			/* dump the first pv */
2551 			pv->pv_pmap = NULL;
2552 		}
2553 		dcache_flush_page(pa);
2554 		mtx_leave(&pg->mdpage.pvmtx);
2555 	}
2556 	/* We should really only flush the pages we demapped. */
2557 }
2558 
2559 /*
2560  * Allocate a context.  If necessary, steal one from someone else.
2561  * Changes hardware context number and loads segment map.
2562  *
2563  * This routine is only ever called from locore.s just after it has
2564  * saved away the previous process, so there are no active user windows.
2565  *
2566  * The new context is flushed from the TLB before returning.
2567  */
2568 int
2569 ctx_alloc(struct pmap *pm)
2570 {
2571 	int s, cnum;
2572 	static int next = 0;
2573 
2574 	if (pm == pmap_kernel()) {
2575 #ifdef DIAGNOSTIC
2576 		printf("ctx_alloc: kernel pmap!\n");
2577 #endif
2578 		return (0);
2579 	}
2580 	s = splvm();
2581 	cnum = next;
2582 	do {
2583 		/*
2584 		 * We use the last context as an "invalid" context in
2585 		 * TSB tags. Never allocate (or bad things will happen).
2586 		 */
2587 		if (cnum >= numctx - 2)
2588 			cnum = 0;
2589 	} while (ctxbusy[++cnum] != 0 && cnum != next);
2590 	if (cnum==0) cnum++; /* Never steal ctx 0 */
2591 	if (ctxbusy[cnum]) {
2592 		int i;
2593 		/* We gotta steal this context */
2594 		for (i = 0; i < TSBENTS; i++) {
2595 			if (TSB_TAG_CTX(tsb_dmmu[i].tag) == cnum)
2596 				tsb_dmmu[i].tag = TSB_TAG_INVALID;
2597 			if (TSB_TAG_CTX(tsb_immu[i].tag) == cnum)
2598 				tsb_immu[i].tag = TSB_TAG_INVALID;
2599 		}
2600 		tlb_flush_ctx(cnum);
2601 	}
2602 	ctxbusy[cnum] = pm->pm_physaddr;
2603 	next = cnum;
2604 	splx(s);
2605 	pm->pm_ctx = cnum;
2606 	return cnum;
2607 }
2608 
2609 /*
2610  * Give away a context.
2611  */
2612 void
2613 ctx_free(struct pmap *pm)
2614 {
2615 	int oldctx;
2616 
2617 	oldctx = pm->pm_ctx;
2618 
2619 	if (oldctx == 0)
2620 		panic("ctx_free: freeing kernel context");
2621 #ifdef DIAGNOSTIC
2622 	if (ctxbusy[oldctx] == 0)
2623 		printf("ctx_free: freeing free context %d\n", oldctx);
2624 	if (ctxbusy[oldctx] != pm->pm_physaddr) {
2625 		printf("ctx_free: freeing someone esle's context\n "
2626 		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
2627 		       oldctx, (void *)(u_long)ctxbusy[oldctx], pm,
2628 		       (void *)(u_long)pm->pm_physaddr);
2629 		db_enter();
2630 	}
2631 #endif
2632 	/* We should verify it has not been stolen and reallocated... */
2633 	ctxbusy[oldctx] = 0;
2634 }
2635 
2636 /*
2637  * Enter the pmap and virtual address into the
2638  * physical to virtual map table.
2639  */
2640 pv_entry_t
2641 pmap_enter_pv(struct pmap *pmap, pv_entry_t npv, vaddr_t va, paddr_t pa)
2642 {
2643 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2644 	pv_entry_t pv = &pg->mdpage.pvent;
2645 
2646 	mtx_enter(&pg->mdpage.pvmtx);
2647 
2648 	if (pv->pv_pmap == NULL) {
2649 		/*
2650 		 * No entries yet, use header as the first entry
2651 		 */
2652 		PV_SETVA(pv, va);
2653 		pv->pv_pmap = pmap;
2654 		pv->pv_next = NULL;
2655 
2656 		mtx_leave(&pg->mdpage.pvmtx);
2657 		return (npv);
2658 	}
2659 
2660 	if (npv == NULL)
2661 		panic("%s: no pv entries available", __func__);
2662 
2663 	if (!(pv->pv_va & PV_ALIAS)) {
2664 		/*
2665 		 * There is at least one other VA mapping this page.
2666 		 * Check if they are cache index compatible. If not
2667 		 * remove all mappings, flush the cache and set page
2668 		 * to be mapped uncached. Caching will be restored
2669 		 * when pages are mapped compatible again.
2670 		 */
2671 		if ((pv->pv_va ^ va) & VA_ALIAS_MASK) {
2672 			pv->pv_va |= PV_ALIAS;
2673 			pmap_page_cache(pmap, pa, 0);
2674 		}
2675 	}
2676 
2677 	/*
2678 	 * There is at least one other VA mapping this page.
2679 	 * Place this entry after the header.
2680 	 */
2681 	npv->pv_va = va & PV_VAMASK;
2682 	npv->pv_pmap = pmap;
2683 	npv->pv_next = pv->pv_next;
2684 	pv->pv_next = npv;
2685 
2686 	mtx_leave(&pg->mdpage.pvmtx);
2687 	return (NULL);
2688 }
2689 
2690 /*
2691  * Remove a physical to virtual address translation.
2692  */
2693 pv_entry_t
2694 pmap_remove_pv(struct pmap *pmap, vaddr_t va, paddr_t pa)
2695 {
2696 	pv_entry_t pv, opv, npv = NULL;
2697 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2698 	int64_t data = 0LL;
2699 	int alias;
2700 
2701 	opv = pv = &pg->mdpage.pvent;
2702 	mtx_enter(&pg->mdpage.pvmtx);
2703 
2704 	/*
2705 	 * If it is the first entry on the list, it is actually
2706 	 * in the header and we must copy the following entry up
2707 	 * to the header.  Otherwise we must search the list for
2708 	 * the entry.  In either case we free the now unused entry.
2709 	 */
2710 	if (pmap == pv->pv_pmap && PV_MATCH(pv, va)) {
2711 		/* Save modified/ref bits */
2712 		data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2713 		npv = pv->pv_next;
2714 		if (npv) {
2715 			/* First save mod/ref bits */
2716 			pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va;
2717 			pv->pv_next = npv->pv_next;
2718 			pv->pv_pmap = npv->pv_pmap;
2719 		} else {
2720 			pv->pv_pmap = NULL;
2721 			pv->pv_next = NULL;
2722 			pv->pv_va &= (PV_REF|PV_MOD); /* Only save ref/mod bits */
2723 		}
2724 	} else {
2725 		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
2726 			if (pmap == npv->pv_pmap && PV_MATCH(npv, va))
2727 				goto found;
2728 		}
2729 
2730 		/*
2731 		 * Sometimes UVM gets confused and calls pmap_remove() instead
2732 		 * of pmap_kremove()
2733 		 */
2734 		mtx_leave(&pg->mdpage.pvmtx);
2735 		return (NULL);
2736 found:
2737 		pv->pv_next = npv->pv_next;
2738 
2739 		/*
2740 		 * move any referenced/modified info to the base pv
2741 		 */
2742 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2743 
2744 		/*
2745 		 * Here, if this page was aliased, we should try clear out any
2746 		 * alias that may have occurred.  However, that's a complicated
2747 		 * operation involving multiple scans of the pv list.
2748 		 */
2749 	}
2750 
2751 	/* Save REF/MOD info */
2752 	opv->pv_va |= pmap_tte2flags(data);
2753 
2754 	/* Check to see if the alias went away */
2755 	if (opv->pv_va & PV_ALIAS) {
2756 		alias = 0;
2757 		for (pv = opv; pv; pv = pv->pv_next) {
2758 			if ((pv->pv_va ^ opv->pv_va) & VA_ALIAS_MASK) {
2759 				alias = 1;
2760 				break;
2761 			}
2762 		}
2763 		if (alias == 0) {
2764 			opv->pv_va &= ~PV_ALIAS;
2765 			pmap_page_cache(pmap, pa, 1);
2766 		}
2767 	}
2768 
2769 	mtx_leave(&pg->mdpage.pvmtx);
2770 	return (npv);
2771 }
2772 
2773 /*
2774  *	pmap_page_cache:
2775  *
2776  *	Change all mappings of a page to cached/uncached.
2777  */
2778 void
2779 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
2780 {
2781 	pv_entry_t pv;
2782 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2783 
2784 	if (CPU_ISSUN4US || CPU_ISSUN4V)
2785 		return;
2786 
2787 	pv = &pg->mdpage.pvent;
2788 	if (pv == NULL)
2789 		return;
2790 
2791 	MUTEX_ASSERT_LOCKED(&pg->mdpage.pvmtx);
2792 
2793 	while (pv) {
2794 		vaddr_t va;
2795 
2796 		va = (pv->pv_va & PV_VAMASK);
2797 		if (mode) {
2798 			/* Enable caching */
2799 			if (pseg_set(pv->pv_pmap, va,
2800 			    pseg_get(pv->pv_pmap, va) | SUN4U_TLB_CV, 0)) {
2801 				printf("pmap_page_cache: aliased pseg empty!\n");
2802 				db_enter();
2803 				/* panic? */
2804 			}
2805 		} else {
2806 			/* Disable caching */
2807 			if (pseg_set(pv->pv_pmap, va,
2808 			    pseg_get(pv->pv_pmap, va) & ~SUN4U_TLB_CV, 0)) {
2809 				printf("pmap_page_cache: aliased pseg empty!\n");
2810 				db_enter();
2811 				/* panic? */
2812 			}
2813 		}
2814 		if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2815 			tsb_invalidate(pv->pv_pmap->pm_ctx, va);
2816 			/* Force reload -- protections may be changed */
2817 			tlb_flush_pte(va, pv->pv_pmap->pm_ctx);
2818 		}
2819 
2820 		pv = pv->pv_next;
2821 	}
2822 }
2823 
2824 int
2825 pmap_get_page(paddr_t *pa, const char *wait, struct pmap *pm)
2826 {
2827 	int reserve = pm == pmap_kernel() ? UVM_PGA_USERESERVE : 0;
2828 
2829 	if (uvm.page_init_done) {
2830 		struct vm_page *pg;
2831 
2832 		while ((pg = uvm_pagealloc(NULL, 0, NULL,
2833 		    UVM_PGA_ZERO|reserve)) == NULL) {
2834 			if (wait == NULL)
2835 				return 0;
2836 			uvm_wait(wait);
2837 		}
2838 		pg->wire_count++;
2839 		atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
2840 		*pa = VM_PAGE_TO_PHYS(pg);
2841 	} else {
2842 		uvm_page_physget(pa);
2843 		prom_claim_phys(*pa, PAGE_SIZE);
2844 		pmap_zero_phys(*pa);
2845 	}
2846 
2847 	return (1);
2848 }
2849 
2850 void
2851 pmap_free_page(paddr_t pa, struct pmap *pm)
2852 {
2853 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2854 
2855 	pg->wire_count = 0;
2856 	uvm_pagefree(pg);
2857 }
2858 
2859 void
2860 pmap_remove_holes(struct vmspace *vm)
2861 {
2862 	vaddr_t shole, ehole;
2863 	struct vm_map *map = &vm->vm_map;
2864 
2865 	/*
2866 	 * Although the hardware only supports 44-bit virtual addresses
2867 	 * (and thus a hole from 1 << 43 to -1 << 43), this pmap
2868 	 * implementation itself only supports 43-bit virtual addresses,
2869 	 * so we have to narrow the hole a bit more.
2870 	 */
2871 	shole = 1L << (HOLESHIFT - 1);
2872 	ehole = -1L << (HOLESHIFT - 1);
2873 
2874 	shole = ulmax(vm_map_min(map), shole);
2875 	ehole = ulmin(vm_map_max(map), ehole);
2876 
2877 	if (ehole <= shole)
2878 		return;
2879 
2880 	(void)uvm_map(map, &shole, ehole - shole, NULL, UVM_UNKNOWN_OFFSET, 0,
2881 	    UVM_MAPFLAG(PROT_NONE, PROT_NONE, MAP_INHERIT_SHARE, MADV_RANDOM,
2882 	      UVM_FLAG_NOMERGE | UVM_FLAG_HOLE | UVM_FLAG_FIXED));
2883 }
2884 
2885 #ifdef DDB
2886 
2887 void
2888 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, char *modif)
2889 {
2890 	struct pv_entry *pv;
2891 
2892 	if (!have_addr) {
2893 		db_printf("Need addr for pv\n");
2894 		return;
2895 	}
2896 
2897 	for (pv = pa_to_pvh(addr); pv; pv = pv->pv_next)
2898 		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
2899 			  pv, pv->pv_next, pv->pv_pmap,
2900 			  (unsigned long long)pv->pv_va);
2901 
2902 }
2903 
2904 #endif
2905 
2906 /*
2907  * Read an instruction from a given virtual memory address.
2908  * EXEC_ONLY mappings are bypassed.
2909  */
2910 int
2911 pmap_copyinsn(pmap_t pmap, vaddr_t va, uint32_t *insn)
2912 {
2913 	paddr_t pa;
2914 
2915 	if (pmap == pmap_kernel())
2916 		return EINVAL;
2917 
2918 	mtx_enter(&pmap->pm_mtx);
2919 	/* inline pmap_extract */
2920 	pa = pseg_get(pmap, va) & TLB_PA_MASK;
2921 	if (pa != 0)
2922 		*insn = lduwa(pa | (va & PAGE_MASK), ASI_PHYS_CACHED);
2923 	mtx_leave(&pmap->pm_mtx);
2924 
2925 	return pa == 0 ? EFAULT : 0;
2926 }
2927