xref: /openbsd/sys/arch/sparc64/sparc64/pmap.c (revision 8529ddd3)
1 /*	$OpenBSD: pmap.c,v 1.91 2015/04/10 18:08:31 kettenis Exp $	*/
2 /*	$NetBSD: pmap.c,v 1.107 2001/08/31 16:47:41 eeh Exp $	*/
3 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
4 /*
5  *
6  * Copyright (C) 1996-1999 Eduardo Horvath.
7  * All rights reserved.
8  *
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  */
29 
30 #include <sys/atomic.h>
31 #include <sys/param.h>
32 #include <sys/malloc.h>
33 #include <sys/queue.h>
34 #include <sys/systm.h>
35 #include <sys/proc.h>
36 #include <sys/msgbuf.h>
37 #include <sys/pool.h>
38 #include <sys/exec.h>
39 #include <sys/core.h>
40 #include <sys/kcore.h>
41 
42 #include <uvm/uvm.h>
43 
44 #include <machine/pcb.h>
45 #include <machine/sparc64.h>
46 #include <machine/ctlreg.h>
47 #include <machine/hypervisor.h>
48 #include <machine/openfirm.h>
49 #include <machine/kcore.h>
50 
51 #include "cache.h"
52 
53 #ifdef DDB
54 #include <machine/db_machdep.h>
55 #include <ddb/db_command.h>
56 #include <ddb/db_sym.h>
57 #include <ddb/db_variables.h>
58 #include <ddb/db_extern.h>
59 #include <ddb/db_access.h>
60 #include <ddb/db_output.h>
61 #define Debugger()	__asm volatile("ta 1; nop");
62 #else
63 #define Debugger()
64 #define db_printf	printf
65 #endif
66 
67 #define	MEG		(1<<20) /* 1MB */
68 #define	KB		(1<<10)	/* 1KB */
69 
70 paddr_t cpu0paddr;/* XXXXXXXXXXXXXXXX */
71 
72 extern int64_t asmptechk(int64_t *pseg[], int addr); /* DEBUG XXXXX */
73 
74 /* These routines are in assembly to allow access thru physical mappings */
75 extern int64_t pseg_get(struct pmap*, vaddr_t addr);
76 extern int pseg_set(struct pmap*, vaddr_t addr, int64_t tte, paddr_t spare);
77 
78 /* XXX - temporary workaround for pmap_{copy,zero}_page api change */
79 void pmap_zero_phys(paddr_t pa);
80 void pmap_copy_phys(paddr_t src, paddr_t dst);
81 
82 /*
83  * Diatribe on ref/mod counting:
84  *
85  * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
86  * in the pv_entry structure for each page.  (We could bypass this for the
87  * vm_page, but that's a long story....)
88  *
89  * This architecture has nice, fast traps with lots of space for software bits
90  * in the TTE.  To accelerate ref/mod counts we make use of these features.
91  *
92  * When we map a page initially, we place a TTE in the page table.  It's
93  * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
94  * writeable we set the TLB_REAL_W bit for the trap handler.
95  *
96  * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
97  * bit in the approprate TTE in the page table.  Whenever we take a protection
98  * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
99  * bits to enable writing and mark the page as modified.
100  *
101  * This means that we may have ref/mod information all over the place.  The
102  * pmap routines must traverse the page tables of all pmaps with a given page
103  * and collect/clear all the ref/mod information and copy it into the pv_entry.
104  */
105 
106 #ifdef NO_VCACHE
107 #define FORCE_ALIAS	1
108 #else
109 #define FORCE_ALIAS	0
110 #endif
111 
112 #define	PV_ALIAS	0x1LL
113 #define PV_REF		0x2LL
114 #define PV_MOD		0x4LL
115 #define PV_NVC		0x8LL
116 #define PV_NC		0x10LL
117 #define PV_WE		0x20LL		/* Debug -- track if this page was ever writable */
118 #define PV_MASK		(0x03fLL)
119 #define PV_VAMASK	(~(NBPG - 1))
120 #define PV_MATCH(pv,va)	(!((((pv)->pv_va) ^ (va)) & PV_VAMASK))
121 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | (((pv)->pv_va) & PV_MASK)))
122 
123 pv_entry_t	pv_table;	/* array of entries, one per page */
124 static struct pool pv_pool;
125 static struct pool pmap_pool;
126 
127 void	pmap_remove_pv(struct pmap *pm, vaddr_t va, paddr_t pa);
128 void	pmap_enter_pv(struct pmap *pm, vaddr_t va, paddr_t pa);
129 void	pmap_page_cache(struct pmap *pm, paddr_t pa, int mode);
130 
131 void	pmap_bootstrap_cpu(paddr_t);
132 
133 void	pmap_pinit(struct pmap *);
134 void	pmap_release(struct pmap *);
135 pv_entry_t pa_to_pvh(paddr_t);
136 
137 u_int64_t first_phys_addr;
138 
139 pv_entry_t
140 pa_to_pvh(paddr_t pa)
141 {
142 	struct vm_page *pg;
143 
144 	pg = PHYS_TO_VM_PAGE(pa);
145 	return pg ? &pg->mdpage.pvent : NULL;
146 }
147 
148 static __inline u_int
149 pmap_tte2flags(u_int64_t tte)
150 {
151 	if (CPU_ISSUN4V)
152 		return (((tte & SUN4V_TLB_ACCESS) ? PV_REF : 0) |
153 		    ((tte & SUN4V_TLB_MODIFY) ? PV_MOD : 0));
154 	else
155 		return (((tte & SUN4U_TLB_ACCESS) ? PV_REF : 0) |
156 		    ((tte & SUN4U_TLB_MODIFY) ? PV_MOD : 0));
157 }
158 
159 /*
160  * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
161  */
162 pte_t *tsb_dmmu;
163 pte_t *tsb_immu;
164 int tsbsize;		/* tsbents = 512 * 2^tsbsize */
165 #define TSBENTS (512 << tsbsize)
166 #define	TSBSIZE	(TSBENTS * 16)
167 
168 /*
169  * The invalid tsb tag uses the fact that the last context we have is
170  * never allocated.
171  */
172 #define TSB_TAG_INVALID	(~0LL << 48)
173 
174 #define TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) \
175   (CPU_ISSUN4V ?\
176     SUN4V_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie) : \
177     SUN4U_TSB_DATA(g,sz,pa,priv,write,cache,aliased,valid,ie))
178 
179 /* The same for sun4u and sun4v. */
180 #define TLB_V		SUN4U_TLB_V
181 
182 /* Only used for DEBUG. */
183 #define TLB_NFO		(CPU_ISSUN4V ? SUN4V_TLB_NFO : SUN4U_TLB_NFO)
184 
185 /*
186  * UltraSPARC T1 & T2 implement only a 40-bit real address range, just
187  * like older UltraSPARC CPUs.
188  */
189 #define TLB_PA_MASK	SUN4U_TLB_PA_MASK
190 
191 /* XXX */
192 #define TLB_TSB_LOCK	(CPU_ISSUN4V ? SUN4V_TLB_TSB_LOCK : SUN4U_TLB_TSB_LOCK)
193 
194 #ifdef SUN4V
195 struct tsb_desc *tsb_desc;
196 #endif
197 
198 struct pmap kernel_pmap_;
199 
200 extern int physmem;
201 /*
202  * Virtual and physical addresses of the start and end of kernel text
203  * and data segments.
204  */
205 vaddr_t ktext;
206 paddr_t ktextp;
207 vaddr_t ektext;
208 paddr_t ektextp;
209 vaddr_t kdata;
210 paddr_t kdatap;
211 vaddr_t ekdata;
212 paddr_t ekdatap;
213 
214 static int npgs;
215 static struct mem_region memlist[8]; /* Pick a random size here */
216 
217 vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
218 
219 struct mem_region *mem, *avail, *orig;
220 int memsize;
221 
222 static int memh = 0, vmemh = 0;	/* Handles to OBP devices */
223 
224 static int ptelookup_va(vaddr_t va); /* sun4u */
225 
226 static __inline void
227 tsb_invalidate(int ctx, vaddr_t va)
228 {
229 	int i;
230 	int64_t tag;
231 
232 	i = ptelookup_va(va);
233 	tag = TSB_TAG(0, ctx, va);
234 	if (tsb_dmmu[i].tag == tag)
235 		atomic_cas_ulong((volatile unsigned long *)&tsb_dmmu[i].tag,
236 		    tag, TSB_TAG_INVALID);
237 	if (tsb_immu[i].tag == tag)
238 		atomic_cas_ulong((volatile unsigned long *)&tsb_immu[i].tag,
239 		    tag, TSB_TAG_INVALID);
240 }
241 
242 struct prom_map *prom_map;
243 int prom_map_size;
244 
245 #ifdef DEBUG
246 #define	PDB_BOOT	0x20000
247 #define	PDB_BOOT1	0x40000
248 int	pmapdebug = 0;
249 
250 #define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
251 #else
252 #define	BDPRINTF(n, f)
253 #endif
254 
255 /*
256  *
257  * A context is simply a small number that differentiates multiple mappings
258  * of the same address.  Contexts on the spitfire are 13 bits, but could
259  * be as large as 17 bits.
260  *
261  * Each context is either free or attached to a pmap.
262  *
263  * The context table is an array of pointers to psegs.  Just dereference
264  * the right pointer and you get to the pmap segment tables.  These are
265  * physical addresses, of course.
266  *
267  */
268 paddr_t *ctxbusy;
269 int numctx;
270 #define CTXENTRY	(sizeof(paddr_t))
271 #define CTXSIZE		(numctx * CTXENTRY)
272 
273 int pmap_get_page(paddr_t *, const char *, struct pmap *);
274 void pmap_free_page(paddr_t, struct pmap *);
275 
276 /*
277  * Support for big page sizes.  This maps the page size to the
278  * page bits.  That is: these are the bits between 8K pages and
279  * larger page sizes that cause aliasing.
280  */
281 struct page_size_map page_size_map[] = {
282 	{ (4*1024*1024-1) & ~(8*1024-1), PGSZ_4M },
283 	{ (512*1024-1) & ~(8*1024-1), PGSZ_512K  },
284 	{ (64*1024-1) & ~(8*1024-1), PGSZ_64K  },
285 	{ (8*1024-1) & ~(8*1024-1), PGSZ_8K  },
286 	{ 0, PGSZ_8K&0  }
287 };
288 
289 /*
290  * Enter a TTE into the kernel pmap only.  Don't do anything else.
291  *
292  * Use only during bootstrapping since it does no locking and
293  * can lose ref/mod info!!!!
294  *
295  */
296 static void
297 pmap_enter_kpage(vaddr_t va, int64_t data)
298 {
299 	paddr_t newp;
300 
301 	newp = 0;
302 	while (pseg_set(pmap_kernel(), va, data, newp) == 1) {
303 		newp = 0;
304 		if (!pmap_get_page(&newp, NULL, pmap_kernel())) {
305 			prom_printf("pmap_enter_kpage: out of pages\n");
306 			panic("pmap_enter_kpage");
307 		}
308 		pmap_kernel()->pm_stats.resident_count++;
309 
310 		BDPRINTF(PDB_BOOT1,
311 			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\r\n",
312 			  pmap_kernel(), va, (long)data, (long)newp));
313 	}
314 }
315 
316 /*
317  * Check bootargs to see if we need to enable bootdebug.
318  */
319 #ifdef DEBUG
320 void
321 pmap_bootdebug(void)
322 {
323 	int chosen;
324 	char *cp;
325 	char buf[128];
326 
327 	/*
328 	 * Grab boot args from PROM
329 	 */
330 	chosen = OF_finddevice("/chosen");
331 	/* Setup pointer to boot flags */
332 	OF_getprop(chosen, "bootargs", buf, sizeof(buf));
333 	cp = buf;
334 	while (*cp != '-')
335 		if (*cp++ == '\0')
336 			return;
337 	for (;;)
338 		switch (*++cp) {
339 		case '\0':
340 			return;
341 		case 'V':
342 			pmapdebug |= PDB_BOOT|PDB_BOOT1;
343 			break;
344 		case 'D':
345 			pmapdebug |= PDB_BOOT1;
346 			break;
347 		}
348 }
349 #endif
350 
351 /*
352  * This is called during bootstrap, before the system is really initialized.
353  *
354  * It's called with the start and end virtual addresses of the kernel.  We
355  * bootstrap the pmap allocator now.  We will allocate the basic structures we
356  * need to bootstrap the VM system here: the page frame tables, the TSB, and
357  * the free memory lists.
358  *
359  * Now all this is becoming a bit obsolete.  maxctx is still important, but by
360  * separating the kernel text and data segments we really would need to
361  * provide the start and end of each segment.  But we can't.  The rodata
362  * segment is attached to the end of the kernel segment and has nothing to
363  * delimit its end.  We could still pass in the beginning of the kernel and
364  * the beginning and end of the data segment but we could also just as easily
365  * calculate that all in here.
366  *
367  * To handle the kernel text, we need to do a reverse mapping of the start of
368  * the kernel, then traverse the free memory lists to find out how big it is.
369  */
370 
371 void
372 pmap_bootstrap(u_long kernelstart, u_long kernelend, u_int maxctx, u_int numcpus)
373 {
374 	extern int data_start[], end[];	/* start of data segment */
375 	extern int msgbufmapped;
376 	struct mem_region *mp, *mp1;
377 	int msgbufsiz;
378 	int pcnt;
379 	size_t s, sz;
380 	int i, j;
381 	int64_t data;
382 	vaddr_t va;
383 	u_int64_t phys_msgbuf;
384 	paddr_t newkp;
385 	vaddr_t newkv, firstaddr, intstk;
386 	vsize_t kdsize, ktsize;
387 
388 #ifdef DEBUG
389 	pmap_bootdebug();
390 #endif
391 
392 	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\r\n"));
393 	/*
394 	 * set machine page size
395 	 */
396 	uvmexp.pagesize = NBPG;
397 	uvm_setpagesize();
398 
399 	/*
400 	 * Find out how big the kernel's virtual address
401 	 * space is.  The *$#@$ prom loses this info
402 	 */
403 	if ((vmemh = OF_finddevice("/virtual-memory")) == -1) {
404 		prom_printf("no virtual-memory?");
405 		OF_exit();
406 	}
407 	bzero((caddr_t)memlist, sizeof(memlist));
408 	if (OF_getprop(vmemh, "available", memlist, sizeof(memlist)) <= 0) {
409 		prom_printf("no vmemory avail?");
410 		OF_exit();
411 	}
412 
413 #ifdef DEBUG
414 	if (pmapdebug & PDB_BOOT) {
415 		/* print out mem list */
416 		prom_printf("Available virtual memory:\r\n");
417 		for (mp = memlist; mp->size; mp++) {
418 			prom_printf("memlist start %p size %lx\r\n",
419 				    (void *)(u_long)mp->start,
420 				    (u_long)mp->size);
421 		}
422 		prom_printf("End of available virtual memory\r\n");
423 	}
424 #endif
425 	/*
426 	 * Get hold or the message buffer.
427 	 */
428 	msgbufp = (struct msgbuf *)(vaddr_t)MSGBUF_VA;
429 /* XXXXX -- increase msgbufsiz for uvmhist printing */
430 	msgbufsiz = 4*NBPG /* round_page(sizeof(struct msgbuf)) */;
431 	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\r\n",
432 			    (long)msgbufp, (long)msgbufsiz));
433 	if ((long)msgbufp !=
434 	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
435 		prom_printf(
436 		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\r\n",
437 		    (void *)msgbufp, (long)phys_msgbuf);
438 	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
439 	BDPRINTF(PDB_BOOT,
440 		("We should have the memory at %lx, let's map it in\r\n",
441 			phys_msgbuf));
442 	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
443 			  -1/* sunos does this */) == -1)
444 		prom_printf("Failed to map msgbuf\r\n");
445 	else
446 		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\r\n",
447 			(void *)msgbufp));
448 	msgbufmapped = 1;	/* enable message buffer */
449 	initmsgbuf((caddr_t)msgbufp, msgbufsiz);
450 
451 	/*
452 	 * Record kernel mapping -- we will map these with a permanent 4MB
453 	 * TLB entry when we initialize the CPU later.
454 	 */
455 	BDPRINTF(PDB_BOOT, ("translating kernelstart %p\r\n",
456 		(void *)kernelstart));
457 	ktext = kernelstart;
458 	ktextp = prom_vtop(kernelstart);
459 
460 	kdata = (vaddr_t)data_start;
461 	kdatap = prom_vtop(kdata);
462 	ekdata = (vaddr_t)end;
463 
464 	/*
465 	 * Find the real size of the kernel.  Locate the smallest starting
466 	 * address > kernelstart.
467 	 */
468 	for (mp1 = mp = memlist; mp->size; mp++) {
469 		/*
470 		 * Check whether this region is at the end of the kernel.
471 		 */
472 		if (mp->start >= ekdata && (mp1->start < ekdata ||
473 						mp1->start > mp->start))
474 			mp1 = mp;
475 	}
476 	if (mp1->start < kdata)
477 		prom_printf("Kernel at end of vmem???\r\n");
478 
479 	BDPRINTF(PDB_BOOT1,
480 		("Kernel data is mapped at %lx, next free seg: %lx, %lx\r\n",
481 			(long)kdata, (u_long)mp1->start, (u_long)mp1->size));
482 
483 	/*
484 	 * We save where we can start allocating memory.
485 	 */
486 	firstaddr = (ekdata + 07) & ~ 07;	/* Longword align */
487 
488 	/*
489 	 * We reserve 100K to grow.
490 	 */
491 	ekdata += 100*KB;
492 
493 	/*
494 	 * And set the end of the data segment to the end of what our
495 	 * bootloader allocated for us, if we still fit in there.
496 	 */
497 	if (ekdata < mp1->start)
498 		ekdata = mp1->start;
499 
500 #define	valloc(name, type, num) (name) = (type *)firstaddr; firstaddr += (num)
501 
502 	/*
503 	 * Since we can't always give the loader the hint to align us on a 4MB
504 	 * boundary, we will need to do the alignment ourselves.  First
505 	 * allocate a new 4MB aligned segment for the kernel, then map it
506 	 * in, copy the kernel over, swap mappings, then finally, free the
507 	 * old kernel.  Then we can continue with this.
508 	 *
509 	 * We'll do the data segment up here since we know how big it is.
510 	 * We'll do the text segment after we've read in the PROM translations
511 	 * so we can figure out its size.
512 	 *
513 	 * The ctxbusy table takes about 64KB, the TSB up to 32KB, and the
514 	 * rest should be less than 1K, so 100KB extra should be plenty.
515 	 */
516 	kdsize = round_page(ekdata - kdata);
517 	BDPRINTF(PDB_BOOT1, ("Kernel data size is %lx\r\n", (long)kdsize));
518 
519 	if ((kdatap & (4*MEG-1)) == 0) {
520 		/* We were at a 4MB boundary -- claim the rest */
521 		psize_t szdiff = (4*MEG - kdsize) & (4*MEG - 1);
522 
523 		BDPRINTF(PDB_BOOT1, ("Need to extend dseg by %lx\r\n",
524 			(long)szdiff));
525 		if (szdiff) {
526 			/* Claim the rest of the physical page. */
527 			newkp = kdatap + kdsize;
528 			newkv = kdata + kdsize;
529 			if (newkp != prom_claim_phys(newkp, szdiff)) {
530 				prom_printf("pmap_bootstrap: could not claim "
531 					"physical dseg extension "
532 					"at %lx size %lx\r\n",
533 					newkp, szdiff);
534 				goto remap_data;
535 			}
536 
537 			/* And the rest of the virtual page. */
538 			if (prom_claim_virt(newkv, szdiff) != newkv)
539 			prom_printf("pmap_bootstrap: could not claim "
540 				"virtual dseg extension "
541 				"at size %lx\r\n", newkv, szdiff);
542 
543 			/* Make sure all 4MB are mapped */
544 			prom_map_phys(newkp, szdiff, newkv, -1);
545 		}
546 	} else {
547 		psize_t sz;
548 remap_data:
549 		/*
550 		 * Either we're not at a 4MB boundary or we can't get the rest
551 		 * of the 4MB extension.  We need to move the data segment.
552 		 * Leave 1MB of extra fiddle space in the calculations.
553 		 */
554 
555 		sz = (kdsize + 4*MEG - 1) & ~(4*MEG-1);
556 		BDPRINTF(PDB_BOOT1,
557 			 ("Allocating new %lx kernel data at 4MB boundary\r\n",
558 			  (u_long)sz));
559 		if ((newkp = prom_alloc_phys(sz, 4*MEG)) == (paddr_t)-1 ) {
560 			prom_printf("Cannot allocate new kernel\r\n");
561 			OF_exit();
562 		}
563 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
564 				     (u_int64_t)newkp));
565 		if ((newkv = (vaddr_t)prom_alloc_virt(sz, 8)) ==
566 		    (vaddr_t)-1) {
567 			prom_printf("Cannot allocate new kernel va\r\n");
568 			OF_exit();
569 		}
570 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %llx at %llx\r\n",
571 		    (u_int64_t)newkp, (u_int64_t)newkv));
572 		prom_map_phys(newkp, sz, (vaddr_t)newkv, -1);
573 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel data...",
574 			kdsize));
575 		bzero((void *)newkv, sz);
576 		bcopy((void *)kdata, (void *)newkv, kdsize);
577 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
578 		prom_unmap_virt((vaddr_t)newkv, sz);
579 		BDPRINTF(PDB_BOOT, ("remap old "));
580 #if 0
581 		/*
582 		 * calling the prom will probably require reading part of the
583 		 * data segment so we can't do this.  */
584 		prom_unmap_virt((vaddr_t)kdatap, kdsize);
585 #endif
586 		prom_map_phys(newkp, sz, kdata, -1);
587 		/*
588 		 * we will map in 4MB, more than we allocated, to allow
589 		 * further allocation
590 		 */
591 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
592 		prom_free_phys(kdatap, kdsize);
593 		kdatap = newkp;
594 		BDPRINTF(PDB_BOOT1,
595 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
596 			  "avail for kernel\r\n", (u_long)firstaddr,
597 			  (u_long)prom_vtop(firstaddr)));
598 	}
599 
600 	/*
601 	 * Find out how much RAM we have installed.
602 	 */
603 	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\r\n"));
604 	if ((memh = OF_finddevice("/memory")) == -1) {
605 		prom_printf("no memory?");
606 		OF_exit();
607 	}
608 	memsize = OF_getproplen(memh, "reg") + 2 * sizeof(struct mem_region);
609 	valloc(mem, struct mem_region, memsize);
610 	bzero((caddr_t)mem, memsize);
611 	if (OF_getprop(memh, "reg", mem, memsize) <= 0) {
612 		prom_printf("no memory installed?");
613 		OF_exit();
614 	}
615 
616 #ifdef DEBUG
617 	if (pmapdebug & PDB_BOOT1) {
618 		/* print out mem list */
619 		prom_printf("Installed physical memory:\r\n");
620 		for (mp = mem; mp->size; mp++) {
621 			prom_printf("memlist start %lx size %lx\r\n",
622 				    (u_long)mp->start, (u_long)mp->size);
623 		}
624 	}
625 #endif
626 	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
627 
628 	for (mp = mem; mp->size; mp++)
629 		physmem += atop(mp->size);
630 	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\r\n",
631 			     (int)physmem, (int)physmem));
632 
633 	/*
634 	 * Calculate approx TSB size.
635 	 */
636 	tsbsize = 0;
637 #ifdef SMALL_KERNEL
638 	while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 2)
639 #else
640 	while ((physmem >> tsbsize) > atop(64 * MEG) && tsbsize < 7)
641 #endif
642 		tsbsize++;
643 
644 	/*
645 	 * Save the prom translations
646 	 */
647 	sz = OF_getproplen(vmemh, "translations");
648 	valloc(prom_map, struct prom_map, sz);
649 	if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <= 0) {
650 		prom_printf("no translations installed?");
651 		OF_exit();
652 	}
653 	prom_map_size = sz / sizeof(struct prom_map);
654 #ifdef DEBUG
655 	if (pmapdebug & PDB_BOOT) {
656 		/* print out mem list */
657 		prom_printf("Prom xlations:\r\n");
658 		for (i = 0; i < prom_map_size; i++) {
659 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
660 				    (u_long)prom_map[i].vstart,
661 				    (u_long)prom_map[i].vsize,
662 				    (u_long)prom_map[i].tte);
663 		}
664 		prom_printf("End of prom xlations\r\n");
665 	}
666 #endif
667 	/*
668 	 * Hunt for the kernel text segment and figure out it size and
669 	 * alignment.
670 	 */
671 	ktsize = 0;
672 	for (i = 0; i < prom_map_size; i++)
673 		if (prom_map[i].vstart == ktext + ktsize)
674 			ktsize += prom_map[i].vsize;
675 	if (ktsize == 0)
676 		panic("No kernel text segment!");
677 	ektext = ktext + ktsize;
678 
679 	if (ktextp & (4*MEG-1)) {
680 		/* Kernel text is not 4MB aligned -- need to fix that */
681 		BDPRINTF(PDB_BOOT1,
682 			 ("Allocating new %lx kernel text at 4MB boundary\r\n",
683 			  (u_long)ktsize));
684 		if ((newkp = prom_alloc_phys(ktsize, 4*MEG)) == 0 ) {
685 			prom_printf("Cannot allocate new kernel text\r\n");
686 			OF_exit();
687 		}
688 		BDPRINTF(PDB_BOOT1, ("Allocating new va for buffer at %llx\r\n",
689 				     (u_int64_t)newkp));
690 		if ((newkv = (vaddr_t)prom_alloc_virt(ktsize, 8)) ==
691 		    (vaddr_t)-1) {
692 			prom_printf("Cannot allocate new kernel text va\r\n");
693 			OF_exit();
694 		}
695 		BDPRINTF(PDB_BOOT1, ("Mapping in buffer %lx at %lx\r\n",
696 				     (u_long)newkp, (u_long)newkv));
697 		prom_map_phys(newkp, ktsize, (vaddr_t)newkv, -1);
698 		BDPRINTF(PDB_BOOT1, ("Copying %ld bytes kernel text...",
699 			ktsize));
700 		bcopy((void *)ktext, (void *)newkv,
701 		    ktsize);
702 		BDPRINTF(PDB_BOOT1, ("done.  Swapping maps..unmap new\r\n"));
703 		prom_unmap_virt((vaddr_t)newkv, 4*MEG);
704 		BDPRINTF(PDB_BOOT, ("remap old "));
705 #if 0
706 		/*
707 		 * calling the prom will probably require reading part of the
708 		 * text segment so we can't do this.
709 		 */
710 		prom_unmap_virt((vaddr_t)ktextp, ktsize);
711 #endif
712 		prom_map_phys(newkp, ktsize, ktext, -1);
713 		/*
714 		 * we will map in 4MB, more than we allocated, to allow
715 		 * further allocation
716 		 */
717 		BDPRINTF(PDB_BOOT1, ("free old\r\n"));
718 		prom_free_phys(ktextp, ktsize);
719 		ktextp = newkp;
720 
721 		BDPRINTF(PDB_BOOT1,
722 			 ("pmap_bootstrap: firstaddr is %lx virt (%lx phys)"
723 			  "avail for kernel\r\n", (u_long)firstaddr,
724 			  (u_long)prom_vtop(firstaddr)));
725 
726 		/*
727 		 * Re-fetch translations -- they've certainly changed.
728 		 */
729 		if (OF_getprop(vmemh, "translations", (void *)prom_map, sz) <=
730 			0) {
731 			prom_printf("no translations installed?");
732 			OF_exit();
733 		}
734 #ifdef DEBUG
735 		if (pmapdebug & PDB_BOOT) {
736 			/* print out mem list */
737 			prom_printf("New prom xlations:\r\n");
738 			for (i = 0; i < prom_map_size; i++) {
739 				prom_printf("start %016lx size %016lx tte %016lx\r\n",
740 					    (u_long)prom_map[i].vstart,
741 					    (u_long)prom_map[i].vsize,
742 					    (u_long)prom_map[i].tte);
743 			}
744 			prom_printf("End of prom xlations\r\n");
745 		}
746 #endif
747 	}
748 	ektextp = ktextp + ktsize;
749 
750 	/*
751 	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
752 	 * any translations inside the kernel data VA range.
753 	 */
754 	for(i = 0; i < prom_map_size; i++) {
755 		if (prom_map[i].vstart >= kdata &&
756 		    prom_map[i].vstart <= firstaddr) {
757 			prom_map[i].vstart = 0;
758 			prom_map[i].vsize = 0;
759 		}
760 		if (prom_map[i].vstart >= ktext &&
761 		    prom_map[i].vstart <= ektext) {
762 			prom_map[i].vstart = 0;
763 			prom_map[i].vsize = 0;
764 		}
765 		for(j = i; j < prom_map_size; j++) {
766 			if (prom_map[j].vstart >= kdata &&
767 			    prom_map[j].vstart <= firstaddr)
768 				continue;	/* this is inside the kernel */
769 			if (prom_map[j].vstart >= ktext &&
770 			    prom_map[j].vstart <= ektext)
771 				continue;	/* this is inside the kernel */
772 			if (prom_map[j].vstart > prom_map[i].vstart) {
773 				struct prom_map tmp;
774 				tmp = prom_map[i];
775 				prom_map[i] = prom_map[j];
776 				prom_map[j] = tmp;
777 			}
778 		}
779 	}
780 #ifdef DEBUG
781 	if (pmapdebug & PDB_BOOT) {
782 		/* print out mem list */
783 		prom_printf("Prom xlations:\r\n");
784 		for (i = 0; i < prom_map_size; i++) {
785 			prom_printf("start %016lx size %016lx tte %016lx\r\n",
786 				    (u_long)prom_map[i].vstart,
787 				    (u_long)prom_map[i].vsize,
788 				    (u_long)prom_map[i].tte);
789 		}
790 		prom_printf("End of prom xlations\r\n");
791 	}
792 #endif
793 
794 	/*
795 	 * Allocate a 64KB page for the cpu_info structure now.
796 	 */
797 	if ((cpu0paddr = prom_alloc_phys(numcpus * 8*NBPG, 8*NBPG)) == 0 ) {
798 		prom_printf("Cannot allocate new cpu_info\r\n");
799 		OF_exit();
800 	}
801 
802 
803 	/*
804 	 * Now the kernel text segment is in its final location we can try to
805 	 * find out how much memory really is free.
806 	 */
807 	sz = OF_getproplen(memh, "available") + sizeof(struct mem_region);
808 	valloc(orig, struct mem_region, sz);
809 	bzero((caddr_t)orig, sz);
810 	if (OF_getprop(memh, "available", orig, sz) <= 0) {
811 		prom_printf("no available RAM?");
812 		OF_exit();
813 	}
814 #ifdef DEBUG
815 	if (pmapdebug & PDB_BOOT1) {
816 		/* print out mem list */
817 		prom_printf("Available physical memory:\r\n");
818 		for (mp = orig; mp->size; mp++) {
819 			prom_printf("memlist start %lx size %lx\r\n",
820 				    (u_long)mp->start, (u_long)mp->size);
821 		}
822 		prom_printf("End of available physical memory\r\n");
823 	}
824 #endif
825 	valloc(avail, struct mem_region, sz);
826 	bzero((caddr_t)avail, sz);
827 	for (pcnt = 0, mp = orig, mp1 = avail; (mp1->size = mp->size);
828 	    mp++, mp1++) {
829 		mp1->start = mp->start;
830 		pcnt++;
831 	}
832 
833 	/*
834 	 * Allocate and initialize a context table
835 	 */
836 	numctx = maxctx;
837 	valloc(ctxbusy, paddr_t, CTXSIZE);
838 	bzero((caddr_t)ctxbusy, CTXSIZE);
839 
840 	/*
841 	 * Allocate our TSB.
842 	 *
843 	 * We will use the left over space to flesh out the kernel pmap.
844 	 */
845 	BDPRINTF(PDB_BOOT1, ("firstaddr before TSB=%lx\r\n",
846 		(u_long)firstaddr));
847 	firstaddr = ((firstaddr + TSBSIZE - 1) & ~(TSBSIZE-1));
848 #ifdef DEBUG
849 	i = (firstaddr + (NBPG-1)) & ~(NBPG-1);	/* First, page align */
850 	if ((int)firstaddr < i) {
851 		prom_printf("TSB alloc fixup failed\r\n");
852 		prom_printf("frobbed i, firstaddr before TSB=%x, %lx\r\n",
853 		    (int)i, (u_long)firstaddr);
854 		panic("TSB alloc");
855 		OF_exit();
856 	}
857 #endif
858 	BDPRINTF(PDB_BOOT, ("frobbed i, firstaddr before TSB=%x, %lx\r\n",
859 			    (int)i, (u_long)firstaddr));
860 	valloc(tsb_dmmu, pte_t, TSBSIZE);
861 	bzero(tsb_dmmu, TSBSIZE);
862 	valloc(tsb_immu, pte_t, TSBSIZE);
863 	bzero(tsb_immu, TSBSIZE);
864 
865 	BDPRINTF(PDB_BOOT1, ("firstaddr after TSB=%lx\r\n", (u_long)firstaddr));
866 	BDPRINTF(PDB_BOOT1, ("TSB allocated at %p size %08x\r\n", (void *)tsb_dmmu,
867 	    (int)TSBSIZE));
868 
869 #ifdef SUN4V
870 	if (CPU_ISSUN4V) {
871 		valloc(tsb_desc, struct tsb_desc, sizeof(struct tsb_desc));
872 		bzero(tsb_desc, sizeof(struct tsb_desc));
873 		tsb_desc->td_idxpgsz = 0;
874 		tsb_desc->td_assoc = 1;
875 		tsb_desc->td_size = TSBENTS;
876 		tsb_desc->td_ctxidx = -1;
877 		tsb_desc->td_pgsz = 0xf;
878 		tsb_desc->td_pa = (paddr_t)tsb_dmmu + kdatap - kdata;
879 	}
880 #endif
881 
882 	first_phys_addr = mem->start;
883 	BDPRINTF(PDB_BOOT1, ("firstaddr after pmap=%08lx\r\n",
884 		(u_long)firstaddr));
885 
886 	/*
887 	 * Page align all regions.
888 	 * Non-page memory isn't very interesting to us.
889 	 * Also, sort the entries for ascending addresses.
890 	 *
891 	 * And convert from virtual to physical addresses.
892 	 */
893 
894 	BDPRINTF(PDB_BOOT, ("kernel virtual size %08lx - %08lx\r\n",
895 			    (u_long)kernelstart, (u_long)firstaddr));
896 	kdata = kdata & ~PGOFSET;
897 	ekdata = firstaddr;
898 	ekdata = (ekdata + PGOFSET) & ~PGOFSET;
899 	BDPRINTF(PDB_BOOT1, ("kernel virtual size %08lx - %08lx\r\n",
900 			     (u_long)kernelstart, (u_long)kernelend));
901 	ekdatap = ekdata - kdata + kdatap;
902 	/* Switch from vaddrs to paddrs */
903 	if(ekdatap > (kdatap + 4*MEG)) {
904 		prom_printf("Kernel size exceeds 4MB\r\n");
905 	}
906 
907 #ifdef DEBUG
908 	if (pmapdebug & PDB_BOOT1) {
909 		/* print out mem list */
910 		prom_printf("Available %lx physical memory before cleanup:\r\n",
911 			    (u_long)avail);
912 		for (mp = avail; mp->size; mp++) {
913 			prom_printf("memlist start %lx size %lx\r\n",
914 				    (u_long)mp->start,
915 				    (u_long)mp->size);
916 		}
917 		prom_printf("End of available physical memory before cleanup\r\n");
918 		prom_printf("kernel physical text size %08lx - %08lx\r\n",
919 			    (u_long)ktextp, (u_long)ektextp);
920 		prom_printf("kernel physical data size %08lx - %08lx\r\n",
921 			    (u_long)kdatap, (u_long)ekdatap);
922 	}
923 #endif
924 	/*
925 	 * Here's a another quick in-lined bubble sort.
926 	 */
927 	for (i = 0; i < pcnt; i++) {
928 		for (j = i; j < pcnt; j++) {
929 			if (avail[j].start < avail[i].start) {
930 				struct mem_region tmp;
931 				tmp = avail[i];
932 				avail[i] = avail[j];
933 				avail[j] = tmp;
934 			}
935 		}
936 	}
937 
938 	/* Throw away page zero if we have it. */
939 	if (avail->start == 0) {
940 		avail->start += NBPG;
941 		avail->size -= NBPG;
942 	}
943 	/*
944 	 * Now we need to remove the area we valloc'ed from the available
945 	 * memory lists.  (NB: we may have already alloc'ed the entire space).
946 	 */
947 	npgs = 0;
948 	for (mp = avail; mp->size; mp++) {
949 		/*
950 		 * Check whether this region holds all of the kernel.
951 		 */
952 		s = mp->start + mp->size;
953 		if (mp->start < kdatap && s > roundup(ekdatap, 4*MEG)) {
954 			avail[pcnt].start = roundup(ekdatap, 4*MEG);
955 			avail[pcnt++].size = s - kdatap;
956 			mp->size = kdatap - mp->start;
957 		}
958 		/*
959 		 * Look whether this regions starts within the kernel.
960 		 */
961 		if (mp->start >= kdatap &&
962 			mp->start < roundup(ekdatap, 4*MEG)) {
963 			s = ekdatap - mp->start;
964 			if (mp->size > s)
965 				mp->size -= s;
966 			else
967 				mp->size = 0;
968 			mp->start = roundup(ekdatap, 4*MEG);
969 		}
970 		/*
971 		 * Now look whether this region ends within the kernel.
972 		 */
973 		s = mp->start + mp->size;
974 		if (s > kdatap && s < roundup(ekdatap, 4*MEG))
975 			mp->size -= s - kdatap;
976 		/*
977 		 * Now page align the start of the region.
978 		 */
979 		s = mp->start % NBPG;
980 		if (mp->size >= s) {
981 			mp->size -= s;
982 			mp->start += s;
983 		}
984 		/*
985 		 * And now align the size of the region.
986 		 */
987 		mp->size -= mp->size % NBPG;
988 		/*
989 		 * Check whether some memory is left here.
990 		 */
991 		if (mp->size == 0) {
992 			bcopy(mp + 1, mp,
993 			      (pcnt - (mp - avail)) * sizeof *mp);
994 			pcnt--;
995 			mp--;
996 			continue;
997 		}
998 		s = mp->start;
999 		sz = mp->size;
1000 		npgs += atop(sz);
1001 		for (mp1 = avail; mp1 < mp; mp1++)
1002 			if (s < mp1->start)
1003 				break;
1004 		if (mp1 < mp) {
1005 			bcopy(mp1, mp1 + 1, (char *)mp - (char *)mp1);
1006 			mp1->start = s;
1007 			mp1->size = sz;
1008 		}
1009 		/*
1010 		 * In future we should be able to specify both allocated
1011 		 * and free.
1012 		 */
1013 		uvm_page_physload(
1014 			atop(mp->start),
1015 			atop(mp->start+mp->size),
1016 			atop(mp->start),
1017 			atop(mp->start+mp->size), 0);
1018 	}
1019 
1020 #if 0
1021 	/* finally, free up any space that valloc did not use */
1022 	prom_unmap_virt((vaddr_t)ekdata, roundup(ekdata, 4*MEG) - ekdata);
1023 	if (ekdatap < roundup(kdatap, 4*MEG))) {
1024 		uvm_page_physload(atop(ekdatap),
1025 			atop(roundup(ekdatap, (4*MEG))),
1026 			atop(ekdatap),
1027 			atop(roundup(ekdatap, (4*MEG))), 0);
1028 	}
1029 #endif
1030 
1031 #ifdef DEBUG
1032 	if (pmapdebug & PDB_BOOT) {
1033 		/* print out mem list */
1034 		prom_printf("Available physical memory after cleanup:\r\n");
1035 		for (mp = avail; mp->size; mp++) {
1036 			prom_printf("avail start %lx size %lx\r\n",
1037 				    (long)mp->start, (long)mp->size);
1038 		}
1039 		prom_printf("End of available physical memory after cleanup\r\n");
1040 	}
1041 #endif
1042 	/*
1043 	 * Allocate and clear out pmap_kernel()->pm_segs[]
1044 	 */
1045 	pmap_kernel()->pm_refs = 1;
1046 	pmap_kernel()->pm_ctx = 0;
1047 	{
1048 		paddr_t newp;
1049 
1050 		do {
1051 			pmap_get_page(&newp, NULL, pmap_kernel());
1052 		} while (!newp); /* Throw away page zero */
1053 		pmap_kernel()->pm_segs=(int64_t *)(u_long)newp;
1054 		pmap_kernel()->pm_physaddr = newp;
1055 		/* mark kernel context as busy */
1056 		((paddr_t*)ctxbusy)[0] = pmap_kernel()->pm_physaddr;
1057 	}
1058 	/*
1059 	 * finish filling out kernel pmap.
1060 	 */
1061 
1062 	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\r\n",
1063 	    (long)pmap_kernel()->pm_physaddr));
1064 	/*
1065 	 * Tell pmap about our mesgbuf -- Hope this works already
1066 	 */
1067 #ifdef DEBUG
1068 	BDPRINTF(PDB_BOOT1, ("Calling consinit()\r\n"));
1069 	if (pmapdebug & PDB_BOOT1) consinit();
1070 	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\r\n"));
1071 #endif
1072 	/* it's not safe to call pmap_enter so we need to do this ourselves */
1073 	va = (vaddr_t)msgbufp;
1074 	prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp, -1);
1075 	while (msgbufsiz) {
1076 		data = TSB_DATA(0 /* global */,
1077 			PGSZ_8K,
1078 			phys_msgbuf,
1079 			1 /* priv */,
1080 			1 /* Write */,
1081 			1 /* Cacheable */,
1082 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1083 			1 /* valid */,
1084 			0 /* IE */);
1085 		pmap_enter_kpage(va, data);
1086 		va += PAGE_SIZE;
1087 		msgbufsiz -= PAGE_SIZE;
1088 		phys_msgbuf += PAGE_SIZE;
1089 	}
1090 	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\r\n"));
1091 
1092 	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\r\n"));
1093 	data = 0;
1094 	if (CPU_ISSUN4U || CPU_ISSUN4US)
1095 		data = SUN4U_TLB_EXEC;
1096 	for (i = 0; i < prom_map_size; i++) {
1097 		if (prom_map[i].vstart && ((prom_map[i].vstart>>32) == 0)) {
1098 			for (j = 0; j < prom_map[i].vsize; j += NBPG) {
1099 				int k;
1100 
1101 				for (k = 0; page_size_map[k].mask; k++) {
1102 					if (((prom_map[i].vstart |
1103 					      prom_map[i].tte) &
1104 					      page_size_map[k].mask) == 0 &&
1105 					      page_size_map[k].mask <
1106 					      prom_map[i].vsize)
1107 						break;
1108 				}
1109 				/* Enter PROM map into pmap_kernel() */
1110 				pmap_enter_kpage(prom_map[i].vstart + j,
1111 					(prom_map[i].tte + j)|data|
1112 					page_size_map[k].code);
1113 			}
1114 		}
1115 	}
1116 	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\r\n"));
1117 
1118 	/*
1119 	 * Fix up start of kernel heap.
1120 	 */
1121 	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1122 	/* Let's keep 1 page of redzone after the kernel */
1123 	vmmap += NBPG;
1124 	{
1125 		extern vaddr_t u0[2];
1126 		extern struct pcb* proc0paddr;
1127 		extern void main(void);
1128 		paddr_t pa;
1129 
1130 		/* Initialize all the pointers to u0 */
1131 		u0[0] = vmmap;
1132 		/* Allocate some VAs for u0 */
1133 		u0[1] = vmmap + 2*USPACE;
1134 
1135 		BDPRINTF(PDB_BOOT1,
1136 			("Inserting stack 0 into pmap_kernel() at %p\r\n",
1137 				vmmap));
1138 
1139 		while (vmmap < u0[1]) {
1140 			int64_t data;
1141 
1142 			pmap_get_page(&pa, NULL, pmap_kernel());
1143 			prom_map_phys(pa, NBPG, vmmap, -1);
1144 			data = TSB_DATA(0 /* global */,
1145 				PGSZ_8K,
1146 				pa,
1147 				1 /* priv */,
1148 				1 /* Write */,
1149 				1 /* Cacheable */,
1150 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1151 				1 /* valid */,
1152 				0 /* IE */);
1153 			pmap_enter_kpage(vmmap, data);
1154 			vmmap += NBPG;
1155 		}
1156 		BDPRINTF(PDB_BOOT1,
1157 			 ("Done inserting stack 0 into pmap_kernel()\r\n"));
1158 
1159 		/* Now map in and initialize our cpu_info structure */
1160 #ifdef DIAGNOSTIC
1161 		vmmap += NBPG; /* redzone -- XXXX do we need one? */
1162 #endif
1163 		intstk = vmmap = roundup(vmmap, 64*KB);
1164 		cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK);
1165 
1166 		BDPRINTF(PDB_BOOT1,
1167 			("Inserting cpu_info into pmap_kernel() at %p\r\n",
1168 				 cpus));
1169 		/* Now map in all 8 pages of cpu_info */
1170 		pa = cpu0paddr;
1171 		prom_map_phys(pa, 64*KB, vmmap, -1);
1172 		/*
1173 		 * Also map it in as the interrupt stack.
1174 		 * This lets the PROM see this if needed.
1175 		 *
1176 		 * XXXX locore.s does not flush these mappings
1177 		 * before installing the locked TTE.
1178 		 */
1179 		prom_map_phys(pa, 64*KB, CPUINFO_VA, -1);
1180 		for (i=0; i<8; i++) {
1181 			int64_t data;
1182 
1183 			data = TSB_DATA(0 /* global */,
1184 				PGSZ_8K,
1185 				pa,
1186 				1 /* priv */,
1187 				1 /* Write */,
1188 				1 /* Cacheable */,
1189 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1190 				1 /* valid */,
1191 				0 /* IE */);
1192 			pmap_enter_kpage(vmmap, data);
1193 			vmmap += NBPG;
1194 			pa += NBPG;
1195 		}
1196 		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\r\n"));
1197 
1198 		/* Initialize our cpu_info structure */
1199 		bzero((void *)intstk, 8*NBPG);
1200 		cpus->ci_self = cpus;
1201 		cpus->ci_next = NULL; /* Redundant, I know. */
1202 		cpus->ci_curproc = &proc0;
1203 		cpus->ci_cpcb = (struct pcb *)u0[0]; /* Need better source */
1204 		cpus->ci_upaid = cpu_myid();
1205 		cpus->ci_number = 0;
1206 		cpus->ci_flags = CPUF_RUNNING;
1207 		cpus->ci_fpproc = NULL;
1208 		cpus->ci_spinup = main; /* Call main when we're running. */
1209 		cpus->ci_initstack = (void *)u0[1];
1210 		cpus->ci_paddr = cpu0paddr;
1211 #ifdef SUN4V
1212 		cpus->ci_mmfsa = cpu0paddr;
1213 #endif
1214 		proc0paddr = cpus->ci_cpcb;
1215 
1216 		cpu0paddr += 64 * KB;
1217 
1218 		/* The rest will be done at CPU attach time. */
1219 		BDPRINTF(PDB_BOOT1,
1220 			 ("Done inserting cpu_info into pmap_kernel()\r\n"));
1221 	}
1222 
1223 	vmmap = (vaddr_t)reserve_dumppages((caddr_t)(u_long)vmmap);
1224 	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\r\n"));
1225 
1226 	pmap_bootstrap_cpu(cpus->ci_paddr);
1227 }
1228 
1229 void sun4u_bootstrap_cpu(paddr_t);
1230 void sun4v_bootstrap_cpu(paddr_t);
1231 
1232 void
1233 pmap_bootstrap_cpu(paddr_t intstack)
1234 {
1235 	if (CPU_ISSUN4V)
1236 		sun4v_bootstrap_cpu(intstack);
1237 	else
1238 		sun4u_bootstrap_cpu(intstack);
1239 }
1240 
1241 extern void sun4u_set_tsbs(void);
1242 
1243 void
1244 sun4u_bootstrap_cpu(paddr_t intstack)
1245 {
1246 	u_int64_t data;
1247 	paddr_t pa;
1248 	vaddr_t va;
1249 	int index;
1250 	int impl;
1251 
1252 	impl = (getver() & VER_IMPL) >> VER_IMPL_SHIFT;
1253 
1254 	/*
1255 	 * Establish the 4MB locked mappings for kernel data and text.
1256 	 *
1257 	 * The text segment needs to be mapped into the DTLB too,
1258 	 * because of .rodata.
1259 	 */
1260 
1261 	index = 15; /* XXX */
1262 	for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) {
1263 		data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, FORCE_ALIAS, 1, 0);
1264 		data |= SUN4U_TLB_L;
1265 		prom_itlb_load(index, data, va);
1266 		prom_dtlb_load(index, data, va);
1267 		index--;
1268 	}
1269 
1270 	for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) {
1271 		data = SUN4U_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, FORCE_ALIAS, 1, 0);
1272 		data |= SUN4U_TLB_L;
1273 		prom_dtlb_load(index, data, va);
1274 		index--;
1275 	}
1276 
1277 #ifdef MULTIPROCESSOR
1278 	if (impl >= IMPL_OLYMPUS_C && impl <= IMPL_JUPITER) {
1279 		/*
1280 		 * On SPARC64-VI and SPARC64-VII processors, the MMU is
1281 		 * shared between threads, so we can't establish a locked
1282 		 * mapping for the interrupt stack since the mappings would
1283 		 * conflict.  Instead we stick the address in a scratch
1284 		 * register, like we do for sun4v.
1285 		 */
1286 		pa = intstack + (CPUINFO_VA - INTSTACK);
1287 		pa += offsetof(struct cpu_info, ci_self);
1288 		va = ldxa(pa, ASI_PHYS_CACHED);
1289 		stxa(0x00, ASI_SCRATCH, va);
1290 
1291 		if ((CPU_JUPITERID % 2) == 1)
1292 			index--;
1293 
1294 		data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, FORCE_ALIAS, 1, 0);
1295 		data |= SUN4U_TLB_L;
1296 		prom_dtlb_load(index, data, va - (CPUINFO_VA - INTSTACK));
1297 
1298 		sun4u_set_tsbs();
1299 		return;
1300 	}
1301 #endif
1302 
1303 	/*
1304 	 * Establish the 64KB locked mapping for the interrupt stack.
1305 	 */
1306 
1307 	data = SUN4U_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, FORCE_ALIAS, 1, 0);
1308 	data |= SUN4U_TLB_L;
1309 	prom_dtlb_load(index, data, INTSTACK);
1310 
1311 	sun4u_set_tsbs();
1312 }
1313 
1314 void
1315 sun4v_bootstrap_cpu(paddr_t intstack)
1316 {
1317 #ifdef SUN4V
1318 	u_int64_t data;
1319 	paddr_t pa;
1320 	vaddr_t va;
1321 	int err;
1322 
1323 	/*
1324 	 * Establish the 4MB locked mappings for kernel data and text.
1325 	 *
1326 	 * The text segment needs to be mapped into the DTLB too,
1327 	 * because of .rodata.
1328 	 */
1329 
1330 	for (va = ktext, pa = ktextp; va < ektext; va += 4*MEG, pa += 4*MEG) {
1331 		data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 0, 1, 0, 1, 0);
1332 		data |= SUN4V_TLB_X;
1333 		err = hv_mmu_map_perm_addr(va, data, MAP_ITLB|MAP_DTLB);
1334 		if (err != H_EOK)
1335 			prom_printf("err: %d\r\n", err);
1336 	}
1337 
1338 	for (va = kdata, pa = kdatap; va < ekdata; va += 4*MEG, pa += 4*MEG) {
1339 		data = SUN4V_TSB_DATA(0, PGSZ_4M, pa, 1, 1, 1, 0, 1, 0);
1340 		err = hv_mmu_map_perm_addr(va, data, MAP_DTLB);
1341 		if (err != H_EOK)
1342 			prom_printf("err: %d\r\n", err);
1343 	}
1344 
1345 #ifndef MULTIPROCESSOR
1346 	/*
1347 	 * Establish the 64KB locked mapping for the interrupt stack.
1348 	 */
1349 	data = SUN4V_TSB_DATA(0, PGSZ_64K, intstack, 1, 1, 1, 0, 1, 0);
1350 	err = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB);
1351 	if (err != H_EOK)
1352 		prom_printf("err: %d\r\n", err);
1353 #else
1354 	pa = intstack + (CPUINFO_VA - INTSTACK);
1355 	pa += offsetof(struct cpu_info, ci_self);
1356 	stxa(0x00, ASI_SCRATCHPAD, ldxa(pa, ASI_PHYS_CACHED));
1357 #endif
1358 
1359 	stxa(0x10, ASI_SCRATCHPAD, intstack + (CPUINFO_VA - INTSTACK));
1360 
1361 	err = hv_mmu_tsb_ctx0(1, (paddr_t)tsb_desc + kdatap - kdata);
1362 	if (err != H_EOK)
1363 		prom_printf("err: %d\r\n", err);
1364 	err = hv_mmu_tsb_ctxnon0(1, (paddr_t)tsb_desc + kdatap - kdata);
1365 	if (err != H_EOK)
1366 		prom_printf("err: %d\r\n", err);
1367 #endif
1368 }
1369 
1370 /*
1371  * Initialize anything else for pmap handling.
1372  * Called during uvm_init().
1373  */
1374 void
1375 pmap_init(void)
1376 {
1377 	BDPRINTF(PDB_BOOT1, ("pmap_init()\r\n"));
1378 	if (PAGE_SIZE != NBPG)
1379 		panic("pmap_init: CLSIZE!=1");
1380 
1381 	/* Setup a pool for additional pvlist structures */
1382 	pool_init(&pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pv_entry", NULL);
1383 	pool_setipl(&pv_pool, IPL_VM);
1384 	pool_init(&pmap_pool, sizeof(struct pmap), 0, 0, 0, "pmappl",
1385 	    &pool_allocator_nointr);
1386 }
1387 
1388 /* Start of non-cachable physical memory on UltraSPARC-III. */
1389 #define VM_MAXPHYS_ADDRESS	((vaddr_t)0x0000040000000000L)
1390 
1391 static vaddr_t kbreak; /* End of kernel VA */
1392 
1393 /*
1394  * How much virtual space is available to the kernel?
1395  */
1396 void
1397 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1398 {
1399 	/*
1400 	 * Make sure virtual memory and physical memory don't overlap
1401 	 * to avoid problems with ASI_PHYS_CACHED on UltraSPARC-III.
1402 	 */
1403 	if (vmmap < VM_MAXPHYS_ADDRESS)
1404 		vmmap = VM_MAXPHYS_ADDRESS;
1405 
1406 	/* Reserve two pages for pmap_copy_page && /dev/mem */
1407 	*start = kbreak = (vaddr_t)(vmmap + 2*NBPG);
1408 	*end = VM_MAX_KERNEL_ADDRESS;
1409 	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\r\n", *start, *end));
1410 }
1411 
1412 /*
1413  * Preallocate kernel page tables to a specified VA.
1414  * This simply loops through the first TTE for each
1415  * page table from the beginning of the kernel pmap,
1416  * reads the entry, and if the result is
1417  * zero (either invalid entry or no page table) it stores
1418  * a zero there, populating page tables in the process.
1419  * This is not the most efficient technique but i don't
1420  * expect it to be called that often.
1421  */
1422 vaddr_t
1423 pmap_growkernel(vaddr_t maxkvaddr)
1424 {
1425 	paddr_t pg;
1426 	struct pmap *pm = pmap_kernel();
1427 
1428 	if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) {
1429 		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1430 		       (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr);
1431 		return (kbreak);
1432 	}
1433 
1434 	/* Align with the start of a page table */
1435 	for (kbreak &= (-1<<PDSHIFT); kbreak < maxkvaddr;
1436 	     kbreak += (1<<PDSHIFT)) {
1437 		if (pseg_get(pm, kbreak))
1438 			continue;
1439 
1440 		pg = 0;
1441 		while (pseg_set(pm, kbreak, 0, pg) == 1) {
1442 			pg = 0;
1443 			pmap_get_page(&pg, "growk", pm);
1444 		}
1445 
1446 	}
1447 
1448 	return (kbreak);
1449 }
1450 
1451 /*
1452  * Create and return a physical map.
1453  */
1454 struct pmap *
1455 pmap_create(void)
1456 {
1457 	struct pmap *pm;
1458 
1459 	pm = pool_get(&pmap_pool, PR_WAITOK | PR_ZERO);
1460 
1461 	mtx_init(&pm->pm_mtx, IPL_VM);
1462 	pm->pm_refs = 1;
1463 	pmap_get_page(&pm->pm_physaddr, "pmap_create", pm);
1464 	pm->pm_segs = (int64_t *)(u_long)pm->pm_physaddr;
1465 	ctx_alloc(pm);
1466 
1467 	return (pm);
1468 }
1469 
1470 /*
1471  * Add a reference to the given pmap.
1472  */
1473 void
1474 pmap_reference(struct pmap *pm)
1475 {
1476 	atomic_inc_int(&pm->pm_refs);
1477 }
1478 
1479 /*
1480  * Retire the given pmap from service.
1481  * Should only be called if the map contains no valid mappings.
1482  */
1483 void
1484 pmap_destroy(struct pmap *pm)
1485 {
1486 	if (atomic_dec_int_nv(&pm->pm_refs) == 0) {
1487 		pmap_release(pm);
1488 		pool_put(&pmap_pool, pm);
1489 	}
1490 }
1491 
1492 /*
1493  * Release any resources held by the given physical map.
1494  * Called when a pmap initialized by pmap_pinit is being released.
1495  */
1496 void
1497 pmap_release(struct pmap *pm)
1498 {
1499 	int i, j, k;
1500 	paddr_t *pdir, *ptbl, tmp;
1501 
1502 #ifdef DIAGNOSTIC
1503 	if(pm == pmap_kernel())
1504 		panic("pmap_release: releasing pmap_kernel()");
1505 #endif
1506 
1507 	mtx_enter(&pm->pm_mtx);
1508 	for(i=0; i<STSZ; i++) {
1509 		paddr_t psegentp = (paddr_t)(u_long)&pm->pm_segs[i];
1510 		if((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)psegentp,
1511 		    ASI_PHYS_CACHED))) {
1512 			for (k=0; k<PDSZ; k++) {
1513 				paddr_t pdirentp = (paddr_t)(u_long)&pdir[k];
1514 				if ((ptbl = (paddr_t *)(u_long)ldxa(
1515 					(vaddr_t)pdirentp, ASI_PHYS_CACHED))) {
1516 					for (j=0; j<PTSZ; j++) {
1517 						int64_t data;
1518 						paddr_t pa;
1519 						pv_entry_t pv;
1520 
1521 						data  = ldxa((vaddr_t)&ptbl[j],
1522 							ASI_PHYS_CACHED);
1523 						if (!(data & TLB_V))
1524 							continue;
1525 						pa = data & TLB_PA_MASK;
1526 						pv = pa_to_pvh(pa);
1527 						if (pv != NULL) {
1528 							printf("pmap_release: pm=%p page %llx still in use\n", pm,
1529 							       (unsigned long long)(((u_int64_t)i<<STSHIFT)|((u_int64_t)k<<PDSHIFT)|((u_int64_t)j<<PTSHIFT)));
1530 							Debugger();
1531 						}
1532 					}
1533 					stxa(pdirentp, ASI_PHYS_CACHED, 0);
1534 					pmap_free_page((paddr_t)ptbl, pm);
1535 				}
1536 			}
1537 			stxa(psegentp, ASI_PHYS_CACHED, 0);
1538 			pmap_free_page((paddr_t)pdir, pm);
1539 		}
1540 	}
1541 	tmp = (paddr_t)(u_long)pm->pm_segs;
1542 	pm->pm_segs = NULL;
1543 	pmap_free_page(tmp, pm);
1544 	mtx_leave(&pm->pm_mtx);
1545 	ctx_free(pm);
1546 }
1547 
1548 /*
1549  * Copy the range specified by src_addr/len
1550  * from the source map to the range dst_addr/len
1551  * in the destination map.
1552  *
1553  * This routine is only advisory and need not do anything.
1554  */
1555 void
1556 pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vaddr_t dst_addr,
1557     vsize_t len, vaddr_t src_addr)
1558 {
1559 }
1560 
1561 /*
1562  * Garbage collects the physical map system for
1563  * pages which are no longer used.
1564  * Success need not be guaranteed -- that is, there
1565  * may well be pages which are not referenced, but
1566  * others may be collected.
1567  * Called by the pageout daemon when pages are scarce.
1568  */
1569 void
1570 pmap_collect(struct pmap *pm)
1571 {
1572 #if 1
1573 	int i, j, k, n, m, s;
1574 	paddr_t *pdir, *ptbl;
1575 	/* This is a good place to scan the pmaps for page tables with
1576 	 * no valid mappings in them and free them. */
1577 
1578 	/* NEVER GARBAGE COLLECT THE KERNEL PMAP */
1579 	if (pm == pmap_kernel())
1580 		return;
1581 
1582 	s = splvm();
1583 	for (i=0; i<STSZ; i++) {
1584 		if ((pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i], ASI_PHYS_CACHED))) {
1585 			m = 0;
1586 			for (k=0; k<PDSZ; k++) {
1587 				if ((ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k], ASI_PHYS_CACHED))) {
1588 					m++;
1589 					n = 0;
1590 					for (j=0; j<PTSZ; j++) {
1591 						int64_t data = ldxa((vaddr_t)&ptbl[j], ASI_PHYS_CACHED);
1592 						if (data&TLB_V)
1593 							n++;
1594 					}
1595 					if (!n) {
1596 						/* Free the damn thing */
1597 						stxa((paddr_t)(u_long)&pdir[k], ASI_PHYS_CACHED, 0);
1598 						pmap_free_page((paddr_t)ptbl, pm);
1599 					}
1600 				}
1601 			}
1602 			if (!m) {
1603 				/* Free the damn thing */
1604 				stxa((paddr_t)(u_long)&pm->pm_segs[i], ASI_PHYS_CACHED, 0);
1605 				pmap_free_page((paddr_t)pdir, pm);
1606 			}
1607 		}
1608 	}
1609 	splx(s);
1610 #endif
1611 }
1612 
1613 void
1614 pmap_zero_page(struct vm_page *pg)
1615 {
1616 	pmap_zero_phys(VM_PAGE_TO_PHYS(pg));
1617 }
1618 
1619 void
1620 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1621 {
1622 	paddr_t src = VM_PAGE_TO_PHYS(srcpg);
1623 	paddr_t dst = VM_PAGE_TO_PHYS(dstpg);
1624 
1625 	pmap_copy_phys(src, dst);
1626 }
1627 
1628 /*
1629  * Activate the address space for the specified process.  If the
1630  * process is the current process, load the new MMU context.
1631  */
1632 void
1633 pmap_activate(struct proc *p)
1634 {
1635 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
1636 	int s;
1637 
1638 	/*
1639 	 * This is essentially the same thing that happens in cpu_switch()
1640 	 * when the newly selected process is about to run, except that we
1641 	 * have to make sure to clean the register windows before we set
1642 	 * the new context.
1643 	 */
1644 
1645 	s = splvm();
1646 	if (p == curproc) {
1647 		write_user_windows();
1648 		if (pmap->pm_ctx == 0)
1649 			ctx_alloc(pmap);
1650 		if (CPU_ISSUN4V)
1651 			stxa(CTX_SECONDARY, ASI_MMU_CONTEXTID, pmap->pm_ctx);
1652 		else
1653 			stxa(CTX_SECONDARY, ASI_DMMU, pmap->pm_ctx);
1654 	}
1655 	splx(s);
1656 }
1657 
1658 /*
1659  * Deactivate the address space of the specified process.
1660  */
1661 void
1662 pmap_deactivate(struct proc *p)
1663 {
1664 }
1665 
1666 /*
1667  * pmap_kenter_pa:		[ INTERFACE ]
1668  *
1669  *	Enter a va -> pa mapping into the kernel pmap without any
1670  *	physical->virtual tracking.
1671  *
1672  *	Note: no locking is necessary in this function.
1673  */
1674 void
1675 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1676 {
1677 	pte_t tte;
1678 	struct pmap *pm = pmap_kernel();
1679 	int s;
1680 
1681 	KDASSERT(va < INTSTACK || va > EINTSTACK);
1682 	KDASSERT(va < kdata || va > ekdata);
1683 
1684 #ifdef DIAGNOSTIC
1685 	if (pa & (PMAP_NVC|PMAP_NC|PMAP_LITTLE))
1686 		panic("pmap_kenter_pa: illegal cache flags %ld", pa);
1687 #endif
1688 
1689 	/*
1690 	 * Construct the TTE.
1691 	 */
1692 	s = splvm();
1693 	tte.tag = TSB_TAG(0,pm->pm_ctx,va);
1694 	if (CPU_ISSUN4V) {
1695 		tte.data = SUN4V_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1696 		    (PROT_WRITE & prot), 1, 0, 1, 0);
1697 		/*
1698 		 * We don't track modification on kenter mappings.
1699 		 */
1700 		if (prot & PROT_WRITE)
1701 			tte.data |= SUN4V_TLB_REAL_W|SUN4V_TLB_W;
1702 		if (prot & PROT_EXEC)
1703 			tte.data |= SUN4V_TLB_EXEC;
1704 		tte.data |= SUN4V_TLB_TSB_LOCK;	/* wired */
1705 	} else {
1706 		tte.data = SUN4U_TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1707 		    (PROT_WRITE & prot), 1, 0, 1, 0);
1708 		/*
1709 		 * We don't track modification on kenter mappings.
1710 		 */
1711 		if (prot & PROT_WRITE)
1712 			tte.data |= SUN4U_TLB_REAL_W|SUN4U_TLB_W;
1713 		if (prot & PROT_EXEC)
1714 			tte.data |= SUN4U_TLB_EXEC;
1715 		tte.data |= SUN4U_TLB_TSB_LOCK;	/* wired */
1716 	}
1717 	KDASSERT((tte.data & TLB_NFO) == 0);
1718 
1719 	/* Kernel page tables are pre-allocated. */
1720 	if (pseg_set(pmap_kernel(), va, tte.data, 0) != 0)
1721 		panic("pmap_kenter_pa: no pseg");
1722 
1723 	pmap_kernel()->pm_stats.resident_count++;
1724 
1725 	splx(s);
1726 	/* this is correct */
1727 	dcache_flush_page(pa);
1728 }
1729 
1730 /*
1731  * pmap_kremove:		[ INTERFACE ]
1732  *
1733  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1734  *	for size bytes (assumed to be page rounded).
1735  */
1736 void
1737 pmap_kremove(vaddr_t va, vsize_t size)
1738 {
1739 	struct pmap *pm = pmap_kernel();
1740 	int64_t data;
1741 	int s;
1742 
1743 	KDASSERT(va < INTSTACK || va > EINTSTACK);
1744 	KDASSERT(va < kdata || va > ekdata);
1745 
1746 	s = splvm();
1747 	while (size >= NBPG) {
1748 		/*
1749 		 * Is this part of the permanent 4MB mapping?
1750 		 */
1751 #ifdef DIAGNOSTIC
1752 		if (pm == pmap_kernel() &&
1753 			(va >= ktext && va < roundup(ekdata, 4*MEG)))
1754 			panic("pmap_kremove: va=%08x in locked TLB",
1755 				(u_int)va);
1756 #endif
1757 		/* Shouldn't need to do this if the entry's not valid. */
1758 		if ((data = pseg_get(pm, va))) {
1759 			/* We need to flip the valid bit and clear the access statistics. */
1760 			if (pseg_set(pm, va, 0, 0)) {
1761 				printf("pmap_kremove: gotten pseg empty!\n");
1762 				Debugger();
1763 				/* panic? */
1764 			}
1765 
1766 			pmap_kernel()->pm_stats.resident_count--;
1767 			tsb_invalidate(pm->pm_ctx, va);
1768 			/* Here we assume nothing can get into the TLB unless it has a PTE */
1769 			tlb_flush_pte(va, pm->pm_ctx);
1770 		}
1771 		va += NBPG;
1772 		size -= NBPG;
1773 	}
1774 	splx(s);
1775 }
1776 
1777 /*
1778  * Insert physical page at pa into the given pmap at virtual address va.
1779  * Supports 64-bit pa so we can map I/O space.
1780  */
1781 int
1782 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1783 {
1784 	pte_t tte;
1785 	paddr_t pg;
1786 	int aliased = 0;
1787 	pv_entry_t pv = NULL;
1788 	int size = 0; /* PMAP_SZ_TO_TTE(pa); */
1789 	boolean_t wired = (flags & PMAP_WIRED) != 0;
1790 
1791 	/*
1792 	 * Is this part of the permanent mappings?
1793 	 */
1794 	KDASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
1795 	KDASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
1796 
1797 	/*
1798 	 * XXXX If a mapping at this address already exists, remove it.
1799 	 */
1800 	mtx_enter(&pm->pm_mtx);
1801 	tte.data = pseg_get(pm, va);
1802 	if (tte.data & TLB_V) {
1803 		mtx_leave(&pm->pm_mtx);
1804 		pmap_remove(pm, va, va + NBPG-1);
1805 		mtx_enter(&pm->pm_mtx);
1806 		tte.data = pseg_get(pm, va);
1807 	}
1808 
1809 	/*
1810 	 * Construct the TTE.
1811 	 */
1812 	pv = pa_to_pvh(pa);
1813 	if (pv != NULL) {
1814 		struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
1815 
1816 		mtx_enter(&pg->mdpage.pvmtx);
1817 		aliased = (pv->pv_va & (PV_ALIAS|PV_NVC));
1818 #ifdef DIAGNOSTIC
1819 		if ((flags & PROT_MASK) & ~prot)
1820 			panic("pmap_enter: access_type exceeds prot");
1821 #endif
1822 		/* If we don't have the traphandler do it, set the ref/mod bits now */
1823 		if (flags & PROT_MASK)
1824 			pv->pv_va |= PV_REF;
1825 		if (flags & PROT_WRITE)
1826 			pv->pv_va |= PV_MOD;
1827 		pv->pv_va |= pmap_tte2flags(tte.data);
1828 		mtx_leave(&pg->mdpage.pvmtx);
1829 	} else {
1830 		aliased = 0;
1831 	}
1832 	if (pa & PMAP_NVC)
1833 		aliased = 1;
1834 #ifdef NO_VCACHE
1835 	aliased = 1; /* Disable D$ */
1836 #endif
1837 	if (CPU_ISSUN4V) {
1838 		tte.data = SUN4V_TSB_DATA(0, size, pa, pm == pmap_kernel(),
1839 		    (flags & PROT_WRITE), (!(pa & PMAP_NC)),
1840 		    aliased, 1, (pa & PMAP_LITTLE));
1841 		if (prot & PROT_WRITE)
1842 			tte.data |= SUN4V_TLB_REAL_W;
1843 		if (prot & PROT_EXEC)
1844 			tte.data |= SUN4V_TLB_EXEC;
1845 		if (wired)
1846 			tte.data |= SUN4V_TLB_TSB_LOCK;
1847 	} else {
1848 		tte.data = SUN4U_TSB_DATA(0, size, pa, pm == pmap_kernel(),
1849 		    (flags & PROT_WRITE), (!(pa & PMAP_NC)),
1850 		    aliased, 1, (pa & PMAP_LITTLE));
1851 		if (prot & PROT_WRITE)
1852 			tte.data |= SUN4U_TLB_REAL_W;
1853 		if (prot & PROT_EXEC)
1854 			tte.data |= SUN4U_TLB_EXEC;
1855 		if (wired)
1856 			tte.data |= SUN4U_TLB_TSB_LOCK;
1857 	}
1858 	KDASSERT((tte.data & TLB_NFO) == 0);
1859 
1860 	pg = 0;
1861 	while (pseg_set(pm, va, tte.data, pg) == 1) {
1862 		pg = 0;
1863 		if (!pmap_get_page(&pg, NULL, pm)) {
1864 			if ((flags & PMAP_CANFAIL) == 0)
1865 				panic("pmap_enter: no memory");
1866 			mtx_leave(&pm->pm_mtx);
1867 			return (ENOMEM);
1868 		}
1869 	}
1870 
1871 	if (pv)
1872 		pmap_enter_pv(pm, va, pa);
1873 	pm->pm_stats.resident_count++;
1874 	mtx_leave(&pm->pm_mtx);
1875 	if (pm->pm_ctx || pm == pmap_kernel()) {
1876 		tsb_invalidate(pm->pm_ctx, va);
1877 
1878 		/* Force reload -- protections may be changed */
1879 		tlb_flush_pte(va, pm->pm_ctx);
1880 	}
1881 	/* this is correct */
1882 	dcache_flush_page(pa);
1883 
1884 	/* We will let the fast mmu miss interrupt load the new translation */
1885 	return 0;
1886 }
1887 
1888 /*
1889  * Remove the given range of mapping entries.
1890  */
1891 void
1892 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
1893 {
1894 	int flush = 0;
1895 	int64_t data;
1896 	vaddr_t flushva = va;
1897 
1898 	/*
1899 	 * In here we should check each pseg and if there are no more entries,
1900 	 * free it.  It's just that linear scans of 8K pages gets expensive.
1901 	 */
1902 
1903 	KDASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
1904 	KDASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
1905 
1906 	mtx_enter(&pm->pm_mtx);
1907 
1908 	/* Now do the real work */
1909 	while (va < endva) {
1910 		/*
1911 		 * Is this part of the permanent 4MB mapping?
1912 		 */
1913 #ifdef DIAGNOSTIC
1914 		if (pm == pmap_kernel() && va >= ktext &&
1915 			va < roundup(ekdata, 4*MEG))
1916 			panic("pmap_remove: va=%08x in locked TLB", (u_int)va);
1917 #endif
1918 		/* We don't really need to do this if the valid bit is not set... */
1919 		if ((data = pseg_get(pm, va)) && (data & TLB_V) != 0) {
1920 			paddr_t entry;
1921 			pv_entry_t pv;
1922 
1923 			flush |= 1;
1924 			/* First remove it from the pv_table */
1925 			entry = (data & TLB_PA_MASK);
1926 			pv = pa_to_pvh(entry);
1927 			if (pv != NULL)
1928 				pmap_remove_pv(pm, va, entry);
1929 			/* We need to flip the valid bit and clear the access statistics. */
1930 			if (pseg_set(pm, va, 0, 0)) {
1931 				printf("pmap_remove: gotten pseg empty!\n");
1932 				Debugger();
1933 				/* panic? */
1934 			}
1935 			pm->pm_stats.resident_count--;
1936 			if (!pm->pm_ctx && pm != pmap_kernel())
1937 				continue;
1938 			tsb_invalidate(pm->pm_ctx, va);
1939 			/* Here we assume nothing can get into the TLB unless it has a PTE */
1940 			tlb_flush_pte(va, pm->pm_ctx);
1941 		}
1942 		va += NBPG;
1943 	}
1944 	mtx_leave(&pm->pm_mtx);
1945 	if (flush) {
1946 		cache_flush_virt(flushva, endva - flushva);
1947 	}
1948 }
1949 
1950 /*
1951  * Change the protection on the specified range of this pmap.
1952  */
1953 void
1954 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1955 {
1956 	paddr_t pa;
1957 	pv_entry_t pv;
1958 	int64_t data;
1959 
1960 	KDASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
1961 	KDASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
1962 
1963 	if ((prot & (PROT_WRITE | PROT_EXEC)) ==
1964 	    (PROT_WRITE | PROT_EXEC))
1965 		return;
1966 
1967 	if (prot == PROT_NONE) {
1968 		pmap_remove(pm, sva, eva);
1969 		return;
1970 	}
1971 
1972 	mtx_enter(&pm->pm_mtx);
1973 	sva = sva & ~PGOFSET;
1974 	while (sva < eva) {
1975 		/*
1976 		 * Is this part of the permanent 4MB mapping?
1977 		 */
1978 		if (pm == pmap_kernel() && sva >= ktext &&
1979 			sva < roundup(ekdata, 4*MEG)) {
1980 			prom_printf("pmap_protect: va=%08x in locked TLB\r\n", sva);
1981 			OF_enter();
1982 			mtx_leave(&pm->pm_mtx);
1983 			return;
1984 		}
1985 
1986 		if (((data = pseg_get(pm, sva))&TLB_V) /*&& ((data&TLB_TSB_LOCK) == 0)*/) {
1987 			pa = data & TLB_PA_MASK;
1988 			pv = pa_to_pvh(pa);
1989 			if (pv != NULL) {
1990 				struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
1991 
1992 				/* Save REF/MOD info */
1993 				mtx_enter(&pg->mdpage.pvmtx);
1994 				pv->pv_va |= pmap_tte2flags(data);
1995 				mtx_leave(&pg->mdpage.pvmtx);
1996 			}
1997 			/* Just do the pmap and TSB, not the pv_list */
1998 			if (CPU_ISSUN4V) {
1999 				if ((prot & PROT_WRITE) == 0)
2000 					data &= ~(SUN4V_TLB_W|SUN4V_TLB_REAL_W);
2001 				if ((prot & PROT_EXEC) == 0)
2002 					data &= ~(SUN4V_TLB_EXEC);
2003 			} else {
2004 				if ((prot & PROT_WRITE) == 0)
2005 					data &= ~(SUN4U_TLB_W|SUN4U_TLB_REAL_W);
2006 				if ((prot & PROT_EXEC) == 0)
2007 					data &= ~(SUN4U_TLB_EXEC);
2008 			}
2009 			KDASSERT((data & TLB_NFO) == 0);
2010 			if (pseg_set(pm, sva, data, 0)) {
2011 				printf("pmap_protect: gotten pseg empty!\n");
2012 				Debugger();
2013 				/* panic? */
2014 			}
2015 
2016 			if (!pm->pm_ctx && pm != pmap_kernel())
2017 				continue;
2018 			tsb_invalidate(pm->pm_ctx, sva);
2019 			tlb_flush_pte(sva, pm->pm_ctx);
2020 		}
2021 		sva += NBPG;
2022 	}
2023 	mtx_leave(&pm->pm_mtx);
2024 }
2025 
2026 /*
2027  * Extract the physical page address associated
2028  * with the given map/virtual_address pair.
2029  */
2030 boolean_t
2031 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap)
2032 {
2033 	paddr_t pa;
2034 
2035 	if (pm == pmap_kernel() && va >= kdata &&
2036 		va < roundup(ekdata, 4*MEG)) {
2037 		/* Need to deal w/locked TLB entry specially. */
2038 		pa = (paddr_t) (kdatap - kdata + va);
2039 	} else if( pm == pmap_kernel() && va >= ktext && va < ektext ) {
2040 		/* Need to deal w/locked TLB entry specially. */
2041 		pa = (paddr_t) (ktextp - ktext + va);
2042 	} else if (pm == pmap_kernel() && va >= INTSTACK && va < EINTSTACK) {
2043 		pa = curcpu()->ci_paddr + va - INTSTACK;
2044 	} else {
2045 		int s;
2046 
2047 		s = splvm();
2048 		pa = (pseg_get(pm, va) & TLB_PA_MASK) + (va & PGOFSET);
2049 		splx(s);
2050 	}
2051 	if (pa == 0)
2052 		return (FALSE);
2053 	if (pap != NULL)
2054 		*pap = pa;
2055 	return (TRUE);
2056 }
2057 
2058 /*
2059  * Return the number bytes that pmap_dumpmmu() will dump.
2060  */
2061 int
2062 pmap_dumpsize(void)
2063 {
2064 	int	sz;
2065 
2066 	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2067 	sz += memsize * sizeof(phys_ram_seg_t);
2068 
2069 	return btodb(sz + DEV_BSIZE - 1);
2070 }
2071 
2072 /*
2073  * Write the mmu contents to the dump device.
2074  * This gets appended to the end of a crash dump since
2075  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2076  *
2077  * Write the core dump headers and MD data to the dump device.
2078  * We dump the following items:
2079  *
2080  *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
2081  *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
2082  *	phys_ram_seg_t[memsize]  physical memory segments
2083  */
2084 int
2085 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, caddr_t, size_t), daddr_t blkno)
2086 {
2087 	kcore_seg_t	*kseg;
2088 	cpu_kcore_hdr_t	*kcpu;
2089 	phys_ram_seg_t	memseg;
2090 	register int	error = 0;
2091 	register int	i, memsegoffset;
2092 	int		buffer[dbtob(1) / sizeof(int)];
2093 	int		*bp, *ep;
2094 
2095 #define EXPEDITE(p,n) do {						\
2096 	int *sp = (int *)(p);						\
2097 	int sz = (n);							\
2098 	while (sz > 0) {						\
2099 		*bp++ = *sp++;						\
2100 		if (bp >= ep) {						\
2101 			error = (*dump)(dumpdev, blkno,			\
2102 					(caddr_t)buffer, dbtob(1));	\
2103 			if (error != 0)					\
2104 				return (error);				\
2105 			++blkno;					\
2106 			bp = buffer;					\
2107 		}							\
2108 		sz -= 4;						\
2109 	}								\
2110 } while (0)
2111 
2112 	/* Setup bookkeeping pointers */
2113 	bp = buffer;
2114 	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2115 
2116 	/* Fill in MI segment header */
2117 	kseg = (kcore_seg_t *)bp;
2118 	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2119 	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2120 
2121 	/* Fill in MD segment header (interpreted by MD part of libkvm) */
2122 	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2123 	kcpu->cputype = CPU_SUN4U;
2124 	kcpu->kernbase = (u_int64_t)KERNBASE;
2125 	kcpu->cpubase = (u_int64_t)CPUINFO_VA;
2126 
2127 	/* Describe the locked text segment */
2128 	kcpu->ktextbase = (u_int64_t)ktext;
2129 	kcpu->ktextp = (u_int64_t)ktextp;
2130 	kcpu->ktextsz = (u_int64_t)(roundup(ektextp, 4*MEG) - ktextp);
2131 
2132 	/* Describe locked data segment */
2133 	kcpu->kdatabase = (u_int64_t)kdata;
2134 	kcpu->kdatap = (u_int64_t)kdatap;
2135 	kcpu->kdatasz = (u_int64_t)(roundup(ekdatap, 4*MEG) - kdatap);
2136 
2137 	/* Now the memsegs */
2138 	kcpu->nmemseg = memsize;
2139 	kcpu->memsegoffset = memsegoffset = ALIGN(sizeof(cpu_kcore_hdr_t));
2140 
2141 	/* Now we need to point this at our kernel pmap. */
2142 	kcpu->nsegmap = STSZ;
2143 	kcpu->segmapoffset = (u_int64_t)pmap_kernel()->pm_physaddr;
2144 
2145 	/* Note: we have assumed everything fits in buffer[] so far... */
2146 	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2147 
2148 	for (i = 0; i < memsize; i++) {
2149 		memseg.start = mem[i].start;
2150 		memseg.size = mem[i].size;
2151 		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2152 	}
2153 
2154 	if (bp != buffer)
2155 		error = (*dump)(dumpdev, blkno++, (caddr_t)buffer, dbtob(1));
2156 
2157 	return (error);
2158 }
2159 
2160 /*
2161  * Determine (non)existence of physical page
2162  */
2163 int pmap_pa_exists(paddr_t pa)
2164 {
2165 	struct mem_region *mp;
2166 
2167 	/* Just go through physical memory list & see if we're there */
2168 	for (mp = mem; mp->size && mp->start <= pa; mp++)
2169 		if (mp->start <= pa && mp->start + mp->size >= pa)
2170 			return 1;
2171 	return 0;
2172 }
2173 
2174 /*
2175  * Lookup the appropriate TSB entry.
2176  *
2177  * Here is the full official pseudo code:
2178  *
2179  */
2180 
2181 #ifdef NOTYET
2182 int64 GenerateTSBPointer(
2183  	int64 va,		/* Missing VA			*/
2184  	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
2185  	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
2186  	Boolean split,		/* TSB Register[12]		*/
2187  	int TSBSize)		/* TSB Register[2:0]		*/
2188 {
2189  	int64 vaPortion;
2190  	int64 TSBBaseMask;
2191  	int64 splitMask;
2192 
2193 	/* TSBBaseMask marks the bits from TSB Base Reg		*/
2194 	TSBBaseMask = 0xffffffffffffe000 <<
2195 		(split? (TSBsize + 1) : TSBsize);
2196 
2197 	/* Shift va towards lsb appropriately and		*/
2198 	/* zero out the original va page offset			*/
2199 	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2200 		0xfffffffffffffff0;
2201 
2202 	if (split) {
2203 		/* There's only one bit in question for split	*/
2204 		splitMask = 1 << (13 + TSBsize);
2205 		if (type == 8K_POINTER)
2206 			/* Make sure we're in the lower half	*/
2207 			vaPortion &= ~splitMask;
2208 		else
2209 			/* Make sure we're in the upper half	*/
2210 			vaPortion |= splitMask;
2211 	}
2212 	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2213 }
2214 #endif
2215 /*
2216  * Of course, since we are not using a split TSB or variable page sizes,
2217  * we can optimize this a bit.
2218  *
2219  * The following only works for a unified 8K TSB.  It will find the slot
2220  * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
2221  */
2222 int
2223 ptelookup_va(vaddr_t va)
2224 {
2225 	long tsbptr;
2226 #define TSBBASEMASK	(0xffffffffffffe000LL<<tsbsize)
2227 
2228 	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK );
2229 	return (tsbptr/sizeof(pte_t));
2230 }
2231 
2232 /*
2233  * Do whatever is needed to sync the MOD/REF flags
2234  */
2235 
2236 boolean_t
2237 pmap_clear_modify(struct vm_page *pg)
2238 {
2239 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2240 	int changed = 0;
2241 	pv_entry_t pv;
2242 
2243 	/* Clear all mappings */
2244 	mtx_enter(&pg->mdpage.pvmtx);
2245 	pv = pa_to_pvh(pa);
2246 	if (pv->pv_va & PV_MOD)
2247 		changed |= 1;
2248 	pv->pv_va &= ~(PV_MOD);
2249 	if (pv->pv_pmap != NULL) {
2250 		for (; pv; pv = pv->pv_next) {
2251 			int64_t data;
2252 
2253 			/* First clear the mod bit in the PTE and make it R/O */
2254 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2255 
2256 			/* Need to both clear the modify and write bits */
2257 			if (CPU_ISSUN4V) {
2258 				if (data & (SUN4V_TLB_MODIFY))
2259 					changed |= 1;
2260 				data &= ~(SUN4V_TLB_MODIFY|SUN4V_TLB_W);
2261 			} else {
2262 				if (data & (SUN4U_TLB_MODIFY))
2263 					changed |= 1;
2264 				data &= ~(SUN4U_TLB_MODIFY|SUN4U_TLB_W);
2265 			}
2266 			KDASSERT((data & TLB_NFO) == 0);
2267 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2268 				printf("pmap_clear_modify: gotten pseg empty!\n");
2269 				Debugger();
2270 				/* panic? */
2271 			}
2272 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2273 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2274 				    (pv->pv_va & PV_VAMASK));
2275 				tlb_flush_pte((pv->pv_va & PV_VAMASK),
2276 				    pv->pv_pmap->pm_ctx);
2277 			}
2278 			/* Then clear the mod bit in the pv */
2279 			if (pv->pv_va & PV_MOD)
2280 				changed |= 1;
2281 			pv->pv_va &= ~(PV_MOD);
2282 			dcache_flush_page(pa);
2283 		}
2284 	}
2285 	mtx_leave(&pg->mdpage.pvmtx);
2286 
2287 	return (changed);
2288 }
2289 
2290 boolean_t
2291 pmap_clear_reference(struct vm_page *pg)
2292 {
2293 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2294 	int changed = 0;
2295 	pv_entry_t pv;
2296 
2297 	/* Clear all references */
2298 	mtx_enter(&pg->mdpage.pvmtx);
2299 	pv = pa_to_pvh(pa);
2300 	if (pv->pv_va & PV_REF)
2301 		changed = 1;
2302 	pv->pv_va &= ~(PV_REF);
2303 	if (pv->pv_pmap != NULL) {
2304 		for (; pv; pv = pv->pv_next) {
2305 			int64_t data;
2306 
2307 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2308 			if (CPU_ISSUN4V) {
2309 				if (data & SUN4V_TLB_ACCESS)
2310 					changed = 1;
2311 				data &= ~SUN4V_TLB_ACCESS;
2312 			} else {
2313 				if (data & SUN4U_TLB_ACCESS)
2314 					changed = 1;
2315 				data &= ~SUN4U_TLB_ACCESS;
2316 			}
2317 			KDASSERT((data & TLB_NFO) == 0);
2318 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2319 				printf("pmap_clear_reference: gotten pseg empty!\n");
2320 				Debugger();
2321 				/* panic? */
2322 			}
2323 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2324 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2325 				    (pv->pv_va & PV_VAMASK));
2326 /*
2327 				tlb_flush_pte(pv->pv_va & PV_VAMASK,
2328 					pv->pv_pmap->pm_ctx);
2329 */
2330 			}
2331 			if (pv->pv_va & PV_REF)
2332 				changed = 1;
2333 			pv->pv_va &= ~(PV_REF);
2334 		}
2335 	}
2336 	/* Stupid here will take a cache hit even on unmapped pages 8^( */
2337 	dcache_flush_page(VM_PAGE_TO_PHYS(pg));
2338 	mtx_leave(&pg->mdpage.pvmtx);
2339 
2340 	return (changed);
2341 }
2342 
2343 boolean_t
2344 pmap_is_modified(struct vm_page *pg)
2345 {
2346 	pv_entry_t pv, npv;
2347 	int mod = 0;
2348 
2349 	/* Check if any mapping has been modified */
2350 	mtx_enter(&pg->mdpage.pvmtx);
2351 	pv = &pg->mdpage.pvent;
2352 	if (pv->pv_va & PV_MOD)
2353 		mod = 1;
2354 	if (!mod && (pv->pv_pmap != NULL)) {
2355 		for (npv = pv; mod == 0 && npv && npv->pv_pmap; npv = npv->pv_next) {
2356 			int64_t data;
2357 
2358 			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2359 			if (pmap_tte2flags(data) & PV_MOD)
2360 				mod = 1;
2361 			/* Migrate modify info to head pv */
2362 			if (npv->pv_va & PV_MOD)
2363 				mod = 1;
2364 			npv->pv_va &= ~PV_MOD;
2365 		}
2366 	}
2367 	/* Save modify info */
2368 	if (mod)
2369 		pv->pv_va |= PV_MOD;
2370 	mtx_leave(&pg->mdpage.pvmtx);
2371 
2372 	return (mod);
2373 }
2374 
2375 boolean_t
2376 pmap_is_referenced(struct vm_page *pg)
2377 {
2378 	pv_entry_t pv, npv;
2379 	int ref = 0;
2380 
2381 	/* Check if any mapping has been referenced */
2382 	mtx_enter(&pg->mdpage.pvmtx);
2383 	pv = &pg->mdpage.pvent;
2384 	if (pv->pv_va & PV_REF)
2385 		ref = 1;
2386 	if (!ref && (pv->pv_pmap != NULL)) {
2387 		for (npv = pv; npv; npv = npv->pv_next) {
2388 			int64_t data;
2389 
2390 			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2391 			if (pmap_tte2flags(data) & PV_REF)
2392 				ref = 1;
2393 			/* Migrate modify info to head pv */
2394 			if (npv->pv_va & PV_REF)
2395 				ref = 1;
2396 			npv->pv_va &= ~PV_REF;
2397 		}
2398 	}
2399 	/* Save ref info */
2400 	if (ref)
2401 		pv->pv_va |= PV_REF;
2402 	mtx_leave(&pg->mdpage.pvmtx);
2403 
2404 	return (ref);
2405 }
2406 
2407 /*
2408  *	Routine:	pmap_unwire
2409  *	Function:	Clear the wired attribute for a map/virtual-address
2410  *			pair.
2411  *	In/out conditions:
2412  *			The mapping must already exist in the pmap.
2413  */
2414 void
2415 pmap_unwire(struct pmap *pmap, vaddr_t va)
2416 {
2417 	int64_t data;
2418 
2419 	if (pmap == NULL)
2420 		return;
2421 
2422 	/*
2423 	 * Is this part of the permanent 4MB mapping?
2424 	 */
2425 	if (pmap == pmap_kernel() && va >= ktext &&
2426 		va < roundup(ekdata, 4*MEG)) {
2427 		prom_printf("pmap_unwire: va=%08x in locked TLB\r\n", va);
2428 		OF_enter();
2429 		return;
2430 	}
2431 	mtx_enter(&pmap->pm_mtx);
2432 	data = pseg_get(pmap, va & PV_VAMASK);
2433 
2434 	if (CPU_ISSUN4V)
2435 		data &= ~SUN4V_TLB_TSB_LOCK;
2436 	else
2437 		data &= ~SUN4U_TLB_TSB_LOCK;
2438 
2439 	if (pseg_set(pmap, va & PV_VAMASK, data, 0)) {
2440 		printf("pmap_unwire: gotten pseg empty!\n");
2441 		Debugger();
2442 		/* panic? */
2443 	}
2444 	mtx_leave(&pmap->pm_mtx);
2445 }
2446 
2447 /*
2448  * Lower the protection on the specified physical page.
2449  *
2450  * Never enable writing as it will break COW
2451  */
2452 void
2453 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2454 {
2455 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
2456 	pv_entry_t pv;
2457 	int64_t data, clear, set;
2458 
2459 	if (prot & PROT_WRITE)
2460 		return;
2461 
2462 	if (prot & (PROT_READ | PROT_EXEC)) {
2463 		/* copy_on_write */
2464 
2465 		set = TLB_V;
2466 		if (CPU_ISSUN4V) {
2467 			clear = SUN4V_TLB_REAL_W|SUN4V_TLB_W;
2468 			if (PROT_EXEC & prot)
2469 				set |= SUN4V_TLB_EXEC;
2470 			else
2471 				clear |= SUN4V_TLB_EXEC;
2472 		} else {
2473 			clear = SUN4U_TLB_REAL_W|SUN4U_TLB_W;
2474 			if (PROT_EXEC & prot)
2475 				set |= SUN4U_TLB_EXEC;
2476 			else
2477 				clear |= SUN4U_TLB_EXEC;
2478 			if (PROT_EXEC == prot)
2479 				set |= SUN4U_TLB_EXEC_ONLY;
2480 		}
2481 
2482 		pv = pa_to_pvh(pa);
2483 		mtx_enter(&pg->mdpage.pvmtx);
2484 		if (pv->pv_pmap != NULL) {
2485 			for (; pv; pv = pv->pv_next) {
2486 				data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2487 
2488 				/* Save REF/MOD info */
2489 				pv->pv_va |= pmap_tte2flags(data);
2490 
2491 				data &= ~(clear);
2492 				data |= (set);
2493 				KDASSERT((data & TLB_NFO) == 0);
2494 				if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, data, 0)) {
2495 					printf("pmap_page_protect: gotten pseg empty!\n");
2496 					Debugger();
2497 					/* panic? */
2498 				}
2499 				if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2500 					tsb_invalidate(pv->pv_pmap->pm_ctx,
2501 					    (pv->pv_va & PV_VAMASK));
2502 					tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx);
2503 				}
2504 			}
2505 		}
2506 		mtx_leave(&pg->mdpage.pvmtx);
2507 	} else {
2508 		pv_entry_t firstpv;
2509 		/* remove mappings */
2510 
2511 		firstpv = pa_to_pvh(pa);
2512 		mtx_enter(&pg->mdpage.pvmtx);
2513 
2514 		/* First remove the entire list of continuation pv's*/
2515 		while ((pv = firstpv->pv_next) != NULL) {
2516 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2517 
2518 			/* Save REF/MOD info */
2519 			firstpv->pv_va |= pmap_tte2flags(data);
2520 
2521 			/* Clear mapping */
2522 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) {
2523 				printf("pmap_page_protect: gotten pseg empty!\n");
2524 				Debugger();
2525 				/* panic? */
2526 			}
2527 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2528 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2529 				    (pv->pv_va & PV_VAMASK));
2530 				tlb_flush_pte(pv->pv_va & PV_VAMASK, pv->pv_pmap->pm_ctx);
2531 			}
2532 			pv->pv_pmap->pm_stats.resident_count--;
2533 
2534 			/* free the pv */
2535 			firstpv->pv_next = pv->pv_next;
2536 			mtx_leave(&pg->mdpage.pvmtx);
2537 			pool_put(&pv_pool, pv);
2538 			mtx_enter(&pg->mdpage.pvmtx);
2539 		}
2540 
2541 		pv = firstpv;
2542 
2543 		/* Then remove the primary pv */
2544 		if (pv->pv_pmap != NULL) {
2545 			data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2546 
2547 			/* Save REF/MOD info */
2548 			pv->pv_va |= pmap_tte2flags(data);
2549 			if (pseg_set(pv->pv_pmap, pv->pv_va & PV_VAMASK, 0, 0)) {
2550 				printf("pmap_page_protect: gotten pseg empty!\n");
2551 				Debugger();
2552 				/* panic? */
2553 			}
2554 			if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2555 				tsb_invalidate(pv->pv_pmap->pm_ctx,
2556 				    (pv->pv_va & PV_VAMASK));
2557 				tlb_flush_pte(pv->pv_va & PV_VAMASK,
2558 				    pv->pv_pmap->pm_ctx);
2559 			}
2560 			pv->pv_pmap->pm_stats.resident_count--;
2561 			KASSERT(pv->pv_next == NULL);
2562 			/* dump the first pv */
2563 			pv->pv_pmap = NULL;
2564 		}
2565 		dcache_flush_page(pa);
2566 		mtx_leave(&pg->mdpage.pvmtx);
2567 	}
2568 	/* We should really only flush the pages we demapped. */
2569 }
2570 
2571 /*
2572  * Allocate a context.  If necessary, steal one from someone else.
2573  * Changes hardware context number and loads segment map.
2574  *
2575  * This routine is only ever called from locore.s just after it has
2576  * saved away the previous process, so there are no active user windows.
2577  *
2578  * The new context is flushed from the TLB before returning.
2579  */
2580 int
2581 ctx_alloc(struct pmap *pm)
2582 {
2583 	int s, cnum;
2584 	static int next = 0;
2585 
2586 	if (pm == pmap_kernel()) {
2587 #ifdef DIAGNOSTIC
2588 		printf("ctx_alloc: kernel pmap!\n");
2589 #endif
2590 		return (0);
2591 	}
2592 	s = splvm();
2593 	cnum = next;
2594 	do {
2595 		/*
2596 		 * We use the last context as an "invalid" context in
2597 		 * TSB tags. Never allocate (or bad things will happen).
2598 		 */
2599 		if (cnum >= numctx - 2)
2600 			cnum = 0;
2601 	} while (ctxbusy[++cnum] != 0 && cnum != next);
2602 	if (cnum==0) cnum++; /* Never steal ctx 0 */
2603 	if (ctxbusy[cnum]) {
2604 		int i;
2605 		/* We gotta steal this context */
2606 		for (i = 0; i < TSBENTS; i++) {
2607 			if (TSB_TAG_CTX(tsb_dmmu[i].tag) == cnum)
2608 				tsb_dmmu[i].tag = TSB_TAG_INVALID;
2609 			if (TSB_TAG_CTX(tsb_immu[i].tag) == cnum)
2610 				tsb_immu[i].tag = TSB_TAG_INVALID;
2611 		}
2612 		tlb_flush_ctx(cnum);
2613 	}
2614 	ctxbusy[cnum] = pm->pm_physaddr;
2615 	next = cnum;
2616 	splx(s);
2617 	pm->pm_ctx = cnum;
2618 	return cnum;
2619 }
2620 
2621 /*
2622  * Give away a context.
2623  */
2624 void
2625 ctx_free(struct pmap *pm)
2626 {
2627 	int oldctx;
2628 
2629 	oldctx = pm->pm_ctx;
2630 
2631 	if (oldctx == 0)
2632 		panic("ctx_free: freeing kernel context");
2633 #ifdef DIAGNOSTIC
2634 	if (ctxbusy[oldctx] == 0)
2635 		printf("ctx_free: freeing free context %d\n", oldctx);
2636 	if (ctxbusy[oldctx] != pm->pm_physaddr) {
2637 		printf("ctx_free: freeing someone esle's context\n "
2638 		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
2639 		       oldctx, (void *)(u_long)ctxbusy[oldctx], pm,
2640 		       (void *)(u_long)pm->pm_physaddr);
2641 		Debugger();
2642 	}
2643 #endif
2644 	/* We should verify it has not been stolen and reallocated... */
2645 	ctxbusy[oldctx] = 0;
2646 }
2647 
2648 /*
2649  * Enter the pmap and virtual address into the
2650  * physical to virtual map table.
2651  */
2652 void
2653 pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa)
2654 {
2655 	pv_entry_t pv, npv = NULL;
2656 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2657 
2658 	pv = pa_to_pvh(pa);
2659 	mtx_enter(&pg->mdpage.pvmtx);
2660 
2661 retry:
2662 	if (pv->pv_pmap == NULL) {
2663 		/*
2664 		 * No entries yet, use header as the first entry
2665 		 */
2666 		PV_SETVA(pv, va);
2667 		pv->pv_pmap = pmap;
2668 		pv->pv_next = NULL;
2669 		mtx_leave(&pg->mdpage.pvmtx);
2670 		if (npv)
2671 			pool_put(&pv_pool, npv);
2672 		return;
2673 	}
2674 
2675 	if (npv == NULL) {
2676 		mtx_leave(&pg->mdpage.pvmtx);
2677 		npv = pool_get(&pv_pool, PR_NOWAIT);
2678 		if (npv == NULL)
2679 			panic("%s: no pv entries available", __func__);
2680 		mtx_enter(&pg->mdpage.pvmtx);
2681 		goto retry;
2682 	}
2683 
2684 	if (!(pv->pv_va & PV_ALIAS)) {
2685 		/*
2686 		 * There is at least one other VA mapping this page.
2687 		 * Check if they are cache index compatible. If not
2688 		 * remove all mappings, flush the cache and set page
2689 		 * to be mapped uncached. Caching will be restored
2690 		 * when pages are mapped compatible again.
2691 		 * XXX - caching is not currently being restored, but
2692 		 * XXX - I haven't seen the pages uncached since
2693 		 * XXX - using pmap_prefer().	mhitch
2694 		 */
2695 		if ((pv->pv_va ^ va) & VA_ALIAS_MASK) {
2696 			pv->pv_va |= PV_ALIAS;
2697 			pmap_page_cache(pmap, pa, 0);
2698 		}
2699 	}
2700 
2701 	/*
2702 	 * There is at least one other VA mapping this page.
2703 	 * Place this entry after the header.
2704 	 */
2705 	npv->pv_va = va & PV_VAMASK;
2706 	npv->pv_pmap = pmap;
2707 	npv->pv_next = pv->pv_next;
2708 	pv->pv_next = npv;
2709 
2710 	mtx_leave(&pg->mdpage.pvmtx);
2711 }
2712 
2713 /*
2714  * Remove a physical to virtual address translation.
2715  */
2716 void
2717 pmap_remove_pv(struct pmap *pmap, vaddr_t va, paddr_t pa)
2718 {
2719 	pv_entry_t pv, opv, npv = NULL;
2720 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2721 	int64_t data = 0LL;
2722 
2723 	opv = pv = pa_to_pvh(pa);
2724 	mtx_enter(&pg->mdpage.pvmtx);
2725 
2726 	/*
2727 	 * If it is the first entry on the list, it is actually
2728 	 * in the header and we must copy the following entry up
2729 	 * to the header.  Otherwise we must search the list for
2730 	 * the entry.  In either case we free the now unused entry.
2731 	 */
2732 	if (pmap == pv->pv_pmap && PV_MATCH(pv, va)) {
2733 		/* Save modified/ref bits */
2734 		data = pseg_get(pv->pv_pmap, pv->pv_va & PV_VAMASK);
2735 		npv = pv->pv_next;
2736 		if (npv) {
2737 			/* First save mod/ref bits */
2738 			pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va;
2739 			pv->pv_next = npv->pv_next;
2740 			pv->pv_pmap = npv->pv_pmap;
2741 		} else {
2742 			pv->pv_pmap = NULL;
2743 			pv->pv_next = NULL;
2744 			pv->pv_va &= (PV_REF|PV_MOD); /* Only save ref/mod bits */
2745 		}
2746 	} else {
2747 		for (npv = pv->pv_next; npv; pv = npv, npv = npv->pv_next) {
2748 			if (pmap == npv->pv_pmap && PV_MATCH(npv, va))
2749 				goto found;
2750 		}
2751 
2752 		/*
2753 		 * Sometimes UVM gets confused and calls pmap_remove() instead
2754 		 * of pmap_kremove()
2755 		 */
2756 		mtx_leave(&pg->mdpage.pvmtx);
2757 		return;
2758 found:
2759 		pv->pv_next = npv->pv_next;
2760 
2761 		/*
2762 		 * move any referenced/modified info to the base pv
2763 		 */
2764 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2765 
2766 		/*
2767 		 * Here, if this page was aliased, we should try clear out any
2768 		 * alias that may have occurred.  However, that's a complicated
2769 		 * operation involving multiple scans of the pv list.
2770 		 */
2771 	}
2772 
2773 	/* Save REF/MOD info */
2774 	opv->pv_va |= pmap_tte2flags(data);
2775 
2776 	/* Check to see if the alias went away */
2777 	if (opv->pv_va & PV_ALIAS) {
2778 		opv->pv_va &= ~PV_ALIAS;
2779 		for (pv = opv; pv; pv = pv->pv_next) {
2780 			if ((pv->pv_va ^ opv->pv_va) & VA_ALIAS_MASK) {
2781 				opv->pv_va |= PV_ALIAS;
2782 			}
2783 		}
2784 		if (!(opv->pv_va & PV_ALIAS))
2785 			pmap_page_cache(pmap, pa, 1);
2786 	}
2787 
2788 	mtx_leave(&pg->mdpage.pvmtx);
2789 
2790 	if (npv)
2791 		pool_put(&pv_pool, npv);
2792 }
2793 
2794 /*
2795  *	pmap_page_cache:
2796  *
2797  *	Change all mappings of a page to cached/uncached.
2798  */
2799 void
2800 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
2801 {
2802 	pv_entry_t pv;
2803 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2804 
2805 	if (CPU_ISSUN4US || CPU_ISSUN4V)
2806 		return;
2807 
2808 	pv = &pg->mdpage.pvent;
2809 	if (pv == NULL)
2810 		return;
2811 
2812 	MUTEX_ASSERT_LOCKED(&pg->mdpage.pvmtx);
2813 
2814 	while (pv) {
2815 		vaddr_t va;
2816 
2817 		va = (pv->pv_va & PV_VAMASK);
2818 		if (pv->pv_va & PV_NC) {
2819 			/* Non-cached -- I/O mapping */
2820 			if (pseg_set(pv->pv_pmap, va,
2821 			    pseg_get(pv->pv_pmap, va) & ~(SUN4U_TLB_CV|SUN4U_TLB_CP),
2822 				     0)) {
2823 				printf("pmap_page_cache: aliased pseg empty!\n");
2824 				Debugger();
2825 				/* panic? */
2826 			}
2827 		} else if (mode && (!(pv->pv_va & PV_NVC))) {
2828 			/* Enable caching */
2829 			if (pseg_set(pv->pv_pmap, va,
2830 			    pseg_get(pv->pv_pmap, va) | SUN4U_TLB_CV, 0)) {
2831 				printf("pmap_page_cache: aliased pseg empty!\n");
2832 				Debugger();
2833 				/* panic? */
2834 			}
2835 		} else {
2836 			/* Disable caching */
2837 			if (pseg_set(pv->pv_pmap, va,
2838 			    pseg_get(pv->pv_pmap, va) & ~SUN4U_TLB_CV, 0)) {
2839 				printf("pmap_page_cache: aliased pseg empty!\n");
2840 				Debugger();
2841 				/* panic? */
2842 			}
2843 		}
2844 		if (pv->pv_pmap->pm_ctx || pv->pv_pmap == pmap_kernel()) {
2845 			tsb_invalidate(pv->pv_pmap->pm_ctx, va);
2846 			/* Force reload -- protections may be changed */
2847 			tlb_flush_pte(va, pv->pv_pmap->pm_ctx);
2848 		}
2849 
2850 		pv = pv->pv_next;
2851 	}
2852 }
2853 
2854 int
2855 pmap_get_page(paddr_t *pa, const char *wait, struct pmap *pm)
2856 {
2857 	int reserve = pm == pmap_kernel() ? UVM_PGA_USERESERVE : 0;
2858 
2859 	if (uvm.page_init_done) {
2860 		struct vm_page *pg;
2861 
2862 		while ((pg = uvm_pagealloc(NULL, 0, NULL,
2863 		    UVM_PGA_ZERO|reserve)) == NULL) {
2864 			if (wait == NULL)
2865 				return 0;
2866 			uvm_wait(wait);
2867 		}
2868 		pg->wire_count++;
2869 		atomic_clearbits_int(&pg->pg_flags, PG_BUSY);
2870 		*pa = VM_PAGE_TO_PHYS(pg);
2871 	} else {
2872 		uvm_page_physget(pa);
2873 		pmap_zero_phys(*pa);
2874 	}
2875 
2876 	return (1);
2877 }
2878 
2879 void
2880 pmap_free_page(paddr_t pa, struct pmap *pm)
2881 {
2882 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
2883 
2884 	pg->wire_count = 0;
2885 	uvm_pagefree(pg);
2886 }
2887 
2888 void
2889 pmap_remove_holes(struct vmspace *vm)
2890 {
2891 	vaddr_t shole, ehole;
2892 	struct vm_map *map = &vm->vm_map;
2893 
2894 	/*
2895 	 * Although the hardware only supports 44-bit virtual addresses
2896 	 * (and thus a hole from 1 << 43 to -1 << 43), this pmap
2897 	 * implementation itself only supports 43-bit virtual addresses,
2898 	 * so we have to narrow the hole a bit more.
2899 	 */
2900 	shole = 1L << (HOLESHIFT - 1);
2901 	ehole = -1L << (HOLESHIFT - 1);
2902 
2903 	shole = ulmax(vm_map_min(map), shole);
2904 	ehole = ulmin(vm_map_max(map), ehole);
2905 
2906 	if (ehole <= shole)
2907 		return;
2908 
2909 	(void)uvm_map(map, &shole, ehole - shole, NULL, UVM_UNKNOWN_OFFSET, 0,
2910 	    UVM_MAPFLAG(PROT_NONE, PROT_NONE, MAP_INHERIT_SHARE, MADV_RANDOM,
2911 	      UVM_FLAG_NOMERGE | UVM_FLAG_HOLE | UVM_FLAG_FIXED));
2912 }
2913 
2914 #ifdef DDB
2915 
2916 void
2917 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, char *modif)
2918 {
2919 	struct pv_entry *pv;
2920 
2921 	if (!have_addr) {
2922 		db_printf("Need addr for pv\n");
2923 		return;
2924 	}
2925 
2926 	for (pv = pa_to_pvh(addr); pv; pv = pv->pv_next)
2927 		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
2928 			  pv, pv->pv_next, pv->pv_pmap,
2929 			  (unsigned long long)pv->pv_va);
2930 
2931 }
2932 
2933 #endif
2934