1 /*	$NetBSD: pmap.c,v 1.303 2016/07/07 06:55:38 msaitoh Exp $	*/
2 /*
3  *
4  * Copyright (C) 1996-1999 Eduardo Horvath.
5  * All rights reserved.
6  *
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.303 2016/07/07 06:55:38 msaitoh Exp $");
30 
31 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
32 #define	HWREF
33 
34 #include "opt_ddb.h"
35 #include "opt_multiprocessor.h"
36 #include "opt_modular.h"
37 
38 #include <sys/param.h>
39 #include <sys/malloc.h>
40 #include <sys/queue.h>
41 #include <sys/systm.h>
42 #include <sys/msgbuf.h>
43 #include <sys/pool.h>
44 #include <sys/exec.h>
45 #include <sys/core.h>
46 #include <sys/kcore.h>
47 #include <sys/proc.h>
48 #include <sys/atomic.h>
49 #include <sys/cpu.h>
50 
51 #include <sys/exec_aout.h>	/* for MID_* */
52 #include <sys/reboot.h>
53 
54 #include <uvm/uvm.h>
55 
56 #include <machine/pcb.h>
57 #include <machine/sparc64.h>
58 #include <machine/ctlreg.h>
59 #include <machine/promlib.h>
60 #include <machine/kcore.h>
61 #include <machine/bootinfo.h>
62 #ifdef SUN4V
63 #include <machine/hypervisor.h>
64 #endif
65 #include <machine/mdesc.h>
66 
67 #include <sparc64/sparc64/cache.h>
68 
69 #ifdef DDB
70 #include <machine/db_machdep.h>
71 #include <ddb/db_command.h>
72 #include <ddb/db_sym.h>
73 #include <ddb/db_variables.h>
74 #include <ddb/db_extern.h>
75 #include <ddb/db_access.h>
76 #include <ddb/db_output.h>
77 #else
78 #define Debugger()
79 #define db_printf	printf
80 #endif
81 
82 #define	MEG		(1<<20) /* 1MB */
83 #define	KB		(1<<10)	/* 1KB */
84 
85 paddr_t cpu0paddr;		/* contigious phys memory preallocated for cpus */
86 
87 /* These routines are in assembly to allow access thru physical mappings */
88 extern int64_t pseg_get_real(struct pmap *, vaddr_t);
89 extern int pseg_set_real(struct pmap *, vaddr_t, int64_t, paddr_t);
90 
91 /*
92  * Diatribe on ref/mod counting:
93  *
94  * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
95  * in the pv_entry structure for each page.  (We could bypass this for the
96  * vm_page, but that's a long story....)
97  *
98  * This architecture has nice, fast traps with lots of space for software bits
99  * in the TTE.  To accelerate ref/mod counts we make use of these features.
100  *
101  * When we map a page initially, we place a TTE in the page table.  It's
102  * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
103  * writable we set the TLB_REAL_W bit for the trap handler.
104  *
105  * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
106  * bit in the approprate TTE in the page table.  Whenever we take a protection
107  * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
108  * bits to enable writing and mark the page as modified.
109  *
110  * This means that we may have ref/mod information all over the place.  The
111  * pmap routines must traverse the page tables of all pmaps with a given page
112  * and collect/clear all the ref/mod information and copy it into the pv_entry.
113  */
114 
115 #ifdef	NO_VCACHE
116 #define	FORCE_ALIAS	1
117 #else
118 #define FORCE_ALIAS	0
119 #endif
120 
121 #define	PV_ALIAS	0x1LL
122 #define PV_REF		0x2LL
123 #define PV_MOD		0x4LL
124 #define PV_NVC		0x8LL
125 #define PV_NC		0x10LL
126 #define PV_WE		0x20LL	/* Debug -- this page was writable somtime */
127 #define PV_MASK		(0x03fLL)
128 #define PV_VAMASK	(~(PAGE_SIZE - 1))
129 #define PV_MATCH(pv,va)	(!(((pv)->pv_va ^ (va)) & PV_VAMASK))
130 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | \
131 					(((pv)->pv_va) & PV_MASK)))
132 
133 struct pool_cache pmap_cache;
134 struct pool_cache pmap_pv_cache;
135 
136 pv_entry_t	pmap_remove_pv(struct pmap *, vaddr_t, struct vm_page *);
137 void	pmap_enter_pv(struct pmap *, vaddr_t, paddr_t, struct vm_page *,
138 			   pv_entry_t);
139 void	pmap_page_cache(struct pmap *, paddr_t, int);
140 
141 /*
142  * First and last managed physical addresses.
143  * XXX only used for dumping the system.
144  */
145 paddr_t	vm_first_phys, vm_num_phys;
146 
147 /*
148  * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
149  */
150 int tsbsize;		/* tsbents = 512 * 2^^tsbsize */
151 #define TSBENTS (512<<tsbsize)
152 #define	TSBSIZE	(TSBENTS * 16)
153 
154 static struct pmap kernel_pmap_;
155 struct pmap *const kernel_pmap_ptr = &kernel_pmap_;
156 
157 static int ctx_alloc(struct pmap *);
158 static bool pmap_is_referenced_locked(struct vm_page *);
159 
160 static void ctx_free(struct pmap *, struct cpu_info *);
161 
162 /* set dmmu secondary context */
163 static __inline void
dmmu_set_secondary_context(uint ctx)164 dmmu_set_secondary_context(uint ctx)
165 {
166 	if (!CPU_ISSUN4V)
167 		__asm volatile(
168 			"stxa %0,[%1]%2;	"
169 			"membar #Sync		"
170 			: : "r" (ctx), "r" (CTX_SECONDARY), "n" (ASI_DMMU)
171 			: "memory");
172 	else
173 		__asm volatile(
174 			"stxa %0,[%1]%2;	"
175 			"membar #Sync		"
176 			: : "r" (ctx), "r" (CTX_SECONDARY), "n" (ASI_MMU_CONTEXTID)
177 			: "memory");
178 
179 }
180 
181 /*
182  * Check if any MMU has a non-zero context
183  */
184 static inline bool
pmap_has_ctx(struct pmap * p)185 pmap_has_ctx(struct pmap *p)
186 {
187 	int i;
188 
189 	/* any context on any cpu? */
190 	for (i = 0; i < sparc_ncpus; i++)
191 		if (p->pm_ctx[i] > 0)
192 			return true;
193 
194 	return false;
195 }
196 
197 #ifdef MULTIPROCESSOR
198 #define pmap_ctx(PM)	((PM)->pm_ctx[cpu_number()])
199 #else
200 #define pmap_ctx(PM)	((PM)->pm_ctx[0])
201 #endif
202 
203 /*
204  * Check if this pmap has a live mapping on some MMU.
205  */
206 static inline bool
pmap_is_on_mmu(struct pmap * p)207 pmap_is_on_mmu(struct pmap *p)
208 {
209 	/* The kernel pmap is always on all MMUs */
210 	if (p == pmap_kernel())
211 		return true;
212 
213 	return pmap_has_ctx(p);
214 }
215 
216 /*
217  * Virtual and physical addresses of the start and end of kernel text
218  * and data segments.
219  */
220 vaddr_t ktext;
221 paddr_t ktextp;
222 vaddr_t ektext;
223 paddr_t ektextp;
224 vaddr_t kdata;
225 paddr_t kdatap;
226 vaddr_t ekdata;
227 paddr_t ekdatap;
228 
229 /*
230  * Kernel 4MB pages.
231  */
232 extern struct tlb_entry *kernel_tlbs;
233 extern int kernel_dtlb_slots, kernel_itlb_slots;
234 
235 static int npgs;
236 
237 vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
238 
239 int phys_installed_size;		/* Installed physical memory */
240 struct mem_region *phys_installed;
241 
242 paddr_t avail_start, avail_end;	/* These are used by ps & family */
243 
244 static int ptelookup_va(vaddr_t va);
245 
246 static inline void
clrx(void * addr)247 clrx(void *addr)
248 {
249 	__asm volatile("clrx [%0]" : : "r" (addr) : "memory");
250 }
251 
252 static void
tsb_invalidate(vaddr_t va,pmap_t pm)253 tsb_invalidate(vaddr_t va, pmap_t pm)
254 {
255 	struct cpu_info *ci;
256 	int ctx;
257 	bool kpm = (pm == pmap_kernel());
258 	int i;
259 	int64_t tag;
260 
261 	i = ptelookup_va(va);
262 #ifdef MULTIPROCESSOR
263 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
264 		if (!CPUSET_HAS(cpus_active, ci->ci_index))
265 			continue;
266 #else
267 		ci = curcpu();
268 #endif
269 		ctx = pm->pm_ctx[ci->ci_index];
270 		if (kpm || ctx > 0) {
271 			tag = TSB_TAG(0, ctx, va);
272 			if (ci->ci_tsb_dmmu[i].tag == tag) {
273 				clrx(&ci->ci_tsb_dmmu[i].data);
274 			}
275 			if (ci->ci_tsb_immu[i].tag == tag) {
276 				clrx(&ci->ci_tsb_immu[i].data);
277 			}
278 		}
279 #ifdef MULTIPROCESSOR
280 	}
281 #endif
282 }
283 
284 struct prom_map *prom_map;
285 int prom_map_size;
286 
287 #define	PDB_CREATE		0x000001
288 #define	PDB_DESTROY		0x000002
289 #define	PDB_REMOVE		0x000004
290 #define	PDB_CHANGEPROT		0x000008
291 #define	PDB_ENTER		0x000010
292 #define	PDB_DEMAP		0x000020	/* used in locore */
293 #define	PDB_REF			0x000040
294 #define	PDB_COPY		0x000080
295 #define	PDB_MMU_ALLOC		0x000100
296 #define	PDB_MMU_STEAL		0x000200
297 #define	PDB_CTX_ALLOC		0x000400
298 #define	PDB_CTX_STEAL		0x000800
299 #define	PDB_MMUREG_ALLOC	0x001000
300 #define	PDB_MMUREG_STEAL	0x002000
301 #define	PDB_CACHESTUFF		0x004000
302 #define	PDB_ALIAS		0x008000
303 #define PDB_EXTRACT		0x010000
304 #define	PDB_BOOT		0x020000
305 #define	PDB_BOOT1		0x040000
306 #define	PDB_GROW		0x080000
307 #define	PDB_CTX_FLUSHALL	0x100000
308 #define	PDB_ACTIVATE		0x200000
309 
310 #if defined(DEBUG) && !defined(PMAP_DEBUG)
311 #define PMAP_DEBUG
312 #endif
313 
314 #ifdef PMAP_DEBUG
315 struct {
316 	int kernel;	/* entering kernel mapping */
317 	int user;	/* entering user mapping */
318 	int ptpneeded;	/* needed to allocate a PT page */
319 	int pwchange;	/* no mapping change, just wiring or protection */
320 	int wchange;	/* no mapping change, just wiring */
321 	int mchange;	/* was mapped but mapping to different page */
322 	int managed;	/* a managed page */
323 	int firstpv;	/* first mapping for this PA */
324 	int secondpv;	/* second mapping for this PA */
325 	int ci;		/* cache inhibited */
326 	int unmanaged;	/* not a managed page */
327 	int flushes;	/* cache flushes */
328 	int cachehit;	/* new entry forced valid entry out */
329 } enter_stats;
330 struct {
331 	int calls;
332 	int removes;
333 	int flushes;
334 	int tflushes;	/* TLB flushes */
335 	int pidflushes;	/* HW pid stolen */
336 	int pvfirst;
337 	int pvsearch;
338 } remove_stats;
339 #define	ENTER_STAT(x)	do { enter_stats.x ++; } while (0)
340 #define	REMOVE_STAT(x)	do { remove_stats.x ++; } while (0)
341 
342 int	pmapdebug = 0;
343 //int	pmapdebug = 0 | PDB_CTX_ALLOC | PDB_ACTIVATE;
344 /* Number of H/W pages stolen for page tables */
345 int	pmap_pages_stolen = 0;
346 
347 #define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
348 #define	DPRINTF(n, f)	if (pmapdebug & (n)) printf f
349 #else
350 #define	ENTER_STAT(x)	do { /* nothing */ } while (0)
351 #define	REMOVE_STAT(x)	do { /* nothing */ } while (0)
352 #define	BDPRINTF(n, f)
353 #define	DPRINTF(n, f)
354 #define pmapdebug 0
355 #endif
356 
357 #define pv_check()
358 
359 static int pmap_get_page(paddr_t *);
360 static void pmap_free_page(paddr_t, sparc64_cpuset_t);
361 static void pmap_free_page_noflush(paddr_t);
362 
363 /*
364  * Global pmap locks.
365  */
366 static kmutex_t pmap_lock;
367 static bool lock_available = false;
368 
369 /*
370  * Support for big page sizes.  This maps the page size to the
371  * page bits.  That is: these are the bits between 8K pages and
372  * larger page sizes that cause aliasing.
373  */
374 #define PSMAP_ENTRY(MASK, CODE)	{ .mask = MASK, .code = CODE }
375 struct page_size_map page_size_map[] = {
376 #ifdef DEBUG
377 	PSMAP_ENTRY(0, PGSZ_8K & 0),	/* Disable large pages */
378 #endif
379 	PSMAP_ENTRY((4 * 1024 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_4M),
380 	PSMAP_ENTRY((512 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_512K),
381 	PSMAP_ENTRY((64 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_64K),
382 	PSMAP_ENTRY((8 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_8K),
383 	PSMAP_ENTRY(0, 0),
384 };
385 
386 /*
387  * This probably shouldn't be necessary, but it stops USIII machines from
388  * breaking in general, and not just for MULTIPROCESSOR.
389  */
390 #define USE_LOCKSAFE_PSEG_GETSET
391 #if defined(USE_LOCKSAFE_PSEG_GETSET)
392 
393 static kmutex_t pseg_lock;
394 
395 static __inline__ int64_t
pseg_get_locksafe(struct pmap * pm,vaddr_t va)396 pseg_get_locksafe(struct pmap *pm, vaddr_t va)
397 {
398 	int64_t rv;
399 	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
400 
401 	if (__predict_true(took_lock))
402 		mutex_enter(&pseg_lock);
403 	rv = pseg_get_real(pm, va);
404 	if (__predict_true(took_lock))
405 		mutex_exit(&pseg_lock);
406 	return rv;
407 }
408 
409 static __inline__ int
pseg_set_locksafe(struct pmap * pm,vaddr_t va,int64_t data,paddr_t ptp)410 pseg_set_locksafe(struct pmap *pm, vaddr_t va, int64_t data, paddr_t ptp)
411 {
412 	int rv;
413 	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
414 
415 	if (__predict_true(took_lock))
416 		mutex_enter(&pseg_lock);
417 	rv = pseg_set_real(pm, va, data, ptp);
418 	if (__predict_true(took_lock))
419 		mutex_exit(&pseg_lock);
420 	return rv;
421 }
422 
423 #define pseg_get(pm, va)		pseg_get_locksafe(pm, va)
424 #define pseg_set(pm, va, data, ptp)	pseg_set_locksafe(pm, va, data, ptp)
425 
426 #else /* USE_LOCKSAFE_PSEG_GETSET */
427 
428 #define pseg_get(pm, va)		pseg_get_real(pm, va)
429 #define pseg_set(pm, va, data, ptp)	pseg_set_real(pm, va, data, ptp)
430 
431 #endif /* USE_LOCKSAFE_PSEG_GETSET */
432 
433 /*
434  * Enter a TTE into the kernel pmap only.  Don't do anything else.
435  *
436  * Use only during bootstrapping since it does no locking and
437  * can lose ref/mod info!!!!
438  *
439  */
pmap_enter_kpage(vaddr_t va,int64_t data)440 static void pmap_enter_kpage(vaddr_t va, int64_t data)
441 {
442 	paddr_t newp;
443 
444 	newp = 0UL;
445 	while (pseg_set(pmap_kernel(), va, data, newp) & 1) {
446 		if (!pmap_get_page(&newp)) {
447 			prom_printf("pmap_enter_kpage: out of pages\n");
448 			panic("pmap_enter_kpage");
449 		}
450 
451 		ENTER_STAT(ptpneeded);
452 		BDPRINTF(PDB_BOOT1,
453 			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\n",
454 			  pmap_kernel(), va, (long)data, (long)newp));
455 		if (pmapdebug & PDB_BOOT1)
456 		{int i; for (i=0; i<140000000; i++) ;}
457 	}
458 }
459 
460 /*
461  * Check the bootargs to see if we need to enable bootdebug.
462  */
463 #ifdef DEBUG
pmap_bootdebug(void)464 static void pmap_bootdebug(void)
465 {
466 	const char *cp = prom_getbootargs();
467 
468 	for (;;)
469 		switch (*++cp) {
470 		case '\0':
471 			return;
472 		case 'V':
473 			pmapdebug |= PDB_BOOT|PDB_BOOT1;
474 			break;
475 		case 'D':
476 			pmapdebug |= PDB_BOOT1;
477 			break;
478 		}
479 }
480 #else
481 #define pmap_bootdebug()	/* nothing */
482 #endif
483 
484 
485 /*
486  * Calculate the correct number of page colors to use.  This should be the
487  * size of the E$/PAGE_SIZE.  However, different CPUs can have different sized
488  * E$, so we need to take the GCM of the E$ size.
489  */
pmap_calculate_colors(void)490 static int pmap_calculate_colors(void)
491 {
492 	int node;
493 	int size, assoc, color, maxcolor = 1;
494 
495 	for (node = prom_firstchild(prom_findroot()); node != 0;
496 	     node = prom_nextsibling(node)) {
497 		char *name = prom_getpropstring(node, "device_type");
498 		if (strcmp("cpu", name) != 0)
499 			continue;
500 
501 		/* Found a CPU, get the E$ info. */
502 		size = cpu_ecache_size(node);
503 		if (size == 0) {
504 			prom_printf("pmap_calculate_colors: node %x has "
505 				"no ecache-size\n", node);
506 			/* If we can't get the E$ size, skip the node */
507 			continue;
508 		}
509 
510 		assoc = cpu_ecache_associativity(node);
511 		color = size/assoc/PAGE_SIZE;
512 		if (color > maxcolor)
513 			maxcolor = color;
514 	}
515 	return (maxcolor);
516 }
517 
pmap_alloc_bootargs(void)518 static void pmap_alloc_bootargs(void)
519 {
520 	char *v;
521 
522 	v = OF_claim(NULL, 2*PAGE_SIZE, PAGE_SIZE);
523 	if ((v == NULL) || (v == (void*)-1))
524 		panic("Can't claim two pages of memory.");
525 
526 	memset(v, 0, 2*PAGE_SIZE);
527 
528 	cpu_args = (struct cpu_bootargs*)v;
529 }
530 
531 #if defined(MULTIPROCESSOR)
532 static void pmap_mp_init(void);
533 
534 static void
pmap_mp_init(void)535 pmap_mp_init(void)
536 {
537 	pte_t *tp;
538 	char *v;
539 	int i;
540 
541 	extern void cpu_mp_startup(void);
542 
543 	if ((v = OF_claim(NULL, PAGE_SIZE, PAGE_SIZE)) == NULL) {
544 		panic("pmap_mp_init: Cannot claim a page.");
545 	}
546 
547 	memcpy(v, mp_tramp_code, mp_tramp_code_len);
548 	*(u_long *)(v + mp_tramp_dtlb_slots) = kernel_dtlb_slots;
549 	*(u_long *)(v + mp_tramp_itlb_slots) = kernel_itlb_slots;
550 	*(u_long *)(v + mp_tramp_func) = (u_long)cpu_mp_startup;
551 	*(u_long *)(v + mp_tramp_ci) = (u_long)cpu_args;
552 	tp = (pte_t *)(v + mp_tramp_code_len);
553 	for (i = 0; i < kernel_dtlb_slots; i++) {
554 		tp[i].tag  = kernel_tlbs[i].te_va;
555 		tp[i].data = TSB_DATA(0,		/* g */
556 				PGSZ_4M,		/* sz */
557 				kernel_tlbs[i].te_pa,	/* pa */
558 				1, /* priv */
559 				0, /* write */
560 				1, /* cache */
561 				1, /* aliased */
562 				1, /* valid */
563 				0 /* ie */);
564 		tp[i].data |= TLB_L | TLB_CV;
565 
566 		if (i >= kernel_itlb_slots) {
567 			tp[i].data |= TLB_W;
568 		} else {
569 			if (CPU_ISSUN4V)
570 				tp[i].data |= SUN4V_TLB_X;
571 		}
572 
573 		DPRINTF(PDB_BOOT1, ("xtlb[%d]: Tag: %" PRIx64 " Data: %"
574 				PRIx64 "\n", i, tp[i].tag, tp[i].data));
575 	}
576 
577 	for (i = 0; i < PAGE_SIZE; i += sizeof(long))
578 		flush(v + i);
579 
580 	cpu_spinup_trampoline = (vaddr_t)v;
581 }
582 #else
583 #define pmap_mp_init()	((void)0)
584 #endif
585 
586 paddr_t pmap_kextract(vaddr_t va);
587 
588 paddr_t
pmap_kextract(vaddr_t va)589 pmap_kextract(vaddr_t va)
590 {
591 	int i;
592 	paddr_t paddr = (paddr_t)-1;
593 
594 	for (i = 0; i < kernel_dtlb_slots; i++) {
595 		if ((va & ~PAGE_MASK_4M) == kernel_tlbs[i].te_va) {
596 			paddr = kernel_tlbs[i].te_pa +
597 				(paddr_t)(va & PAGE_MASK_4M);
598 			break;
599 		}
600 	}
601 
602 	if (i == kernel_dtlb_slots) {
603 		panic("pmap_kextract: Address %p is not from kernel space.\n"
604 				"Data segment is too small?\n", (void*)va);
605 	}
606 
607 	return (paddr);
608 }
609 
610 /*
611  * Bootstrap kernel allocator, allocates from unused space in 4MB kernel
612  * data segment meaning that
613  *
614  * - Access to allocated memory will never generate a trap
615  * - Allocated chunks are never reclaimed or freed
616  * - Allocation calls do not change PROM memlists
617  */
618 static struct mem_region kdata_mem_pool;
619 
620 static void
kdata_alloc_init(vaddr_t va_start,vaddr_t va_end)621 kdata_alloc_init(vaddr_t va_start, vaddr_t va_end)
622 {
623 	vsize_t va_size = va_end - va_start;
624 
625 	kdata_mem_pool.start = va_start;
626 	kdata_mem_pool.size  = va_size;
627 
628 	BDPRINTF(PDB_BOOT, ("kdata_alloc_init(): %d bytes @%p.\n", va_size,
629 				va_start));
630 }
631 
632 static vaddr_t
kdata_alloc(vsize_t size,vsize_t align)633 kdata_alloc(vsize_t size, vsize_t align)
634 {
635 	vaddr_t va;
636 	vsize_t asize;
637 
638 	asize = roundup(kdata_mem_pool.start, align) - kdata_mem_pool.start;
639 
640 	kdata_mem_pool.start += asize;
641 	kdata_mem_pool.size  -= asize;
642 
643 	if (kdata_mem_pool.size < size) {
644 		panic("kdata_alloc(): Data segment is too small.\n");
645 	}
646 
647 	va = kdata_mem_pool.start;
648 	kdata_mem_pool.start += size;
649 	kdata_mem_pool.size  -= size;
650 
651 	BDPRINTF(PDB_BOOT, ("kdata_alloc(): Allocated %d@%p, %d free.\n",
652 				size, (void*)va, kdata_mem_pool.size));
653 
654 	return (va);
655 }
656 
657 /*
658  * Unified routine for reading PROM properties.
659  */
660 static void
pmap_read_memlist(const char * device,const char * property,void ** ml,int * ml_size,vaddr_t (* ml_alloc)(vsize_t,vsize_t))661 pmap_read_memlist(const char *device, const char *property, void **ml,
662 		  int *ml_size, vaddr_t (* ml_alloc)(vsize_t, vsize_t))
663 {
664 	void *va;
665 	int size, handle;
666 
667 	if ( (handle = prom_finddevice(device)) == 0) {
668 		prom_printf("pmap_read_memlist(): No %s device found.\n",
669 				device);
670 		prom_halt();
671 	}
672 	if ( (size = OF_getproplen(handle, property)) < 0) {
673 		prom_printf("pmap_read_memlist(): %s/%s has no length.\n",
674 				device, property);
675 		prom_halt();
676 	}
677 	if ( (va = (void*)(* ml_alloc)(size, sizeof(uint64_t))) == NULL) {
678 		prom_printf("pmap_read_memlist(): Cannot allocate memlist.\n");
679 		prom_halt();
680 	}
681 	if (OF_getprop(handle, property, va, size) <= 0) {
682 		prom_printf("pmap_read_memlist(): Cannot read %s/%s.\n",
683 				device, property);
684 		prom_halt();
685 	}
686 
687 	*ml = va;
688 	*ml_size = size;
689 }
690 
691 /*
692  * This is called during bootstrap, before the system is really initialized.
693  *
694  * It's called with the start and end virtual addresses of the kernel.  We
695  * bootstrap the pmap allocator now.  We will allocate the basic structures we
696  * need to bootstrap the VM system here: the page frame tables, the TSB, and
697  * the free memory lists.
698  *
699  * Now all this is becoming a bit obsolete.  maxctx is still important, but by
700  * separating the kernel text and data segments we really would need to
701  * provide the start and end of each segment.  But we can't.  The rodata
702  * segment is attached to the end of the kernel segment and has nothing to
703  * delimit its end.  We could still pass in the beginning of the kernel and
704  * the beginning and end of the data segment but we could also just as easily
705  * calculate that all in here.
706  *
707  * To handle the kernel text, we need to do a reverse mapping of the start of
708  * the kernel, then traverse the free memory lists to find out how big it is.
709  */
710 
711 void
pmap_bootstrap(u_long kernelstart,u_long kernelend)712 pmap_bootstrap(u_long kernelstart, u_long kernelend)
713 {
714 #ifdef MODULAR
715 	extern vaddr_t module_start, module_end;
716 #endif
717 	extern char etext[], data_start[];	/* start of data segment */
718 	extern int msgbufmapped;
719 	struct mem_region *mp, *mp1, *avail, *orig;
720 	int i, j, pcnt, msgbufsiz;
721 	size_t s, sz;
722 	int64_t data;
723 	vaddr_t va, intstk;
724 	uint64_t phys_msgbuf;
725 	paddr_t newp = 0;
726 
727 	void *prom_memlist;
728 	int prom_memlist_size;
729 
730 	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\n"));
731 
732 	/* XXX - incomplete spinup code for SUN4V */
733 	if (CPU_ISSUN4V)
734 		boothowto |= RB_MD1;
735 
736 	cache_setup_funcs();
737 
738 	/*
739 	 * Calculate kernel size.
740 	 */
741 	ktext   = kernelstart;
742 	ktextp  = pmap_kextract(ktext);
743 	ektext  = roundup((vaddr_t)etext, PAGE_SIZE_4M);
744 	ektextp = roundup(pmap_kextract((vaddr_t)etext), PAGE_SIZE_4M);
745 
746 	kdata   = (vaddr_t)data_start;
747 	kdatap  = pmap_kextract(kdata);
748 	ekdata  = roundup(kernelend, PAGE_SIZE_4M);
749 	ekdatap = roundup(pmap_kextract(kernelend), PAGE_SIZE_4M);
750 
751 	BDPRINTF(PDB_BOOT, ("Virtual layout: text %lx-%lx, data %lx-%lx.\n",
752 				ktext, ektext, kdata, ekdata));
753 	BDPRINTF(PDB_BOOT, ("Physical layout: text %lx-%lx, data %lx-%lx.\n",
754 				ktextp, ektextp, kdatap, ekdatap));
755 
756 	/* Initialize bootstrap allocator. */
757 	kdata_alloc_init(kernelend + 1 * 1024 * 1024, ekdata);
758 
759 	/* make sure we have access to the mdesc data on SUN4V machines */
760 	if (CPU_ISSUN4V) {
761 		vaddr_t m_va;
762 		psize_t m_len;
763 		paddr_t m_pa;
764 
765 		m_len = mdesc_get_len();
766 		m_va = kdata_alloc(m_len, 16);
767 		m_pa = kdatap + (m_va - kdata);
768 		mdesc_init(m_va, m_pa, m_len);
769 	}
770 
771 	pmap_bootdebug();
772 	pmap_alloc_bootargs();
773 	pmap_mp_init();
774 
775 	/*
776 	 * set machine page size
777 	 */
778 	uvmexp.pagesize = NBPG;
779 	uvmexp.ncolors = pmap_calculate_colors();
780 	uvm_setpagesize();
781 
782 	/*
783 	 * Get hold or the message buffer.
784 	 */
785 	msgbufp = (struct kern_msgbuf *)(vaddr_t)MSGBUF_VA;
786 	msgbufsiz = MSGBUFSIZE;
787 	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\n",
788 			    (long)msgbufp, (long)msgbufsiz));
789 	if ((long)msgbufp !=
790 	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
791 		prom_printf(
792 		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\n",
793 		    (void *)msgbufp, (long)phys_msgbuf);
794 	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
795 	BDPRINTF(PDB_BOOT,
796 		("We should have the memory at %lx, let's map it in\n",
797 			phys_msgbuf));
798 	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
799 			  -1/* sunos does this */) == -1) {
800 		prom_printf("Failed to map msgbuf\n");
801 	} else {
802 		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\n",
803 			(void *)msgbufp));
804 	}
805 	msgbufmapped = 1;	/* enable message buffer */
806 	initmsgbuf((void *)msgbufp, msgbufsiz);
807 
808 	/*
809 	 * Find out how much RAM we have installed.
810 	 */
811 	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\n"));
812 	pmap_read_memlist("/memory", "reg", &prom_memlist, &prom_memlist_size,
813 			kdata_alloc);
814 	phys_installed = prom_memlist;
815 	phys_installed_size = prom_memlist_size / sizeof(*phys_installed);
816 
817 	if (pmapdebug & PDB_BOOT1) {
818 		/* print out mem list */
819 		prom_printf("Installed physical memory:\n");
820 		for (i = 0; i < phys_installed_size; i++) {
821 			prom_printf("memlist start %lx size %lx\n",
822 					(u_long)phys_installed[i].start,
823 					(u_long)phys_installed[i].size);
824 		}
825 	}
826 
827 	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
828 	for (i = 0; i < phys_installed_size; i++)
829 		physmem += btoc(phys_installed[i].size);
830 	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\n",
831 			     (int)physmem, (int)physmem));
832 
833 	/*
834 	 * Calculate approx TSB size.  This probably needs tweaking.
835 	 */
836 	if (physmem < btoc(64 * 1024 * 1024))
837 		tsbsize = 0;
838 	else if (physmem < btoc(512 * 1024 * 1024))
839 		tsbsize = 1;
840 	else
841 		tsbsize = 2;
842 
843 	/*
844 	 * Save the prom translations
845 	 */
846 	pmap_read_memlist("/virtual-memory", "translations", &prom_memlist,
847 			&prom_memlist_size, kdata_alloc);
848 	prom_map = prom_memlist;
849 	prom_map_size = prom_memlist_size / sizeof(struct prom_map);
850 
851 	if (pmapdebug & PDB_BOOT) {
852 		/* print out mem list */
853 		prom_printf("Prom xlations:\n");
854 		for (i = 0; i < prom_map_size; i++) {
855 			prom_printf("start %016lx size %016lx tte %016lx\n",
856 				    (u_long)prom_map[i].vstart,
857 				    (u_long)prom_map[i].vsize,
858 				    (u_long)prom_map[i].tte);
859 		}
860 		prom_printf("End of prom xlations\n");
861 	}
862 
863 	/*
864 	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
865 	 * any translations inside the kernel data VA range.
866 	 */
867 	for (i = 0; i < prom_map_size; i++) {
868 		for (j = i; j < prom_map_size; j++) {
869 			if (prom_map[j].vstart > prom_map[i].vstart) {
870 				struct prom_map tmp;
871 
872 				tmp = prom_map[i];
873 				prom_map[i] = prom_map[j];
874 				prom_map[j] = tmp;
875 			}
876 		}
877 	}
878 	if (pmapdebug & PDB_BOOT) {
879 		/* print out mem list */
880 		prom_printf("Prom xlations:\n");
881 		for (i = 0; i < prom_map_size; i++) {
882 			prom_printf("start %016lx size %016lx tte %016lx\n",
883 				    (u_long)prom_map[i].vstart,
884 				    (u_long)prom_map[i].vsize,
885 				    (u_long)prom_map[i].tte);
886 		}
887 		prom_printf("End of prom xlations\n");
888 	}
889 
890 	/*
891 	 * Allocate a ncpu*64KB page for the cpu_info & stack structure now.
892 	 */
893 	cpu0paddr = prom_alloc_phys(8 * PAGE_SIZE * sparc_ncpus, 8 * PAGE_SIZE);
894 	if (cpu0paddr == 0) {
895 		prom_printf("Cannot allocate cpu_infos\n");
896 		prom_halt();
897 	}
898 
899 	/*
900 	 * Now the kernel text segment is in its final location we can try to
901 	 * find out how much memory really is free.
902 	 */
903 	pmap_read_memlist("/memory", "available", &prom_memlist,
904 			&prom_memlist_size, kdata_alloc);
905 	orig = prom_memlist;
906 	sz  = prom_memlist_size;
907 	pcnt = prom_memlist_size / sizeof(*orig);
908 
909 	BDPRINTF(PDB_BOOT1, ("Available physical memory:\n"));
910 	avail = (struct mem_region*)kdata_alloc(sz, sizeof(uint64_t));
911 	for (i = 0; i < pcnt; i++) {
912 		avail[i] = orig[i];
913 		BDPRINTF(PDB_BOOT1, ("memlist start %lx size %lx\n",
914 					(u_long)orig[i].start,
915 					(u_long)orig[i].size));
916 	}
917 	BDPRINTF(PDB_BOOT1, ("End of available physical memory\n"));
918 
919 	BDPRINTF(PDB_BOOT, ("ktext %08lx[%08lx] - %08lx[%08lx] : "
920 				"kdata %08lx[%08lx] - %08lx[%08lx]\n",
921 				(u_long)ktext, (u_long)ktextp,
922 				(u_long)ektext, (u_long)ektextp,
923 				(u_long)kdata, (u_long)kdatap,
924 				(u_long)ekdata, (u_long)ekdatap));
925 	if (pmapdebug & PDB_BOOT1) {
926 		/* print out mem list */
927 		prom_printf("Available %lx physical memory before cleanup:\n",
928 			    (u_long)avail);
929 		for (i = 0; i < pcnt; i++) {
930 			prom_printf("memlist start %lx size %lx\n",
931 				    (u_long)avail[i].start,
932 				    (u_long)avail[i].size);
933 		}
934 		prom_printf("End of available physical memory before cleanup\n");
935 		prom_printf("kernel physical text size %08lx - %08lx\n",
936 			    (u_long)ktextp, (u_long)ektextp);
937 		prom_printf("kernel physical data size %08lx - %08lx\n",
938 			    (u_long)kdatap, (u_long)ekdatap);
939 	}
940 
941 	/*
942 	 * Here's a another quick in-lined bubble sort.
943 	 */
944 	for (i = 0; i < pcnt; i++) {
945 		for (j = i; j < pcnt; j++) {
946 			if (avail[j].start < avail[i].start) {
947 				struct mem_region tmp;
948 				tmp = avail[i];
949 				avail[i] = avail[j];
950 				avail[j] = tmp;
951 			}
952 		}
953 	}
954 
955 	/* Throw away page zero if we have it. */
956 	if (avail->start == 0) {
957 		avail->start += PAGE_SIZE;
958 		avail->size -= PAGE_SIZE;
959 	}
960 
961 	/*
962 	 * Now we need to remove the area we valloc'ed from the available
963 	 * memory lists.  (NB: we may have already alloc'ed the entire space).
964 	 */
965 	npgs = 0;
966 	for (mp = avail, i = 0; i < pcnt; i++, mp = &avail[i]) {
967 		/*
968 		 * Now page align the start of the region.
969 		 */
970 		s = mp->start % PAGE_SIZE;
971 		if (mp->size >= s) {
972 			mp->size -= s;
973 			mp->start += s;
974 		}
975 		/*
976 		 * And now align the size of the region.
977 		 */
978 		mp->size -= mp->size % PAGE_SIZE;
979 		/*
980 		 * Check whether some memory is left here.
981 		 */
982 		if (mp->size == 0) {
983 			memcpy(mp, mp + 1,
984 			      (pcnt - (mp - avail)) * sizeof *mp);
985 			pcnt--;
986 			mp--;
987 			continue;
988 		}
989 		s = mp->start;
990 		sz = mp->size;
991 		npgs += btoc(sz);
992 		for (mp1 = avail; mp1 < mp; mp1++)
993 			if (s < mp1->start)
994 				break;
995 		if (mp1 < mp) {
996 			memcpy(mp1 + 1, mp1, (char *)mp - (char *)mp1);
997 			mp1->start = s;
998 			mp1->size = sz;
999 		}
1000 #ifdef DEBUG
1001 /* Clear all memory we give to the VM system.  I want to make sure
1002  * the PROM isn't using it for something, so this should break the PROM.
1003  */
1004 
1005 /* Calling pmap_zero_page() at this point also hangs some machines
1006  * so don't do it at all. -- pk 26/02/2002
1007  */
1008 #if 0
1009 		{
1010 			paddr_t p;
1011 			for (p = mp->start; p < mp->start+mp->size;
1012 			     p += PAGE_SIZE)
1013 				pmap_zero_page(p);
1014 		}
1015 #endif
1016 #endif /* DEBUG */
1017 		/*
1018 		 * In future we should be able to specify both allocated
1019 		 * and free.
1020 		 */
1021 		BDPRINTF(PDB_BOOT1, ("uvm_page_physload(%lx, %lx)\n",
1022 					(long)mp->start,
1023 					(long)(mp->start + mp->size)));
1024 		uvm_page_physload(
1025 			atop(mp->start),
1026 			atop(mp->start+mp->size),
1027 			atop(mp->start),
1028 			atop(mp->start+mp->size),
1029 			VM_FREELIST_DEFAULT);
1030 	}
1031 
1032 	if (pmapdebug & PDB_BOOT) {
1033 		/* print out mem list */
1034 		prom_printf("Available physical memory after cleanup:\n");
1035 		for (i = 0; i < pcnt; i++) {
1036 			prom_printf("avail start %lx size %lx\n",
1037 				    (long)avail[i].start, (long)avail[i].size);
1038 		}
1039 		prom_printf("End of available physical memory after cleanup\n");
1040 	}
1041 
1042 	/*
1043 	 * Allocate and clear out pmap_kernel()->pm_segs[]
1044 	 */
1045 	pmap_kernel()->pm_refs = 1;
1046 	memset(&pmap_kernel()->pm_ctx, 0, sizeof(pmap_kernel()->pm_ctx));
1047 
1048 	/* Throw away page zero */
1049 	do {
1050 		pmap_get_page(&newp);
1051 	} while (!newp);
1052 	pmap_kernel()->pm_segs=(paddr_t *)(u_long)newp;
1053 	pmap_kernel()->pm_physaddr = newp;
1054 
1055 	/*
1056 	 * finish filling out kernel pmap.
1057 	 */
1058 
1059 	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\n",
1060 	    (long)pmap_kernel()->pm_physaddr));
1061 	/*
1062 	 * Tell pmap about our mesgbuf -- Hope this works already
1063 	 */
1064 	BDPRINTF(PDB_BOOT1, ("Calling consinit()\n"));
1065 	if (pmapdebug & PDB_BOOT1)
1066 		consinit();
1067 	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\n"));
1068 	/* it's not safe to call pmap_enter so we need to do this ourselves */
1069 	va = (vaddr_t)msgbufp;
1070 	while (msgbufsiz) {
1071 		data = TSB_DATA(0 /* global */,
1072 			PGSZ_8K,
1073 			phys_msgbuf,
1074 			1 /* priv */,
1075 			1 /* Write */,
1076 			1 /* Cacheable */,
1077 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1078 			1 /* valid */,
1079 			0 /* IE */);
1080 		pmap_enter_kpage(va, data);
1081 		va += PAGE_SIZE;
1082 		msgbufsiz -= PAGE_SIZE;
1083 		phys_msgbuf += PAGE_SIZE;
1084 	}
1085 	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\n"));
1086 
1087 	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\n"));
1088 	for (i = 0; i < prom_map_size; i++)
1089 		if (prom_map[i].vstart && ((prom_map[i].vstart >> 32) == 0))
1090 			for (j = 0; j < prom_map[i].vsize; j += PAGE_SIZE) {
1091 				int k;
1092 
1093 				for (k = 0; page_size_map[k].mask; k++) {
1094 					if (((prom_map[i].vstart |
1095 					      prom_map[i].tte) &
1096 					      page_size_map[k].mask) == 0 &&
1097 					      page_size_map[k].mask <
1098 					      prom_map[i].vsize)
1099 						break;
1100 				}
1101 				page_size_map[k].use++;
1102 				/* Enter PROM map into pmap_kernel() */
1103 				pmap_enter_kpage(prom_map[i].vstart + j,
1104 					(prom_map[i].tte + j) | TLB_EXEC |
1105 					page_size_map[k].code);
1106 			}
1107 	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\n"));
1108 
1109 	/*
1110 	 * Fix up start of kernel heap.
1111 	 */
1112 	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1113 	/* Let's keep 1 page of redzone after the kernel */
1114 	vmmap += PAGE_SIZE;
1115 	{
1116 		extern void main(void);
1117 		vaddr_t u0va;
1118 		paddr_t pa;
1119 
1120 		u0va = vmmap;
1121 
1122 		BDPRINTF(PDB_BOOT1,
1123 			("Inserting lwp0 USPACE into pmap_kernel() at %p\n",
1124 				vmmap));
1125 
1126 		while (vmmap < u0va + 2*USPACE) {
1127 			int64_t data1;
1128 
1129 			if (!pmap_get_page(&pa))
1130 				panic("pmap_bootstrap: no pages");
1131 			prom_map_phys(pa, PAGE_SIZE, vmmap, -1);
1132 			data1 = TSB_DATA(0 /* global */,
1133 				PGSZ_8K,
1134 				pa,
1135 				1 /* priv */,
1136 				1 /* Write */,
1137 				1 /* Cacheable */,
1138 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1139 				1 /* valid */,
1140 				0 /* IE */);
1141 			pmap_enter_kpage(vmmap, data1);
1142 			vmmap += PAGE_SIZE;
1143 		}
1144 		BDPRINTF(PDB_BOOT1,
1145 			 ("Done inserting stack 0 into pmap_kernel()\n"));
1146 
1147 		/* Now map in and initialize our cpu_info structure */
1148 #ifdef DIAGNOSTIC
1149 		vmmap += PAGE_SIZE; /* redzone -- XXXX do we need one? */
1150 #endif
1151 		if ((vmmap ^ INTSTACK) & VA_ALIAS_MASK)
1152 			vmmap += PAGE_SIZE; /* Matchup virtual color for D$ */
1153 		intstk = vmmap;
1154 		cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK);
1155 
1156 		BDPRINTF(PDB_BOOT1,
1157 			("Inserting cpu_info into pmap_kernel() at %p\n",
1158 				 cpus));
1159 		/* Now map in all 8 pages of interrupt stack/cpu_info */
1160 		pa = cpu0paddr;
1161 		prom_map_phys(pa, 64*KB, vmmap, -1);
1162 
1163 		/*
1164 		 * Also map it in as the interrupt stack.
1165 		 * This lets the PROM see this if needed.
1166 		 *
1167 		 * XXXX locore.s does not flush these mappings
1168 		 * before installing the locked TTE.
1169 		 */
1170 		prom_map_phys(pa, 64*KB, INTSTACK, -1);
1171 		for (i = 0; i < 8; i++) {
1172 			int64_t data1;
1173 
1174 			data1 = TSB_DATA(0 /* global */,
1175 				PGSZ_8K,
1176 				pa,
1177 				1 /* priv */,
1178 				1 /* Write */,
1179 				1 /* Cacheable */,
1180 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1181 				1 /* valid */,
1182 				0 /* IE */);
1183 			pmap_enter_kpage(vmmap, data1);
1184 			vmmap += PAGE_SIZE;
1185 			pa += PAGE_SIZE;
1186 		}
1187 		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\n"));
1188 
1189 		/* Initialize our cpu_info structure */
1190 		memset((void *)intstk, 0, 64 * KB);
1191 		cpus->ci_self = cpus;
1192 		cpus->ci_next = NULL;
1193 		cpus->ci_curlwp = &lwp0;
1194 		cpus->ci_flags = CPUF_PRIMARY;
1195 		cpus->ci_cpuid = cpu_myid();
1196 		cpus->ci_fplwp = NULL;
1197 		cpus->ci_eintstack = NULL;
1198 		cpus->ci_spinup = main; /* Call main when we're running. */
1199 		cpus->ci_paddr = cpu0paddr;
1200 		if (CPU_ISSUN4V) {
1201 			cpus->ci_mmfsa = cpu0paddr;
1202 			cpus->ci_tsb_desc = NULL;
1203 		}
1204 		cpus->ci_cpcb = (struct pcb *)u0va;
1205 		cpus->ci_idepth = -1;
1206 		memset(cpus->ci_intrpending, -1, sizeof(cpus->ci_intrpending));
1207 
1208 		uvm_lwp_setuarea(&lwp0, u0va);
1209 		lwp0.l_md.md_tf = (struct trapframe64*)(u0va + USPACE
1210 		    - sizeof(struct trapframe64));
1211 
1212 		cpu0paddr += 64 * KB;
1213 
1214 		CPUSET_CLEAR(cpus_active);
1215 		CPUSET_ADD(cpus_active, 0);
1216 
1217 		cpu_pmap_prepare(cpus, true);
1218 		cpu_pmap_init(cpus);
1219 
1220 		/* The rest will be done at CPU attach time. */
1221 		BDPRINTF(PDB_BOOT1,
1222 			 ("Done inserting cpu_info into pmap_kernel()\n"));
1223 	}
1224 
1225 	vmmap = (vaddr_t)reserve_dumppages((void *)(u_long)vmmap);
1226 
1227 #ifdef MODULAR
1228 	/*
1229 	 * For 32bit kernels:
1230 	 *   Reserve 16 MB of VA for module loading. Right now our full
1231 	 *   GENERIC kernel is about 13 MB, so this looks good enough.
1232 	 * For 64bit kernels:
1233 	 *   We can use all the space left before the special addresses,
1234 	 *   but leave 2 pages at vmmap alone (see pmap_virtual_space)
1235 	 *   and another red zone page.
1236 	 */
1237 #ifdef __arch64__
1238 	module_start = vmmap + 3*PAGE_SIZE;
1239 	module_end = 0x08000000;	/* keep all modules within 2GB */
1240 	KASSERT(module_end < KERNEND);	/* of kernel text */
1241 #else
1242 	module_start = vmmap;
1243 	vmmap += 16 * 1024*1024;
1244 	module_end = vmmap;
1245 #endif
1246 #endif
1247 
1248 	/*
1249 	 * Set up bounds of allocatable memory for vmstat et al.
1250 	 */
1251 	avail_start = avail->start;
1252 	for (mp = avail; mp->size; mp++)
1253 		avail_end = mp->start+mp->size;
1254 
1255 	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\n"));
1256 
1257 	BDPRINTF(PDB_BOOT, ("left kdata: %" PRId64 " @%" PRIx64 ".\n",
1258 				kdata_mem_pool.size, kdata_mem_pool.start));
1259 }
1260 
1261 /*
1262  * Allocate TSBs for both mmus from the locked kernel data segment page.
1263  * This is run before the cpu itself is activated (or by the first cpu
1264  * itself)
1265  */
1266 void
cpu_pmap_prepare(struct cpu_info * ci,bool initial)1267 cpu_pmap_prepare(struct cpu_info *ci, bool initial)
1268 {
1269 	/* allocate our TSBs */
1270 	ci->ci_tsb_dmmu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
1271 	ci->ci_tsb_immu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
1272 	memset(ci->ci_tsb_dmmu, 0, TSBSIZE);
1273 	memset(ci->ci_tsb_immu, 0, TSBSIZE);
1274 	if (!initial) {
1275 		KASSERT(ci != curcpu());
1276 		/*
1277 		 * Initially share ctxbusy with the boot cpu, the
1278 		 * cpu will replace it as soon as it runs (and can
1279 		 * probe the number of available contexts itself).
1280 		 * Untill then only context 0 (aka kernel) will be
1281 		 * referenced anyway.
1282 		 */
1283 		ci->ci_numctx = curcpu()->ci_numctx;
1284 		ci->ci_ctxbusy = curcpu()->ci_ctxbusy;
1285 	}
1286 
1287 	if (CPU_ISSUN4V) {
1288 		ci->ci_tsb_desc = (struct tsb_desc *)kdata_alloc(
1289 			sizeof(struct tsb_desc), 16);
1290 		memset(ci->ci_tsb_desc, 0, sizeof(struct tsb_desc));
1291 		/* 8K page size used for TSB index computation */
1292 		ci->ci_tsb_desc->td_idxpgsz = 0;
1293 		ci->ci_tsb_desc->td_assoc = 1;
1294 		ci->ci_tsb_desc->td_size = TSBENTS;
1295 		ci->ci_tsb_desc->td_ctxidx = -1;
1296 		ci->ci_tsb_desc->td_pgsz = 0xf;
1297 		ci->ci_tsb_desc->td_pa = pmap_kextract((vaddr_t)ci->ci_tsb_dmmu);
1298 		BDPRINTF(PDB_BOOT1, ("cpu %d: TSB descriptor allocated at %p "
1299 		    "size %08x - td_pa at %p\n",
1300 		    ci->ci_index, ci->ci_tsb_desc, sizeof(struct tsb_desc),
1301 		    ci->ci_tsb_desc->td_pa));
1302 	}
1303 
1304 	BDPRINTF(PDB_BOOT1, ("cpu %d: TSB allocated at %p/%p size %08x\n",
1305 	    ci->ci_index, ci->ci_tsb_dmmu, ci->ci_tsb_immu, TSBSIZE));
1306 }
1307 
1308 /*
1309  * Initialize the per CPU parts for the cpu running this code.
1310  */
1311 void
cpu_pmap_init(struct cpu_info * ci)1312 cpu_pmap_init(struct cpu_info *ci)
1313 {
1314 	size_t ctxsize;
1315 
1316 	/*
1317 	 * We delay initialising ci_ctx_lock here as LOCKDEBUG isn't
1318 	 * running for cpu0 yet..
1319 	 */
1320 	ci->ci_pmap_next_ctx = 1;
1321 	/* all SUN4U use 13 bit contexts - SUN4V use at least 13 bit contexts */
1322 	ci->ci_numctx = 0x2000;
1323 	ctxsize = sizeof(paddr_t)*ci->ci_numctx;
1324 	ci->ci_ctxbusy = (paddr_t *)kdata_alloc(ctxsize, sizeof(uint64_t));
1325 	memset(ci->ci_ctxbusy, 0, ctxsize);
1326 	LIST_INIT(&ci->ci_pmap_ctxlist);
1327 
1328 	/* mark kernel context as busy */
1329 	ci->ci_ctxbusy[0] = pmap_kernel()->pm_physaddr;
1330 }
1331 
1332 /*
1333  * Initialize anything else for pmap handling.
1334  * Called during vm_init().
1335  */
1336 void
pmap_init(void)1337 pmap_init(void)
1338 {
1339 	struct vm_page *pg;
1340 	struct pglist pglist;
1341 	uint64_t data;
1342 	paddr_t pa;
1343 	psize_t size;
1344 	vaddr_t va;
1345 
1346 	BDPRINTF(PDB_BOOT1, ("pmap_init()\n"));
1347 
1348 	size = sizeof(struct pv_entry) * physmem;
1349 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
1350 		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
1351 		panic("pmap_init: no memory");
1352 
1353 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
1354 	if (va == 0)
1355 		panic("pmap_init: no memory");
1356 
1357 	/* Map the pages */
1358 	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
1359 		pa = VM_PAGE_TO_PHYS(pg);
1360 		pmap_zero_page(pa);
1361 		data = TSB_DATA(0 /* global */,
1362 			PGSZ_8K,
1363 			pa,
1364 			1 /* priv */,
1365 			1 /* Write */,
1366 			1 /* Cacheable */,
1367 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1368 			1 /* valid */,
1369 			0 /* IE */);
1370 		pmap_enter_kpage(va, data);
1371 		va += PAGE_SIZE;
1372 	}
1373 
1374 	/*
1375 	 * initialize the pmap pools.
1376 	 */
1377 	pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap),
1378 	    SPARC64_BLOCK_SIZE, 0, 0, "pmappl", NULL, IPL_NONE, NULL, NULL,
1379 	    NULL);
1380 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
1381 	    PR_LARGECACHE, "pv_entry", NULL, IPL_NONE, NULL, NULL, NULL);
1382 
1383 	vm_first_phys = avail_start;
1384 	vm_num_phys = avail_end - avail_start;
1385 
1386 	mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
1387 #if defined(USE_LOCKSAFE_PSEG_GETSET)
1388 	mutex_init(&pseg_lock, MUTEX_SPIN, IPL_VM);
1389 #endif
1390 	lock_available = true;
1391 }
1392 
1393 /*
1394  * How much virtual space is available to the kernel?
1395  */
1396 static vaddr_t kbreak; /* End of kernel VA */
1397 void
pmap_virtual_space(vaddr_t * start,vaddr_t * end)1398 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1399 {
1400 
1401 	/*
1402 	 * Reserve one segment for kernel virtual memory.
1403 	 */
1404 #ifdef __arch64__
1405 	/*
1406 	 * On 64 bit kernels, start it beyound firmware, so
1407 	 * we are basically unrestricted.
1408 	 */
1409 	*start = kbreak = VM_KERNEL_MEM_VA_START;
1410 	*end = VM_MAX_KERNEL_ADDRESS;
1411 #else
1412 	/*
1413 	 * Reserve two pages for pmap_copy_page && /dev/mem, but otherwise
1414 	 * end it beyound the iospace and other special fixed addresses.
1415 	 */
1416 	*start = kbreak = (vaddr_t)(vmmap + 2*PAGE_SIZE);
1417 	*end = VM_MAX_KERNEL_ADDRESS;
1418 #endif
1419 	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\n", *start, *end));
1420 }
1421 
1422 /*
1423  * Preallocate kernel page tables to a specified VA.
1424  * This simply loops through the first TTE for each
1425  * page table from the beginning of the kernel pmap,
1426  * reads the entry, and if the result is
1427  * zero (either invalid entry or no page table) it stores
1428  * a zero there, populating page tables in the process.
1429  * This is not the most efficient technique but i don't
1430  * expect it to be called that often.
1431  */
1432 vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)1433 pmap_growkernel(vaddr_t maxkvaddr)
1434 {
1435 	struct pmap *pm = pmap_kernel();
1436 	paddr_t pa;
1437 
1438 	if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) {
1439 		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1440 		       (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr);
1441 		return (kbreak);
1442 	}
1443 	DPRINTF(PDB_GROW, ("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr));
1444 	/* Align with the start of a page table */
1445 	for (kbreak &= ((~0ULL) << PDSHIFT); kbreak < maxkvaddr;
1446 	     kbreak += (1 << PDSHIFT)) {
1447 		if (pseg_get(pm, kbreak) & TLB_V)
1448 			continue;
1449 
1450 		pa = 0;
1451 		while (pseg_set(pm, kbreak, 0, pa) & 1) {
1452 			DPRINTF(PDB_GROW,
1453 			    ("pmap_growkernel: extending %lx\n", kbreak));
1454 			pa = 0;
1455 			if (!pmap_get_page(&pa))
1456 				panic("pmap_growkernel: no pages");
1457 			ENTER_STAT(ptpneeded);
1458 		}
1459 	}
1460 	return (kbreak);
1461 }
1462 
1463 /*
1464  * Create and return a physical map.
1465  */
1466 struct pmap *
pmap_create(void)1467 pmap_create(void)
1468 {
1469 	struct pmap *pm;
1470 
1471 	DPRINTF(PDB_CREATE, ("pmap_create()\n"));
1472 
1473 	pm = pool_cache_get(&pmap_cache, PR_WAITOK);
1474 	memset(pm, 0, sizeof *pm);
1475 	DPRINTF(PDB_CREATE, ("pmap_create(): created %p\n", pm));
1476 
1477 	mutex_init(&pm->pm_obj_lock, MUTEX_DEFAULT, IPL_NONE);
1478 	uvm_obj_init(&pm->pm_obj, NULL, false, 1);
1479 	uvm_obj_setlock(&pm->pm_obj, &pm->pm_obj_lock);
1480 
1481 	if (pm != pmap_kernel()) {
1482 		while (!pmap_get_page(&pm->pm_physaddr)) {
1483 			uvm_wait("pmap_create");
1484 		}
1485 		pm->pm_segs = (paddr_t *)(u_long)pm->pm_physaddr;
1486 	}
1487 	DPRINTF(PDB_CREATE, ("pmap_create(%p): ctx %d\n", pm, pmap_ctx(pm)));
1488 	return pm;
1489 }
1490 
1491 /*
1492  * Add a reference to the given pmap.
1493  */
1494 void
pmap_reference(struct pmap * pm)1495 pmap_reference(struct pmap *pm)
1496 {
1497 
1498 	atomic_inc_uint(&pm->pm_refs);
1499 }
1500 
1501 /*
1502  * Retire the given pmap from service.
1503  * Should only be called if the map contains no valid mappings.
1504  */
1505 void
pmap_destroy(struct pmap * pm)1506 pmap_destroy(struct pmap *pm)
1507 {
1508 #ifdef MULTIPROCESSOR
1509 	struct cpu_info *ci;
1510 	sparc64_cpuset_t pmap_cpus_active;
1511 #else
1512 #define pmap_cpus_active 0
1513 #endif
1514 	struct vm_page *pg, *nextpg;
1515 
1516 	if ((int)atomic_dec_uint_nv(&pm->pm_refs) > 0) {
1517 		return;
1518 	}
1519 	DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm));
1520 #ifdef MULTIPROCESSOR
1521 	CPUSET_CLEAR(pmap_cpus_active);
1522 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
1523 		/* XXXMRG: Move the lock inside one or both tests? */
1524 		mutex_enter(&ci->ci_ctx_lock);
1525 		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
1526 			if (pm->pm_ctx[ci->ci_index] > 0) {
1527 				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
1528 				ctx_free(pm, ci);
1529 			}
1530 		}
1531 		mutex_exit(&ci->ci_ctx_lock);
1532 	}
1533 #else
1534 	if (pmap_ctx(pm)) {
1535 		mutex_enter(&curcpu()->ci_ctx_lock);
1536 		ctx_free(pm, curcpu());
1537 		mutex_exit(&curcpu()->ci_ctx_lock);
1538 	}
1539 #endif
1540 
1541 	/* we could be a little smarter and leave pages zeroed */
1542 	for (pg = TAILQ_FIRST(&pm->pm_obj.memq); pg != NULL; pg = nextpg) {
1543 #ifdef DIAGNOSTIC
1544 		struct vm_page_md *md = VM_PAGE_TO_MD(pg);
1545 #endif
1546 
1547 		KASSERT((pg->flags & PG_MARKER) == 0);
1548 		nextpg = TAILQ_NEXT(pg, listq.queue);
1549 		TAILQ_REMOVE(&pm->pm_obj.memq, pg, listq.queue);
1550 		KASSERT(md->mdpg_pvh.pv_pmap == NULL);
1551 		dcache_flush_page_cpuset(VM_PAGE_TO_PHYS(pg), pmap_cpus_active);
1552 		uvm_pagefree(pg);
1553 	}
1554 	pmap_free_page((paddr_t)(u_long)pm->pm_segs, pmap_cpus_active);
1555 
1556 	uvm_obj_destroy(&pm->pm_obj, false);
1557 	mutex_destroy(&pm->pm_obj_lock);
1558 	pool_cache_put(&pmap_cache, pm);
1559 }
1560 
1561 /*
1562  * Copy the range specified by src_addr/len
1563  * from the source map to the range dst_addr/len
1564  * in the destination map.
1565  *
1566  * This routine is only advisory and need not do anything.
1567  */
1568 void
pmap_copy(struct pmap * dst_pmap,struct pmap * src_pmap,vaddr_t dst_addr,vsize_t len,vaddr_t src_addr)1569 pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vaddr_t dst_addr, vsize_t len, vaddr_t src_addr)
1570 {
1571 
1572 	DPRINTF(PDB_CREATE, ("pmap_copy(%p, %p, %p, %lx, %p)\n",
1573 			     dst_pmap, src_pmap, (void *)(u_long)dst_addr,
1574 			     (u_long)len, (void *)(u_long)src_addr));
1575 }
1576 
1577 /*
1578  * Activate the address space for the specified process.  If the
1579  * process is the current process, load the new MMU context.
1580  */
1581 void
pmap_activate(struct lwp * l)1582 pmap_activate(struct lwp *l)
1583 {
1584 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
1585 
1586 	if (pmap == pmap_kernel()) {
1587 		return;
1588 	}
1589 
1590 	/*
1591 	 * This is essentially the same thing that happens in cpu_switchto()
1592 	 * when the newly selected process is about to run, except that we
1593 	 * have to make sure to clean the register windows before we set
1594 	 * the new context.
1595 	 */
1596 
1597 	if (l != curlwp) {
1598 		return;
1599 	}
1600 	write_user_windows();
1601 	pmap_activate_pmap(pmap);
1602 }
1603 
1604 void
pmap_activate_pmap(struct pmap * pmap)1605 pmap_activate_pmap(struct pmap *pmap)
1606 {
1607 
1608 	if (pmap_ctx(pmap) == 0) {
1609 		(void) ctx_alloc(pmap);
1610 	}
1611 	DPRINTF(PDB_ACTIVATE,
1612 		("%s: cpu%d activating ctx %d\n", __func__,
1613 		 cpu_number(), pmap_ctx(pmap)));
1614 	dmmu_set_secondary_context(pmap_ctx(pmap));
1615 }
1616 
1617 /*
1618  * Deactivate the address space of the specified process.
1619  */
1620 void
pmap_deactivate(struct lwp * l)1621 pmap_deactivate(struct lwp *l)
1622 {
1623 
1624 	DPRINTF(PDB_ACTIVATE,
1625 		("%s: cpu%d deactivating ctx %d\n", __func__,
1626 		 cpu_number(), pmap_ctx(l->l_proc->p_vmspace->vm_map.pmap)));
1627 }
1628 
1629 /*
1630  * pmap_kenter_pa:		[ INTERFACE ]
1631  *
1632  *	Enter a va -> pa mapping into the kernel pmap without any
1633  *	physical->virtual tracking.
1634  *
1635  *	Note: no locking is necessary in this function.
1636  */
1637 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot,u_int flags)1638 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1639 {
1640 	pte_t tte;
1641 	paddr_t ptp;
1642 	struct pmap *pm = pmap_kernel();
1643 	int i;
1644 
1645 	KASSERT(va < INTSTACK || va > EINTSTACK);
1646 	KASSERT(va < kdata || va > ekdata);
1647 
1648 	/*
1649 	 * Construct the TTE.
1650 	 */
1651 
1652 	ENTER_STAT(unmanaged);
1653 	if (pa & (PMAP_NVC|PMAP_NC)) {
1654 		ENTER_STAT(ci);
1655 	}
1656 
1657 	tte.data = TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1658 			    (VM_PROT_WRITE & prot),
1659 			    !(pa & PMAP_NC), pa & (PMAP_NVC), 1, 0);
1660 	/* We don't track mod/ref here. */
1661 	if (prot & VM_PROT_WRITE)
1662 		tte.data |= TLB_REAL_W|TLB_W;
1663 	if (prot & VM_PROT_EXECUTE)
1664 		tte.data |= TLB_EXEC;
1665 	tte.data |= TLB_TSB_LOCK;	/* wired */
1666 	ptp = 0;
1667 
1668  retry:
1669 	i = pseg_set(pm, va, tte.data, ptp);
1670 	if (i & 1) {
1671 		KASSERT((i & 4) == 0);
1672 		ptp = 0;
1673 		if (!pmap_get_page(&ptp))
1674 			panic("pmap_kenter_pa: no pages");
1675 		ENTER_STAT(ptpneeded);
1676 		goto retry;
1677 	}
1678 	if (ptp && i == 0) {
1679 		/* We allocated a spare page but didn't use it.  Free it. */
1680 		printf("pmap_kenter_pa: freeing unused page %llx\n",
1681 		       (long long)ptp);
1682 		pmap_free_page_noflush(ptp);
1683 	}
1684 #ifdef PMAP_DEBUG
1685 	i = ptelookup_va(va);
1686 	if (pmapdebug & PDB_ENTER)
1687 		prom_printf("pmap_kenter_pa: va=%08x data=%08x:%08x "
1688 			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
1689 			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
1690 	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
1691 		prom_printf("pmap_kenter_pa: evicting entry tag=%x:%08x "
1692 			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1693 			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
1694 			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data,
1695 			i, &curcpu()->ci_tsb_dmmu[i]);
1696 		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1697 			va, (int)(tte.data>>32), (int)tte.data,	i,
1698 			&curcpu()->ci_tsb_dmmu[i]);
1699 	}
1700 #endif
1701 }
1702 
1703 /*
1704  * pmap_kremove:		[ INTERFACE ]
1705  *
1706  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1707  *	for size bytes (assumed to be page rounded).
1708  */
1709 void
pmap_kremove(vaddr_t va,vsize_t size)1710 pmap_kremove(vaddr_t va, vsize_t size)
1711 {
1712 	struct pmap *pm = pmap_kernel();
1713 	int64_t data;
1714 	paddr_t pa;
1715 	int rv;
1716 	bool flush = FALSE;
1717 
1718 	KASSERT(va < INTSTACK || va > EINTSTACK);
1719 	KASSERT(va < kdata || va > ekdata);
1720 
1721 	DPRINTF(PDB_DEMAP, ("pmap_kremove: start 0x%lx size %lx\n", va, size));
1722 	for (; size >= PAGE_SIZE; va += PAGE_SIZE, size -= PAGE_SIZE) {
1723 
1724 #ifdef DIAGNOSTIC
1725 		/*
1726 		 * Is this part of the permanent 4MB mapping?
1727 		 */
1728 		if (va >= ktext && va < roundup(ekdata, 4*MEG))
1729 			panic("pmap_kremove: va=%08x in locked TLB", (u_int)va);
1730 #endif
1731 
1732 		data = pseg_get(pm, va);
1733 		if ((data & TLB_V) == 0) {
1734 			continue;
1735 		}
1736 
1737 		flush = TRUE;
1738 		pa = data & TLB_PA_MASK;
1739 
1740 		/*
1741 		 * We need to flip the valid bit and
1742 		 * clear the access statistics.
1743 		 */
1744 
1745 		rv = pseg_set(pm, va, 0, 0);
1746 		if (rv & 1)
1747 			panic("pmap_kremove: pseg_set needs spare, rv=%d\n",
1748 			    rv);
1749 		DPRINTF(PDB_DEMAP, ("pmap_kremove: seg %x pdir %x pte %x\n",
1750 		    (int)va_to_seg(va), (int)va_to_dir(va),
1751 		    (int)va_to_pte(va)));
1752 		REMOVE_STAT(removes);
1753 
1754 		tsb_invalidate(va, pm);
1755 		REMOVE_STAT(tflushes);
1756 
1757 		/*
1758 		 * Here we assume nothing can get into the TLB
1759 		 * unless it has a PTE.
1760 		 */
1761 
1762 		tlb_flush_pte(va, pm);
1763 		dcache_flush_page_all(pa);
1764 	}
1765 	if (flush)
1766 		REMOVE_STAT(flushes);
1767 }
1768 
1769 /*
1770  * Insert physical page at pa into the given pmap at virtual address va.
1771  * Supports 64-bit pa so we can map I/O space.
1772  */
1773 
1774 int
pmap_enter(struct pmap * pm,vaddr_t va,paddr_t pa,vm_prot_t prot,u_int flags)1775 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1776 {
1777 	pte_t tte;
1778 	int64_t data;
1779 	paddr_t opa = 0, ptp; /* XXX: gcc */
1780 	pv_entry_t pvh, npv = NULL, freepv;
1781 	struct vm_page *pg, *opg, *ptpg;
1782 	int s, i, uncached = 0, error = 0;
1783 	int size = PGSZ_8K; /* PMAP_SZ_TO_TTE(pa); */
1784 	bool wired = (flags & PMAP_WIRED) != 0;
1785 	bool wasmapped = FALSE;
1786 	bool dopv = TRUE;
1787 
1788 	/*
1789 	 * Is this part of the permanent mappings?
1790 	 */
1791 	KASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
1792 	KASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
1793 
1794 	/* Grab a spare PV. */
1795 	freepv = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
1796 	if (__predict_false(freepv == NULL)) {
1797 		if (flags & PMAP_CANFAIL)
1798 			return (ENOMEM);
1799 		panic("pmap_enter: no pv entries available");
1800 	}
1801 	freepv->pv_next = NULL;
1802 
1803 	/*
1804 	 * If a mapping at this address already exists, check if we're
1805 	 * entering the same PA again.  if it's different remove it.
1806 	 */
1807 
1808 	mutex_enter(&pmap_lock);
1809 	data = pseg_get(pm, va);
1810 	if (data & TLB_V) {
1811 		wasmapped = TRUE;
1812 		opa = data & TLB_PA_MASK;
1813 		if (opa != pa) {
1814 			opg = PHYS_TO_VM_PAGE(opa);
1815 			if (opg != NULL) {
1816 				npv = pmap_remove_pv(pm, va, opg);
1817 			}
1818 		}
1819 	}
1820 
1821 	/*
1822 	 * Construct the TTE.
1823 	 */
1824 	pg = PHYS_TO_VM_PAGE(pa);
1825 	if (pg) {
1826 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1827 
1828 		pvh = &md->mdpg_pvh;
1829 		uncached = (pvh->pv_va & (PV_ALIAS|PV_NVC));
1830 #ifdef DIAGNOSTIC
1831 		if ((flags & VM_PROT_ALL) & ~prot)
1832 			panic("pmap_enter: access_type exceeds prot");
1833 #endif
1834 		/*
1835 		 * If we don't have the traphandler do it,
1836 		 * set the ref/mod bits now.
1837 		 */
1838 		if (flags & VM_PROT_ALL)
1839 			pvh->pv_va |= PV_REF;
1840 		if (flags & VM_PROT_WRITE)
1841 			pvh->pv_va |= PV_MOD;
1842 
1843 		/*
1844 		 * make sure we have a pv entry ready if we need one.
1845 		 */
1846 		if (pvh->pv_pmap == NULL || (wasmapped && opa == pa)) {
1847 			if (npv != NULL) {
1848 				/* free it */
1849 				npv->pv_next = freepv;
1850 				freepv = npv;
1851 				npv = NULL;
1852 			}
1853 			if (wasmapped && opa == pa) {
1854 				dopv = FALSE;
1855 			}
1856 		} else if (npv == NULL) {
1857 			/* use the pre-allocated pv */
1858 			npv = freepv;
1859 			freepv = freepv->pv_next;
1860 		}
1861 		ENTER_STAT(managed);
1862 	} else {
1863 		ENTER_STAT(unmanaged);
1864 		dopv = FALSE;
1865 		if (npv != NULL) {
1866 			/* free it */
1867 			npv->pv_next = freepv;
1868 			freepv = npv;
1869 			npv = NULL;
1870 		}
1871 	}
1872 
1873 #ifndef NO_VCACHE
1874 	if (pa & PMAP_NVC)
1875 #endif
1876 		uncached = 1;
1877 	if (uncached) {
1878 		ENTER_STAT(ci);
1879 	}
1880 	tte.data = TSB_DATA(0, size, pa, pm == pmap_kernel(),
1881 		flags & VM_PROT_WRITE, !(pa & PMAP_NC),
1882 		uncached, 1, pa & PMAP_LITTLE);
1883 #ifdef HWREF
1884 	if (prot & VM_PROT_WRITE)
1885 		tte.data |= TLB_REAL_W;
1886 	if (prot & VM_PROT_EXECUTE)
1887 		tte.data |= TLB_EXEC;
1888 #else
1889 	/* If it needs ref accounting do nothing. */
1890 	if (!(flags & VM_PROT_READ)) {
1891 		mutex_exit(&pmap_lock);
1892 		goto out;
1893 	}
1894 #endif
1895 	if (flags & VM_PROT_EXECUTE) {
1896 		if ((flags & (VM_PROT_READ|VM_PROT_WRITE)) == 0)
1897 			tte.data |= TLB_EXEC_ONLY|TLB_EXEC;
1898 		else
1899 			tte.data |= TLB_EXEC;
1900 	}
1901 	if (wired)
1902 		tte.data |= TLB_TSB_LOCK;
1903 	ptp = 0;
1904 
1905  retry:
1906 	i = pseg_set(pm, va, tte.data, ptp);
1907 	if (i == -2) {
1908 		if (flags & PMAP_CANFAIL)
1909 			return (ENOMEM);
1910 		panic("pmap_enter: invalid VA (inside hole)");
1911 	}
1912 	if (i & 4) {
1913 		/* ptp used as L3 */
1914 		KASSERT(ptp != 0);
1915 		KASSERT((i & 3) == 0);
1916 		ptpg = PHYS_TO_VM_PAGE(ptp);
1917 		if (ptpg) {
1918 			ptpg->offset = (uint64_t)va & (0xfffffLL << 23);
1919 			TAILQ_INSERT_TAIL(&pm->pm_obj.memq, ptpg, listq.queue);
1920 		} else {
1921 			KASSERT(pm == pmap_kernel());
1922 		}
1923 	}
1924 	if (i & 2) {
1925 		/* ptp used as L2 */
1926 		KASSERT(ptp != 0);
1927 		KASSERT((i & 4) == 0);
1928 		ptpg = PHYS_TO_VM_PAGE(ptp);
1929 		if (ptpg) {
1930 			ptpg->offset = (((uint64_t)va >> 43) & 0x3ffLL) << 13;
1931 			TAILQ_INSERT_TAIL(&pm->pm_obj.memq, ptpg, listq.queue);
1932 		} else {
1933 			KASSERT(pm == pmap_kernel());
1934 		}
1935 	}
1936 	if (i & 1) {
1937 		KASSERT((i & 4) == 0);
1938 		ptp = 0;
1939 		if (!pmap_get_page(&ptp)) {
1940 			mutex_exit(&pmap_lock);
1941 			if (flags & PMAP_CANFAIL) {
1942 				if (npv != NULL) {
1943 					/* free it */
1944 					npv->pv_next = freepv;
1945 					freepv = npv;
1946 				}
1947 				error = ENOMEM;
1948 				goto out;
1949 			} else {
1950 				panic("pmap_enter: no pages");
1951 			}
1952 		}
1953 		ENTER_STAT(ptpneeded);
1954 		goto retry;
1955 	}
1956 	if (ptp && i == 0) {
1957 		/* We allocated a spare page but didn't use it.  Free it. */
1958 		printf("pmap_enter: freeing unused page %llx\n",
1959 		       (long long)ptp);
1960 		pmap_free_page_noflush(ptp);
1961 	}
1962 	if (dopv) {
1963 		pmap_enter_pv(pm, va, pa, pg, npv);
1964 	}
1965 
1966 	mutex_exit(&pmap_lock);
1967 #ifdef PMAP_DEBUG
1968 	i = ptelookup_va(va);
1969 	if (pmapdebug & PDB_ENTER)
1970 		prom_printf("pmap_enter: va=%08x data=%08x:%08x "
1971 			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
1972 			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
1973 	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
1974 		prom_printf("pmap_enter: evicting entry tag=%x:%08x "
1975 			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1976 			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
1977 			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data, i,
1978 			&curcpu()->ci_tsb_dmmu[i]);
1979 		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1980 			va, (int)(tte.data>>32), (int)tte.data, i,
1981 			&curcpu()->ci_tsb_dmmu[i]);
1982 	}
1983 #endif
1984 
1985 	if (flags & (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)) {
1986 
1987 		/*
1988 		 * preload the TSB with the new entry,
1989 		 * since we're going to need it immediately anyway.
1990 		 */
1991 
1992 		KASSERT(pmap_ctx(pm)>=0);
1993 		i = ptelookup_va(va);
1994 		tte.tag = TSB_TAG(0, pmap_ctx(pm), va);
1995 		s = splhigh();
1996 		if (wasmapped && pmap_is_on_mmu(pm)) {
1997 			tsb_invalidate(va, pm);
1998 		}
1999 		if (flags & (VM_PROT_READ | VM_PROT_WRITE)) {
2000 			curcpu()->ci_tsb_dmmu[i].tag = tte.tag;
2001 			__asm volatile("" : : : "memory");
2002 			curcpu()->ci_tsb_dmmu[i].data = tte.data;
2003 		}
2004 		if (flags & VM_PROT_EXECUTE) {
2005 			curcpu()->ci_tsb_immu[i].tag = tte.tag;
2006 			__asm volatile("" : : : "memory");
2007 			curcpu()->ci_tsb_immu[i].data = tte.data;
2008 		}
2009 
2010 		/*
2011 		 * it's only necessary to flush the TLB if this page was
2012 		 * previously mapped, but for some reason it's a lot faster
2013 		 * for the fork+exit microbenchmark if we always do it.
2014 		 */
2015 
2016 		KASSERT(pmap_ctx(pm)>=0);
2017 #ifdef MULTIPROCESSOR
2018 		if (wasmapped && pmap_is_on_mmu(pm))
2019 			tlb_flush_pte(va, pm);
2020 		else
2021 			sp_tlb_flush_pte(va, pmap_ctx(pm));
2022 #else
2023 		tlb_flush_pte(va, pm);
2024 #endif
2025 		splx(s);
2026 	} else if (wasmapped && pmap_is_on_mmu(pm)) {
2027 		/* Force reload -- protections may be changed */
2028 		KASSERT(pmap_ctx(pm)>=0);
2029 		tsb_invalidate(va, pm);
2030 		tlb_flush_pte(va, pm);
2031 	}
2032 
2033 	/* We will let the fast mmu miss interrupt load the new translation */
2034 	pv_check();
2035  out:
2036 	/* Catch up on deferred frees. */
2037 	for (; freepv != NULL; freepv = npv) {
2038 		npv = freepv->pv_next;
2039 		pool_cache_put(&pmap_pv_cache, freepv);
2040 	}
2041 	return error;
2042 }
2043 
2044 void
pmap_remove_all(struct pmap * pm)2045 pmap_remove_all(struct pmap *pm)
2046 {
2047 #ifdef MULTIPROCESSOR
2048 	struct cpu_info *ci;
2049 	sparc64_cpuset_t pmap_cpus_active;
2050 #endif
2051 
2052 	if (pm == pmap_kernel()) {
2053 		return;
2054 	}
2055 	write_user_windows();
2056 	pm->pm_refs = 0;
2057 
2058 	/*
2059 	 * XXXMRG: pmap_destroy() does exactly the same dance here.
2060 	 * surely one of them isn't necessary?
2061 	 */
2062 #ifdef MULTIPROCESSOR
2063 	CPUSET_CLEAR(pmap_cpus_active);
2064 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
2065 		/* XXXMRG: Move the lock inside one or both tests? */
2066 		mutex_enter(&ci->ci_ctx_lock);
2067 		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
2068 			if (pm->pm_ctx[ci->ci_index] > 0) {
2069 				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
2070 				ctx_free(pm, ci);
2071 			}
2072 		}
2073 		mutex_exit(&ci->ci_ctx_lock);
2074 	}
2075 #else
2076 	if (pmap_ctx(pm)) {
2077 		mutex_enter(&curcpu()->ci_ctx_lock);
2078 		ctx_free(pm, curcpu());
2079 		mutex_exit(&curcpu()->ci_ctx_lock);
2080 	}
2081 #endif
2082 
2083 	REMOVE_STAT(flushes);
2084 	/*
2085 	 * XXXMRG: couldn't we do something less severe here, and
2086 	 * only flush the right context on each CPU?
2087 	 */
2088 	blast_dcache();
2089 }
2090 
2091 /*
2092  * Remove the given range of mapping entries.
2093  */
2094 void
pmap_remove(struct pmap * pm,vaddr_t va,vaddr_t endva)2095 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
2096 {
2097 	int64_t data;
2098 	paddr_t pa;
2099 	struct vm_page *pg;
2100 	pv_entry_t pv, freepv = NULL;
2101 	int rv;
2102 	bool flush = FALSE;
2103 
2104 	/*
2105 	 * In here we should check each pseg and if there are no more entries,
2106 	 * free it.  It's just that linear scans of 8K pages gets expensive.
2107 	 */
2108 
2109 	KASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
2110 	KASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
2111 
2112 	mutex_enter(&pmap_lock);
2113 	DPRINTF(PDB_REMOVE, ("pmap_remove(pm=%p, va=%p, endva=%p):", pm,
2114 			     (void *)(u_long)va, (void *)(u_long)endva));
2115 	REMOVE_STAT(calls);
2116 
2117 	/* Now do the real work */
2118 	for (; va < endva; va += PAGE_SIZE) {
2119 #ifdef DIAGNOSTIC
2120 		/*
2121 		 * Is this part of the permanent 4MB mapping?
2122 		 */
2123 		if (pm == pmap_kernel() && va >= ktext &&
2124 			va < roundup(ekdata, 4*MEG))
2125 			panic("pmap_remove: va=%08llx in locked TLB",
2126 			      (long long)va);
2127 #endif
2128 
2129 		data = pseg_get(pm, va);
2130 		if ((data & TLB_V) == 0) {
2131 			continue;
2132 		}
2133 
2134 		flush = TRUE;
2135 		/* First remove the pv entry, if there is one */
2136 		pa = data & TLB_PA_MASK;
2137 		pg = PHYS_TO_VM_PAGE(pa);
2138 		if (pg) {
2139 			pv = pmap_remove_pv(pm, va, pg);
2140 			if (pv != NULL) {
2141 				/* free it */
2142 				pv->pv_next = freepv;
2143 				freepv = pv;
2144 			}
2145 		}
2146 
2147 		/*
2148 		 * We need to flip the valid bit and
2149 		 * clear the access statistics.
2150 		 */
2151 
2152 		rv = pseg_set(pm, va, 0, 0);
2153 		if (rv & 1)
2154 			panic("pmap_remove: pseg_set needed spare, rv=%d!\n",
2155 			    rv);
2156 
2157 		DPRINTF(PDB_REMOVE, (" clearing seg %x pte %x\n",
2158 				     (int)va_to_seg(va), (int)va_to_pte(va)));
2159 		REMOVE_STAT(removes);
2160 
2161 		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
2162 			continue;
2163 
2164 		/*
2165 		 * if the pmap is being torn down, don't bother flushing,
2166 		 * we already have done so.
2167 		 */
2168 
2169 		if (!pm->pm_refs)
2170 			continue;
2171 
2172 		/*
2173 		 * Here we assume nothing can get into the TLB
2174 		 * unless it has a PTE.
2175 		 */
2176 
2177 		KASSERT(pmap_ctx(pm)>=0);
2178 		tsb_invalidate(va, pm);
2179 		REMOVE_STAT(tflushes);
2180 		tlb_flush_pte(va, pm);
2181 		dcache_flush_page_all(pa);
2182 	}
2183 	if (flush && pm->pm_refs)
2184 		REMOVE_STAT(flushes);
2185 	DPRINTF(PDB_REMOVE, ("\n"));
2186 	pv_check();
2187 	mutex_exit(&pmap_lock);
2188 
2189 	/* Catch up on deferred frees. */
2190 	for (; freepv != NULL; freepv = pv) {
2191 		pv = freepv->pv_next;
2192 		pool_cache_put(&pmap_pv_cache, freepv);
2193 	}
2194 }
2195 
2196 /*
2197  * Change the protection on the specified range of this pmap.
2198  */
2199 void
pmap_protect(struct pmap * pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)2200 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
2201 {
2202 	paddr_t pa;
2203 	int64_t data;
2204 	struct vm_page *pg;
2205 	pv_entry_t pv;
2206 	int rv;
2207 
2208 	KASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
2209 	KASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
2210 
2211 	if (prot == VM_PROT_NONE) {
2212 		pmap_remove(pm, sva, eva);
2213 		return;
2214 	}
2215 
2216 	sva = trunc_page(sva);
2217 	mutex_enter(&pmap_lock);
2218 	for (; sva < eva; sva += PAGE_SIZE) {
2219 #ifdef PMAP_DEBUG
2220 		/*
2221 		 * Is this part of the permanent 4MB mapping?
2222 		 */
2223 		if (pm == pmap_kernel() && sva >= ktext &&
2224 		    sva < roundup(ekdata, 4 * MEG)) {
2225 			mutex_exit(&pmap_lock);
2226 			prom_printf("pmap_protect: va=%08x in locked TLB\n",
2227 			    sva);
2228 			prom_abort();
2229 			return;
2230 		}
2231 #endif
2232 		DPRINTF(PDB_CHANGEPROT, ("pmap_protect: va %p\n",
2233 		    (void *)(u_long)sva));
2234 		data = pseg_get(pm, sva);
2235 		if ((data & TLB_V) == 0) {
2236 			continue;
2237 		}
2238 
2239 		pa = data & TLB_PA_MASK;
2240 		DPRINTF(PDB_CHANGEPROT|PDB_REF,
2241 			("pmap_protect: va=%08x data=%08llx "
2242 			 "seg=%08x pte=%08x\n",
2243 			 (u_int)sva, (long long)pa, (int)va_to_seg(sva),
2244 			 (int)va_to_pte(sva)));
2245 
2246 		pg = PHYS_TO_VM_PAGE(pa);
2247 		if (pg) {
2248 			struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2249 
2250 			/* Save REF/MOD info */
2251 			pv = &md->mdpg_pvh;
2252 			if (data & TLB_ACCESS)
2253 				pv->pv_va |= PV_REF;
2254 			if (data & TLB_MODIFY)
2255 				pv->pv_va |= PV_MOD;
2256 		}
2257 
2258 		/* Just do the pmap and TSB, not the pv_list */
2259 		if ((prot & VM_PROT_WRITE) == 0)
2260 			data &= ~(TLB_W|TLB_REAL_W);
2261 		if ((prot & VM_PROT_EXECUTE) == 0)
2262 			data &= ~(TLB_EXEC);
2263 
2264 		rv = pseg_set(pm, sva, data, 0);
2265 		if (rv & 1)
2266 			panic("pmap_protect: pseg_set needs spare! rv=%d\n",
2267 			    rv);
2268 
2269 		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
2270 			continue;
2271 
2272 		KASSERT(pmap_ctx(pm)>=0);
2273 		tsb_invalidate(sva, pm);
2274 		tlb_flush_pte(sva, pm);
2275 	}
2276 	pv_check();
2277 	mutex_exit(&pmap_lock);
2278 }
2279 
2280 /*
2281  * Extract the physical page address associated
2282  * with the given map/virtual_address pair.
2283  */
2284 bool
pmap_extract(struct pmap * pm,vaddr_t va,paddr_t * pap)2285 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap)
2286 {
2287 	paddr_t pa;
2288 	int64_t data = 0;
2289 
2290 	if (pm == pmap_kernel() && va >= kdata && va < roundup(ekdata, 4*MEG)) {
2291 		/* Need to deal w/locked TLB entry specially. */
2292 		pa = pmap_kextract(va);
2293 		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
2294 				      (u_long)va, (unsigned long long)pa));
2295 		if (pap != NULL)
2296 			*pap = pa;
2297 		return TRUE;
2298 	} else if (pm == pmap_kernel() && va >= ktext && va < ektext) {
2299 		/* Need to deal w/locked TLB entry specially. */
2300 		pa = pmap_kextract(va);
2301 		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
2302 		    (u_long)va, (unsigned long long)pa));
2303 		if (pap != NULL)
2304 			*pap = pa;
2305 		return TRUE;
2306 	} else if (pm == pmap_kernel() && va >= INTSTACK && va < (INTSTACK + 64*KB)) {
2307 		pa = (paddr_t)(curcpu()->ci_paddr - INTSTACK + va);
2308 		DPRINTF(PDB_EXTRACT, ("pmap_extract (intstack): va=%lx pa=%llx\n",
2309 		    (u_long)va, (unsigned long long)pa));
2310 		if (pap != NULL)
2311 			*pap = pa;
2312 		return TRUE;
2313 	} else {
2314 		data = pseg_get(pm, va);
2315 		pa = data & TLB_PA_MASK;
2316 		if (pmapdebug & PDB_EXTRACT) {
2317 			paddr_t npa = ldxa((vaddr_t)&pm->pm_segs[va_to_seg(va)],
2318 					   ASI_PHYS_CACHED);
2319 			printf("pmap_extract: va=%p segs[%ld]=%llx",
2320 			       (void *)(u_long)va, (long)va_to_seg(va),
2321 			       (unsigned long long)npa);
2322 			if (npa) {
2323 				npa = (paddr_t)
2324 					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
2325 					     [va_to_dir(va)],
2326 					     ASI_PHYS_CACHED);
2327 				printf(" segs[%ld][%ld]=%lx",
2328 				       (long)va_to_seg(va),
2329 				       (long)va_to_dir(va), (long)npa);
2330 			}
2331 			if (npa)	{
2332 				npa = (paddr_t)
2333 					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
2334 					     [va_to_pte(va)],
2335 					     ASI_PHYS_CACHED);
2336 				printf(" segs[%ld][%ld][%ld]=%lx",
2337 				       (long)va_to_seg(va),
2338 				       (long)va_to_dir(va),
2339 				       (long)va_to_pte(va), (long)npa);
2340 			}
2341 			printf(" pseg_get: %lx\n", (long)pa);
2342 		}
2343 	}
2344 	if ((data & TLB_V) == 0)
2345 		return (FALSE);
2346 	if (pap != NULL)
2347 		*pap = pa + (va & PGOFSET);
2348 	return (TRUE);
2349 }
2350 
2351 /*
2352  * Change protection on a kernel address.
2353  * This should only be called from MD code.
2354  */
2355 void
pmap_kprotect(vaddr_t va,vm_prot_t prot)2356 pmap_kprotect(vaddr_t va, vm_prot_t prot)
2357 {
2358 	struct pmap *pm = pmap_kernel();
2359 	int64_t data;
2360 	int rv;
2361 
2362 	data = pseg_get(pm, va);
2363 	KASSERT(data & TLB_V);
2364 	if (prot & VM_PROT_WRITE) {
2365 		data |= (TLB_W|TLB_REAL_W);
2366 	} else {
2367 		data &= ~(TLB_W|TLB_REAL_W);
2368 	}
2369 	rv = pseg_set(pm, va, data, 0);
2370 	if (rv & 1)
2371 		panic("pmap_kprotect: pseg_set needs spare! rv=%d", rv);
2372 	KASSERT(pmap_ctx(pm)>=0);
2373 	tsb_invalidate(va, pm);
2374 	tlb_flush_pte(va, pm);
2375 }
2376 
2377 /*
2378  * Return the number bytes that pmap_dumpmmu() will dump.
2379  */
2380 int
pmap_dumpsize(void)2381 pmap_dumpsize(void)
2382 {
2383 	int	sz;
2384 
2385 	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2386 	sz += kernel_dtlb_slots * sizeof(struct cpu_kcore_4mbseg);
2387 	sz += phys_installed_size * sizeof(phys_ram_seg_t);
2388 
2389 	return btodb(sz + DEV_BSIZE - 1);
2390 }
2391 
2392 /*
2393  * Write the mmu contents to the dump device.
2394  * This gets appended to the end of a crash dump since
2395  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2396  *
2397  * Write the core dump headers and MD data to the dump device.
2398  * We dump the following items:
2399  *
2400  *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
2401  *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
2402  *	phys_ram_seg_t[phys_installed_size]  physical memory segments
2403  */
2404 int
pmap_dumpmmu(int (* dump)(dev_t,daddr_t,void *,size_t),daddr_t blkno)2405 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, void *, size_t), daddr_t blkno)
2406 {
2407 	kcore_seg_t	*kseg;
2408 	cpu_kcore_hdr_t	*kcpu;
2409 	phys_ram_seg_t	memseg;
2410 	struct cpu_kcore_4mbseg ktlb;
2411 	int	error = 0;
2412 	int	i;
2413 	int	buffer[dbtob(1) / sizeof(int)];
2414 	int	*bp, *ep;
2415 
2416 #define EXPEDITE(p,n) do {						\
2417 	int *sp = (void *)(p);						\
2418 	int sz = (n);							\
2419 	while (sz > 0) {						\
2420 		*bp++ = *sp++;						\
2421 		if (bp >= ep) {						\
2422 			error = (*dump)(dumpdev, blkno,			\
2423 					(void *)buffer, dbtob(1));	\
2424 			if (error != 0)					\
2425 				return (error);				\
2426 			++blkno;					\
2427 			bp = buffer;					\
2428 		}							\
2429 		sz -= 4;						\
2430 	}								\
2431 } while (0)
2432 
2433 	/* Setup bookkeeping pointers */
2434 	bp = buffer;
2435 	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2436 
2437 	/* Fill in MI segment header */
2438 	kseg = (kcore_seg_t *)bp;
2439 	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2440 	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2441 
2442 	/* Fill in MD segment header (interpreted by MD part of libkvm) */
2443 	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2444 	kcpu->cputype = cputyp;
2445 	kcpu->kernbase = (uint64_t)KERNBASE;
2446 	kcpu->cpubase = (uint64_t)CPUINFO_VA;
2447 
2448 	/* Describe the locked text segment */
2449 	kcpu->ktextbase = (uint64_t)ktext;
2450 	kcpu->ktextp = (uint64_t)ktextp;
2451 	kcpu->ktextsz = (uint64_t)ektext - ktext;
2452 	if (kcpu->ktextsz > 4*MEG)
2453 		kcpu->ktextsz = 0;	/* old version can not work */
2454 
2455 	/* Describe locked data segment */
2456 	kcpu->kdatabase = (uint64_t)kdata;
2457 	kcpu->kdatap = (uint64_t)kdatap;
2458 	kcpu->kdatasz = (uint64_t)ekdatap - kdatap;
2459 
2460 	/* new version of locked segments description */
2461 	kcpu->newmagic = SPARC64_KCORE_NEWMAGIC;
2462 	kcpu->num4mbsegs = kernel_dtlb_slots;
2463 	kcpu->off4mbsegs = ALIGN(sizeof(cpu_kcore_hdr_t));
2464 
2465 	/* description of per-cpu mappings */
2466 	kcpu->numcpuinfos = sparc_ncpus;
2467 	kcpu->percpusz = 64 * 1024;	/* used to be 128k for some time */
2468 	kcpu->thiscpu = cpu_number();	/* which cpu is doing this dump */
2469 	kcpu->cpusp = cpu0paddr - 64 * 1024 * sparc_ncpus;
2470 
2471 	/* Now the memsegs */
2472 	kcpu->nmemseg = phys_installed_size;
2473 	kcpu->memsegoffset = kcpu->off4mbsegs
2474 		+ kernel_dtlb_slots * sizeof(struct cpu_kcore_4mbseg);
2475 
2476 	/* Now we need to point this at our kernel pmap. */
2477 	kcpu->nsegmap = STSZ;
2478 	kcpu->segmapoffset = (uint64_t)pmap_kernel()->pm_physaddr;
2479 
2480 	/* Note: we have assumed everything fits in buffer[] so far... */
2481 	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2482 
2483 	/* write locked kernel 4MB TLBs */
2484 	for (i = 0; i < kernel_dtlb_slots; i++) {
2485 		ktlb.va = kernel_tlbs[i].te_va;
2486 		ktlb.pa = kernel_tlbs[i].te_pa;
2487 		EXPEDITE(&ktlb, sizeof(ktlb));
2488 	}
2489 
2490 	/* write memsegs */
2491 	for (i = 0; i < phys_installed_size; i++) {
2492 		memseg.start = phys_installed[i].start;
2493 		memseg.size = phys_installed[i].size;
2494 		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2495 	}
2496 
2497 	if (bp != buffer)
2498 		error = (*dump)(dumpdev, blkno++, (void *)buffer, dbtob(1));
2499 
2500 	return (error);
2501 }
2502 
2503 /*
2504  * Determine (non)existence of physical page
2505  */
2506 int
pmap_pa_exists(paddr_t pa)2507 pmap_pa_exists(paddr_t pa)
2508 {
2509 	int i;
2510 
2511 	/* Just go through physical memory list & see if we're there */
2512 	for (i = 0; i < phys_installed_size; i++) {
2513 		if ((phys_installed[i].start <= pa) &&
2514 				(phys_installed[i].start +
2515 				 phys_installed[i].size >= pa))
2516 			return 1;
2517 	}
2518 	return 0;
2519 }
2520 
2521 /*
2522  * Lookup the appropriate TSB entry.
2523  *
2524  * Here is the full official pseudo code:
2525  *
2526  */
2527 
2528 #ifdef NOTYET
GenerateTSBPointer(int64 va,PointerType type,int64 TSBBase,Boolean split,int TSBSize)2529 int64 GenerateTSBPointer(
2530  	int64 va,		/* Missing VA			*/
2531  	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
2532  	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
2533  	Boolean split,		/* TSB Register[12]		*/
2534  	int TSBSize)		/* TSB Register[2:0]		*/
2535 {
2536  	int64 vaPortion;
2537  	int64 TSBBaseMask;
2538  	int64 splitMask;
2539 
2540 	/* TSBBaseMask marks the bits from TSB Base Reg		*/
2541 	TSBBaseMask = 0xffffffffffffe000 <<
2542 		(split? (TSBsize + 1) : TSBsize);
2543 
2544 	/* Shift va towards lsb appropriately and		*/
2545 	/* zero out the original va page offset			*/
2546 	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2547 		0xfffffffffffffff0;
2548 
2549 	if (split) {
2550 		/* There's only one bit in question for split	*/
2551 		splitMask = 1 << (13 + TSBsize);
2552 		if (type == 8K_POINTER)
2553 			/* Make sure we're in the lower half	*/
2554 			vaPortion &= ~splitMask;
2555 		else
2556 			/* Make sure we're in the upper half	*/
2557 			vaPortion |= splitMask;
2558 	}
2559 	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2560 }
2561 #endif
2562 /*
2563  * Of course, since we are not using a split TSB or variable page sizes,
2564  * we can optimize this a bit.
2565  *
2566  * The following only works for a unified 8K TSB.  It will find the slot
2567  * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
2568  */
2569 int
ptelookup_va(vaddr_t va)2570 ptelookup_va(vaddr_t va)
2571 {
2572 	long tsbptr;
2573 #define TSBBASEMASK	(0xffffffffffffe000LL << tsbsize)
2574 
2575 	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK);
2576 	return (tsbptr / sizeof(pte_t));
2577 }
2578 
2579 /*
2580  * Do whatever is needed to sync the MOD/REF flags
2581  */
2582 
2583 bool
pmap_clear_modify(struct vm_page * pg)2584 pmap_clear_modify(struct vm_page *pg)
2585 {
2586 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2587 	pv_entry_t pv;
2588 	int rv;
2589 	int changed = 0;
2590 #ifdef DEBUG
2591 	int modified = 0;
2592 
2593 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify(%p)\n", pg));
2594 
2595 	modified = pmap_is_modified(pg);
2596 #endif
2597 	mutex_enter(&pmap_lock);
2598 	/* Clear all mappings */
2599 	pv = &md->mdpg_pvh;
2600 #ifdef DEBUG
2601 	if (pv->pv_va & PV_MOD)
2602 		pv->pv_va |= PV_WE;	/* Remember this was modified */
2603 #endif
2604 	if (pv->pv_va & PV_MOD) {
2605 		changed |= 1;
2606 		pv->pv_va &= ~PV_MOD;
2607 	}
2608 #ifdef DEBUG
2609 	if (pv->pv_next && !pv->pv_pmap) {
2610 		printf("pmap_clear_modify: npv but no pmap for pv %p\n", pv);
2611 		Debugger();
2612 	}
2613 #endif
2614 	if (pv->pv_pmap != NULL) {
2615 		for (; pv; pv = pv->pv_next) {
2616 			int64_t data;
2617 			struct pmap *pmap = pv->pv_pmap;
2618 			vaddr_t va = pv->pv_va & PV_VAMASK;
2619 
2620 			/* First clear the mod bit in the PTE and make it R/O */
2621 			data = pseg_get(pmap, va);
2622 			KASSERT(data & TLB_V);
2623 			/* Need to both clear the modify and write bits */
2624 			if (data & TLB_MODIFY)
2625 				changed |= 1;
2626 #ifdef HWREF
2627 			data &= ~(TLB_MODIFY|TLB_W);
2628 #else
2629 			data &= ~(TLB_MODIFY|TLB_W|TLB_REAL_W);
2630 #endif
2631 			rv = pseg_set(pmap, va, data, 0);
2632 			if (rv & 1)
2633 				printf("pmap_clear_modify: pseg_set needs"
2634 				    " spare! rv=%d\n", rv);
2635 			if (pmap_is_on_mmu(pmap)) {
2636 				KASSERT(pmap_ctx(pmap)>=0);
2637 				tsb_invalidate(va, pmap);
2638 				tlb_flush_pte(va, pmap);
2639 			}
2640 			/* Then clear the mod bit in the pv */
2641 			if (pv->pv_va & PV_MOD) {
2642 				changed |= 1;
2643 				pv->pv_va &= ~PV_MOD;
2644 			}
2645 		}
2646 	}
2647 	pv_check();
2648 	mutex_exit(&pmap_lock);
2649 #ifdef DEBUG
2650 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify: pg %p %s\n", pg,
2651 	    (changed ? "was modified" : "was not modified")));
2652 	if (modified && modified != changed) {
2653 		printf("pmap_clear_modify: modified %d changed %d\n",
2654 		       modified, changed);
2655 		Debugger();
2656 	}
2657 #endif
2658 	return (changed);
2659 }
2660 
2661 bool
pmap_clear_reference(struct vm_page * pg)2662 pmap_clear_reference(struct vm_page *pg)
2663 {
2664 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2665 	pv_entry_t pv;
2666 	int rv;
2667 	int changed = 0;
2668 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
2669 	int referenced = 0;
2670 #endif
2671 
2672 	mutex_enter(&pmap_lock);
2673 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
2674 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_reference(%p)\n", pg));
2675 	referenced = pmap_is_referenced_locked(pg);
2676 #endif
2677 	/* Clear all references */
2678 	pv = &md->mdpg_pvh;
2679 	if (pv->pv_va & PV_REF) {
2680 		changed |= 1;
2681 		pv->pv_va &= ~PV_REF;
2682 	}
2683 #ifdef DEBUG
2684 	if (pv->pv_next && !pv->pv_pmap) {
2685 		printf("pmap_clear_reference: npv but no pmap for pv %p\n", pv);
2686 		Debugger();
2687 	}
2688 #endif
2689 	if (pv->pv_pmap != NULL) {
2690 		for (; pv; pv = pv->pv_next) {
2691 			int64_t data;
2692 			struct pmap *pmap = pv->pv_pmap;
2693 			vaddr_t va = pv->pv_va & PV_VAMASK;
2694 
2695 			data = pseg_get(pmap, va);
2696 			KASSERT(data & TLB_V);
2697 			DPRINTF(PDB_CHANGEPROT,
2698 			    ("clearing ref pm:%p va:%p ctx:%lx data:%llx\n",
2699 			     pmap, (void *)(u_long)va,
2700 			     (u_long)pmap_ctx(pmap),
2701 			     (long long)data));
2702 #ifdef HWREF
2703 			if (data & TLB_ACCESS) {
2704 				changed |= 1;
2705 				data &= ~TLB_ACCESS;
2706 			}
2707 #else
2708 			if (data < 0)
2709 				changed |= 1;
2710 			data = 0;
2711 #endif
2712 			rv = pseg_set(pmap, va, data, 0);
2713 			if (rv & 1)
2714 				panic("pmap_clear_reference: pseg_set needs"
2715 				    " spare! rv=%d\n", rv);
2716 			if (pmap_is_on_mmu(pmap)) {
2717 				KASSERT(pmap_ctx(pmap)>=0);
2718 				tsb_invalidate(va, pmap);
2719 				tlb_flush_pte(va, pmap);
2720 			}
2721 			if (pv->pv_va & PV_REF) {
2722 				changed |= 1;
2723 				pv->pv_va &= ~PV_REF;
2724 			}
2725 		}
2726 	}
2727 	dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
2728 	pv_check();
2729 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
2730 	if (pmap_is_referenced_locked(pg)) {
2731 		pv = &md->mdpg_pvh;
2732 		printf("pmap_clear_reference(): %p still referenced "
2733 			"(pmap = %p, ctx = %d)\n", pg, pv->pv_pmap,
2734 			pv->pv_pmap ? pmap_ctx(pv->pv_pmap) : 0);
2735 		Debugger();
2736 	}
2737 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2738 	    ("pmap_clear_reference: pg %p %s\n", pg,
2739 	     (changed ? "was referenced" : "was not referenced")));
2740 	if (referenced != changed) {
2741 		printf("pmap_clear_reference: referenced %d changed %d\n",
2742 		       referenced, changed);
2743 		Debugger();
2744 	} else {
2745 		mutex_exit(&pmap_lock);
2746 		return (referenced);
2747 	}
2748 #endif
2749 	mutex_exit(&pmap_lock);
2750 	return (changed);
2751 }
2752 
2753 bool
pmap_is_modified(struct vm_page * pg)2754 pmap_is_modified(struct vm_page *pg)
2755 {
2756 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2757 	pv_entry_t pv, npv;
2758 	bool res = false;
2759 
2760 	/* Check if any mapping has been modified */
2761 	pv = &md->mdpg_pvh;
2762 	if (pv->pv_va & PV_MOD)
2763 		res = true;
2764 #ifdef HWREF
2765 #ifdef DEBUG
2766 	if (pv->pv_next && !pv->pv_pmap) {
2767 		printf("pmap_is_modified: npv but no pmap for pv %p\n", pv);
2768 		Debugger();
2769 	}
2770 #endif
2771 	if (!res && pv->pv_pmap != NULL) {
2772 		mutex_enter(&pmap_lock);
2773 		for (npv = pv; !res && npv && npv->pv_pmap;
2774 		     npv = npv->pv_next) {
2775 			int64_t data;
2776 
2777 			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2778 			KASSERT(data & TLB_V);
2779 			if (data & TLB_MODIFY)
2780 				res = true;
2781 
2782 			/* Migrate modify info to head pv */
2783 			if (npv->pv_va & PV_MOD) {
2784 				res = true;
2785 				npv->pv_va &= ~PV_MOD;
2786 			}
2787 		}
2788 		/* Save modify info */
2789 		if (res)
2790 			pv->pv_va |= PV_MOD;
2791 #ifdef DEBUG
2792 		if (res)
2793 			pv->pv_va |= PV_WE;
2794 #endif
2795 		mutex_exit(&pmap_lock);
2796 	}
2797 #endif
2798 
2799 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_is_modified(%p) = %d\n", pg,
2800 	    res));
2801 	pv_check();
2802 	return res;
2803 }
2804 
2805 /*
2806  * Variant of pmap_is_reference() where caller already holds pmap_lock
2807  */
2808 static bool
pmap_is_referenced_locked(struct vm_page * pg)2809 pmap_is_referenced_locked(struct vm_page *pg)
2810 {
2811 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2812 	pv_entry_t pv, npv;
2813 	bool res = false;
2814 
2815 	KASSERT(mutex_owned(&pmap_lock));
2816 
2817 	/* Check if any mapping has been referenced */
2818 	pv = &md->mdpg_pvh;
2819 	if (pv->pv_va & PV_REF)
2820 		return true;
2821 
2822 #ifdef HWREF
2823 #ifdef DEBUG
2824 	if (pv->pv_next && !pv->pv_pmap) {
2825 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
2826 		Debugger();
2827 	}
2828 #endif
2829 	if (pv->pv_pmap == NULL)
2830 		return false;
2831 
2832 	for (npv = pv; npv; npv = npv->pv_next) {
2833 		int64_t data;
2834 
2835 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2836 		KASSERT(data & TLB_V);
2837 		if (data & TLB_ACCESS)
2838 			res = true;
2839 
2840 		/* Migrate ref info to head pv */
2841 		if (npv->pv_va & PV_REF) {
2842 			res = true;
2843 			npv->pv_va &= ~PV_REF;
2844 		}
2845 	}
2846 	/* Save ref info */
2847 	if (res)
2848 		pv->pv_va |= PV_REF;
2849 #endif
2850 
2851 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2852 		("pmap_is_referenced(%p) = %d\n", pg, res));
2853 	pv_check();
2854 	return res;
2855 }
2856 
2857 bool
pmap_is_referenced(struct vm_page * pg)2858 pmap_is_referenced(struct vm_page *pg)
2859 {
2860 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2861 	pv_entry_t pv;
2862 	bool res = false;
2863 
2864 	/* Check if any mapping has been referenced */
2865 	pv = &md->mdpg_pvh;
2866 	if (pv->pv_va & PV_REF)
2867 		return true;
2868 
2869 #ifdef HWREF
2870 #ifdef DEBUG
2871 	if (pv->pv_next && !pv->pv_pmap) {
2872 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
2873 		Debugger();
2874 	}
2875 #endif
2876 	if (pv->pv_pmap != NULL) {
2877 		mutex_enter(&pmap_lock);
2878 		res = pmap_is_referenced_locked(pg);
2879 		mutex_exit(&pmap_lock);
2880 	}
2881 #endif
2882 
2883 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2884 		("pmap_is_referenced(%p) = %d\n", pg, res));
2885 	pv_check();
2886 	return res;
2887 }
2888 
2889 
2890 
2891 /*
2892  *	Routine:	pmap_unwire
2893  *	Function:	Clear the wired attribute for a map/virtual-address
2894  *			pair.
2895  *	In/out conditions:
2896  *			The mapping must already exist in the pmap.
2897  */
2898 void
pmap_unwire(pmap_t pmap,vaddr_t va)2899 pmap_unwire(pmap_t pmap, vaddr_t va)
2900 {
2901 	int64_t data;
2902 	int rv;
2903 
2904 	DPRINTF(PDB_MMU_STEAL, ("pmap_unwire(%p, %lx)\n", pmap, va));
2905 
2906 #ifdef DEBUG
2907 	/*
2908 	 * Is this part of the permanent 4MB mapping?
2909 	 */
2910 	if (pmap == pmap_kernel() && va >= ktext &&
2911 		va < roundup(ekdata, 4*MEG)) {
2912 		prom_printf("pmap_unwire: va=%08x in locked TLB\n", va);
2913 		prom_abort();
2914 		return;
2915 	}
2916 #endif
2917 	data = pseg_get(pmap, va & PV_VAMASK);
2918 	KASSERT(data & TLB_V);
2919 	data &= ~TLB_TSB_LOCK;
2920 	rv = pseg_set(pmap, va & PV_VAMASK, data, 0);
2921 	if (rv & 1)
2922 		panic("pmap_unwire: pseg_set needs spare! rv=%d\n", rv);
2923 	pv_check();
2924 }
2925 
2926 /*
2927  * Lower the protection on the specified physical page.
2928  *
2929  * Never enable writing as it will break COW
2930  */
2931 
2932 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)2933 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2934 {
2935 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2936 	int64_t clear, set;
2937 	int64_t data = 0;
2938 	int rv;
2939 	pv_entry_t pv, npv, freepv = NULL;
2940 	struct pmap *pmap;
2941 	vaddr_t va;
2942 	bool needflush = FALSE;
2943 
2944 	DPRINTF(PDB_CHANGEPROT,
2945 	    ("pmap_page_protect: pg %p prot %x\n", pg, prot));
2946 
2947 	mutex_enter(&pmap_lock);
2948 	pv = &md->mdpg_pvh;
2949 	if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
2950 		/* copy_on_write */
2951 
2952 		set = TLB_V;
2953 		clear = TLB_REAL_W|TLB_W;
2954 		if (VM_PROT_EXECUTE & prot)
2955 			set |= TLB_EXEC;
2956 		else
2957 			clear |= TLB_EXEC;
2958 		if (VM_PROT_EXECUTE == prot)
2959 			set |= TLB_EXEC_ONLY;
2960 
2961 #ifdef DEBUG
2962 		if (pv->pv_next && !pv->pv_pmap) {
2963 			printf("pmap_page_protect: no pmap for pv %p\n", pv);
2964 			Debugger();
2965 		}
2966 #endif
2967 		if (pv->pv_pmap != NULL) {
2968 			for (; pv; pv = pv->pv_next) {
2969 				pmap = pv->pv_pmap;
2970 				va = pv->pv_va & PV_VAMASK;
2971 
2972 				DPRINTF(PDB_CHANGEPROT | PDB_REF,
2973 					("pmap_page_protect: "
2974 					 "RO va %p of pg %p...\n",
2975 					 (void *)(u_long)pv->pv_va, pg));
2976 				data = pseg_get(pmap, va);
2977 				KASSERT(data & TLB_V);
2978 
2979 				/* Save REF/MOD info */
2980 				if (data & TLB_ACCESS)
2981 					pv->pv_va |= PV_REF;
2982 				if (data & TLB_MODIFY)
2983 					pv->pv_va |= PV_MOD;
2984 
2985 				data &= ~clear;
2986 				data |= set;
2987 				rv = pseg_set(pmap, va, data, 0);
2988 				if (rv & 1)
2989 					panic("pmap_page_protect: "
2990 					       "pseg_set needs spare! rv=%d\n",
2991 					       rv);
2992 				if (pmap_is_on_mmu(pmap)) {
2993 					KASSERT(pmap_ctx(pmap)>=0);
2994 					tsb_invalidate(va, pmap);
2995 					tlb_flush_pte(va, pmap);
2996 				}
2997 			}
2998 		}
2999 	} else {
3000 		/* remove mappings */
3001 		DPRINTF(PDB_REMOVE,
3002 			("pmap_page_protect: demapping pg %p\n", pg));
3003 
3004 		/* First remove the entire list of continuation pv's */
3005 		for (npv = pv->pv_next; npv; npv = pv->pv_next) {
3006 			pmap = npv->pv_pmap;
3007 			va = npv->pv_va & PV_VAMASK;
3008 
3009 			/* We're removing npv from pv->pv_next */
3010 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
3011 				("pmap_page_protect: "
3012 				 "demap va %p of pg %p in pmap %p...\n",
3013 				 (void *)(u_long)va, pg, pmap));
3014 
3015 			/* clear the entry in the page table */
3016 			data = pseg_get(pmap, va);
3017 			KASSERT(data & TLB_V);
3018 
3019 			/* Save ref/mod info */
3020 			if (data & TLB_ACCESS)
3021 				pv->pv_va |= PV_REF;
3022 			if (data & TLB_MODIFY)
3023 				pv->pv_va |= PV_MOD;
3024 			/* Clear mapping */
3025 			rv = pseg_set(pmap, va, 0, 0);
3026 			if (rv & 1)
3027 				panic("pmap_page_protect: pseg_set needs"
3028 				     " spare! rv=%d\n", rv);
3029 			if (pmap_is_on_mmu(pmap)) {
3030 				KASSERT(pmap_ctx(pmap)>=0);
3031 				tsb_invalidate(va, pmap);
3032 				tlb_flush_pte(va, pmap);
3033 			}
3034 			if (pmap->pm_refs > 0) {
3035 				needflush = TRUE;
3036 			}
3037 
3038 			/* free the pv */
3039 			pv->pv_next = npv->pv_next;
3040 			npv->pv_next = freepv;
3041 			freepv = npv;
3042 		}
3043 
3044 		/* Then remove the primary pv */
3045 #ifdef DEBUG
3046 		if (pv->pv_next && !pv->pv_pmap) {
3047 			printf("pmap_page_protect: no pmap for pv %p\n", pv);
3048 			Debugger();
3049 		}
3050 #endif
3051 		if (pv->pv_pmap != NULL) {
3052 			pmap = pv->pv_pmap;
3053 			va = pv->pv_va & PV_VAMASK;
3054 
3055 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
3056 				("pmap_page_protect: "
3057 				 "demap va %p of pg %p from pm %p...\n",
3058 				 (void *)(u_long)va, pg, pmap));
3059 
3060 			data = pseg_get(pmap, va);
3061 			KASSERT(data & TLB_V);
3062 			/* Save ref/mod info */
3063 			if (data & TLB_ACCESS)
3064 				pv->pv_va |= PV_REF;
3065 			if (data & TLB_MODIFY)
3066 				pv->pv_va |= PV_MOD;
3067 			rv = pseg_set(pmap, va, 0, 0);
3068 			if (rv & 1)
3069 				panic("pmap_page_protect: pseg_set needs"
3070 				    " spare! rv=%d\n", rv);
3071 			if (pmap_is_on_mmu(pmap)) {
3072 			    	KASSERT(pmap_ctx(pmap)>=0);
3073 				tsb_invalidate(va, pmap);
3074 				tlb_flush_pte(va, pmap);
3075 			}
3076 			if (pmap->pm_refs > 0) {
3077 				needflush = TRUE;
3078 			}
3079 			npv = pv->pv_next;
3080 			/* dump the first pv */
3081 			if (npv) {
3082 				/* First save mod/ref bits */
3083 				pv->pv_pmap = npv->pv_pmap;
3084 				pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va;
3085 				pv->pv_next = npv->pv_next;
3086 				npv->pv_next = freepv;
3087 				freepv = npv;
3088 			} else {
3089 				pv->pv_pmap = NULL;
3090 				pv->pv_next = NULL;
3091 			}
3092 		}
3093 		if (needflush)
3094 			dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
3095 	}
3096 	/* We should really only flush the pages we demapped. */
3097 	pv_check();
3098 	mutex_exit(&pmap_lock);
3099 
3100 	/* Catch up on deferred frees. */
3101 	for (; freepv != NULL; freepv = npv) {
3102 		npv = freepv->pv_next;
3103 		pool_cache_put(&pmap_pv_cache, freepv);
3104 	}
3105 }
3106 
3107 #ifdef PMAP_COUNT_DEBUG
3108 /*
3109  * count pages in pmap -- this can be slow.
3110  */
3111 int
pmap_count_res(struct pmap * pm)3112 pmap_count_res(struct pmap *pm)
3113 {
3114 	int64_t data;
3115 	paddr_t *pdir, *ptbl;
3116 	int i, j, k, n;
3117 
3118 	/* Don't want one of these pages reused while we're reading it. */
3119 	mutex_enter(&pmap_lock);
3120 	n = 0;
3121 	for (i = 0; i < STSZ; i++) {
3122 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
3123 					       ASI_PHYS_CACHED);
3124 		if (pdir == NULL) {
3125 			continue;
3126 		}
3127 		for (k = 0; k < PDSZ; k++) {
3128 			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
3129 						       ASI_PHYS_CACHED);
3130 			if (ptbl == NULL) {
3131 				continue;
3132 			}
3133 			for (j = 0; j < PTSZ; j++) {
3134 				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
3135 						     ASI_PHYS_CACHED);
3136 				if (data & TLB_V)
3137 					n++;
3138 			}
3139 		}
3140 	}
3141 	mutex_exit(&pmap_lock);
3142 
3143 	if (pm->pm_stats.resident_count != n)
3144 		printf("pmap_count_resident: pm_stats = %ld, counted: %d\n",
3145 		    pm->pm_stats.resident_count, n);
3146 
3147 	return n;
3148 }
3149 
3150 /*
3151  * count wired pages in pmap -- this can be slow.
3152  */
3153 int
pmap_count_wired(struct pmap * pm)3154 pmap_count_wired(struct pmap *pm)
3155 {
3156 	int64_t data;
3157 	paddr_t *pdir, *ptbl;
3158 	int i, j, k, n;
3159 
3160 	/* Don't want one of these pages reused while we're reading it. */
3161 	mutex_enter(&pmap_lock);	/* XXX uvmplock */
3162 	n = 0;
3163 	for (i = 0; i < STSZ; i++) {
3164 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
3165 					       ASI_PHYS_CACHED);
3166 		if (pdir == NULL) {
3167 			continue;
3168 		}
3169 		for (k = 0; k < PDSZ; k++) {
3170 			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
3171 						       ASI_PHYS_CACHED);
3172 			if (ptbl == NULL) {
3173 				continue;
3174 			}
3175 			for (j = 0; j < PTSZ; j++) {
3176 				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
3177 						     ASI_PHYS_CACHED);
3178 				if (data & TLB_TSB_LOCK)
3179 					n++;
3180 			}
3181 		}
3182 	}
3183 	mutex_exit(&pmap_lock);	/* XXX uvmplock */
3184 
3185 	if (pm->pm_stats.wired_count != n)
3186 		printf("pmap_count_wired: pm_stats = %ld, counted: %d\n",
3187 		    pm->pm_stats.wired_count, n);
3188 
3189 	return n;
3190 }
3191 #endif	/* PMAP_COUNT_DEBUG */
3192 
3193 void
pmap_procwr(struct proc * p,vaddr_t va,size_t len)3194 pmap_procwr(struct proc *p, vaddr_t va, size_t len)
3195 {
3196 
3197 	blast_icache();
3198 }
3199 
3200 /*
3201  * Allocate a hardware context to the given pmap.
3202  */
3203 static int
ctx_alloc(struct pmap * pm)3204 ctx_alloc(struct pmap *pm)
3205 {
3206 	int i, ctx;
3207 
3208 	KASSERT(pm != pmap_kernel());
3209 	KASSERT(pm == curproc->p_vmspace->vm_map.pmap);
3210 	mutex_enter(&curcpu()->ci_ctx_lock);
3211 	ctx = curcpu()->ci_pmap_next_ctx++;
3212 
3213 	/*
3214 	 * if we have run out of contexts, remove all user entries from
3215 	 * the TSB, TLB and dcache and start over with context 1 again.
3216 	 */
3217 
3218 	if (ctx == curcpu()->ci_numctx) {
3219 		DPRINTF(PDB_CTX_ALLOC|PDB_CTX_FLUSHALL,
3220 			("ctx_alloc: cpu%d run out of contexts %d\n",
3221 			 cpu_number(), curcpu()->ci_numctx));
3222 		write_user_windows();
3223 		while (!LIST_EMPTY(&curcpu()->ci_pmap_ctxlist)) {
3224 #ifdef MULTIPROCESSOR
3225 			KASSERT(pmap_ctx(LIST_FIRST(&curcpu()->ci_pmap_ctxlist)) != 0);
3226 #endif
3227 			ctx_free(LIST_FIRST(&curcpu()->ci_pmap_ctxlist),
3228 				 curcpu());
3229 		}
3230 		for (i = TSBENTS - 1; i >= 0; i--) {
3231 			if (TSB_TAG_CTX(curcpu()->ci_tsb_dmmu[i].tag) != 0) {
3232 				clrx(&curcpu()->ci_tsb_dmmu[i].data);
3233 			}
3234 			if (TSB_TAG_CTX(curcpu()->ci_tsb_immu[i].tag) != 0) {
3235 				clrx(&curcpu()->ci_tsb_immu[i].data);
3236 			}
3237 		}
3238 		sp_tlb_flush_all();
3239 		ctx = 1;
3240 		curcpu()->ci_pmap_next_ctx = 2;
3241 	}
3242 	curcpu()->ci_ctxbusy[ctx] = pm->pm_physaddr;
3243 	LIST_INSERT_HEAD(&curcpu()->ci_pmap_ctxlist, pm, pm_list[cpu_number()]);
3244 	pmap_ctx(pm) = ctx;
3245 	mutex_exit(&curcpu()->ci_ctx_lock);
3246 	DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: cpu%d allocated ctx %d\n",
3247 		cpu_number(), ctx));
3248 	return ctx;
3249 }
3250 
3251 /*
3252  * Give away a context.
3253  */
3254 static void
ctx_free(struct pmap * pm,struct cpu_info * ci)3255 ctx_free(struct pmap *pm, struct cpu_info *ci)
3256 {
3257 	int oldctx;
3258 	int cpunum;
3259 
3260 	KASSERT(mutex_owned(&ci->ci_ctx_lock));
3261 
3262 #ifdef MULTIPROCESSOR
3263 	cpunum = ci->ci_index;
3264 #else
3265 	/* Give the compiler a hint.. */
3266 	cpunum = 0;
3267 #endif
3268 
3269 	oldctx = pm->pm_ctx[cpunum];
3270 	if (oldctx == 0)
3271 		return;
3272 
3273 #ifdef DIAGNOSTIC
3274 	if (pm == pmap_kernel())
3275 		panic("ctx_free: freeing kernel context");
3276 	if (ci->ci_ctxbusy[oldctx] == 0)
3277 		printf("ctx_free: freeing free context %d\n", oldctx);
3278 	if (ci->ci_ctxbusy[oldctx] != pm->pm_physaddr) {
3279 		printf("ctx_free: freeing someone else's context\n "
3280 		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
3281 		       oldctx, (void *)(u_long)ci->ci_ctxbusy[oldctx], pm,
3282 		       (void *)(u_long)pm->pm_physaddr);
3283 		Debugger();
3284 	}
3285 #endif
3286 	/* We should verify it has not been stolen and reallocated... */
3287 	DPRINTF(PDB_CTX_ALLOC, ("ctx_free: cpu%d freeing ctx %d\n",
3288 		cpu_number(), oldctx));
3289 	ci->ci_ctxbusy[oldctx] = 0UL;
3290 	pm->pm_ctx[cpunum] = 0;
3291 	LIST_REMOVE(pm, pm_list[cpunum]);
3292 }
3293 
3294 /*
3295  * Enter the pmap and virtual address into the
3296  * physical to virtual map table.
3297  *
3298  * We enter here with the pmap locked.
3299  */
3300 
3301 void
pmap_enter_pv(struct pmap * pmap,vaddr_t va,paddr_t pa,struct vm_page * pg,pv_entry_t npv)3302 pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa, struct vm_page *pg,
3303 	      pv_entry_t npv)
3304 {
3305 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
3306 	pv_entry_t pvh;
3307 
3308 	KASSERT(mutex_owned(&pmap_lock));
3309 
3310 	pvh = &md->mdpg_pvh;
3311 	DPRINTF(PDB_ENTER, ("pmap_enter: pvh %p: was %lx/%p/%p\n",
3312 	    pvh, pvh->pv_va, pvh->pv_pmap, pvh->pv_next));
3313 	if (pvh->pv_pmap == NULL) {
3314 
3315 		/*
3316 		 * No entries yet, use header as the first entry
3317 		 */
3318 		DPRINTF(PDB_ENTER, ("pmap_enter: first pv: pmap %p va %lx\n",
3319 		    pmap, va));
3320 		ENTER_STAT(firstpv);
3321 		PV_SETVA(pvh, va);
3322 		pvh->pv_pmap = pmap;
3323 		pvh->pv_next = NULL;
3324 		KASSERT(npv == NULL);
3325 	} else {
3326 		if (pg->loan_count == 0 && !(pvh->pv_va & PV_ALIAS)) {
3327 
3328 			/*
3329 			 * There is at least one other VA mapping this page.
3330 			 * Check if they are cache index compatible. If not
3331 			 * remove all mappings, flush the cache and set page
3332 			 * to be mapped uncached. Caching will be restored
3333 			 * when pages are mapped compatible again.
3334 			 */
3335 			if ((pvh->pv_va ^ va) & VA_ALIAS_MASK) {
3336 				pvh->pv_va |= PV_ALIAS;
3337 				pmap_page_cache(pmap, pa, 0);
3338 				ENTER_STAT(ci);
3339 			}
3340 		}
3341 
3342 		/*
3343 		 * There is at least one other VA mapping this page.
3344 		 * Place this entry after the header.
3345 		 */
3346 
3347 		DPRINTF(PDB_ENTER, ("pmap_enter: new pv: pmap %p va %lx\n",
3348 		    pmap, va));
3349 		npv->pv_pmap = pmap;
3350 		npv->pv_va = va & PV_VAMASK;
3351 		npv->pv_next = pvh->pv_next;
3352 		pvh->pv_next = npv;
3353 
3354 		if (!npv->pv_next) {
3355 			ENTER_STAT(secondpv);
3356 		}
3357 	}
3358 }
3359 
3360 /*
3361  * Remove a physical to virtual address translation.
3362  */
3363 
3364 pv_entry_t
pmap_remove_pv(struct pmap * pmap,vaddr_t va,struct vm_page * pg)3365 pmap_remove_pv(struct pmap *pmap, vaddr_t va, struct vm_page *pg)
3366 {
3367 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
3368 	pv_entry_t pvh, npv, pv;
3369 	int64_t data = 0;
3370 
3371 	KASSERT(mutex_owned(&pmap_lock));
3372 
3373 	pvh = &md->mdpg_pvh;
3374 
3375 	DPRINTF(PDB_REMOVE, ("pmap_remove_pv(pm=%p, va=%p, pg=%p)\n", pmap,
3376 	    (void *)(u_long)va, pg));
3377 	pv_check();
3378 
3379 	/*
3380 	 * Remove page from the PV table.
3381 	 * If it is the first entry on the list, it is actually
3382 	 * in the header and we must copy the following entry up
3383 	 * to the header.  Otherwise we must search the list for
3384 	 * the entry.  In either case we free the now unused entry.
3385 	 */
3386 	if (pmap == pvh->pv_pmap && PV_MATCH(pvh, va)) {
3387 		data = pseg_get(pvh->pv_pmap, pvh->pv_va & PV_VAMASK);
3388 		KASSERT(data & TLB_V);
3389 		npv = pvh->pv_next;
3390 		if (npv) {
3391 			/* First save mod/ref bits */
3392 			pvh->pv_va = (pvh->pv_va & PV_MASK) | npv->pv_va;
3393 			pvh->pv_next = npv->pv_next;
3394 			pvh->pv_pmap = npv->pv_pmap;
3395 		} else {
3396 			pvh->pv_pmap = NULL;
3397 			pvh->pv_next = NULL;
3398 			pvh->pv_va &= (PV_REF|PV_MOD);
3399 		}
3400 		REMOVE_STAT(pvfirst);
3401 	} else {
3402 		for (pv = pvh, npv = pvh->pv_next; npv;
3403 		     pv = npv, npv = npv->pv_next) {
3404 			REMOVE_STAT(pvsearch);
3405 			if (pmap == npv->pv_pmap && PV_MATCH(npv, va))
3406 				break;
3407 		}
3408 		pv->pv_next = npv->pv_next;
3409 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
3410 		KASSERT(data & TLB_V);
3411 	}
3412 
3413 	/* Save ref/mod info */
3414 	if (data & TLB_ACCESS)
3415 		pvh->pv_va |= PV_REF;
3416 	if (data & TLB_MODIFY)
3417 		pvh->pv_va |= PV_MOD;
3418 
3419 	/* Check to see if the alias went away */
3420 	if (pvh->pv_va & PV_ALIAS) {
3421 		pvh->pv_va &= ~PV_ALIAS;
3422 		for (pv = pvh; pv; pv = pv->pv_next) {
3423 			if ((pv->pv_va ^ pvh->pv_va) & VA_ALIAS_MASK) {
3424 				pvh->pv_va |= PV_ALIAS;
3425 				break;
3426 			}
3427 		}
3428 		if (!(pvh->pv_va & PV_ALIAS))
3429 			pmap_page_cache(pmap, VM_PAGE_TO_PHYS(pg), 1);
3430 	}
3431 	pv_check();
3432 	return npv;
3433 }
3434 
3435 /*
3436  *	pmap_page_cache:
3437  *
3438  *	Change all mappings of a page to cached/uncached.
3439  */
3440 void
pmap_page_cache(struct pmap * pm,paddr_t pa,int mode)3441 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
3442 {
3443 	struct vm_page *pg;
3444 	struct vm_page_md *md;
3445 	pv_entry_t pv;
3446 	vaddr_t va;
3447 	int rv;
3448 
3449 #if 0
3450 	/*
3451 	 * Why is this?
3452 	 */
3453 	if (CPU_ISSUN4US || CPU_ISSUN4V)
3454 		return;
3455 #endif
3456 
3457 	KASSERT(mutex_owned(&pmap_lock));
3458 
3459 	DPRINTF(PDB_ENTER, ("pmap_page_uncache(%llx)\n",
3460 	    (unsigned long long)pa));
3461 	pg = PHYS_TO_VM_PAGE(pa);
3462 	md = VM_PAGE_TO_MD(pg);
3463 	pv = &md->mdpg_pvh;
3464 	while (pv) {
3465 		va = pv->pv_va & PV_VAMASK;
3466 		if (pv->pv_va & PV_NC) {
3467 			int64_t data;
3468 
3469 			/* Non-cached -- I/O mapping */
3470 			data = pseg_get(pv->pv_pmap, va);
3471 			KASSERT(data & TLB_V);
3472 			rv = pseg_set(pv->pv_pmap, va,
3473 				     data & ~(TLB_CV|TLB_CP), 0);
3474 			if (rv & 1)
3475 				panic("pmap_page_cache: pseg_set needs"
3476 				     " spare! rv=%d\n", rv);
3477 		} else if (mode && (!(pv->pv_va & PV_NVC))) {
3478 			int64_t data;
3479 
3480 			/* Enable caching */
3481 			data = pseg_get(pv->pv_pmap, va);
3482 			KASSERT(data & TLB_V);
3483 			rv = pseg_set(pv->pv_pmap, va, data | TLB_CV, 0);
3484 			if (rv & 1)
3485 				panic("pmap_page_cache: pseg_set needs"
3486 				    " spare! rv=%d\n", rv);
3487 		} else {
3488 			int64_t data;
3489 
3490 			/* Disable caching */
3491 			data = pseg_get(pv->pv_pmap, va);
3492 			KASSERT(data & TLB_V);
3493 			rv = pseg_set(pv->pv_pmap, va, data & ~TLB_CV, 0);
3494 			if (rv & 1)
3495 				panic("pmap_page_cache: pseg_set needs"
3496 				    " spare! rv=%d\n", rv);
3497 		}
3498 		if (pmap_is_on_mmu(pv->pv_pmap)) {
3499 			/* Force reload -- cache bits have changed */
3500 			KASSERT(pmap_ctx(pv->pv_pmap)>=0);
3501 			tsb_invalidate(va, pv->pv_pmap);
3502 			tlb_flush_pte(va, pv->pv_pmap);
3503 		}
3504 		pv = pv->pv_next;
3505 	}
3506 }
3507 
3508 /*
3509  * Some routines to allocate and free PTPs.
3510  */
3511 static int
pmap_get_page(paddr_t * p)3512 pmap_get_page(paddr_t *p)
3513 {
3514 	struct vm_page *pg;
3515 	paddr_t pa;
3516 
3517 	if (uvm.page_init_done) {
3518 		pg = uvm_pagealloc(NULL, 0, NULL,
3519 		    UVM_PGA_ZERO | UVM_PGA_USERESERVE);
3520 		if (pg == NULL)
3521 			return (0);
3522 		pa = VM_PAGE_TO_PHYS(pg);
3523 	} else {
3524 		if (!uvm_page_physget(&pa))
3525 			return (0);
3526 		pmap_zero_page(pa);
3527 	}
3528 	*p = pa;
3529 	return (1);
3530 }
3531 
3532 static void
pmap_free_page(paddr_t pa,sparc64_cpuset_t cs)3533 pmap_free_page(paddr_t pa, sparc64_cpuset_t cs)
3534 {
3535 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
3536 
3537 	dcache_flush_page_cpuset(pa, cs);
3538 	uvm_pagefree(pg);
3539 }
3540 
3541 static void
pmap_free_page_noflush(paddr_t pa)3542 pmap_free_page_noflush(paddr_t pa)
3543 {
3544 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
3545 
3546 	uvm_pagefree(pg);
3547 }
3548 
3549 #ifdef DDB
3550 
3551 void db_dump_pv(db_expr_t, int, db_expr_t, const char *);
3552 void
db_dump_pv(db_expr_t addr,int have_addr,db_expr_t count,const char * modif)3553 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, const char *modif)
3554 {
3555 	struct vm_page *pg;
3556 	struct vm_page_md *md;
3557 	struct pv_entry *pv;
3558 
3559 	if (!have_addr) {
3560 		db_printf("Need addr for pv\n");
3561 		return;
3562 	}
3563 
3564 	pg = PHYS_TO_VM_PAGE((paddr_t)addr);
3565 	if (pg == NULL) {
3566 		db_printf("page is not managed\n");
3567 		return;
3568 	}
3569 	md = VM_PAGE_TO_MD(pg);
3570 	for (pv = &md->mdpg_pvh; pv; pv = pv->pv_next)
3571 		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
3572 			  pv, pv->pv_next, pv->pv_pmap,
3573 			  (unsigned long long)pv->pv_va);
3574 }
3575 
3576 #endif
3577 
3578 #ifdef DEBUG
3579 /*
3580  * Test ref/modify handling.  */
3581 void pmap_testout(void);
3582 void
pmap_testout(void)3583 pmap_testout(void)
3584 {
3585 	vaddr_t va;
3586 	volatile int *loc;
3587 	int val = 0;
3588 	paddr_t pa;
3589 	struct vm_page *pg;
3590 	int ref, mod;
3591 
3592 	/* Allocate a page */
3593 	va = (vaddr_t)(vmmap - PAGE_SIZE);
3594 	KASSERT(va != 0);
3595 	loc = (int*)va;
3596 
3597 	pmap_get_page(&pa);
3598 	pg = PHYS_TO_VM_PAGE(pa);
3599 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3600 	pmap_update(pmap_kernel());
3601 
3602 	/* Now clear reference and modify */
3603 	ref = pmap_clear_reference(pg);
3604 	mod = pmap_clear_modify(pg);
3605 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3606 	       (void *)(u_long)va, (long)pa,
3607 	       ref, mod);
3608 
3609 	/* Check it's properly cleared */
3610 	ref = pmap_is_referenced(pg);
3611 	mod = pmap_is_modified(pg);
3612 	printf("Checking cleared page: ref %d, mod %d\n",
3613 	       ref, mod);
3614 
3615 	/* Reference page */
3616 	val = *loc;
3617 
3618 	ref = pmap_is_referenced(pg);
3619 	mod = pmap_is_modified(pg);
3620 	printf("Referenced page: ref %d, mod %d val %x\n",
3621 	       ref, mod, val);
3622 
3623 	/* Now clear reference and modify */
3624 	ref = pmap_clear_reference(pg);
3625 	mod = pmap_clear_modify(pg);
3626 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3627 	       (void *)(u_long)va, (long)pa,
3628 	       ref, mod);
3629 
3630 	/* Modify page */
3631 	*loc = 1;
3632 
3633 	ref = pmap_is_referenced(pg);
3634 	mod = pmap_is_modified(pg);
3635 	printf("Modified page: ref %d, mod %d\n",
3636 	       ref, mod);
3637 
3638 	/* Now clear reference and modify */
3639 	ref = pmap_clear_reference(pg);
3640 	mod = pmap_clear_modify(pg);
3641 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3642 	       (void *)(u_long)va, (long)pa,
3643 	       ref, mod);
3644 
3645 	/* Check it's properly cleared */
3646 	ref = pmap_is_referenced(pg);
3647 	mod = pmap_is_modified(pg);
3648 	printf("Checking cleared page: ref %d, mod %d\n",
3649 	       ref, mod);
3650 
3651 	/* Modify page */
3652 	*loc = 1;
3653 
3654 	ref = pmap_is_referenced(pg);
3655 	mod = pmap_is_modified(pg);
3656 	printf("Modified page: ref %d, mod %d\n",
3657 	       ref, mod);
3658 
3659 	/* Check pmap_protect() */
3660 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_READ);
3661 	pmap_update(pmap_kernel());
3662 	ref = pmap_is_referenced(pg);
3663 	mod = pmap_is_modified(pg);
3664 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3665 	       ref, mod);
3666 
3667 	/* Now clear reference and modify */
3668 	ref = pmap_clear_reference(pg);
3669 	mod = pmap_clear_modify(pg);
3670 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3671 	       (void *)(u_long)va, (long)pa,
3672 	       ref, mod);
3673 
3674 	/* Modify page */
3675 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3676 	pmap_update(pmap_kernel());
3677 	*loc = 1;
3678 
3679 	ref = pmap_is_referenced(pg);
3680 	mod = pmap_is_modified(pg);
3681 	printf("Modified page: ref %d, mod %d\n",
3682 	       ref, mod);
3683 
3684 	/* Check pmap_protect() */
3685 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_NONE);
3686 	pmap_update(pmap_kernel());
3687 	ref = pmap_is_referenced(pg);
3688 	mod = pmap_is_modified(pg);
3689 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3690 	       ref, mod);
3691 
3692 	/* Now clear reference and modify */
3693 	ref = pmap_clear_reference(pg);
3694 	mod = pmap_clear_modify(pg);
3695 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3696 	       (void *)(u_long)va, (long)pa,
3697 	       ref, mod);
3698 
3699 	/* Modify page */
3700 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3701 	pmap_update(pmap_kernel());
3702 	*loc = 1;
3703 
3704 	ref = pmap_is_referenced(pg);
3705 	mod = pmap_is_modified(pg);
3706 	printf("Modified page: ref %d, mod %d\n",
3707 	       ref, mod);
3708 
3709 	/* Check pmap_pag_protect() */
3710 	pmap_page_protect(pg, VM_PROT_READ);
3711 	ref = pmap_is_referenced(pg);
3712 	mod = pmap_is_modified(pg);
3713 	printf("pmap_protect(): ref %d, mod %d\n",
3714 	       ref, mod);
3715 
3716 	/* Now clear reference and modify */
3717 	ref = pmap_clear_reference(pg);
3718 	mod = pmap_clear_modify(pg);
3719 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3720 	       (void *)(u_long)va, (long)pa,
3721 	       ref, mod);
3722 
3723 
3724 	/* Modify page */
3725 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3726 	pmap_update(pmap_kernel());
3727 	*loc = 1;
3728 
3729 	ref = pmap_is_referenced(pg);
3730 	mod = pmap_is_modified(pg);
3731 	printf("Modified page: ref %d, mod %d\n",
3732 	       ref, mod);
3733 
3734 	/* Check pmap_pag_protect() */
3735 	pmap_page_protect(pg, VM_PROT_NONE);
3736 	ref = pmap_is_referenced(pg);
3737 	mod = pmap_is_modified(pg);
3738 	printf("pmap_protect(): ref %d, mod %d\n",
3739 	       ref, mod);
3740 
3741 	/* Now clear reference and modify */
3742 	ref = pmap_clear_reference(pg);
3743 	mod = pmap_clear_modify(pg);
3744 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3745 	       (void *)(u_long)va, (long)pa,
3746 	       ref, mod);
3747 
3748 	/* Unmap page */
3749 	pmap_remove(pmap_kernel(), va, va+1);
3750 	pmap_update(pmap_kernel());
3751 	ref = pmap_is_referenced(pg);
3752 	mod = pmap_is_modified(pg);
3753 	printf("Unmapped page: ref %d, mod %d\n", ref, mod);
3754 
3755 	/* Now clear reference and modify */
3756 	ref = pmap_clear_reference(pg);
3757 	mod = pmap_clear_modify(pg);
3758 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3759 	       (void *)(u_long)va, (long)pa, ref, mod);
3760 
3761 	/* Check it's properly cleared */
3762 	ref = pmap_is_referenced(pg);
3763 	mod = pmap_is_modified(pg);
3764 	printf("Checking cleared page: ref %d, mod %d\n",
3765 	       ref, mod);
3766 
3767 	pmap_remove(pmap_kernel(), va, va+1);
3768 	pmap_update(pmap_kernel());
3769 	pmap_free_page(pa, cpus_active);
3770 }
3771 #endif
3772 
3773 void
pmap_update(struct pmap * pmap)3774 pmap_update(struct pmap *pmap)
3775 {
3776 
3777 	if (pmap->pm_refs > 0) {
3778 		return;
3779 	}
3780 	pmap->pm_refs = 1;
3781 	pmap_activate_pmap(pmap);
3782 }
3783 
3784 /*
3785  * pmap_copy_page()/pmap_zero_page()
3786  *
3787  * we make sure that the destination page is flushed from all D$'s
3788  * before we perform the copy/zero.
3789  */
3790 extern int cold;
3791 void
pmap_copy_page(paddr_t src,paddr_t dst)3792 pmap_copy_page(paddr_t src, paddr_t dst)
3793 {
3794 
3795 	if (!cold)
3796 		dcache_flush_page_all(dst);
3797 	pmap_copy_page_phys(src, dst);
3798 }
3799 
3800 void
pmap_zero_page(paddr_t pa)3801 pmap_zero_page(paddr_t pa)
3802 {
3803 
3804 	if (!cold)
3805 		dcache_flush_page_all(pa);
3806 	pmap_zero_page_phys(pa);
3807 }
3808 
3809 #ifdef _LP64
3810 int
sparc64_mmap_range_test(vaddr_t addr,vaddr_t eaddr)3811 sparc64_mmap_range_test(vaddr_t addr, vaddr_t eaddr)
3812 {
3813 	const vaddr_t hole_start = 0x000007ffffffffff;
3814 	const vaddr_t hole_end   = 0xfffff80000000000;
3815 
3816 	if (addr >= hole_end)
3817 		return 0;
3818 	if (eaddr <= hole_start)
3819 		return 0;
3820 
3821 	return EINVAL;
3822 }
3823 #endif
3824 
3825 #ifdef SUN4V
3826 void
pmap_setup_intstack_sun4v(paddr_t pa)3827 pmap_setup_intstack_sun4v(paddr_t pa)
3828 {
3829 	int64_t hv_rc;
3830 	int64_t data;
3831 	data = SUN4V_TSB_DATA(
3832 	    0 /* global */,
3833 	    PGSZ_64K,
3834 	    pa,
3835 	    1 /* priv */,
3836 	    1 /* Write */,
3837 	    1 /* Cacheable */,
3838 	    FORCE_ALIAS /* ALIAS -- Disable D$ */,
3839 	    1 /* valid */,
3840 	    0 /* IE */);
3841 	hv_rc = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB);
3842 	if ( hv_rc != H_EOK ) {
3843 		panic("hv_mmu_map_perm_addr() failed - rc = %" PRId64 "\n",
3844 		    hv_rc);
3845 	}
3846 }
3847 
3848 void
pmap_setup_tsb_sun4v(struct tsb_desc * tsb_desc)3849 pmap_setup_tsb_sun4v(struct tsb_desc* tsb_desc)
3850 {
3851 	int err;
3852 	paddr_t tsb_desc_p;
3853 	tsb_desc_p = pmap_kextract((vaddr_t)tsb_desc);
3854 	if (!tsb_desc_p) {
3855 		panic("pmap_setup_tsb_sun4v() pmap_kextract() failed");
3856 	}
3857 	err = hv_mmu_tsb_ctx0(1, tsb_desc_p);
3858 	if (err != H_EOK) {
3859 		prom_printf("hv_mmu_tsb_ctx0() err: %d\n", err);
3860 		panic("pmap_setup_tsb_sun4v() hv_mmu_tsb_ctx0() failed");
3861 	}
3862 	err = hv_mmu_tsb_ctxnon0(1, tsb_desc_p);
3863 	if (err != H_EOK) {
3864 		prom_printf("hv_mmu_tsb_ctxnon0() err: %d\n", err);
3865 		panic("pmap_setup_tsb_sun4v() hv_mmu_tsb_ctxnon0() failed");
3866 	}
3867 }
3868 
3869 #endif
3870