xref: /netbsd/sys/arch/sparc64/sparc64/pmap.c (revision 7d0e1a68)
1 /*	$NetBSD: pmap.c,v 1.317 2023/06/02 08:51:47 andvar Exp $	*/
2 /*
3  *
4  * Copyright (C) 1996-1999 Eduardo Horvath.
5  * All rights reserved.
6  *
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR  ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR  BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 
28 #include <sys/cdefs.h>
29 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.317 2023/06/02 08:51:47 andvar Exp $");
30 
31 #undef	NO_VCACHE /* Don't forget the locked TLB in dostart */
32 #define	HWREF
33 
34 #include "opt_ddb.h"
35 #include "opt_multiprocessor.h"
36 #include "opt_modular.h"
37 
38 #include <sys/param.h>
39 #include <sys/malloc.h>
40 #include <sys/queue.h>
41 #include <sys/systm.h>
42 #include <sys/msgbuf.h>
43 #include <sys/pool.h>
44 #include <sys/exec.h>
45 #include <sys/core.h>
46 #include <sys/kcore.h>
47 #include <sys/proc.h>
48 #include <sys/atomic.h>
49 #include <sys/cpu.h>
50 
51 #include <sys/exec_aout.h>	/* for MID_* */
52 #include <sys/reboot.h>
53 
54 #include <uvm/uvm.h>
55 
56 #include <machine/pcb.h>
57 #include <machine/sparc64.h>
58 #include <machine/ctlreg.h>
59 #include <machine/promlib.h>
60 #include <machine/kcore.h>
61 #include <machine/bootinfo.h>
62 #ifdef SUN4V
63 #include <machine/hypervisor.h>
64 #endif
65 #include <machine/mdesc.h>
66 
67 #include <sparc64/sparc64/cache.h>
68 
69 #ifdef DDB
70 #include <machine/db_machdep.h>
71 #include <ddb/db_command.h>
72 #include <ddb/db_sym.h>
73 #include <ddb/db_variables.h>
74 #include <ddb/db_extern.h>
75 #include <ddb/db_access.h>
76 #include <ddb/db_output.h>
77 #else
78 #define Debugger()
79 #define db_printf	printf
80 #endif
81 
82 #define	MEG		(1<<20) /* 1MB */
83 #define	KB		(1<<10)	/* 1KB */
84 
85 paddr_t cpu0paddr;		/* contigious phys memory preallocated for cpus */
86 
87 /* These routines are in assembly to allow access thru physical mappings */
88 extern int64_t pseg_get_real(struct pmap *, vaddr_t);
89 extern int pseg_set_real(struct pmap *, vaddr_t, int64_t, paddr_t);
90 
91 /*
92  * Diatribe on ref/mod counting:
93  *
94  * First of all, ref/mod info must be non-volatile.  Hence we need to keep it
95  * in the pv_entry structure for each page.  (We could bypass this for the
96  * vm_page, but that's a long story....)
97  *
98  * This architecture has nice, fast traps with lots of space for software bits
99  * in the TTE.  To accelerate ref/mod counts we make use of these features.
100  *
101  * When we map a page initially, we place a TTE in the page table.  It's
102  * inserted with the TLB_W and TLB_ACCESS bits cleared.  If a page is really
103  * writable we set the TLB_REAL_W bit for the trap handler.
104  *
105  * Whenever we take a TLB miss trap, the trap handler will set the TLB_ACCESS
106  * bit in the approprate TTE in the page table.  Whenever we take a protection
107  * fault, if the TLB_REAL_W bit is set then we flip both the TLB_W and TLB_MOD
108  * bits to enable writing and mark the page as modified.
109  *
110  * This means that we may have ref/mod information all over the place.  The
111  * pmap routines must traverse the page tables of all pmaps with a given page
112  * and collect/clear all the ref/mod information and copy it into the pv_entry.
113  */
114 
115 #ifdef	NO_VCACHE
116 #define	FORCE_ALIAS	1
117 #else
118 #define FORCE_ALIAS	0
119 #endif
120 
121 #define	PV_ALIAS	0x1LL
122 #define PV_REF		0x2LL
123 #define PV_MOD		0x4LL
124 #define PV_NVC		0x8LL
125 #define PV_NC		0x10LL
126 #define PV_WE		0x20LL	/* Debug -- this page was writable somtime */
127 #define PV_MASK		(0x03fLL)
128 #define PV_VAMASK	(~(PAGE_SIZE - 1))
129 #define PV_MATCH(pv,va)	(!(((pv)->pv_va ^ (va)) & PV_VAMASK))
130 #define PV_SETVA(pv,va) ((pv)->pv_va = (((va) & PV_VAMASK) | \
131 					(((pv)->pv_va) & PV_MASK)))
132 
133 struct pool_cache pmap_cache;
134 struct pool_cache pmap_pv_cache;
135 
136 pv_entry_t	pmap_remove_pv(struct pmap *, vaddr_t, struct vm_page *);
137 void	pmap_enter_pv(struct pmap *, vaddr_t, paddr_t, struct vm_page *,
138 			   pv_entry_t *);
139 void	pmap_page_cache(struct pmap *, paddr_t, int);
140 
141 /*
142  * First and last managed physical addresses.
143  * XXX only used for dumping the system.
144  */
145 paddr_t	vm_first_phys, vm_num_phys;
146 
147 /*
148  * Here's the CPU TSB stuff.  It's allocated in pmap_bootstrap.
149  */
150 int tsbsize;		/* tsbents = 512 * 2^^tsbsize */
151 #define TSBENTS (512<<tsbsize)
152 #define	TSBSIZE	(TSBENTS * 16)
153 
154 static struct pmap kernel_pmap_;
155 struct pmap *const kernel_pmap_ptr = &kernel_pmap_;
156 
157 static int ctx_alloc(struct pmap *);
158 static bool pmap_is_referenced_locked(struct vm_page *);
159 
160 static void ctx_free(struct pmap *, struct cpu_info *);
161 
162 /* set dmmu secondary context */
163 static __inline void
dmmu_set_secondary_context(uint ctx)164 dmmu_set_secondary_context(uint ctx)
165 {
166 
167 	if (!CPU_ISSUN4V)
168 		__asm volatile(
169 			"stxa %0,[%1]%2;	"
170 			"membar #Sync		"
171 			: : "r" (ctx), "r" (CTX_SECONDARY), "n" (ASI_DMMU)
172 			: "memory");
173 	else
174 		__asm volatile(
175 			"stxa %0,[%1]%2;	"
176 			"membar #Sync		"
177 			: : "r" (ctx), "r" (CTX_SECONDARY), "n" (ASI_MMU_CONTEXTID)
178 			: "memory");
179 }
180 
181 /*
182  * Check if any MMU has a non-zero context
183  */
184 static inline bool
pmap_has_ctx(struct pmap * p)185 pmap_has_ctx(struct pmap *p)
186 {
187 	int i;
188 
189 	/* any context on any cpu? */
190 	for (i = 0; i < sparc_ncpus; i++)
191 		if (p->pm_ctx[i] > 0)
192 			return true;
193 
194 	return false;
195 }
196 
197 /*
198  * Check if this pmap has a live mapping on some MMU.
199  */
200 static inline bool
pmap_is_on_mmu(struct pmap * p)201 pmap_is_on_mmu(struct pmap *p)
202 {
203 	/* The kernel pmap is always on all MMUs */
204 	if (p == pmap_kernel())
205 		return true;
206 
207 	return pmap_has_ctx(p);
208 }
209 
210 /*
211  * Virtual and physical addresses of the start and end of kernel text
212  * and data segments.
213  */
214 vaddr_t ktext;
215 paddr_t ktextp;
216 vaddr_t ektext;
217 paddr_t ektextp;
218 vaddr_t kdata;
219 paddr_t kdatap;
220 vaddr_t ekdata;
221 paddr_t ekdatap;
222 
223 /*
224  * Kernel 4MB pages.
225  */
226 extern struct tlb_entry *kernel_tlbs;
227 extern int kernel_dtlb_slots, kernel_itlb_slots;
228 
229 static int npgs;
230 
231 vaddr_t	vmmap;			/* one reserved MI vpage for /dev/mem */
232 
233 int phys_installed_size;		/* Installed physical memory */
234 struct mem_region *phys_installed;
235 
236 paddr_t avail_start, avail_end;	/* These are used by ps & family */
237 
238 static int ptelookup_va(vaddr_t va);
239 
240 static inline void
clrx(void * addr)241 clrx(void *addr)
242 {
243 	__asm volatile("clrx [%0]" : : "r" (addr) : "memory");
244 }
245 
246 static void
tsb_invalidate(vaddr_t va,pmap_t pm)247 tsb_invalidate(vaddr_t va, pmap_t pm)
248 {
249 	struct cpu_info *ci;
250 	int ctx;
251 	bool kpm = (pm == pmap_kernel());
252 	int i;
253 	int64_t tag;
254 
255 	i = ptelookup_va(va);
256 #ifdef MULTIPROCESSOR
257 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
258 		if (!CPUSET_HAS(cpus_active, ci->ci_index))
259 			continue;
260 #else
261 		ci = curcpu();
262 #endif
263 		ctx = pm->pm_ctx[ci->ci_index];
264 		if (kpm || ctx > 0) {
265 			tag = TSB_TAG(0, ctx, va);
266 			if (ci->ci_tsb_dmmu[i].tag == tag) {
267 				clrx(&ci->ci_tsb_dmmu[i].data);
268 			}
269 			if (ci->ci_tsb_immu[i].tag == tag) {
270 				clrx(&ci->ci_tsb_immu[i].data);
271 			}
272 		}
273 #ifdef MULTIPROCESSOR
274 	}
275 #endif
276 }
277 
278 struct prom_map *prom_map;
279 int prom_map_size;
280 
281 #define	PDB_CREATE		0x000001
282 #define	PDB_DESTROY		0x000002
283 #define	PDB_REMOVE		0x000004
284 #define	PDB_CHANGEPROT		0x000008
285 #define	PDB_ENTER		0x000010
286 #define	PDB_DEMAP		0x000020	/* used in locore */
287 #define	PDB_REF			0x000040
288 #define	PDB_COPY		0x000080
289 #define	PDB_MMU_ALLOC		0x000100
290 #define	PDB_MMU_STEAL		0x000200
291 #define	PDB_CTX_ALLOC		0x000400
292 #define	PDB_CTX_STEAL		0x000800
293 #define	PDB_MMUREG_ALLOC	0x001000
294 #define	PDB_MMUREG_STEAL	0x002000
295 #define	PDB_CACHESTUFF		0x004000
296 #define	PDB_ALIAS		0x008000
297 #define PDB_EXTRACT		0x010000
298 #define	PDB_BOOT		0x020000
299 #define	PDB_BOOT1		0x040000
300 #define	PDB_GROW		0x080000
301 #define	PDB_CTX_FLUSHALL	0x100000
302 #define	PDB_ACTIVATE		0x200000
303 
304 #if defined(DEBUG) && !defined(PMAP_DEBUG)
305 #define PMAP_DEBUG
306 #endif
307 
308 #ifdef PMAP_DEBUG
309 struct {
310 	int kernel;	/* entering kernel mapping */
311 	int user;	/* entering user mapping */
312 	int ptpneeded;	/* needed to allocate a PT page */
313 	int pwchange;	/* no mapping change, just wiring or protection */
314 	int wchange;	/* no mapping change, just wiring */
315 	int mchange;	/* was mapped but mapping to different page */
316 	int managed;	/* a managed page */
317 	int firstpv;	/* first mapping for this PA */
318 	int secondpv;	/* second mapping for this PA */
319 	int ci;		/* cache inhibited */
320 	int unmanaged;	/* not a managed page */
321 	int flushes;	/* cache flushes */
322 	int cachehit;	/* new entry forced valid entry out */
323 } enter_stats;
324 struct {
325 	int calls;
326 	int removes;
327 	int flushes;
328 	int tflushes;	/* TLB flushes */
329 	int pidflushes;	/* HW pid stolen */
330 	int pvfirst;
331 	int pvsearch;
332 } remove_stats;
333 #define	ENTER_STAT(x)	do { enter_stats.x ++; } while (0)
334 #define	REMOVE_STAT(x)	do { remove_stats.x ++; } while (0)
335 
336 int	pmapdebug = 0;
337 //int	pmapdebug = 0 | PDB_CTX_ALLOC | PDB_ACTIVATE;
338 /* Number of H/W pages stolen for page tables */
339 int	pmap_pages_stolen = 0;
340 
341 #define	BDPRINTF(n, f)	if (pmapdebug & (n)) prom_printf f
342 #define	DPRINTF(n, f)	if (pmapdebug & (n)) printf f
343 #else
344 #define	ENTER_STAT(x)	do { /* nothing */ } while (0)
345 #define	REMOVE_STAT(x)	do { /* nothing */ } while (0)
346 #define	BDPRINTF(n, f)
347 #define	DPRINTF(n, f)
348 #define pmapdebug 0
349 #endif
350 
351 #define pv_check()
352 
353 static int pmap_get_page(paddr_t *);
354 static void pmap_free_page(paddr_t, sparc64_cpuset_t);
355 static void pmap_free_page_noflush(paddr_t);
356 
357 /*
358  * Global pmap locks.
359  */
360 static kmutex_t pmap_lock;
361 static bool lock_available = false;
362 
363 /*
364  * Support for big page sizes.  This maps the page size to the
365  * page bits.  That is: these are the bits between 8K pages and
366  * larger page sizes that cause aliasing.
367  */
368 #define PSMAP_ENTRY(MASK, CODE)	{ .mask = MASK, .code = CODE }
369 struct page_size_map page_size_map[] = {
370 #ifdef DEBUG
371 	PSMAP_ENTRY(0, PGSZ_8K & 0),	/* Disable large pages */
372 #endif
373 	PSMAP_ENTRY((4 * 1024 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_4M),
374 	PSMAP_ENTRY((512 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_512K),
375 	PSMAP_ENTRY((64 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_64K),
376 	PSMAP_ENTRY((8 * 1024 - 1) & ~(8 * 1024 - 1), PGSZ_8K),
377 	PSMAP_ENTRY(0, 0),
378 };
379 
380 /*
381  * This probably shouldn't be necessary, but it stops USIII machines from
382  * breaking in general, and not just for MULTIPROCESSOR.
383  */
384 #define USE_LOCKSAFE_PSEG_GETSET
385 #if defined(USE_LOCKSAFE_PSEG_GETSET)
386 
387 static kmutex_t pseg_lock;
388 
389 static __inline__ int64_t
pseg_get_locksafe(struct pmap * pm,vaddr_t va)390 pseg_get_locksafe(struct pmap *pm, vaddr_t va)
391 {
392 	int64_t rv;
393 	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
394 
395 	if (__predict_true(took_lock))
396 		mutex_enter(&pseg_lock);
397 	rv = pseg_get_real(pm, va);
398 	if (__predict_true(took_lock))
399 		mutex_exit(&pseg_lock);
400 	return rv;
401 }
402 
403 static __inline__ int
pseg_set_locksafe(struct pmap * pm,vaddr_t va,int64_t data,paddr_t ptp)404 pseg_set_locksafe(struct pmap *pm, vaddr_t va, int64_t data, paddr_t ptp)
405 {
406 	int rv;
407 	bool took_lock = lock_available /*&& pm == pmap_kernel()*/;
408 
409 	if (__predict_true(took_lock))
410 		mutex_enter(&pseg_lock);
411 	rv = pseg_set_real(pm, va, data, ptp);
412 	if (__predict_true(took_lock))
413 		mutex_exit(&pseg_lock);
414 	return rv;
415 }
416 
417 #define pseg_get(pm, va)		pseg_get_locksafe(pm, va)
418 #define pseg_set(pm, va, data, ptp)	pseg_set_locksafe(pm, va, data, ptp)
419 
420 #else /* USE_LOCKSAFE_PSEG_GETSET */
421 
422 #define pseg_get(pm, va)		pseg_get_real(pm, va)
423 #define pseg_set(pm, va, data, ptp)	pseg_set_real(pm, va, data, ptp)
424 
425 #endif /* USE_LOCKSAFE_PSEG_GETSET */
426 
427 /*
428  * Enter a TTE into the kernel pmap only.  Don't do anything else.
429  *
430  * Use only during bootstrapping since it does no locking and
431  * can lose ref/mod info!!!!
432  *
433  */
pmap_enter_kpage(vaddr_t va,int64_t data)434 static void pmap_enter_kpage(vaddr_t va, int64_t data)
435 {
436 	paddr_t newp;
437 
438 	newp = 0UL;
439 	while (pseg_set(pmap_kernel(), va, data, newp) & 1) {
440 		if (!pmap_get_page(&newp)) {
441 			prom_printf("pmap_enter_kpage: out of pages\n");
442 			panic("pmap_enter_kpage");
443 		}
444 
445 		ENTER_STAT(ptpneeded);
446 		BDPRINTF(PDB_BOOT1,
447 			 ("pseg_set: pm=%p va=%p data=%lx newp %lx\n",
448 			  pmap_kernel(), va, (long)data, (long)newp));
449 		if (pmapdebug & PDB_BOOT1)
450 		{int i; for (i=0; i<140000000; i++) ;}
451 	}
452 }
453 
454 /*
455  * Check the bootargs to see if we need to enable bootdebug.
456  */
457 #ifdef DEBUG
pmap_bootdebug(void)458 static void pmap_bootdebug(void)
459 {
460 	const char *cp = prom_getbootargs();
461 
462 	for (;;)
463 		switch (*++cp) {
464 		case '\0':
465 			return;
466 		case 'V':
467 			pmapdebug |= PDB_BOOT|PDB_BOOT1;
468 			break;
469 		case 'D':
470 			pmapdebug |= PDB_BOOT1;
471 			break;
472 		}
473 }
474 #else
475 #define pmap_bootdebug()	/* nothing */
476 #endif
477 
478 
479 /*
480  * Calculate the correct number of page colors to use.  This should be the
481  * size of the E$/PAGE_SIZE.  However, different CPUs can have different sized
482  * E$, so we need to take the GCM of the E$ size.
483  */
pmap_calculate_colors(void)484 static int pmap_calculate_colors(void)
485 {
486 	int node;
487 	int size, assoc, color, maxcolor = 1;
488 
489 	for (node = prom_firstchild(prom_findroot()); node != 0;
490 	     node = prom_nextsibling(node)) {
491 		char *name = prom_getpropstring(node, "device_type");
492 		if (strcmp("cpu", name) != 0)
493 			continue;
494 
495 		/* Found a CPU, get the E$ info. */
496 		size = cpu_ecache_size(node);
497 		if (size == 0) {
498 			prom_printf("pmap_calculate_colors: node %x has "
499 				"no ecache-size\n", node);
500 			/* If we can't get the E$ size, skip the node */
501 			continue;
502 		}
503 
504 		assoc = cpu_ecache_associativity(node);
505 		color = size/assoc/PAGE_SIZE;
506 		if (color > maxcolor)
507 			maxcolor = color;
508 	}
509 	return (maxcolor);
510 }
511 
pmap_alloc_bootargs(void)512 static void pmap_alloc_bootargs(void)
513 {
514 	char *v;
515 
516 	v = OF_claim(NULL, 2*PAGE_SIZE, PAGE_SIZE);
517 	if ((v == NULL) || (v == (void*)-1))
518 		panic("Can't claim two pages of memory.");
519 
520 	memset(v, 0, 2*PAGE_SIZE);
521 
522 	cpu_args = (struct cpu_bootargs*)v;
523 }
524 
525 #if defined(MULTIPROCESSOR)
526 static void pmap_mp_init(void);
527 
528 static void
pmap_mp_init(void)529 pmap_mp_init(void)
530 {
531 	pte_t *tp;
532 	char *v;
533 	int i;
534 
535 	extern void cpu_mp_startup(void);
536 
537 	if ((v = OF_claim(NULL, PAGE_SIZE, PAGE_SIZE)) == NULL) {
538 		panic("pmap_mp_init: Cannot claim a page.");
539 	}
540 
541 	memcpy(v, mp_tramp_code, mp_tramp_code_len);
542 	*(u_long *)(v + mp_tramp_dtlb_slots) = kernel_dtlb_slots;
543 	*(u_long *)(v + mp_tramp_itlb_slots) = kernel_itlb_slots;
544 	*(u_long *)(v + mp_tramp_func) = (u_long)cpu_mp_startup;
545 	*(u_long *)(v + mp_tramp_ci) = (u_long)cpu_args;
546 	tp = (pte_t *)(v + mp_tramp_code_len);
547 	for (i = 0; i < kernel_dtlb_slots; i++) {
548 		tp[i].tag  = kernel_tlbs[i].te_va;
549 		tp[i].data = TSB_DATA(0,		/* g */
550 				PGSZ_4M,		/* sz */
551 				kernel_tlbs[i].te_pa,	/* pa */
552 				1, /* priv */
553 				0, /* write */
554 				1, /* cache */
555 				1, /* aliased */
556 				1, /* valid */
557 				0, /* ie */
558 				0  /* wc */);
559 		tp[i].data |= TLB_L | TLB_CV;
560 
561 		if (i >= kernel_itlb_slots) {
562 			tp[i].data |= TLB_W;
563 		} else {
564 			if (CPU_ISSUN4V)
565 				tp[i].data |= SUN4V_TLB_X;
566 		}
567 
568 		DPRINTF(PDB_BOOT1, ("xtlb[%d]: Tag: %" PRIx64 " Data: %"
569 				PRIx64 "\n", i, tp[i].tag, tp[i].data));
570 	}
571 
572 	for (i = 0; i < PAGE_SIZE; i += sizeof(long))
573 		sparc_flush_icache(v + i);
574 
575 	cpu_spinup_trampoline = (vaddr_t)v;
576 }
577 #else
578 #define pmap_mp_init()	((void)0)
579 #endif
580 
581 paddr_t pmap_kextract(vaddr_t va);
582 
583 paddr_t
pmap_kextract(vaddr_t va)584 pmap_kextract(vaddr_t va)
585 {
586 	int i;
587 	paddr_t paddr = (paddr_t)-1;
588 
589 	for (i = 0; i < kernel_dtlb_slots; i++) {
590 		if ((va & ~PAGE_MASK_4M) == kernel_tlbs[i].te_va) {
591 			paddr = kernel_tlbs[i].te_pa +
592 				(paddr_t)(va & PAGE_MASK_4M);
593 			break;
594 		}
595 	}
596 
597 	if (i == kernel_dtlb_slots) {
598 		panic("pmap_kextract: Address %p is not from kernel space.\n"
599 				"Data segment is too small?\n", (void*)va);
600 	}
601 
602 	return (paddr);
603 }
604 
605 /*
606  * Bootstrap kernel allocator, allocates from unused space in 4MB kernel
607  * data segment meaning that
608  *
609  * - Access to allocated memory will never generate a trap
610  * - Allocated chunks are never reclaimed or freed
611  * - Allocation calls do not change PROM memlists
612  */
613 static struct mem_region kdata_mem_pool;
614 
615 static void
kdata_alloc_init(vaddr_t va_start,vaddr_t va_end)616 kdata_alloc_init(vaddr_t va_start, vaddr_t va_end)
617 {
618 	vsize_t va_size = va_end - va_start;
619 
620 	kdata_mem_pool.start = va_start;
621 	kdata_mem_pool.size  = va_size;
622 
623 	BDPRINTF(PDB_BOOT, ("kdata_alloc_init(): %d bytes @%p.\n", va_size,
624 				va_start));
625 }
626 
627 static vaddr_t
kdata_alloc(vsize_t size,vsize_t align)628 kdata_alloc(vsize_t size, vsize_t align)
629 {
630 	vaddr_t va;
631 	vsize_t asize;
632 
633 	asize = roundup(kdata_mem_pool.start, align) - kdata_mem_pool.start;
634 
635 	kdata_mem_pool.start += asize;
636 	kdata_mem_pool.size  -= asize;
637 
638 	if (kdata_mem_pool.size < size) {
639 		panic("kdata_alloc(): Data segment is too small.\n");
640 	}
641 
642 	va = kdata_mem_pool.start;
643 	kdata_mem_pool.start += size;
644 	kdata_mem_pool.size  -= size;
645 
646 	BDPRINTF(PDB_BOOT, ("kdata_alloc(): Allocated %d@%p, %d free.\n",
647 				size, (void*)va, kdata_mem_pool.size));
648 
649 	return (va);
650 }
651 
652 /*
653  * Unified routine for reading PROM properties.
654  */
655 static void
pmap_read_memlist(const char * device,const char * property,void ** ml,int * ml_size,vaddr_t (* ml_alloc)(vsize_t,vsize_t))656 pmap_read_memlist(const char *device, const char *property, void **ml,
657 		  int *ml_size, vaddr_t (* ml_alloc)(vsize_t, vsize_t))
658 {
659 	void *va;
660 	int size, handle;
661 
662 	if ( (handle = prom_finddevice(device)) == 0) {
663 		prom_printf("pmap_read_memlist(): No %s device found.\n",
664 				device);
665 		prom_halt();
666 	}
667 	if ( (size = OF_getproplen(handle, property)) < 0) {
668 		prom_printf("pmap_read_memlist(): %s/%s has no length.\n",
669 				device, property);
670 		prom_halt();
671 	}
672 	if ( (va = (void*)(* ml_alloc)(size, sizeof(uint64_t))) == NULL) {
673 		prom_printf("pmap_read_memlist(): Cannot allocate memlist.\n");
674 		prom_halt();
675 	}
676 	if (OF_getprop(handle, property, va, size) <= 0) {
677 		prom_printf("pmap_read_memlist(): Cannot read %s/%s.\n",
678 				device, property);
679 		prom_halt();
680 	}
681 
682 	*ml = va;
683 	*ml_size = size;
684 }
685 
686 /*
687  * This is called during bootstrap, before the system is really initialized.
688  *
689  * It's called with the start and end virtual addresses of the kernel.  We
690  * bootstrap the pmap allocator now.  We will allocate the basic structures we
691  * need to bootstrap the VM system here: the page frame tables, the TSB, and
692  * the free memory lists.
693  *
694  * Now all this is becoming a bit obsolete.  maxctx is still important, but by
695  * separating the kernel text and data segments we really would need to
696  * provide the start and end of each segment.  But we can't.  The rodata
697  * segment is attached to the end of the kernel segment and has nothing to
698  * delimit its end.  We could still pass in the beginning of the kernel and
699  * the beginning and end of the data segment but we could also just as easily
700  * calculate that all in here.
701  *
702  * To handle the kernel text, we need to do a reverse mapping of the start of
703  * the kernel, then traverse the free memory lists to find out how big it is.
704  */
705 
706 void
pmap_bootstrap(u_long kernelstart,u_long kernelend)707 pmap_bootstrap(u_long kernelstart, u_long kernelend)
708 {
709 #ifdef MODULAR
710 	extern vaddr_t module_start, module_end;
711 #endif
712 	extern char etext[], data_start[];	/* start of data segment */
713 	extern int msgbufmapped;
714 	struct mem_region *mp, *mp1, *avail, *orig;
715 	int i, j, pcnt, msgbufsiz;
716 	size_t s, sz;
717 	int64_t data;
718 	vaddr_t va, intstk;
719 	uint64_t phys_msgbuf;
720 	paddr_t newp = 0;
721 
722 	void *prom_memlist;
723 	int prom_memlist_size;
724 
725 	BDPRINTF(PDB_BOOT, ("Entered pmap_bootstrap.\n"));
726 
727 	/* XXX - incomplete spinup code for SUN4V */
728 	if (CPU_ISSUN4V)
729 		boothowto |= RB_MD1;
730 
731 	cache_setup_funcs();
732 
733 	/*
734 	 * Calculate kernel size.
735 	 */
736 	ktext   = kernelstart;
737 	ktextp  = pmap_kextract(ktext);
738 	ektext  = roundup((vaddr_t)etext, PAGE_SIZE_4M);
739 	ektextp = roundup(pmap_kextract((vaddr_t)etext), PAGE_SIZE_4M);
740 
741 	kdata   = (vaddr_t)data_start;
742 	kdatap  = pmap_kextract(kdata);
743 	ekdata  = roundup(kernelend, PAGE_SIZE_4M);
744 	ekdatap = roundup(pmap_kextract(kernelend), PAGE_SIZE_4M);
745 
746 	BDPRINTF(PDB_BOOT, ("Virtual layout: text %lx-%lx, data %lx-%lx.\n",
747 				ktext, ektext, kdata, ekdata));
748 	BDPRINTF(PDB_BOOT, ("Physical layout: text %lx-%lx, data %lx-%lx.\n",
749 				ktextp, ektextp, kdatap, ekdatap));
750 
751 	/* Initialize bootstrap allocator. */
752 	kdata_alloc_init(kernelend + 1 * 1024 * 1024, ekdata);
753 
754 	/* make sure we have access to the mdesc data on SUN4V machines */
755 	if (CPU_ISSUN4V) {
756 		vaddr_t m_va;
757 		psize_t m_len;
758 		paddr_t m_pa;
759 
760 		m_len = mdesc_get_len();
761 		m_va = kdata_alloc(m_len, 16);
762 		m_pa = pmap_kextract(m_va);
763 		mdesc_init(m_va, m_pa, m_len);
764 	}
765 
766 	pmap_bootdebug();
767 	pmap_alloc_bootargs();
768 	pmap_mp_init();
769 
770 	/*
771 	 * set machine page size
772 	 */
773 	uvmexp.pagesize = NBPG;
774 	uvmexp.ncolors = pmap_calculate_colors();
775 	uvm_md_init();
776 
777 	/*
778 	 * Get hold or the message buffer.
779 	 */
780 	msgbufp = (struct kern_msgbuf *)(vaddr_t)MSGBUF_VA;
781 	msgbufsiz = MSGBUFSIZE;
782 	BDPRINTF(PDB_BOOT, ("Trying to allocate msgbuf at %lx, size %lx\n",
783 			    (long)msgbufp, (long)msgbufsiz));
784 	if ((long)msgbufp !=
785 	    (long)(phys_msgbuf = prom_claim_virt((vaddr_t)msgbufp, msgbufsiz)))
786 		prom_printf(
787 		    "cannot get msgbuf VA, msgbufp=%p, phys_msgbuf=%lx\n",
788 		    (void *)msgbufp, (long)phys_msgbuf);
789 	phys_msgbuf = prom_get_msgbuf(msgbufsiz, MMU_PAGE_ALIGN);
790 	BDPRINTF(PDB_BOOT,
791 		("We should have the memory at %lx, let's map it in\n",
792 			phys_msgbuf));
793 	if (prom_map_phys(phys_msgbuf, msgbufsiz, (vaddr_t)msgbufp,
794 			  -1/* sunos does this */) == -1) {
795 		prom_printf("Failed to map msgbuf\n");
796 	} else {
797 		BDPRINTF(PDB_BOOT, ("msgbuf mapped at %p\n",
798 			(void *)msgbufp));
799 	}
800 	msgbufmapped = 1;	/* enable message buffer */
801 	initmsgbuf((void *)msgbufp, msgbufsiz);
802 
803 	/*
804 	 * Find out how much RAM we have installed.
805 	 */
806 	BDPRINTF(PDB_BOOT, ("pmap_bootstrap: getting phys installed\n"));
807 	pmap_read_memlist("/memory", "reg", &prom_memlist, &prom_memlist_size,
808 			kdata_alloc);
809 	phys_installed = prom_memlist;
810 	phys_installed_size = prom_memlist_size / sizeof(*phys_installed);
811 
812 	if (pmapdebug & PDB_BOOT1) {
813 		/* print out mem list */
814 		prom_printf("Installed physical memory:\n");
815 		for (i = 0; i < phys_installed_size; i++) {
816 			prom_printf("memlist start %lx size %lx\n",
817 					(u_long)phys_installed[i].start,
818 					(u_long)phys_installed[i].size);
819 		}
820 	}
821 
822 	BDPRINTF(PDB_BOOT1, ("Calculating physmem:"));
823 	for (i = 0; i < phys_installed_size; i++)
824 		physmem += btoc(phys_installed[i].size);
825 	BDPRINTF(PDB_BOOT1, (" result %x or %d pages\n",
826 			     (int)physmem, (int)physmem));
827 
828 	/*
829 	 * Calculate approx TSB size.  This probably needs tweaking.
830 	 */
831 	if (physmem < btoc(64 * 1024 * 1024))
832 		tsbsize = 0;
833 	else if (physmem < btoc(512 * 1024 * 1024))
834 		tsbsize = 1;
835 	else
836 		tsbsize = 2;
837 
838 	/*
839 	 * Save the prom translations
840 	 */
841 	pmap_read_memlist("/virtual-memory", "translations", &prom_memlist,
842 			&prom_memlist_size, kdata_alloc);
843 	prom_map = prom_memlist;
844 	prom_map_size = prom_memlist_size / sizeof(struct prom_map);
845 
846 	if (pmapdebug & PDB_BOOT) {
847 		/* print out mem list */
848 		prom_printf("Prom xlations:\n");
849 		for (i = 0; i < prom_map_size; i++) {
850 			prom_printf("start %016lx size %016lx tte %016lx\n",
851 				    (u_long)prom_map[i].vstart,
852 				    (u_long)prom_map[i].vsize,
853 				    (u_long)prom_map[i].tte);
854 		}
855 		prom_printf("End of prom xlations\n");
856 	}
857 
858 	/*
859 	 * Here's a quick in-lined reverse bubble sort.  It gets rid of
860 	 * any translations inside the kernel data VA range.
861 	 */
862 	for (i = 0; i < prom_map_size; i++) {
863 		for (j = i; j < prom_map_size; j++) {
864 			if (prom_map[j].vstart > prom_map[i].vstart) {
865 				struct prom_map tmp;
866 
867 				tmp = prom_map[i];
868 				prom_map[i] = prom_map[j];
869 				prom_map[j] = tmp;
870 			}
871 		}
872 	}
873 	if (pmapdebug & PDB_BOOT) {
874 		/* print out mem list */
875 		prom_printf("Prom xlations:\n");
876 		for (i = 0; i < prom_map_size; i++) {
877 			prom_printf("start %016lx size %016lx tte %016lx\n",
878 				    (u_long)prom_map[i].vstart,
879 				    (u_long)prom_map[i].vsize,
880 				    (u_long)prom_map[i].tte);
881 		}
882 		prom_printf("End of prom xlations\n");
883 	}
884 
885 	/*
886 	 * Allocate a ncpu*64KB page for the cpu_info & stack structure now.
887 	 */
888 	cpu0paddr = prom_alloc_phys(8 * PAGE_SIZE * sparc_ncpus, 8 * PAGE_SIZE);
889 	if (cpu0paddr == 0) {
890 		prom_printf("Cannot allocate cpu_infos\n");
891 		prom_halt();
892 	}
893 
894 	/*
895 	 * Now the kernel text segment is in its final location we can try to
896 	 * find out how much memory really is free.
897 	 */
898 	pmap_read_memlist("/memory", "available", &prom_memlist,
899 			&prom_memlist_size, kdata_alloc);
900 	orig = prom_memlist;
901 	sz  = prom_memlist_size;
902 	pcnt = prom_memlist_size / sizeof(*orig);
903 
904 	BDPRINTF(PDB_BOOT1, ("Available physical memory:\n"));
905 	avail = (struct mem_region*)kdata_alloc(sz, sizeof(uint64_t));
906 	for (i = 0; i < pcnt; i++) {
907 		avail[i] = orig[i];
908 		BDPRINTF(PDB_BOOT1, ("memlist start %lx size %lx\n",
909 					(u_long)orig[i].start,
910 					(u_long)orig[i].size));
911 	}
912 	BDPRINTF(PDB_BOOT1, ("End of available physical memory\n"));
913 
914 	BDPRINTF(PDB_BOOT, ("ktext %08lx[%08lx] - %08lx[%08lx] : "
915 				"kdata %08lx[%08lx] - %08lx[%08lx]\n",
916 				(u_long)ktext, (u_long)ktextp,
917 				(u_long)ektext, (u_long)ektextp,
918 				(u_long)kdata, (u_long)kdatap,
919 				(u_long)ekdata, (u_long)ekdatap));
920 	if (pmapdebug & PDB_BOOT1) {
921 		/* print out mem list */
922 		prom_printf("Available %lx physical memory before cleanup:\n",
923 			    (u_long)avail);
924 		for (i = 0; i < pcnt; i++) {
925 			prom_printf("memlist start %lx size %lx\n",
926 				    (u_long)avail[i].start,
927 				    (u_long)avail[i].size);
928 		}
929 		prom_printf("End of available physical memory before cleanup\n");
930 		prom_printf("kernel physical text size %08lx - %08lx\n",
931 			    (u_long)ktextp, (u_long)ektextp);
932 		prom_printf("kernel physical data size %08lx - %08lx\n",
933 			    (u_long)kdatap, (u_long)ekdatap);
934 	}
935 
936 	/*
937 	 * Here's a another quick in-lined bubble sort.
938 	 */
939 	for (i = 0; i < pcnt; i++) {
940 		for (j = i; j < pcnt; j++) {
941 			if (avail[j].start < avail[i].start) {
942 				struct mem_region tmp;
943 				tmp = avail[i];
944 				avail[i] = avail[j];
945 				avail[j] = tmp;
946 			}
947 		}
948 	}
949 
950 	/* Throw away page zero if we have it. */
951 	if (avail->start == 0) {
952 		avail->start += PAGE_SIZE;
953 		avail->size -= PAGE_SIZE;
954 	}
955 
956 	/*
957 	 * Now we need to remove the area we valloc'ed from the available
958 	 * memory lists.  (NB: we may have already alloc'ed the entire space).
959 	 */
960 	npgs = 0;
961 	for (mp = avail, i = 0; i < pcnt; i++, mp = &avail[i]) {
962 		/*
963 		 * Now page align the start of the region.
964 		 */
965 		s = mp->start % PAGE_SIZE;
966 		if (mp->size >= s) {
967 			mp->size -= s;
968 			mp->start += s;
969 		}
970 		/*
971 		 * And now align the size of the region.
972 		 */
973 		mp->size -= mp->size % PAGE_SIZE;
974 		/*
975 		 * Check whether some memory is left here.
976 		 */
977 		if (mp->size == 0) {
978 			memcpy(mp, mp + 1,
979 			      (pcnt - (mp - avail)) * sizeof *mp);
980 			pcnt--;
981 			mp--;
982 			continue;
983 		}
984 		s = mp->start;
985 		sz = mp->size;
986 		npgs += btoc(sz);
987 		for (mp1 = avail; mp1 < mp; mp1++)
988 			if (s < mp1->start)
989 				break;
990 		if (mp1 < mp) {
991 			memcpy(mp1 + 1, mp1, (char *)mp - (char *)mp1);
992 			mp1->start = s;
993 			mp1->size = sz;
994 		}
995 #ifdef DEBUG
996 /* Clear all memory we give to the VM system.  I want to make sure
997  * the PROM isn't using it for something, so this should break the PROM.
998  */
999 
1000 /* Calling pmap_zero_page() at this point also hangs some machines
1001  * so don't do it at all. -- pk 26/02/2002
1002  */
1003 #if 0
1004 		{
1005 			paddr_t p;
1006 			for (p = mp->start; p < mp->start+mp->size;
1007 			     p += PAGE_SIZE)
1008 				pmap_zero_page(p);
1009 		}
1010 #endif
1011 #endif /* DEBUG */
1012 		/*
1013 		 * In future we should be able to specify both allocated
1014 		 * and free.
1015 		 */
1016 		BDPRINTF(PDB_BOOT1, ("uvm_page_physload(%lx, %lx)\n",
1017 					(long)mp->start,
1018 					(long)(mp->start + mp->size)));
1019 		uvm_page_physload(
1020 			atop(mp->start),
1021 			atop(mp->start+mp->size),
1022 			atop(mp->start),
1023 			atop(mp->start+mp->size),
1024 			VM_FREELIST_DEFAULT);
1025 	}
1026 
1027 	if (pmapdebug & PDB_BOOT) {
1028 		/* print out mem list */
1029 		prom_printf("Available physical memory after cleanup:\n");
1030 		for (i = 0; i < pcnt; i++) {
1031 			prom_printf("avail start %lx size %lx\n",
1032 				    (long)avail[i].start, (long)avail[i].size);
1033 		}
1034 		prom_printf("End of available physical memory after cleanup\n");
1035 	}
1036 
1037 	/*
1038 	 * Allocate and clear out pmap_kernel()->pm_segs[]
1039 	 */
1040 	pmap_kernel()->pm_refs = 1;
1041 	memset(&pmap_kernel()->pm_ctx, 0, sizeof(pmap_kernel()->pm_ctx));
1042 
1043 	/* Throw away page zero */
1044 	do {
1045 		pmap_get_page(&newp);
1046 	} while (!newp);
1047 	pmap_kernel()->pm_segs=(paddr_t *)(u_long)newp;
1048 	pmap_kernel()->pm_physaddr = newp;
1049 
1050 	/*
1051 	 * finish filling out kernel pmap.
1052 	 */
1053 
1054 	BDPRINTF(PDB_BOOT, ("pmap_kernel()->pm_physaddr = %lx\n",
1055 	    (long)pmap_kernel()->pm_physaddr));
1056 	/*
1057 	 * Tell pmap about our mesgbuf -- Hope this works already
1058 	 */
1059 	BDPRINTF(PDB_BOOT1, ("Calling consinit()\n"));
1060 	if (pmapdebug & PDB_BOOT1)
1061 		consinit();
1062 	BDPRINTF(PDB_BOOT1, ("Inserting mesgbuf into pmap_kernel()\n"));
1063 	/* it's not safe to call pmap_enter so we need to do this ourselves */
1064 	va = (vaddr_t)msgbufp;
1065 	while (msgbufsiz) {
1066 		data = TSB_DATA(0 /* global */,
1067 			PGSZ_8K,
1068 			phys_msgbuf,
1069 			1 /* priv */,
1070 			1 /* Write */,
1071 			1 /* Cacheable */,
1072 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1073 			1 /* valid */,
1074 			0 /* IE */,
1075 			0 /* wc */);
1076 		pmap_enter_kpage(va, data);
1077 		va += PAGE_SIZE;
1078 		msgbufsiz -= PAGE_SIZE;
1079 		phys_msgbuf += PAGE_SIZE;
1080 	}
1081 	BDPRINTF(PDB_BOOT1, ("Done inserting mesgbuf into pmap_kernel()\n"));
1082 
1083 	BDPRINTF(PDB_BOOT1, ("Inserting PROM mappings into pmap_kernel()\n"));
1084 	for (i = 0; i < prom_map_size; i++)
1085 		if (prom_map[i].vstart && ((prom_map[i].vstart >> 32) == 0))
1086 			for (j = 0; j < prom_map[i].vsize; j += PAGE_SIZE) {
1087 				int k;
1088 
1089 				for (k = 0; page_size_map[k].mask; k++) {
1090 					if (((prom_map[i].vstart |
1091 					      prom_map[i].tte) &
1092 					      page_size_map[k].mask) == 0 &&
1093 					      page_size_map[k].mask <
1094 					      prom_map[i].vsize)
1095 						break;
1096 				}
1097 				page_size_map[k].use++;
1098 				/* Enter PROM map into pmap_kernel() */
1099 				pmap_enter_kpage(prom_map[i].vstart + j,
1100 					(prom_map[i].tte + j) | TLB_EXEC |
1101 					page_size_map[k].code);
1102 			}
1103 	BDPRINTF(PDB_BOOT1, ("Done inserting PROM mappings into pmap_kernel()\n"));
1104 
1105 	/*
1106 	 * Fix up start of kernel heap.
1107 	 */
1108 	vmmap = (vaddr_t)roundup(ekdata, 4*MEG);
1109 	/* Let's keep 1 page of redzone after the kernel */
1110 	vmmap += PAGE_SIZE;
1111 	{
1112 		extern void main(void);
1113 		vaddr_t u0va;
1114 		paddr_t pa;
1115 
1116 		u0va = vmmap;
1117 
1118 		BDPRINTF(PDB_BOOT1,
1119 			("Inserting lwp0 USPACE into pmap_kernel() at %p\n",
1120 				vmmap));
1121 
1122 		while (vmmap < u0va + 2*USPACE) {
1123 			int64_t data1;
1124 
1125 			if (!pmap_get_page(&pa))
1126 				panic("pmap_bootstrap: no pages");
1127 			prom_map_phys(pa, PAGE_SIZE, vmmap, -1);
1128 			data1 = TSB_DATA(0 /* global */,
1129 				PGSZ_8K,
1130 				pa,
1131 				1 /* priv */,
1132 				1 /* Write */,
1133 				1 /* Cacheable */,
1134 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1135 				1 /* valid */,
1136 				0 /* ei */,
1137 				0 /* WC */);
1138 			pmap_enter_kpage(vmmap, data1);
1139 			vmmap += PAGE_SIZE;
1140 		}
1141 		BDPRINTF(PDB_BOOT1,
1142 			 ("Done inserting stack 0 into pmap_kernel()\n"));
1143 
1144 		/* Now map in and initialize our cpu_info structure */
1145 #ifdef DIAGNOSTIC
1146 		vmmap += PAGE_SIZE; /* redzone -- XXXX do we need one? */
1147 #endif
1148 		if ((vmmap ^ INTSTACK) & VA_ALIAS_MASK)
1149 			vmmap += PAGE_SIZE; /* Matchup virtual color for D$ */
1150 		intstk = vmmap;
1151 		cpus = (struct cpu_info *)(intstk + CPUINFO_VA - INTSTACK);
1152 
1153 		BDPRINTF(PDB_BOOT1,
1154 			("Inserting cpu_info into pmap_kernel() at %p\n",
1155 				 cpus));
1156 		/* Now map in all 8 pages of interrupt stack/cpu_info */
1157 		pa = cpu0paddr;
1158 		prom_map_phys(pa, 64*KB, vmmap, -1);
1159 
1160 		/*
1161 		 * Also map it in as the interrupt stack.
1162 		 * This lets the PROM see this if needed.
1163 		 *
1164 		 * XXXX locore.s does not flush these mappings
1165 		 * before installing the locked TTE.
1166 		 */
1167 		prom_map_phys(pa, 64*KB, INTSTACK, -1);
1168 		for (i = 0; i < 8; i++) {
1169 			int64_t data1;
1170 
1171 			data1 = TSB_DATA(0 /* global */,
1172 				PGSZ_8K,
1173 				pa,
1174 				1 /* priv */,
1175 				1 /* Write */,
1176 				1 /* Cacheable */,
1177 				FORCE_ALIAS /* ALIAS -- Disable D$ */,
1178 				1 /* valid */,
1179 				0 /* IE */,
1180 				0 /* wc */);
1181 			pmap_enter_kpage(vmmap, data1);
1182 			vmmap += PAGE_SIZE;
1183 			pa += PAGE_SIZE;
1184 		}
1185 		BDPRINTF(PDB_BOOT1, ("Initializing cpu_info\n"));
1186 
1187 		/* Initialize our cpu_info structure */
1188 		memset((void *)intstk, 0, 64 * KB);
1189 		cpus->ci_self = cpus;
1190 		cpus->ci_next = NULL;
1191 		cpus->ci_curlwp = &lwp0;
1192 		cpus->ci_flags = CPUF_PRIMARY;
1193 		cpus->ci_cpuid = cpu_myid();
1194 		cpus->ci_fplwp = NULL;
1195 		cpus->ci_eintstack = NULL;
1196 		cpus->ci_spinup = main; /* Call main when we're running. */
1197 		cpus->ci_paddr = cpu0paddr;
1198 		if (CPU_ISSUN4V) {
1199 			cpus->ci_mmufsa = cpu0paddr;
1200 			cpus->ci_tsb_desc = NULL;
1201 		}
1202 		cpus->ci_cpcb = (struct pcb *)u0va;
1203 		cpus->ci_idepth = -1;
1204 		memset(cpus->ci_intrpending, -1, sizeof(cpus->ci_intrpending));
1205 
1206 		uvm_lwp_setuarea(&lwp0, u0va);
1207 		lwp0.l_md.md_tf = (struct trapframe64*)(u0va + USPACE
1208 		    - sizeof(struct trapframe64));
1209 
1210 		cpu0paddr += 64 * KB;
1211 
1212 		CPUSET_CLEAR(cpus_active);
1213 		CPUSET_ADD(cpus_active, 0);
1214 
1215 		cpu_pmap_prepare(cpus, true);
1216 		cpu_pmap_init(cpus);
1217 
1218 		/* The rest will be done at CPU attach time. */
1219 		BDPRINTF(PDB_BOOT1,
1220 			 ("Done inserting cpu_info into pmap_kernel()\n"));
1221 	}
1222 
1223 	vmmap = (vaddr_t)reserve_dumppages((void *)(u_long)vmmap);
1224 
1225 #ifdef MODULAR
1226 	/*
1227 	 * For 32bit kernels:
1228 	 *   Reserve 16 MB of VA for module loading. Right now our full
1229 	 *   GENERIC kernel is about 13 MB, so this looks good enough.
1230 	 * For 64bit kernels:
1231 	 *   We can use all the space left before the special addresses,
1232 	 *   but leave 2 pages at vmmap alone (see pmap_virtual_space)
1233 	 *   and another red zone page.
1234 	 */
1235 #ifdef __arch64__
1236 	module_start = vmmap + 3*PAGE_SIZE;
1237 	module_end = 0x08000000;	/* keep all modules within 2GB */
1238 	KASSERT(module_end < KERNEND);	/* of kernel text */
1239 #else
1240 	module_start = vmmap;
1241 	vmmap += 16 * 1024*1024;
1242 	module_end = vmmap;
1243 #endif
1244 #endif
1245 
1246 	/*
1247 	 * Set up bounds of allocatable memory for vmstat et al.
1248 	 */
1249 	avail_start = avail->start;
1250 	for (mp = avail; mp->size; mp++)
1251 		avail_end = mp->start+mp->size;
1252 
1253 	BDPRINTF(PDB_BOOT1, ("Finished pmap_bootstrap()\n"));
1254 
1255 	BDPRINTF(PDB_BOOT, ("left kdata: %" PRId64 " @%" PRIx64 ".\n",
1256 				kdata_mem_pool.size, kdata_mem_pool.start));
1257 }
1258 
1259 /*
1260  * Allocate TSBs for both mmus from the locked kernel data segment page.
1261  * This is run before the cpu itself is activated (or by the first cpu
1262  * itself)
1263  */
1264 void
cpu_pmap_prepare(struct cpu_info * ci,bool initial)1265 cpu_pmap_prepare(struct cpu_info *ci, bool initial)
1266 {
1267 	/* allocate our TSBs */
1268 	ci->ci_tsb_dmmu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
1269 	ci->ci_tsb_immu = (pte_t *)kdata_alloc(TSBSIZE, TSBSIZE);
1270 	memset(ci->ci_tsb_dmmu, 0, TSBSIZE);
1271 	memset(ci->ci_tsb_immu, 0, TSBSIZE);
1272 	if (!initial) {
1273 		KASSERT(ci != curcpu());
1274 		/*
1275 		 * Initially share ctxbusy with the boot cpu, the
1276 		 * cpu will replace it as soon as it runs (and can
1277 		 * probe the number of available contexts itself).
1278 		 * Untill then only context 0 (aka kernel) will be
1279 		 * referenced anyway.
1280 		 */
1281 		ci->ci_numctx = curcpu()->ci_numctx;
1282 		ci->ci_ctxbusy = curcpu()->ci_ctxbusy;
1283 	}
1284 
1285 	if (CPU_ISSUN4V) {
1286 		ci->ci_tsb_desc = (struct tsb_desc *)kdata_alloc(
1287 			sizeof(struct tsb_desc), 16);
1288 		memset(ci->ci_tsb_desc, 0, sizeof(struct tsb_desc));
1289 		/* 8K page size used for TSB index computation */
1290 		ci->ci_tsb_desc->td_idxpgsz = 0;
1291 		ci->ci_tsb_desc->td_assoc = 1;
1292 		ci->ci_tsb_desc->td_size = TSBENTS;
1293 		ci->ci_tsb_desc->td_ctxidx = -1;
1294 		ci->ci_tsb_desc->td_pgsz = 0xf;
1295 		ci->ci_tsb_desc->td_pa = pmap_kextract((vaddr_t)ci->ci_tsb_dmmu);
1296 		BDPRINTF(PDB_BOOT1, ("cpu %d: TSB descriptor allocated at %p "
1297 		    "size %08x - td_pa at %p\n",
1298 		    ci->ci_index, ci->ci_tsb_desc, sizeof(struct tsb_desc),
1299 		    ci->ci_tsb_desc->td_pa));
1300 	}
1301 
1302 	BDPRINTF(PDB_BOOT1, ("cpu %d: TSB allocated at %p/%p size %08x\n",
1303 	    ci->ci_index, ci->ci_tsb_dmmu, ci->ci_tsb_immu, TSBSIZE));
1304 }
1305 
1306 /*
1307  * Initialize the per CPU parts for the cpu running this code.
1308  */
1309 void
cpu_pmap_init(struct cpu_info * ci)1310 cpu_pmap_init(struct cpu_info *ci)
1311 {
1312 	size_t ctxsize;
1313 
1314 	/*
1315 	 * We delay initialising ci_ctx_lock here as LOCKDEBUG isn't
1316 	 * running for cpu0 yet..
1317 	 */
1318 	ci->ci_pmap_next_ctx = 1;
1319 	/* all SUN4U use 13 bit contexts - SUN4V use at least 13 bit contexts */
1320 	ci->ci_numctx = 0x2000;
1321 	ctxsize = sizeof(paddr_t)*ci->ci_numctx;
1322 	ci->ci_ctxbusy = (paddr_t *)kdata_alloc(ctxsize, sizeof(uint64_t));
1323 	memset(ci->ci_ctxbusy, 0, ctxsize);
1324 	LIST_INIT(&ci->ci_pmap_ctxlist);
1325 
1326 	/* mark kernel context as busy */
1327 	ci->ci_ctxbusy[0] = pmap_kernel()->pm_physaddr;
1328 }
1329 
1330 /*
1331  * Initialize anything else for pmap handling.
1332  * Called during vm_init().
1333  */
1334 void
pmap_init(void)1335 pmap_init(void)
1336 {
1337 	struct vm_page *pg;
1338 	struct pglist pglist;
1339 	uint64_t data;
1340 	paddr_t pa;
1341 	psize_t size;
1342 	vaddr_t va;
1343 
1344 	BDPRINTF(PDB_BOOT1, ("pmap_init()\n"));
1345 
1346 	size = sizeof(struct pv_entry) * physmem;
1347 	if (uvm_pglistalloc((psize_t)size, (paddr_t)0, (paddr_t)-1,
1348 		(paddr_t)PAGE_SIZE, (paddr_t)0, &pglist, 1, 0) != 0)
1349 		panic("pmap_init: no memory");
1350 
1351 	va = uvm_km_alloc(kernel_map, size, 0, UVM_KMF_VAONLY);
1352 	if (va == 0)
1353 		panic("pmap_init: no memory");
1354 
1355 	/* Map the pages */
1356 	TAILQ_FOREACH(pg, &pglist, pageq.queue) {
1357 		pa = VM_PAGE_TO_PHYS(pg);
1358 		pmap_zero_page(pa);
1359 		data = TSB_DATA(0 /* global */,
1360 			PGSZ_8K,
1361 			pa,
1362 			1 /* priv */,
1363 			1 /* Write */,
1364 			1 /* Cacheable */,
1365 			FORCE_ALIAS /* ALIAS -- Disable D$ */,
1366 			1 /* valid */,
1367 			0 /* IE */,
1368 			0 /* wc */);
1369 		pmap_enter_kpage(va, data);
1370 		va += PAGE_SIZE;
1371 	}
1372 
1373 	/*
1374 	 * initialize the pmap pools.
1375 	 */
1376 	pool_cache_bootstrap(&pmap_cache, sizeof(struct pmap),
1377 	    SPARC64_BLOCK_SIZE, 0, 0, "pmappl", NULL, IPL_NONE, NULL, NULL,
1378 	    NULL);
1379 	pool_cache_bootstrap(&pmap_pv_cache, sizeof(struct pv_entry), 0, 0,
1380 	    PR_LARGECACHE, "pv_entry", NULL, IPL_NONE, NULL, NULL, NULL);
1381 
1382 	vm_first_phys = avail_start;
1383 	vm_num_phys = avail_end - avail_start;
1384 
1385 	mutex_init(&pmap_lock, MUTEX_DEFAULT, IPL_NONE);
1386 #if defined(USE_LOCKSAFE_PSEG_GETSET)
1387 	mutex_init(&pseg_lock, MUTEX_SPIN, IPL_VM);
1388 #endif
1389 	lock_available = true;
1390 }
1391 
1392 /*
1393  * How much virtual space is available to the kernel?
1394  */
1395 static vaddr_t kbreak; /* End of kernel VA */
1396 void
pmap_virtual_space(vaddr_t * start,vaddr_t * end)1397 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1398 {
1399 
1400 	/*
1401 	 * Reserve one segment for kernel virtual memory.
1402 	 */
1403 #ifdef __arch64__
1404 	/*
1405 	 * On 64 bit kernels, start it beyond firmware, so
1406 	 * we are basically unrestricted.
1407 	 */
1408 	*start = kbreak = VM_KERNEL_MEM_VA_START;
1409 	*end = VM_MAX_KERNEL_ADDRESS;
1410 #else
1411 	/*
1412 	 * Reserve two pages for pmap_copy_page && /dev/mem, but otherwise
1413 	 * end it beyound the iospace and other special fixed addresses.
1414 	 */
1415 	*start = kbreak = (vaddr_t)(vmmap + 2*PAGE_SIZE);
1416 	*end = VM_MAX_KERNEL_ADDRESS;
1417 #endif
1418 	BDPRINTF(PDB_BOOT1, ("pmap_virtual_space: %x-%x\n", *start, *end));
1419 }
1420 
1421 /*
1422  * Preallocate kernel page tables to a specified VA.
1423  * This simply loops through the first TTE for each
1424  * page table from the beginning of the kernel pmap,
1425  * reads the entry, and if the result is
1426  * zero (either invalid entry or no page table) it stores
1427  * a zero there, populating page tables in the process.
1428  * This is not the most efficient technique but i don't
1429  * expect it to be called that often.
1430  */
1431 vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)1432 pmap_growkernel(vaddr_t maxkvaddr)
1433 {
1434 	struct pmap *pm = pmap_kernel();
1435 	paddr_t pa;
1436 
1437 	if (maxkvaddr >= VM_MAX_KERNEL_ADDRESS) {
1438 		printf("WARNING: cannot extend kernel pmap beyond %p to %p\n",
1439 		       (void *)VM_MAX_KERNEL_ADDRESS, (void *)maxkvaddr);
1440 		return (kbreak);
1441 	}
1442 	DPRINTF(PDB_GROW, ("pmap_growkernel(%lx...%lx)\n", kbreak, maxkvaddr));
1443 	/* Align with the start of a page table */
1444 	for (kbreak &= ((~0ULL) << PDSHIFT); kbreak < maxkvaddr;
1445 	     kbreak += (1 << PDSHIFT)) {
1446 		if (pseg_get(pm, kbreak) & TLB_V)
1447 			continue;
1448 
1449 		pa = 0;
1450 		while (pseg_set(pm, kbreak, 0, pa) & 1) {
1451 			DPRINTF(PDB_GROW,
1452 			    ("pmap_growkernel: extending %lx\n", kbreak));
1453 			pa = 0;
1454 			if (!pmap_get_page(&pa))
1455 				panic("pmap_growkernel: no pages");
1456 			ENTER_STAT(ptpneeded);
1457 		}
1458 	}
1459 	return (kbreak);
1460 }
1461 
1462 /*
1463  * Create and return a physical map.
1464  */
1465 struct pmap *
pmap_create(void)1466 pmap_create(void)
1467 {
1468 	struct pmap *pm;
1469 
1470 	DPRINTF(PDB_CREATE, ("pmap_create()\n"));
1471 
1472 	pm = pool_cache_get(&pmap_cache, PR_WAITOK);
1473 	memset(pm, 0, sizeof *pm);
1474 	DPRINTF(PDB_CREATE, ("pmap_create(): created %p\n", pm));
1475 
1476 	pm->pm_refs = 1;
1477 	TAILQ_INIT(&pm->pm_ptps);
1478 	if (pm != pmap_kernel()) {
1479 		while (!pmap_get_page(&pm->pm_physaddr)) {
1480 			uvm_wait("pmap_create");
1481 		}
1482 		pm->pm_segs = (paddr_t *)(u_long)pm->pm_physaddr;
1483 	}
1484 	DPRINTF(PDB_CREATE, ("pmap_create(%p): ctx %d\n", pm, pmap_ctx(pm)));
1485 	return pm;
1486 }
1487 
1488 /*
1489  * Add a reference to the given pmap.
1490  */
1491 void
pmap_reference(struct pmap * pm)1492 pmap_reference(struct pmap *pm)
1493 {
1494 
1495 	atomic_inc_uint(&pm->pm_refs);
1496 }
1497 
1498 /*
1499  * Retire the given pmap from service.
1500  * Should only be called if the map contains no valid mappings.
1501  */
1502 void
pmap_destroy(struct pmap * pm)1503 pmap_destroy(struct pmap *pm)
1504 {
1505 #ifdef MULTIPROCESSOR
1506 	struct cpu_info *ci;
1507 	sparc64_cpuset_t pmap_cpus_active;
1508 #else
1509 #define pmap_cpus_active 0
1510 #endif
1511 	struct vm_page *pg;
1512 
1513 	membar_release();
1514 	if ((int)atomic_dec_uint_nv(&pm->pm_refs) > 0) {
1515 		return;
1516 	}
1517 	membar_acquire();
1518 	DPRINTF(PDB_DESTROY, ("pmap_destroy: freeing pmap %p\n", pm));
1519 #ifdef MULTIPROCESSOR
1520 	CPUSET_CLEAR(pmap_cpus_active);
1521 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
1522 		/* XXXMRG: Move the lock inside one or both tests? */
1523 		mutex_enter(&ci->ci_ctx_lock);
1524 		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
1525 			if (pm->pm_ctx[ci->ci_index] > 0) {
1526 				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
1527 				ctx_free(pm, ci);
1528 			}
1529 		}
1530 		mutex_exit(&ci->ci_ctx_lock);
1531 	}
1532 #else
1533 	if (pmap_ctx(pm)) {
1534 		mutex_enter(&curcpu()->ci_ctx_lock);
1535 		ctx_free(pm, curcpu());
1536 		mutex_exit(&curcpu()->ci_ctx_lock);
1537 	}
1538 #endif
1539 
1540 	/* we could be a little smarter and leave pages zeroed */
1541 	while ((pg = TAILQ_FIRST(&pm->pm_ptps)) != NULL) {
1542 		struct vm_page_md *md = VM_PAGE_TO_MD(pg);
1543 
1544 		TAILQ_REMOVE(&pm->pm_ptps, pg, pageq.queue);
1545 		KASSERT(md->mdpg_pvh.pv_pmap == NULL);
1546 		dcache_flush_page_cpuset(VM_PAGE_TO_PHYS(pg), pmap_cpus_active);
1547 		uvm_pagefree(pg);
1548 	}
1549 	pmap_free_page((paddr_t)(u_long)pm->pm_segs, pmap_cpus_active);
1550 
1551 	pool_cache_put(&pmap_cache, pm);
1552 }
1553 
1554 /*
1555  * Copy the range specified by src_addr/len
1556  * from the source map to the range dst_addr/len
1557  * in the destination map.
1558  *
1559  * This routine is only advisory and need not do anything.
1560  */
1561 void
pmap_copy(struct pmap * dst_pmap,struct pmap * src_pmap,vaddr_t dst_addr,vsize_t len,vaddr_t src_addr)1562 pmap_copy(struct pmap *dst_pmap, struct pmap *src_pmap, vaddr_t dst_addr, vsize_t len, vaddr_t src_addr)
1563 {
1564 
1565 	DPRINTF(PDB_CREATE, ("pmap_copy(%p, %p, %p, %lx, %p)\n",
1566 			     dst_pmap, src_pmap, (void *)(u_long)dst_addr,
1567 			     (u_long)len, (void *)(u_long)src_addr));
1568 }
1569 
1570 /*
1571  * Activate the address space for the specified process.  If the
1572  * process is the current process, load the new MMU context.
1573  */
1574 void
pmap_activate(struct lwp * l)1575 pmap_activate(struct lwp *l)
1576 {
1577 	struct pmap *pmap = l->l_proc->p_vmspace->vm_map.pmap;
1578 
1579 	if (pmap == pmap_kernel()) {
1580 		return;
1581 	}
1582 
1583 	/*
1584 	 * This is essentially the same thing that happens in cpu_switchto()
1585 	 * when the newly selected process is about to run, except that we
1586 	 * have to make sure to clean the register windows before we set
1587 	 * the new context.
1588 	 */
1589 
1590 	if (l != curlwp) {
1591 		return;
1592 	}
1593 	write_user_windows();
1594 	pmap_activate_pmap(pmap);
1595 }
1596 
1597 void
pmap_activate_pmap(struct pmap * pmap)1598 pmap_activate_pmap(struct pmap *pmap)
1599 {
1600 
1601 	if (pmap_ctx(pmap) == 0) {
1602 		(void) ctx_alloc(pmap);
1603 	}
1604 	DPRINTF(PDB_ACTIVATE,
1605 		("%s: cpu%d activating ctx %d\n", __func__,
1606 		 cpu_number(), pmap_ctx(pmap)));
1607 	dmmu_set_secondary_context(pmap_ctx(pmap));
1608 }
1609 
1610 /*
1611  * Deactivate the address space of the specified process.
1612  */
1613 void
pmap_deactivate(struct lwp * l)1614 pmap_deactivate(struct lwp *l)
1615 {
1616 
1617 	DPRINTF(PDB_ACTIVATE,
1618 		("%s: cpu%d deactivating ctx %d\n", __func__,
1619 		 cpu_number(), pmap_ctx(l->l_proc->p_vmspace->vm_map.pmap)));
1620 }
1621 
1622 /*
1623  * pmap_kenter_pa:		[ INTERFACE ]
1624  *
1625  *	Enter a va -> pa mapping into the kernel pmap without any
1626  *	physical->virtual tracking.
1627  *
1628  *	Note: no locking is necessary in this function.
1629  */
1630 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot,u_int flags)1631 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1632 {
1633 	pte_t tte;
1634 	paddr_t ptp;
1635 	struct pmap *pm = pmap_kernel();
1636 	int i;
1637 
1638 	KASSERT(va < INTSTACK || va > EINTSTACK);
1639 	KASSERT(va < kdata || va > ekdata);
1640 
1641 	/*
1642 	 * Construct the TTE.
1643 	 */
1644 
1645 	ENTER_STAT(unmanaged);
1646 	if (pa & (PMAP_NVC|PMAP_NC)) {
1647 		ENTER_STAT(ci);
1648 	}
1649 
1650 	tte.data = TSB_DATA(0, PGSZ_8K, pa, 1 /* Privileged */,
1651 			    (VM_PROT_WRITE & prot),
1652 			    !(pa & PMAP_NC), pa & (PMAP_NVC), 1,
1653 			    pa & (PMAP_LITTLE), pa & PMAP_WC);
1654 	/* We don't track mod/ref here. */
1655 	if (prot & VM_PROT_WRITE)
1656 		tte.data |= TLB_REAL_W|TLB_W;
1657 	if (prot & VM_PROT_EXECUTE)
1658 		tte.data |= TLB_EXEC;
1659 	tte.data |= TLB_TSB_LOCK;	/* wired */
1660 	ptp = 0;
1661 
1662  retry:
1663 	i = pseg_set(pm, va, tte.data, ptp);
1664 	if (i & 1) {
1665 		KASSERT((i & 4) == 0);
1666 		ptp = 0;
1667 		if (!pmap_get_page(&ptp))
1668 			panic("pmap_kenter_pa: no pages");
1669 		ENTER_STAT(ptpneeded);
1670 		goto retry;
1671 	}
1672 	if (ptp && i == 0) {
1673 		/* We allocated a spare page but didn't use it.  Free it. */
1674 		printf("pmap_kenter_pa: freeing unused page %llx\n",
1675 		       (long long)ptp);
1676 		pmap_free_page_noflush(ptp);
1677 	}
1678 #ifdef PMAP_DEBUG
1679 	i = ptelookup_va(va);
1680 	if (pmapdebug & PDB_ENTER)
1681 		prom_printf("pmap_kenter_pa: va=%08x data=%08x:%08x "
1682 			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
1683 			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
1684 	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
1685 		prom_printf("pmap_kenter_pa: evicting entry tag=%x:%08x "
1686 			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1687 			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
1688 			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data,
1689 			i, &curcpu()->ci_tsb_dmmu[i]);
1690 		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1691 			va, (int)(tte.data>>32), (int)tte.data,	i,
1692 			&curcpu()->ci_tsb_dmmu[i]);
1693 	}
1694 #endif
1695 }
1696 
1697 /*
1698  * pmap_kremove:		[ INTERFACE ]
1699  *
1700  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1701  *	for size bytes (assumed to be page rounded).
1702  */
1703 void
pmap_kremove(vaddr_t va,vsize_t size)1704 pmap_kremove(vaddr_t va, vsize_t size)
1705 {
1706 	struct pmap *pm = pmap_kernel();
1707 	int64_t data;
1708 	paddr_t pa;
1709 	int rv;
1710 	bool flush = FALSE;
1711 
1712 	KASSERT(va < INTSTACK || va > EINTSTACK);
1713 	KASSERT(va < kdata || va > ekdata);
1714 
1715 	DPRINTF(PDB_DEMAP, ("pmap_kremove: start 0x%lx size %lx\n", va, size));
1716 	for (; size >= PAGE_SIZE; va += PAGE_SIZE, size -= PAGE_SIZE) {
1717 
1718 #ifdef DIAGNOSTIC
1719 		/*
1720 		 * Is this part of the permanent 4MB mapping?
1721 		 */
1722 		if (va >= ktext && va < roundup(ekdata, 4*MEG))
1723 			panic("pmap_kremove: va=%08x in locked TLB", (u_int)va);
1724 #endif
1725 
1726 		data = pseg_get(pm, va);
1727 		if ((data & TLB_V) == 0) {
1728 			continue;
1729 		}
1730 
1731 		flush = TRUE;
1732 		pa = data & TLB_PA_MASK;
1733 
1734 		/*
1735 		 * We need to flip the valid bit and
1736 		 * clear the access statistics.
1737 		 */
1738 
1739 		rv = pseg_set(pm, va, 0, 0);
1740 		if (rv & 1)
1741 			panic("pmap_kremove: pseg_set needs spare, rv=%d\n",
1742 			    rv);
1743 		DPRINTF(PDB_DEMAP, ("pmap_kremove: seg %x pdir %x pte %x\n",
1744 		    (int)va_to_seg(va), (int)va_to_dir(va),
1745 		    (int)va_to_pte(va)));
1746 		REMOVE_STAT(removes);
1747 
1748 		tsb_invalidate(va, pm);
1749 		REMOVE_STAT(tflushes);
1750 
1751 		/*
1752 		 * Here we assume nothing can get into the TLB
1753 		 * unless it has a PTE.
1754 		 */
1755 
1756 		tlb_flush_pte(va, pm);
1757 		dcache_flush_page_all(pa);
1758 	}
1759 	if (flush)
1760 		REMOVE_STAT(flushes);
1761 }
1762 
1763 /*
1764  * Insert physical page at pa into the given pmap at virtual address va.
1765  * Supports 64-bit pa so we can map I/O space.
1766  */
1767 
1768 int
pmap_enter(struct pmap * pm,vaddr_t va,paddr_t pa,vm_prot_t prot,u_int flags)1769 pmap_enter(struct pmap *pm, vaddr_t va, paddr_t pa, vm_prot_t prot, u_int flags)
1770 {
1771 	pte_t tte;
1772 	int64_t data;
1773 	paddr_t opa = 0, ptp; /* XXX: gcc */
1774 	pv_entry_t pvh, opv = NULL, npv;
1775 	struct vm_page *pg, *opg, *ptpg;
1776 	int s, i, uncached = 0, error = 0;
1777 	int size = PGSZ_8K; /* PMAP_SZ_TO_TTE(pa); */
1778 	bool wired = (flags & PMAP_WIRED) != 0;
1779 	bool wasmapped = false;
1780 	bool dopv = true;
1781 
1782 	/*
1783 	 * Is this part of the permanent mappings?
1784 	 */
1785 	KASSERT(pm != pmap_kernel() || va < INTSTACK || va > EINTSTACK);
1786 	KASSERT(pm != pmap_kernel() || va < kdata || va > ekdata);
1787 
1788 	/*
1789 	 * Grab a spare PV.  Keep going even if this fails since we don't
1790 	 * yet know if we will need it.
1791 	 */
1792 
1793 	npv = pool_cache_get(&pmap_pv_cache, PR_NOWAIT);
1794 
1795 	/*
1796 	 * If a mapping at this address already exists, check if we're
1797 	 * entering the same PA again.  if it's different remove it.
1798 	 */
1799 
1800 	mutex_enter(&pmap_lock);
1801 	data = pseg_get(pm, va);
1802 	if (data & TLB_V) {
1803 		wasmapped = TRUE;
1804 		opa = data & TLB_PA_MASK;
1805 		if (opa != pa) {
1806 			opg = PHYS_TO_VM_PAGE(opa);
1807 			if (opg != NULL) {
1808 				opv = pmap_remove_pv(pm, va, opg);
1809 			}
1810 		}
1811 	}
1812 
1813 	/*
1814 	 * Construct the TTE.
1815 	 */
1816 	pg = PHYS_TO_VM_PAGE(pa);
1817 	if (pg) {
1818 		struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
1819 
1820 		pvh = &md->mdpg_pvh;
1821 		uncached = (pvh->pv_va & (PV_ALIAS|PV_NVC));
1822 #ifdef DIAGNOSTIC
1823 		if ((flags & VM_PROT_ALL) & ~prot)
1824 			panic("pmap_enter: access_type exceeds prot");
1825 #endif
1826 		/*
1827 		 * If we don't have the traphandler do it,
1828 		 * set the ref/mod bits now.
1829 		 */
1830 		if (flags & VM_PROT_ALL)
1831 			pvh->pv_va |= PV_REF;
1832 		if (flags & VM_PROT_WRITE)
1833 			pvh->pv_va |= PV_MOD;
1834 
1835 		/*
1836 		 * make sure we have a pv entry ready if we need one.
1837 		 */
1838 		if (wasmapped && opa == pa) {
1839 			dopv = false;
1840 		} else if (npv == NULL) {
1841 			npv = opv;
1842 			opv = NULL;
1843 			if (npv == NULL) {
1844 				mutex_exit(&pmap_lock);
1845 				error = ENOMEM;
1846 				goto out;
1847 			}
1848 		}
1849 		ENTER_STAT(managed);
1850 	} else {
1851 		ENTER_STAT(unmanaged);
1852 		dopv = false;
1853 	}
1854 
1855 #ifndef NO_VCACHE
1856 	if (pa & PMAP_NVC)
1857 #endif
1858 		uncached = 1;
1859 	if (uncached) {
1860 		ENTER_STAT(ci);
1861 	}
1862 	tte.data = TSB_DATA(0, size, pa, pm == pmap_kernel(),
1863 		flags & VM_PROT_WRITE, !(pa & PMAP_NC),
1864 		uncached, 1, pa & PMAP_LITTLE, pa & PMAP_WC);
1865 #ifdef HWREF
1866 	if (prot & VM_PROT_WRITE)
1867 		tte.data |= TLB_REAL_W;
1868 	if (prot & VM_PROT_EXECUTE)
1869 		tte.data |= TLB_EXEC;
1870 #else
1871 	/* If it needs ref accounting do nothing. */
1872 	if (!(flags & VM_PROT_READ)) {
1873 		mutex_exit(&pmap_lock);
1874 		goto out;
1875 	}
1876 #endif
1877 	if (flags & VM_PROT_EXECUTE) {
1878 		if ((flags & (VM_PROT_READ|VM_PROT_WRITE)) == 0)
1879 			tte.data |= TLB_EXEC_ONLY|TLB_EXEC;
1880 		else
1881 			tte.data |= TLB_EXEC;
1882 	}
1883 	if (wired)
1884 		tte.data |= TLB_TSB_LOCK;
1885 	ptp = 0;
1886 
1887  retry:
1888 	i = pseg_set(pm, va, tte.data, ptp);
1889 	if (i == -2) {
1890 		if (flags & PMAP_CANFAIL)
1891 			return (ENOMEM);
1892 		panic("pmap_enter: invalid VA (inside hole)");
1893 	}
1894 	if (i & 4) {
1895 		/* ptp used as L3 */
1896 		KASSERT(ptp != 0);
1897 		KASSERT((i & 3) == 0);
1898 		ptpg = PHYS_TO_VM_PAGE(ptp);
1899 		if (ptpg) {
1900 			ptpg->offset = (uint64_t)va & (0xfffffLL << 23);
1901 			TAILQ_INSERT_TAIL(&pm->pm_ptps, ptpg, pageq.queue);
1902 		} else {
1903 			KASSERT(pm == pmap_kernel());
1904 		}
1905 	}
1906 	if (i & 2) {
1907 		/* ptp used as L2 */
1908 		KASSERT(ptp != 0);
1909 		KASSERT((i & 4) == 0);
1910 		ptpg = PHYS_TO_VM_PAGE(ptp);
1911 		if (ptpg) {
1912 			ptpg->offset = (((uint64_t)va >> 43) & 0x3ffLL) << 13;
1913 			TAILQ_INSERT_TAIL(&pm->pm_ptps, ptpg, pageq.queue);
1914 		} else {
1915 			KASSERT(pm == pmap_kernel());
1916 		}
1917 	}
1918 	if (i & 1) {
1919 		KASSERT((i & 4) == 0);
1920 		ptp = 0;
1921 		if (!pmap_get_page(&ptp)) {
1922 			mutex_exit(&pmap_lock);
1923 			if (flags & PMAP_CANFAIL) {
1924 				error = ENOMEM;
1925 				goto out;
1926 			} else {
1927 				panic("pmap_enter: no pages");
1928 			}
1929 		}
1930 		ENTER_STAT(ptpneeded);
1931 		goto retry;
1932 	}
1933 	if (ptp && i == 0) {
1934 		/* We allocated a spare page but didn't use it.  Free it. */
1935 		printf("pmap_enter: freeing unused page %llx\n",
1936 		       (long long)ptp);
1937 		pmap_free_page_noflush(ptp);
1938 	}
1939 	if (dopv) {
1940 		pmap_enter_pv(pm, va, pa, pg, &npv);
1941 	}
1942 
1943 	mutex_exit(&pmap_lock);
1944 #ifdef PMAP_DEBUG
1945 	i = ptelookup_va(va);
1946 	if (pmapdebug & PDB_ENTER)
1947 		prom_printf("pmap_enter: va=%08x data=%08x:%08x "
1948 			"tsb_dmmu[%d]=%08x\n", va, (int)(tte.data>>32),
1949 			(int)tte.data, i, &curcpu()->ci_tsb_dmmu[i]);
1950 	if (pmapdebug & PDB_MMU_STEAL && curcpu()->ci_tsb_dmmu[i].data) {
1951 		prom_printf("pmap_enter: evicting entry tag=%x:%08x "
1952 			"data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1953 			(int)(curcpu()->ci_tsb_dmmu[i].tag>>32), (int)curcpu()->ci_tsb_dmmu[i].tag,
1954 			(int)(curcpu()->ci_tsb_dmmu[i].data>>32), (int)curcpu()->ci_tsb_dmmu[i].data, i,
1955 			&curcpu()->ci_tsb_dmmu[i]);
1956 		prom_printf("with va=%08x data=%08x:%08x tsb_dmmu[%d]=%08x\n",
1957 			va, (int)(tte.data>>32), (int)tte.data, i,
1958 			&curcpu()->ci_tsb_dmmu[i]);
1959 	}
1960 #endif
1961 
1962 	if (flags & (VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE)) {
1963 
1964 		/*
1965 		 * preload the TSB with the new entry,
1966 		 * since we're going to need it immediately anyway.
1967 		 */
1968 
1969 		KASSERT(pmap_ctx(pm)>=0);
1970 		i = ptelookup_va(va);
1971 		tte.tag = TSB_TAG(0, pmap_ctx(pm), va);
1972 		s = splhigh();
1973 		if (wasmapped && pmap_is_on_mmu(pm)) {
1974 			tsb_invalidate(va, pm);
1975 		}
1976 		if (flags & (VM_PROT_READ | VM_PROT_WRITE)) {
1977 			curcpu()->ci_tsb_dmmu[i].tag = tte.tag;
1978 			__asm volatile("" : : : "memory");
1979 			curcpu()->ci_tsb_dmmu[i].data = tte.data;
1980 		}
1981 		if (flags & VM_PROT_EXECUTE) {
1982 			curcpu()->ci_tsb_immu[i].tag = tte.tag;
1983 			__asm volatile("" : : : "memory");
1984 			curcpu()->ci_tsb_immu[i].data = tte.data;
1985 		}
1986 
1987 		/*
1988 		 * it's only necessary to flush the TLB if this page was
1989 		 * previously mapped, but for some reason it's a lot faster
1990 		 * for the fork+exit microbenchmark if we always do it.
1991 		 */
1992 
1993 		KASSERT(pmap_ctx(pm)>=0);
1994 #ifdef MULTIPROCESSOR
1995 		if (wasmapped && pmap_is_on_mmu(pm))
1996 			tlb_flush_pte(va, pm);
1997 		else
1998 			sp_tlb_flush_pte(va, pmap_ctx(pm));
1999 #else
2000 		tlb_flush_pte(va, pm);
2001 #endif
2002 		splx(s);
2003 	} else if (wasmapped && pmap_is_on_mmu(pm)) {
2004 		/* Force reload -- protections may be changed */
2005 		KASSERT(pmap_ctx(pm)>=0);
2006 		tsb_invalidate(va, pm);
2007 		tlb_flush_pte(va, pm);
2008 	}
2009 
2010 	/* We will let the fast mmu miss interrupt load the new translation */
2011 	pv_check();
2012  out:
2013 	if (opv)
2014 		pool_cache_put(&pmap_pv_cache, opv);
2015 	if (npv)
2016 		pool_cache_put(&pmap_pv_cache, npv);
2017 
2018 	return error;
2019 }
2020 
2021 bool
pmap_remove_all(struct pmap * pm)2022 pmap_remove_all(struct pmap *pm)
2023 {
2024 #ifdef MULTIPROCESSOR
2025 	struct cpu_info *ci;
2026 	sparc64_cpuset_t pmap_cpus_active;
2027 #endif
2028 
2029 	if (pm == pmap_kernel()) {
2030 		return false;
2031 	}
2032 	write_user_windows();
2033 	pm->pm_refs = 0;
2034 
2035 	/*
2036 	 * XXXMRG: pmap_destroy() does exactly the same dance here.
2037 	 * surely one of them isn't necessary?
2038 	 */
2039 #ifdef MULTIPROCESSOR
2040 	CPUSET_CLEAR(pmap_cpus_active);
2041 	for (ci = cpus; ci != NULL; ci = ci->ci_next) {
2042 		/* XXXMRG: Move the lock inside one or both tests? */
2043 		mutex_enter(&ci->ci_ctx_lock);
2044 		if (CPUSET_HAS(cpus_active, ci->ci_index)) {
2045 			if (pm->pm_ctx[ci->ci_index] > 0) {
2046 				CPUSET_ADD(pmap_cpus_active, ci->ci_index);
2047 				ctx_free(pm, ci);
2048 			}
2049 		}
2050 		mutex_exit(&ci->ci_ctx_lock);
2051 	}
2052 #else
2053 	if (pmap_ctx(pm)) {
2054 		mutex_enter(&curcpu()->ci_ctx_lock);
2055 		ctx_free(pm, curcpu());
2056 		mutex_exit(&curcpu()->ci_ctx_lock);
2057 	}
2058 #endif
2059 
2060 	REMOVE_STAT(flushes);
2061 	/*
2062 	 * XXXMRG: couldn't we do something less severe here, and
2063 	 * only flush the right context on each CPU?
2064 	 */
2065 	blast_dcache();
2066 	return false;
2067 }
2068 
2069 /*
2070  * Remove the given range of mapping entries.
2071  */
2072 void
pmap_remove(struct pmap * pm,vaddr_t va,vaddr_t endva)2073 pmap_remove(struct pmap *pm, vaddr_t va, vaddr_t endva)
2074 {
2075 	int64_t data;
2076 	paddr_t pa;
2077 	struct vm_page *pg;
2078 	pv_entry_t pv, freepv = NULL;
2079 	int rv;
2080 	bool flush = FALSE;
2081 
2082 	/*
2083 	 * In here we should check each pseg and if there are no more entries,
2084 	 * free it.  It's just that linear scans of 8K pages gets expensive.
2085 	 */
2086 
2087 	KASSERT(pm != pmap_kernel() || endva < INTSTACK || va > EINTSTACK);
2088 	KASSERT(pm != pmap_kernel() || endva < kdata || va > ekdata);
2089 
2090 	mutex_enter(&pmap_lock);
2091 	DPRINTF(PDB_REMOVE, ("pmap_remove(pm=%p, va=%p, endva=%p):", pm,
2092 			     (void *)(u_long)va, (void *)(u_long)endva));
2093 	REMOVE_STAT(calls);
2094 
2095 	/* Now do the real work */
2096 	for (; va < endva; va += PAGE_SIZE) {
2097 #ifdef DIAGNOSTIC
2098 		/*
2099 		 * Is this part of the permanent 4MB mapping?
2100 		 */
2101 		if (pm == pmap_kernel() && va >= ktext &&
2102 			va < roundup(ekdata, 4*MEG))
2103 			panic("pmap_remove: va=%08llx in locked TLB",
2104 			      (long long)va);
2105 #endif
2106 
2107 		data = pseg_get(pm, va);
2108 		if ((data & TLB_V) == 0) {
2109 			continue;
2110 		}
2111 
2112 		flush = TRUE;
2113 		/* First remove the pv entry, if there is one */
2114 		pa = data & TLB_PA_MASK;
2115 		pg = PHYS_TO_VM_PAGE(pa);
2116 		if (pg) {
2117 			pv = pmap_remove_pv(pm, va, pg);
2118 			if (pv != NULL) {
2119 				/* free it */
2120 				pv->pv_next = freepv;
2121 				freepv = pv;
2122 			}
2123 		}
2124 
2125 		/*
2126 		 * We need to flip the valid bit and
2127 		 * clear the access statistics.
2128 		 */
2129 
2130 		rv = pseg_set(pm, va, 0, 0);
2131 		if (rv & 1)
2132 			panic("pmap_remove: pseg_set needed spare, rv=%d!\n",
2133 			    rv);
2134 
2135 		DPRINTF(PDB_REMOVE, (" clearing seg %x pte %x\n",
2136 				     (int)va_to_seg(va), (int)va_to_pte(va)));
2137 		REMOVE_STAT(removes);
2138 
2139 		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
2140 			continue;
2141 
2142 		/*
2143 		 * if the pmap is being torn down, don't bother flushing,
2144 		 * we already have done so.
2145 		 */
2146 
2147 		if (!pm->pm_refs)
2148 			continue;
2149 
2150 		/*
2151 		 * Here we assume nothing can get into the TLB
2152 		 * unless it has a PTE.
2153 		 */
2154 
2155 		KASSERT(pmap_ctx(pm)>=0);
2156 		tsb_invalidate(va, pm);
2157 		REMOVE_STAT(tflushes);
2158 		tlb_flush_pte(va, pm);
2159 		dcache_flush_page_all(pa);
2160 	}
2161 	if (flush && pm->pm_refs)
2162 		REMOVE_STAT(flushes);
2163 	DPRINTF(PDB_REMOVE, ("\n"));
2164 	pv_check();
2165 	mutex_exit(&pmap_lock);
2166 
2167 	/* Catch up on deferred frees. */
2168 	for (; freepv != NULL; freepv = pv) {
2169 		pv = freepv->pv_next;
2170 		pool_cache_put(&pmap_pv_cache, freepv);
2171 	}
2172 }
2173 
2174 /*
2175  * Change the protection on the specified range of this pmap.
2176  */
2177 void
pmap_protect(struct pmap * pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)2178 pmap_protect(struct pmap *pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
2179 {
2180 	paddr_t pa;
2181 	int64_t data;
2182 	struct vm_page *pg;
2183 	pv_entry_t pv;
2184 	int rv;
2185 
2186 	KASSERT(pm != pmap_kernel() || eva < INTSTACK || sva > EINTSTACK);
2187 	KASSERT(pm != pmap_kernel() || eva < kdata || sva > ekdata);
2188 
2189 	if (prot == VM_PROT_NONE) {
2190 		pmap_remove(pm, sva, eva);
2191 		return;
2192 	}
2193 
2194 	sva = trunc_page(sva);
2195 	mutex_enter(&pmap_lock);
2196 	for (; sva < eva; sva += PAGE_SIZE) {
2197 #ifdef PMAP_DEBUG
2198 		/*
2199 		 * Is this part of the permanent 4MB mapping?
2200 		 */
2201 		if (pm == pmap_kernel() && sva >= ktext &&
2202 		    sva < roundup(ekdata, 4 * MEG)) {
2203 			mutex_exit(&pmap_lock);
2204 			prom_printf("pmap_protect: va=%08x in locked TLB\n",
2205 			    sva);
2206 			prom_abort();
2207 			return;
2208 		}
2209 #endif
2210 		DPRINTF(PDB_CHANGEPROT, ("pmap_protect: va %p\n",
2211 		    (void *)(u_long)sva));
2212 		data = pseg_get(pm, sva);
2213 		if ((data & TLB_V) == 0) {
2214 			continue;
2215 		}
2216 
2217 		pa = data & TLB_PA_MASK;
2218 		DPRINTF(PDB_CHANGEPROT|PDB_REF,
2219 			("pmap_protect: va=%08x data=%08llx "
2220 			 "seg=%08x pte=%08x\n",
2221 			 (u_int)sva, (long long)pa, (int)va_to_seg(sva),
2222 			 (int)va_to_pte(sva)));
2223 
2224 		pg = PHYS_TO_VM_PAGE(pa);
2225 		if (pg) {
2226 			struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2227 
2228 			/* Save REF/MOD info */
2229 			pv = &md->mdpg_pvh;
2230 			if (data & TLB_ACCESS)
2231 				pv->pv_va |= PV_REF;
2232 			if (data & TLB_MODIFY)
2233 				pv->pv_va |= PV_MOD;
2234 		}
2235 
2236 		/* Just do the pmap and TSB, not the pv_list */
2237 		if ((prot & VM_PROT_WRITE) == 0)
2238 			data &= ~(TLB_W|TLB_REAL_W);
2239 		if ((prot & VM_PROT_EXECUTE) == 0)
2240 			data &= ~(TLB_EXEC);
2241 
2242 		rv = pseg_set(pm, sva, data, 0);
2243 		if (rv & 1)
2244 			panic("pmap_protect: pseg_set needs spare! rv=%d\n",
2245 			    rv);
2246 
2247 		if (pm != pmap_kernel() && !pmap_has_ctx(pm))
2248 			continue;
2249 
2250 		KASSERT(pmap_ctx(pm)>=0);
2251 		tsb_invalidate(sva, pm);
2252 		tlb_flush_pte(sva, pm);
2253 	}
2254 	pv_check();
2255 	mutex_exit(&pmap_lock);
2256 }
2257 
2258 /*
2259  * Extract the physical page address associated
2260  * with the given map/virtual_address pair.
2261  */
2262 bool
pmap_extract(struct pmap * pm,vaddr_t va,paddr_t * pap)2263 pmap_extract(struct pmap *pm, vaddr_t va, paddr_t *pap)
2264 {
2265 	paddr_t pa;
2266 	int64_t data = 0;
2267 
2268 	if (pm == pmap_kernel() && va >= kdata && va < roundup(ekdata, 4*MEG)) {
2269 		/* Need to deal w/locked TLB entry specially. */
2270 		pa = pmap_kextract(va);
2271 		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
2272 				      (u_long)va, (unsigned long long)pa));
2273 		if (pap != NULL)
2274 			*pap = pa;
2275 		return TRUE;
2276 	} else if (pm == pmap_kernel() && va >= ktext && va < ektext) {
2277 		/* Need to deal w/locked TLB entry specially. */
2278 		pa = pmap_kextract(va);
2279 		DPRINTF(PDB_EXTRACT, ("pmap_extract: va=%lx pa=%llx\n",
2280 		    (u_long)va, (unsigned long long)pa));
2281 		if (pap != NULL)
2282 			*pap = pa;
2283 		return TRUE;
2284 	} else if (pm == pmap_kernel() && va >= INTSTACK && va < (INTSTACK + 64*KB)) {
2285 		pa = (paddr_t)(curcpu()->ci_paddr - INTSTACK + va);
2286 		DPRINTF(PDB_EXTRACT, ("pmap_extract (intstack): va=%lx pa=%llx\n",
2287 		    (u_long)va, (unsigned long long)pa));
2288 		if (pap != NULL)
2289 			*pap = pa;
2290 		return TRUE;
2291 	} else {
2292 		data = pseg_get(pm, va);
2293 		pa = data & TLB_PA_MASK;
2294 		if (pmapdebug & PDB_EXTRACT) {
2295 			paddr_t npa = ldxa((vaddr_t)&pm->pm_segs[va_to_seg(va)],
2296 					   ASI_PHYS_CACHED);
2297 			printf("pmap_extract: va=%p segs[%ld]=%llx",
2298 			       (void *)(u_long)va, (long)va_to_seg(va),
2299 			       (unsigned long long)npa);
2300 			if (npa) {
2301 				npa = (paddr_t)
2302 					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
2303 					     [va_to_dir(va)],
2304 					     ASI_PHYS_CACHED);
2305 				printf(" segs[%ld][%ld]=%lx",
2306 				       (long)va_to_seg(va),
2307 				       (long)va_to_dir(va), (long)npa);
2308 			}
2309 			if (npa)	{
2310 				npa = (paddr_t)
2311 					ldxa((vaddr_t)&((paddr_t *)(u_long)npa)
2312 					     [va_to_pte(va)],
2313 					     ASI_PHYS_CACHED);
2314 				printf(" segs[%ld][%ld][%ld]=%lx",
2315 				       (long)va_to_seg(va),
2316 				       (long)va_to_dir(va),
2317 				       (long)va_to_pte(va), (long)npa);
2318 			}
2319 			printf(" pseg_get: %lx\n", (long)pa);
2320 		}
2321 	}
2322 	if ((data & TLB_V) == 0)
2323 		return (FALSE);
2324 	if (pap != NULL)
2325 		*pap = pa + (va & PGOFSET);
2326 	return (TRUE);
2327 }
2328 
2329 /*
2330  * Change protection on a kernel address.
2331  * This should only be called from MD code.
2332  */
2333 void
pmap_kprotect(vaddr_t va,vm_prot_t prot)2334 pmap_kprotect(vaddr_t va, vm_prot_t prot)
2335 {
2336 	struct pmap *pm = pmap_kernel();
2337 	int64_t data;
2338 	int rv;
2339 
2340 	data = pseg_get(pm, va);
2341 	KASSERT(data & TLB_V);
2342 	if (prot & VM_PROT_WRITE) {
2343 		data |= (TLB_W|TLB_REAL_W);
2344 	} else {
2345 		data &= ~(TLB_W|TLB_REAL_W);
2346 	}
2347 	rv = pseg_set(pm, va, data, 0);
2348 	if (rv & 1)
2349 		panic("pmap_kprotect: pseg_set needs spare! rv=%d", rv);
2350 	KASSERT(pmap_ctx(pm)>=0);
2351 	tsb_invalidate(va, pm);
2352 	tlb_flush_pte(va, pm);
2353 }
2354 
2355 /*
2356  * Return the number bytes that pmap_dumpmmu() will dump.
2357  */
2358 int
pmap_dumpsize(void)2359 pmap_dumpsize(void)
2360 {
2361 	int	sz;
2362 
2363 	sz = ALIGN(sizeof(kcore_seg_t)) + ALIGN(sizeof(cpu_kcore_hdr_t));
2364 	sz += kernel_dtlb_slots * sizeof(struct cpu_kcore_4mbseg);
2365 	sz += phys_installed_size * sizeof(phys_ram_seg_t);
2366 
2367 	return btodb(sz + DEV_BSIZE - 1);
2368 }
2369 
2370 /*
2371  * Write the mmu contents to the dump device.
2372  * This gets appended to the end of a crash dump since
2373  * there is no in-core copy of kernel memory mappings on a 4/4c machine.
2374  *
2375  * Write the core dump headers and MD data to the dump device.
2376  * We dump the following items:
2377  *
2378  *	kcore_seg_t		 MI header defined in <sys/kcore.h>)
2379  *	cpu_kcore_hdr_t		 MD header defined in <machine/kcore.h>)
2380  *	phys_ram_seg_t[phys_installed_size]  physical memory segments
2381  */
2382 int
pmap_dumpmmu(int (* dump)(dev_t,daddr_t,void *,size_t),daddr_t blkno)2383 pmap_dumpmmu(int (*dump)(dev_t, daddr_t, void *, size_t), daddr_t blkno)
2384 {
2385 	kcore_seg_t	*kseg;
2386 	cpu_kcore_hdr_t	*kcpu;
2387 	phys_ram_seg_t	memseg;
2388 	struct cpu_kcore_4mbseg ktlb;
2389 	int	error = 0;
2390 	int	i;
2391 	int	buffer[dbtob(1) / sizeof(int)];
2392 	int	*bp, *ep;
2393 
2394 #define EXPEDITE(p,n) do {						\
2395 	int *sp = (void *)(p);						\
2396 	int sz = (n);							\
2397 	while (sz > 0) {						\
2398 		*bp++ = *sp++;						\
2399 		if (bp >= ep) {						\
2400 			error = (*dump)(dumpdev, blkno,			\
2401 					(void *)buffer, dbtob(1));	\
2402 			if (error != 0)					\
2403 				return (error);				\
2404 			++blkno;					\
2405 			bp = buffer;					\
2406 		}							\
2407 		sz -= 4;						\
2408 	}								\
2409 } while (0)
2410 
2411 	/* Setup bookkeeping pointers */
2412 	bp = buffer;
2413 	ep = &buffer[sizeof(buffer) / sizeof(buffer[0])];
2414 
2415 	/* Fill in MI segment header */
2416 	kseg = (kcore_seg_t *)bp;
2417 	CORE_SETMAGIC(*kseg, KCORE_MAGIC, MID_MACHINE, CORE_CPU);
2418 	kseg->c_size = dbtob(pmap_dumpsize()) - ALIGN(sizeof(kcore_seg_t));
2419 
2420 	/* Fill in MD segment header (interpreted by MD part of libkvm) */
2421 	kcpu = (cpu_kcore_hdr_t *)((long)bp + ALIGN(sizeof(kcore_seg_t)));
2422 	kcpu->cputype = cputyp;
2423 	kcpu->kernbase = (uint64_t)KERNBASE;
2424 	kcpu->cpubase = (uint64_t)CPUINFO_VA;
2425 
2426 	/* Describe the locked text segment */
2427 	kcpu->ktextbase = (uint64_t)ktext;
2428 	kcpu->ktextp = (uint64_t)ktextp;
2429 	kcpu->ktextsz = (uint64_t)ektext - ktext;
2430 	if (kcpu->ktextsz > 4*MEG)
2431 		kcpu->ktextsz = 0;	/* old version can not work */
2432 
2433 	/* Describe locked data segment */
2434 	kcpu->kdatabase = (uint64_t)kdata;
2435 	kcpu->kdatap = (uint64_t)kdatap;
2436 	kcpu->kdatasz = (uint64_t)ekdatap - kdatap;
2437 
2438 	/* new version of locked segments description */
2439 	kcpu->newmagic = SPARC64_KCORE_NEWMAGIC;
2440 	kcpu->num4mbsegs = kernel_dtlb_slots;
2441 	kcpu->off4mbsegs = ALIGN(sizeof(cpu_kcore_hdr_t));
2442 
2443 	/* description of per-cpu mappings */
2444 	kcpu->numcpuinfos = sparc_ncpus;
2445 	kcpu->percpusz = 64 * 1024;	/* used to be 128k for some time */
2446 	kcpu->thiscpu = cpu_number();	/* which cpu is doing this dump */
2447 	kcpu->cpusp = cpu0paddr - 64 * 1024 * sparc_ncpus;
2448 
2449 	/* Now the memsegs */
2450 	kcpu->nmemseg = phys_installed_size;
2451 	kcpu->memsegoffset = kcpu->off4mbsegs
2452 		+ kernel_dtlb_slots * sizeof(struct cpu_kcore_4mbseg);
2453 
2454 	/* Now we need to point this at our kernel pmap. */
2455 	kcpu->nsegmap = STSZ;
2456 	kcpu->segmapoffset = (uint64_t)pmap_kernel()->pm_physaddr;
2457 
2458 	/* Note: we have assumed everything fits in buffer[] so far... */
2459 	bp = (int *)((long)kcpu + ALIGN(sizeof(cpu_kcore_hdr_t)));
2460 
2461 	/* write locked kernel 4MB TLBs */
2462 	for (i = 0; i < kernel_dtlb_slots; i++) {
2463 		ktlb.va = kernel_tlbs[i].te_va;
2464 		ktlb.pa = kernel_tlbs[i].te_pa;
2465 		EXPEDITE(&ktlb, sizeof(ktlb));
2466 	}
2467 
2468 	/* write memsegs */
2469 	for (i = 0; i < phys_installed_size; i++) {
2470 		memseg.start = phys_installed[i].start;
2471 		memseg.size = phys_installed[i].size;
2472 		EXPEDITE(&memseg, sizeof(phys_ram_seg_t));
2473 	}
2474 
2475 	if (bp != buffer)
2476 		error = (*dump)(dumpdev, blkno++, (void *)buffer, dbtob(1));
2477 
2478 	return (error);
2479 }
2480 
2481 /*
2482  * Determine (non)existence of physical page
2483  */
2484 int
pmap_pa_exists(paddr_t pa)2485 pmap_pa_exists(paddr_t pa)
2486 {
2487 	int i;
2488 
2489 	/* Just go through physical memory list & see if we're there */
2490 	for (i = 0; i < phys_installed_size; i++) {
2491 		if ((phys_installed[i].start <= pa) &&
2492 				(phys_installed[i].start +
2493 				 phys_installed[i].size >= pa))
2494 			return 1;
2495 	}
2496 	return 0;
2497 }
2498 
2499 /*
2500  * Lookup the appropriate TSB entry.
2501  *
2502  * Here is the full official pseudo code:
2503  *
2504  */
2505 
2506 #ifdef NOTYET
GenerateTSBPointer(int64 va,PointerType type,int64 TSBBase,Boolean split,int TSBSize)2507 int64 GenerateTSBPointer(
2508  	int64 va,		/* Missing VA			*/
2509  	PointerType type,	/* 8K_POINTER or 16K_POINTER	*/
2510  	int64 TSBBase,		/* TSB Register[63:13] << 13	*/
2511  	Boolean split,		/* TSB Register[12]		*/
2512  	int TSBSize)		/* TSB Register[2:0]		*/
2513 {
2514  	int64 vaPortion;
2515  	int64 TSBBaseMask;
2516  	int64 splitMask;
2517 
2518 	/* TSBBaseMask marks the bits from TSB Base Reg		*/
2519 	TSBBaseMask = 0xffffffffffffe000 <<
2520 		(split? (TSBsize + 1) : TSBsize);
2521 
2522 	/* Shift va towards lsb appropriately and		*/
2523 	/* zero out the original va page offset			*/
2524 	vaPortion = (va >> ((type == 8K_POINTER)? 9: 12)) &
2525 		0xfffffffffffffff0;
2526 
2527 	if (split) {
2528 		/* There's only one bit in question for split	*/
2529 		splitMask = 1 << (13 + TSBsize);
2530 		if (type == 8K_POINTER)
2531 			/* Make sure we're in the lower half	*/
2532 			vaPortion &= ~splitMask;
2533 		else
2534 			/* Make sure we're in the upper half	*/
2535 			vaPortion |= splitMask;
2536 	}
2537 	return (TSBBase & TSBBaseMask) | (vaPortion & ~TSBBaseMask);
2538 }
2539 #endif
2540 /*
2541  * Of course, since we are not using a split TSB or variable page sizes,
2542  * we can optimize this a bit.
2543  *
2544  * The following only works for a unified 8K TSB.  It will find the slot
2545  * for that particular va and return it.  IT MAY BE FOR ANOTHER MAPPING!
2546  */
2547 int
ptelookup_va(vaddr_t va)2548 ptelookup_va(vaddr_t va)
2549 {
2550 	long tsbptr;
2551 #define TSBBASEMASK	(0xffffffffffffe000LL << tsbsize)
2552 
2553 	tsbptr = (((va >> 9) & 0xfffffffffffffff0LL) & ~TSBBASEMASK);
2554 	return (tsbptr / sizeof(pte_t));
2555 }
2556 
2557 /*
2558  * Do whatever is needed to sync the MOD/REF flags
2559  */
2560 
2561 bool
pmap_clear_modify(struct vm_page * pg)2562 pmap_clear_modify(struct vm_page *pg)
2563 {
2564 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2565 	pv_entry_t pv;
2566 	int rv;
2567 	int changed = 0;
2568 #ifdef DEBUG
2569 	int modified = 0;
2570 
2571 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify(%p)\n", pg));
2572 
2573 	modified = pmap_is_modified(pg);
2574 #endif
2575 	mutex_enter(&pmap_lock);
2576 	/* Clear all mappings */
2577 	pv = &md->mdpg_pvh;
2578 #ifdef DEBUG
2579 	if (pv->pv_va & PV_MOD)
2580 		pv->pv_va |= PV_WE;	/* Remember this was modified */
2581 #endif
2582 	if (pv->pv_va & PV_MOD) {
2583 		changed |= 1;
2584 		pv->pv_va &= ~PV_MOD;
2585 	}
2586 #ifdef DEBUG
2587 	if (pv->pv_next && !pv->pv_pmap) {
2588 		printf("pmap_clear_modify: npv but no pmap for pv %p\n", pv);
2589 		Debugger();
2590 	}
2591 #endif
2592 	if (pv->pv_pmap != NULL) {
2593 		for (; pv; pv = pv->pv_next) {
2594 			int64_t data;
2595 			struct pmap *pmap = pv->pv_pmap;
2596 			vaddr_t va = pv->pv_va & PV_VAMASK;
2597 
2598 			/* First clear the mod bit in the PTE and make it R/O */
2599 			data = pseg_get(pmap, va);
2600 			KASSERT(data & TLB_V);
2601 			/* Need to both clear the modify and write bits */
2602 			if (data & TLB_MODIFY)
2603 				changed |= 1;
2604 #ifdef HWREF
2605 			data &= ~(TLB_MODIFY|TLB_W);
2606 #else
2607 			data &= ~(TLB_MODIFY|TLB_W|TLB_REAL_W);
2608 #endif
2609 			rv = pseg_set(pmap, va, data, 0);
2610 			if (rv & 1)
2611 				printf("pmap_clear_modify: pseg_set needs"
2612 				    " spare! rv=%d\n", rv);
2613 			if (pmap_is_on_mmu(pmap)) {
2614 				KASSERT(pmap_ctx(pmap)>=0);
2615 				tsb_invalidate(va, pmap);
2616 				tlb_flush_pte(va, pmap);
2617 			}
2618 			/* Then clear the mod bit in the pv */
2619 			if (pv->pv_va & PV_MOD) {
2620 				changed |= 1;
2621 				pv->pv_va &= ~PV_MOD;
2622 			}
2623 		}
2624 	}
2625 	pv_check();
2626 	mutex_exit(&pmap_lock);
2627 #ifdef DEBUG
2628 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_modify: pg %p %s\n", pg,
2629 	    (changed ? "was modified" : "was not modified")));
2630 	if (modified && modified != changed) {
2631 		printf("pmap_clear_modify: modified %d changed %d\n",
2632 		       modified, changed);
2633 		Debugger();
2634 	}
2635 #endif
2636 	return (changed);
2637 }
2638 
2639 bool
pmap_clear_reference(struct vm_page * pg)2640 pmap_clear_reference(struct vm_page *pg)
2641 {
2642 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2643 	pv_entry_t pv;
2644 	int rv;
2645 	int changed = 0;
2646 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
2647 	int referenced = 0;
2648 #endif
2649 
2650 	mutex_enter(&pmap_lock);
2651 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
2652 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_clear_reference(%p)\n", pg));
2653 	referenced = pmap_is_referenced_locked(pg);
2654 #endif
2655 	/* Clear all references */
2656 	pv = &md->mdpg_pvh;
2657 	if (pv->pv_va & PV_REF) {
2658 		changed |= 1;
2659 		pv->pv_va &= ~PV_REF;
2660 	}
2661 #ifdef DEBUG
2662 	if (pv->pv_next && !pv->pv_pmap) {
2663 		printf("pmap_clear_reference: npv but no pmap for pv %p\n", pv);
2664 		Debugger();
2665 	}
2666 #endif
2667 	if (pv->pv_pmap != NULL) {
2668 		for (; pv; pv = pv->pv_next) {
2669 			int64_t data;
2670 			struct pmap *pmap = pv->pv_pmap;
2671 			vaddr_t va = pv->pv_va & PV_VAMASK;
2672 
2673 			data = pseg_get(pmap, va);
2674 			KASSERT(data & TLB_V);
2675 			DPRINTF(PDB_CHANGEPROT,
2676 			    ("clearing ref pm:%p va:%p ctx:%lx data:%llx\n",
2677 			     pmap, (void *)(u_long)va,
2678 			     (u_long)pmap_ctx(pmap),
2679 			     (long long)data));
2680 #ifdef HWREF
2681 			if (data & TLB_ACCESS) {
2682 				changed |= 1;
2683 				data &= ~TLB_ACCESS;
2684 			}
2685 #else
2686 			if (data < 0)
2687 				changed |= 1;
2688 			data = 0;
2689 #endif
2690 			rv = pseg_set(pmap, va, data, 0);
2691 			if (rv & 1)
2692 				panic("pmap_clear_reference: pseg_set needs"
2693 				    " spare! rv=%d\n", rv);
2694 			if (pmap_is_on_mmu(pmap)) {
2695 				KASSERT(pmap_ctx(pmap)>=0);
2696 				tsb_invalidate(va, pmap);
2697 				tlb_flush_pte(va, pmap);
2698 			}
2699 			if (pv->pv_va & PV_REF) {
2700 				changed |= 1;
2701 				pv->pv_va &= ~PV_REF;
2702 			}
2703 		}
2704 	}
2705 	dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
2706 	pv_check();
2707 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
2708 	if (pmap_is_referenced_locked(pg)) {
2709 		pv = &md->mdpg_pvh;
2710 		printf("pmap_clear_reference(): %p still referenced "
2711 			"(pmap = %p, ctx = %d)\n", pg, pv->pv_pmap,
2712 			pv->pv_pmap ? pmap_ctx(pv->pv_pmap) : 0);
2713 		Debugger();
2714 	}
2715 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2716 	    ("pmap_clear_reference: pg %p %s\n", pg,
2717 	     (changed ? "was referenced" : "was not referenced")));
2718 	if (referenced != changed) {
2719 		printf("pmap_clear_reference: referenced %d changed %d\n",
2720 		       referenced, changed);
2721 		Debugger();
2722 	} else {
2723 		mutex_exit(&pmap_lock);
2724 		return (referenced);
2725 	}
2726 #endif
2727 	mutex_exit(&pmap_lock);
2728 	return (changed);
2729 }
2730 
2731 bool
pmap_is_modified(struct vm_page * pg)2732 pmap_is_modified(struct vm_page *pg)
2733 {
2734 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2735 	pv_entry_t pv, npv;
2736 	bool res = false;
2737 
2738 	/* Check if any mapping has been modified */
2739 	pv = &md->mdpg_pvh;
2740 	if (pv->pv_va & PV_MOD)
2741 		res = true;
2742 #ifdef HWREF
2743 #ifdef DEBUG
2744 	if (pv->pv_next && !pv->pv_pmap) {
2745 		printf("pmap_is_modified: npv but no pmap for pv %p\n", pv);
2746 		Debugger();
2747 	}
2748 #endif
2749 	if (!res && pv->pv_pmap != NULL) {
2750 		mutex_enter(&pmap_lock);
2751 		for (npv = pv; !res && npv && npv->pv_pmap;
2752 		     npv = npv->pv_next) {
2753 			int64_t data;
2754 
2755 			data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2756 			KASSERT(data & TLB_V);
2757 			if (data & TLB_MODIFY)
2758 				res = true;
2759 
2760 			/* Migrate modify info to head pv */
2761 			if (npv->pv_va & PV_MOD) {
2762 				res = true;
2763 				npv->pv_va &= ~PV_MOD;
2764 			}
2765 		}
2766 		/* Save modify info */
2767 		if (res)
2768 			pv->pv_va |= PV_MOD;
2769 #ifdef DEBUG
2770 		if (res)
2771 			pv->pv_va |= PV_WE;
2772 #endif
2773 		mutex_exit(&pmap_lock);
2774 	}
2775 #endif
2776 
2777 	DPRINTF(PDB_CHANGEPROT|PDB_REF, ("pmap_is_modified(%p) = %d\n", pg,
2778 	    res));
2779 	pv_check();
2780 	return res;
2781 }
2782 
2783 /*
2784  * Variant of pmap_is_reference() where caller already holds pmap_lock
2785  */
2786 static bool
pmap_is_referenced_locked(struct vm_page * pg)2787 pmap_is_referenced_locked(struct vm_page *pg)
2788 {
2789 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2790 	pv_entry_t pv, npv;
2791 	bool res = false;
2792 
2793 	KASSERT(mutex_owned(&pmap_lock));
2794 
2795 	/* Check if any mapping has been referenced */
2796 	pv = &md->mdpg_pvh;
2797 	if (pv->pv_va & PV_REF)
2798 		return true;
2799 
2800 #ifdef HWREF
2801 #ifdef DEBUG
2802 	if (pv->pv_next && !pv->pv_pmap) {
2803 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
2804 		Debugger();
2805 	}
2806 #endif
2807 	if (pv->pv_pmap == NULL)
2808 		return false;
2809 
2810 	for (npv = pv; npv; npv = npv->pv_next) {
2811 		int64_t data;
2812 
2813 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
2814 		KASSERT(data & TLB_V);
2815 		if (data & TLB_ACCESS)
2816 			res = true;
2817 
2818 		/* Migrate ref info to head pv */
2819 		if (npv->pv_va & PV_REF) {
2820 			res = true;
2821 			npv->pv_va &= ~PV_REF;
2822 		}
2823 	}
2824 	/* Save ref info */
2825 	if (res)
2826 		pv->pv_va |= PV_REF;
2827 #endif
2828 
2829 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2830 		("pmap_is_referenced(%p) = %d\n", pg, res));
2831 	pv_check();
2832 	return res;
2833 }
2834 
2835 bool
pmap_is_referenced(struct vm_page * pg)2836 pmap_is_referenced(struct vm_page *pg)
2837 {
2838 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2839 	pv_entry_t pv;
2840 	bool res = false;
2841 
2842 	/* Check if any mapping has been referenced */
2843 	pv = &md->mdpg_pvh;
2844 	if (pv->pv_va & PV_REF)
2845 		return true;
2846 
2847 #ifdef HWREF
2848 #ifdef DEBUG
2849 	if (pv->pv_next && !pv->pv_pmap) {
2850 		printf("pmap_is_referenced: npv but no pmap for pv %p\n", pv);
2851 		Debugger();
2852 	}
2853 #endif
2854 	if (pv->pv_pmap != NULL) {
2855 		mutex_enter(&pmap_lock);
2856 		res = pmap_is_referenced_locked(pg);
2857 		mutex_exit(&pmap_lock);
2858 	}
2859 #endif
2860 
2861 	DPRINTF(PDB_CHANGEPROT|PDB_REF,
2862 		("pmap_is_referenced(%p) = %d\n", pg, res));
2863 	pv_check();
2864 	return res;
2865 }
2866 
2867 
2868 
2869 /*
2870  *	Routine:	pmap_unwire
2871  *	Function:	Clear the wired attribute for a map/virtual-address
2872  *			pair.
2873  *	In/out conditions:
2874  *			The mapping must already exist in the pmap.
2875  */
2876 void
pmap_unwire(pmap_t pmap,vaddr_t va)2877 pmap_unwire(pmap_t pmap, vaddr_t va)
2878 {
2879 	int64_t data;
2880 	int rv;
2881 
2882 	DPRINTF(PDB_MMU_STEAL, ("pmap_unwire(%p, %lx)\n", pmap, va));
2883 
2884 #ifdef DEBUG
2885 	/*
2886 	 * Is this part of the permanent 4MB mapping?
2887 	 */
2888 	if (pmap == pmap_kernel() && va >= ktext &&
2889 		va < roundup(ekdata, 4*MEG)) {
2890 		prom_printf("pmap_unwire: va=%08x in locked TLB\n", va);
2891 		prom_abort();
2892 		return;
2893 	}
2894 #endif
2895 	data = pseg_get(pmap, va & PV_VAMASK);
2896 	KASSERT(data & TLB_V);
2897 	data &= ~TLB_TSB_LOCK;
2898 	rv = pseg_set(pmap, va & PV_VAMASK, data, 0);
2899 	if (rv & 1)
2900 		panic("pmap_unwire: pseg_set needs spare! rv=%d\n", rv);
2901 	pv_check();
2902 }
2903 
2904 /*
2905  * Lower the protection on the specified physical page.
2906  *
2907  * Never enable writing as it will break COW
2908  */
2909 
2910 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)2911 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2912 {
2913 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
2914 	int64_t clear, set;
2915 	int64_t data = 0;
2916 	int rv;
2917 	pv_entry_t pv, npv, freepv = NULL;
2918 	struct pmap *pmap;
2919 	vaddr_t va;
2920 	bool needflush = FALSE;
2921 
2922 	DPRINTF(PDB_CHANGEPROT,
2923 	    ("pmap_page_protect: pg %p prot %x\n", pg, prot));
2924 
2925 	mutex_enter(&pmap_lock);
2926 	pv = &md->mdpg_pvh;
2927 	if (prot & (VM_PROT_READ|VM_PROT_EXECUTE)) {
2928 		/* copy_on_write */
2929 
2930 		set = TLB_V;
2931 		clear = TLB_REAL_W|TLB_W;
2932 		if (VM_PROT_EXECUTE & prot)
2933 			set |= TLB_EXEC;
2934 		else
2935 			clear |= TLB_EXEC;
2936 		if (VM_PROT_EXECUTE == prot)
2937 			set |= TLB_EXEC_ONLY;
2938 
2939 #ifdef DEBUG
2940 		if (pv->pv_next && !pv->pv_pmap) {
2941 			printf("pmap_page_protect: no pmap for pv %p\n", pv);
2942 			Debugger();
2943 		}
2944 #endif
2945 		if (pv->pv_pmap != NULL) {
2946 			for (; pv; pv = pv->pv_next) {
2947 				pmap = pv->pv_pmap;
2948 				va = pv->pv_va & PV_VAMASK;
2949 
2950 				DPRINTF(PDB_CHANGEPROT | PDB_REF,
2951 					("pmap_page_protect: "
2952 					 "RO va %p of pg %p...\n",
2953 					 (void *)(u_long)pv->pv_va, pg));
2954 				data = pseg_get(pmap, va);
2955 				KASSERT(data & TLB_V);
2956 
2957 				/* Save REF/MOD info */
2958 				if (data & TLB_ACCESS)
2959 					pv->pv_va |= PV_REF;
2960 				if (data & TLB_MODIFY)
2961 					pv->pv_va |= PV_MOD;
2962 
2963 				data &= ~clear;
2964 				data |= set;
2965 				rv = pseg_set(pmap, va, data, 0);
2966 				if (rv & 1)
2967 					panic("pmap_page_protect: "
2968 					       "pseg_set needs spare! rv=%d\n",
2969 					       rv);
2970 				if (pmap_is_on_mmu(pmap)) {
2971 					KASSERT(pmap_ctx(pmap)>=0);
2972 					tsb_invalidate(va, pmap);
2973 					tlb_flush_pte(va, pmap);
2974 				}
2975 			}
2976 		}
2977 	} else {
2978 		/* remove mappings */
2979 		DPRINTF(PDB_REMOVE,
2980 			("pmap_page_protect: demapping pg %p\n", pg));
2981 
2982 		/* First remove the entire list of continuation pv's */
2983 		for (npv = pv->pv_next; npv; npv = pv->pv_next) {
2984 			pmap = npv->pv_pmap;
2985 			va = npv->pv_va & PV_VAMASK;
2986 
2987 			/* We're removing npv from pv->pv_next */
2988 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
2989 				("pmap_page_protect: "
2990 				 "demap va %p of pg %p in pmap %p...\n",
2991 				 (void *)(u_long)va, pg, pmap));
2992 
2993 			/* clear the entry in the page table */
2994 			data = pseg_get(pmap, va);
2995 			KASSERT(data & TLB_V);
2996 
2997 			/* Save ref/mod info */
2998 			if (data & TLB_ACCESS)
2999 				pv->pv_va |= PV_REF;
3000 			if (data & TLB_MODIFY)
3001 				pv->pv_va |= PV_MOD;
3002 			/* Clear mapping */
3003 			rv = pseg_set(pmap, va, 0, 0);
3004 			if (rv & 1)
3005 				panic("pmap_page_protect: pseg_set needs"
3006 				     " spare! rv=%d\n", rv);
3007 			if (pmap_is_on_mmu(pmap)) {
3008 				KASSERT(pmap_ctx(pmap)>=0);
3009 				tsb_invalidate(va, pmap);
3010 				tlb_flush_pte(va, pmap);
3011 			}
3012 			if (pmap->pm_refs > 0) {
3013 				needflush = TRUE;
3014 			}
3015 
3016 			/* free the pv */
3017 			pv->pv_next = npv->pv_next;
3018 			npv->pv_next = freepv;
3019 			freepv = npv;
3020 		}
3021 
3022 		/* Then remove the primary pv */
3023 #ifdef DEBUG
3024 		if (pv->pv_next && !pv->pv_pmap) {
3025 			printf("pmap_page_protect: no pmap for pv %p\n", pv);
3026 			Debugger();
3027 		}
3028 #endif
3029 		if (pv->pv_pmap != NULL) {
3030 			pmap = pv->pv_pmap;
3031 			va = pv->pv_va & PV_VAMASK;
3032 
3033 			DPRINTF(PDB_CHANGEPROT|PDB_REF|PDB_REMOVE,
3034 				("pmap_page_protect: "
3035 				 "demap va %p of pg %p from pm %p...\n",
3036 				 (void *)(u_long)va, pg, pmap));
3037 
3038 			data = pseg_get(pmap, va);
3039 			KASSERT(data & TLB_V);
3040 			/* Save ref/mod info */
3041 			if (data & TLB_ACCESS)
3042 				pv->pv_va |= PV_REF;
3043 			if (data & TLB_MODIFY)
3044 				pv->pv_va |= PV_MOD;
3045 			rv = pseg_set(pmap, va, 0, 0);
3046 			if (rv & 1)
3047 				panic("pmap_page_protect: pseg_set needs"
3048 				    " spare! rv=%d\n", rv);
3049 			if (pmap_is_on_mmu(pmap)) {
3050 			    	KASSERT(pmap_ctx(pmap)>=0);
3051 				tsb_invalidate(va, pmap);
3052 				tlb_flush_pte(va, pmap);
3053 			}
3054 			if (pmap->pm_refs > 0) {
3055 				needflush = TRUE;
3056 			}
3057 			npv = pv->pv_next;
3058 			/* dump the first pv */
3059 			if (npv) {
3060 				/* First save mod/ref bits */
3061 				pv->pv_pmap = npv->pv_pmap;
3062 				pv->pv_va = (pv->pv_va & PV_MASK) | npv->pv_va;
3063 				pv->pv_next = npv->pv_next;
3064 				npv->pv_next = freepv;
3065 				freepv = npv;
3066 			} else {
3067 				pv->pv_pmap = NULL;
3068 				pv->pv_next = NULL;
3069 			}
3070 		}
3071 		if (needflush)
3072 			dcache_flush_page_all(VM_PAGE_TO_PHYS(pg));
3073 	}
3074 	/* We should really only flush the pages we demapped. */
3075 	pv_check();
3076 	mutex_exit(&pmap_lock);
3077 
3078 	/* Catch up on deferred frees. */
3079 	for (; freepv != NULL; freepv = npv) {
3080 		npv = freepv->pv_next;
3081 		pool_cache_put(&pmap_pv_cache, freepv);
3082 	}
3083 }
3084 
3085 #ifdef PMAP_COUNT_DEBUG
3086 /*
3087  * count pages in pmap -- this can be slow.
3088  */
3089 int
pmap_count_res(struct pmap * pm)3090 pmap_count_res(struct pmap *pm)
3091 {
3092 	int64_t data;
3093 	paddr_t *pdir, *ptbl;
3094 	int i, j, k, n;
3095 
3096 	/* Don't want one of these pages reused while we're reading it. */
3097 	mutex_enter(&pmap_lock);
3098 	n = 0;
3099 	for (i = 0; i < STSZ; i++) {
3100 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
3101 					       ASI_PHYS_CACHED);
3102 		if (pdir == NULL) {
3103 			continue;
3104 		}
3105 		for (k = 0; k < PDSZ; k++) {
3106 			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
3107 						       ASI_PHYS_CACHED);
3108 			if (ptbl == NULL) {
3109 				continue;
3110 			}
3111 			for (j = 0; j < PTSZ; j++) {
3112 				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
3113 						     ASI_PHYS_CACHED);
3114 				if (data & TLB_V)
3115 					n++;
3116 			}
3117 		}
3118 	}
3119 	mutex_exit(&pmap_lock);
3120 
3121 	if (pm->pm_stats.resident_count != n)
3122 		printf("pmap_count_resident: pm_stats = %ld, counted: %d\n",
3123 		    pm->pm_stats.resident_count, n);
3124 
3125 	return n;
3126 }
3127 
3128 /*
3129  * count wired pages in pmap -- this can be slow.
3130  */
3131 int
pmap_count_wired(struct pmap * pm)3132 pmap_count_wired(struct pmap *pm)
3133 {
3134 	int64_t data;
3135 	paddr_t *pdir, *ptbl;
3136 	int i, j, k, n;
3137 
3138 	/* Don't want one of these pages reused while we're reading it. */
3139 	mutex_enter(&pmap_lock);	/* XXX uvmplock */
3140 	n = 0;
3141 	for (i = 0; i < STSZ; i++) {
3142 		pdir = (paddr_t *)(u_long)ldxa((vaddr_t)&pm->pm_segs[i],
3143 					       ASI_PHYS_CACHED);
3144 		if (pdir == NULL) {
3145 			continue;
3146 		}
3147 		for (k = 0; k < PDSZ; k++) {
3148 			ptbl = (paddr_t *)(u_long)ldxa((vaddr_t)&pdir[k],
3149 						       ASI_PHYS_CACHED);
3150 			if (ptbl == NULL) {
3151 				continue;
3152 			}
3153 			for (j = 0; j < PTSZ; j++) {
3154 				data = (int64_t)ldxa((vaddr_t)&ptbl[j],
3155 						     ASI_PHYS_CACHED);
3156 				if (data & TLB_TSB_LOCK)
3157 					n++;
3158 			}
3159 		}
3160 	}
3161 	mutex_exit(&pmap_lock);	/* XXX uvmplock */
3162 
3163 	if (pm->pm_stats.wired_count != n)
3164 		printf("pmap_count_wired: pm_stats = %ld, counted: %d\n",
3165 		    pm->pm_stats.wired_count, n);
3166 
3167 	return n;
3168 }
3169 #endif	/* PMAP_COUNT_DEBUG */
3170 
3171 void
pmap_procwr(struct proc * p,vaddr_t va,size_t len)3172 pmap_procwr(struct proc *p, vaddr_t va, size_t len)
3173 {
3174 
3175 	blast_icache();
3176 }
3177 
3178 /*
3179  * Allocate a hardware context to the given pmap.
3180  */
3181 static int
ctx_alloc(struct pmap * pm)3182 ctx_alloc(struct pmap *pm)
3183 {
3184 	int i, ctx;
3185 
3186 	KASSERT(pm != pmap_kernel());
3187 	KASSERT(pm == curproc->p_vmspace->vm_map.pmap);
3188 	mutex_enter(&curcpu()->ci_ctx_lock);
3189 	ctx = curcpu()->ci_pmap_next_ctx++;
3190 
3191 	/*
3192 	 * if we have run out of contexts, remove all user entries from
3193 	 * the TSB, TLB and dcache and start over with context 1 again.
3194 	 */
3195 
3196 	if (ctx == curcpu()->ci_numctx) {
3197 		DPRINTF(PDB_CTX_ALLOC|PDB_CTX_FLUSHALL,
3198 			("ctx_alloc: cpu%d run out of contexts %d\n",
3199 			 cpu_number(), curcpu()->ci_numctx));
3200 		write_user_windows();
3201 		while (!LIST_EMPTY(&curcpu()->ci_pmap_ctxlist)) {
3202 #ifdef MULTIPROCESSOR
3203 			KASSERT(pmap_ctx(LIST_FIRST(&curcpu()->ci_pmap_ctxlist)) != 0);
3204 #endif
3205 			ctx_free(LIST_FIRST(&curcpu()->ci_pmap_ctxlist),
3206 				 curcpu());
3207 		}
3208 		for (i = TSBENTS - 1; i >= 0; i--) {
3209 			if (TSB_TAG_CTX(curcpu()->ci_tsb_dmmu[i].tag) != 0) {
3210 				clrx(&curcpu()->ci_tsb_dmmu[i].data);
3211 			}
3212 			if (TSB_TAG_CTX(curcpu()->ci_tsb_immu[i].tag) != 0) {
3213 				clrx(&curcpu()->ci_tsb_immu[i].data);
3214 			}
3215 		}
3216 		sp_tlb_flush_all();
3217 		ctx = 1;
3218 		curcpu()->ci_pmap_next_ctx = 2;
3219 	}
3220 	curcpu()->ci_ctxbusy[ctx] = pm->pm_physaddr;
3221 	LIST_INSERT_HEAD(&curcpu()->ci_pmap_ctxlist, pm, pm_list[cpu_number()]);
3222 	pmap_ctx(pm) = ctx;
3223 	mutex_exit(&curcpu()->ci_ctx_lock);
3224 	DPRINTF(PDB_CTX_ALLOC, ("ctx_alloc: cpu%d allocated ctx %d\n",
3225 		cpu_number(), ctx));
3226 	return ctx;
3227 }
3228 
3229 /*
3230  * Give away a context.
3231  */
3232 static void
ctx_free(struct pmap * pm,struct cpu_info * ci)3233 ctx_free(struct pmap *pm, struct cpu_info *ci)
3234 {
3235 	int oldctx;
3236 	int cpunum;
3237 
3238 	KASSERT(mutex_owned(&ci->ci_ctx_lock));
3239 
3240 #ifdef MULTIPROCESSOR
3241 	cpunum = ci->ci_index;
3242 #else
3243 	/* Give the compiler a hint.. */
3244 	cpunum = 0;
3245 #endif
3246 
3247 	oldctx = pm->pm_ctx[cpunum];
3248 	if (oldctx == 0)
3249 		return;
3250 
3251 #ifdef DIAGNOSTIC
3252 	if (pm == pmap_kernel())
3253 		panic("ctx_free: freeing kernel context");
3254 	if (ci->ci_ctxbusy[oldctx] == 0)
3255 		printf("ctx_free: freeing free context %d\n", oldctx);
3256 	if (ci->ci_ctxbusy[oldctx] != pm->pm_physaddr) {
3257 		printf("ctx_free: freeing someone else's context\n "
3258 		       "ctxbusy[%d] = %p, pm(%p)->pm_ctx = %p\n",
3259 		       oldctx, (void *)(u_long)ci->ci_ctxbusy[oldctx], pm,
3260 		       (void *)(u_long)pm->pm_physaddr);
3261 		Debugger();
3262 	}
3263 #endif
3264 	/* We should verify it has not been stolen and reallocated... */
3265 	DPRINTF(PDB_CTX_ALLOC, ("ctx_free: cpu%d freeing ctx %d\n",
3266 		cpu_number(), oldctx));
3267 	ci->ci_ctxbusy[oldctx] = 0UL;
3268 	pm->pm_ctx[cpunum] = 0;
3269 	LIST_REMOVE(pm, pm_list[cpunum]);
3270 }
3271 
3272 /*
3273  * Enter the pmap and virtual address into the
3274  * physical to virtual map table.
3275  *
3276  * We enter here with the pmap locked.
3277  * The pv_entry_t in *npvp is replaced with NULL if this function
3278  * uses it, otherwise the caller needs to free it.
3279  */
3280 
3281 void
pmap_enter_pv(struct pmap * pmap,vaddr_t va,paddr_t pa,struct vm_page * pg,pv_entry_t * npvp)3282 pmap_enter_pv(struct pmap *pmap, vaddr_t va, paddr_t pa, struct vm_page *pg,
3283 	      pv_entry_t *npvp)
3284 {
3285 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
3286 	pv_entry_t pvh, npv;
3287 
3288 	KASSERT(mutex_owned(&pmap_lock));
3289 
3290 	pvh = &md->mdpg_pvh;
3291 	DPRINTF(PDB_ENTER, ("pmap_enter: pvh %p: was %lx/%p/%p\n",
3292 	    pvh, pvh->pv_va, pvh->pv_pmap, pvh->pv_next));
3293 	if (pvh->pv_pmap == NULL) {
3294 
3295 		/*
3296 		 * No entries yet, use header as the first entry
3297 		 */
3298 		DPRINTF(PDB_ENTER, ("pmap_enter: first pv: pmap %p va %lx\n",
3299 		    pmap, va));
3300 		ENTER_STAT(firstpv);
3301 		PV_SETVA(pvh, va);
3302 		pvh->pv_pmap = pmap;
3303 		pvh->pv_next = NULL;
3304 	} else {
3305 		if (pg->loan_count == 0 && !(pvh->pv_va & PV_ALIAS)) {
3306 
3307 			/*
3308 			 * There is at least one other VA mapping this page.
3309 			 * Check if they are cache index compatible. If not
3310 			 * remove all mappings, flush the cache and set page
3311 			 * to be mapped uncached. Caching will be restored
3312 			 * when pages are mapped compatible again.
3313 			 */
3314 			if ((pvh->pv_va ^ va) & VA_ALIAS_MASK) {
3315 				pvh->pv_va |= PV_ALIAS;
3316 				pmap_page_cache(pmap, pa, 0);
3317 				ENTER_STAT(ci);
3318 			}
3319 		}
3320 
3321 		/*
3322 		 * There is at least one other VA mapping this page.
3323 		 * Place this entry after the header.
3324 		 */
3325 
3326 		DPRINTF(PDB_ENTER, ("pmap_enter: new pv: pmap %p va %lx\n",
3327 		    pmap, va));
3328 		npv = *npvp;
3329 		*npvp = NULL;
3330 		npv->pv_pmap = pmap;
3331 		npv->pv_va = va & PV_VAMASK;
3332 		npv->pv_next = pvh->pv_next;
3333 		pvh->pv_next = npv;
3334 
3335 		if (!npv->pv_next) {
3336 			ENTER_STAT(secondpv);
3337 		}
3338 	}
3339 }
3340 
3341 /*
3342  * Remove a physical to virtual address translation.
3343  */
3344 
3345 pv_entry_t
pmap_remove_pv(struct pmap * pmap,vaddr_t va,struct vm_page * pg)3346 pmap_remove_pv(struct pmap *pmap, vaddr_t va, struct vm_page *pg)
3347 {
3348 	struct vm_page_md * const md = VM_PAGE_TO_MD(pg);
3349 	pv_entry_t pvh, npv, pv;
3350 	int64_t data = 0;
3351 
3352 	KASSERT(mutex_owned(&pmap_lock));
3353 
3354 	pvh = &md->mdpg_pvh;
3355 
3356 	DPRINTF(PDB_REMOVE, ("pmap_remove_pv(pm=%p, va=%p, pg=%p)\n", pmap,
3357 	    (void *)(u_long)va, pg));
3358 	pv_check();
3359 
3360 	/*
3361 	 * Remove page from the PV table.
3362 	 * If it is the first entry on the list, it is actually
3363 	 * in the header and we must copy the following entry up
3364 	 * to the header.  Otherwise we must search the list for
3365 	 * the entry.  In either case we free the now unused entry.
3366 	 */
3367 	if (pmap == pvh->pv_pmap && PV_MATCH(pvh, va)) {
3368 		data = pseg_get(pvh->pv_pmap, pvh->pv_va & PV_VAMASK);
3369 		KASSERT(data & TLB_V);
3370 		npv = pvh->pv_next;
3371 		if (npv) {
3372 			/* First save mod/ref bits */
3373 			pvh->pv_va = (pvh->pv_va & PV_MASK) | npv->pv_va;
3374 			pvh->pv_next = npv->pv_next;
3375 			pvh->pv_pmap = npv->pv_pmap;
3376 		} else {
3377 			pvh->pv_pmap = NULL;
3378 			pvh->pv_next = NULL;
3379 			pvh->pv_va &= (PV_REF|PV_MOD);
3380 		}
3381 		REMOVE_STAT(pvfirst);
3382 	} else {
3383 		for (pv = pvh, npv = pvh->pv_next; npv;
3384 		     pv = npv, npv = npv->pv_next) {
3385 			REMOVE_STAT(pvsearch);
3386 			if (pmap == npv->pv_pmap && PV_MATCH(npv, va))
3387 				break;
3388 		}
3389 		pv->pv_next = npv->pv_next;
3390 		data = pseg_get(npv->pv_pmap, npv->pv_va & PV_VAMASK);
3391 		KASSERT(data & TLB_V);
3392 	}
3393 
3394 	/* Save ref/mod info */
3395 	if (data & TLB_ACCESS)
3396 		pvh->pv_va |= PV_REF;
3397 	if (data & TLB_MODIFY)
3398 		pvh->pv_va |= PV_MOD;
3399 
3400 	/* Check to see if the alias went away */
3401 	if (pvh->pv_va & PV_ALIAS) {
3402 		pvh->pv_va &= ~PV_ALIAS;
3403 		for (pv = pvh; pv; pv = pv->pv_next) {
3404 			if ((pv->pv_va ^ pvh->pv_va) & VA_ALIAS_MASK) {
3405 				pvh->pv_va |= PV_ALIAS;
3406 				break;
3407 			}
3408 		}
3409 		if (!(pvh->pv_va & PV_ALIAS))
3410 			pmap_page_cache(pmap, VM_PAGE_TO_PHYS(pg), 1);
3411 	}
3412 	pv_check();
3413 	return npv;
3414 }
3415 
3416 /*
3417  *	pmap_page_cache:
3418  *
3419  *	Change all mappings of a page to cached/uncached.
3420  */
3421 void
pmap_page_cache(struct pmap * pm,paddr_t pa,int mode)3422 pmap_page_cache(struct pmap *pm, paddr_t pa, int mode)
3423 {
3424 	struct vm_page *pg;
3425 	struct vm_page_md *md;
3426 	pv_entry_t pv;
3427 	vaddr_t va;
3428 	int rv;
3429 
3430 #if 0
3431 	/*
3432 	 * Why is this?
3433 	 */
3434 	if (CPU_ISSUN4US || CPU_ISSUN4V)
3435 		return;
3436 #endif
3437 
3438 	KASSERT(mutex_owned(&pmap_lock));
3439 
3440 	DPRINTF(PDB_ENTER, ("pmap_page_uncache(%llx)\n",
3441 	    (unsigned long long)pa));
3442 	pg = PHYS_TO_VM_PAGE(pa);
3443 	md = VM_PAGE_TO_MD(pg);
3444 	pv = &md->mdpg_pvh;
3445 	while (pv) {
3446 		va = pv->pv_va & PV_VAMASK;
3447 		if (pv->pv_va & PV_NC) {
3448 			int64_t data;
3449 
3450 			/* Non-cached -- I/O mapping */
3451 			data = pseg_get(pv->pv_pmap, va);
3452 			KASSERT(data & TLB_V);
3453 			rv = pseg_set(pv->pv_pmap, va,
3454 				     data & ~(TLB_CV|TLB_CP), 0);
3455 			if (rv & 1)
3456 				panic("pmap_page_cache: pseg_set needs"
3457 				     " spare! rv=%d\n", rv);
3458 		} else if (mode && (!(pv->pv_va & PV_NVC))) {
3459 			int64_t data;
3460 
3461 			/* Enable caching */
3462 			data = pseg_get(pv->pv_pmap, va);
3463 			KASSERT(data & TLB_V);
3464 			rv = pseg_set(pv->pv_pmap, va, data | TLB_CV, 0);
3465 			if (rv & 1)
3466 				panic("pmap_page_cache: pseg_set needs"
3467 				    " spare! rv=%d\n", rv);
3468 		} else {
3469 			int64_t data;
3470 
3471 			/* Disable caching */
3472 			data = pseg_get(pv->pv_pmap, va);
3473 			KASSERT(data & TLB_V);
3474 			rv = pseg_set(pv->pv_pmap, va, data & ~TLB_CV, 0);
3475 			if (rv & 1)
3476 				panic("pmap_page_cache: pseg_set needs"
3477 				    " spare! rv=%d\n", rv);
3478 		}
3479 		if (pmap_is_on_mmu(pv->pv_pmap)) {
3480 			/* Force reload -- cache bits have changed */
3481 			KASSERT(pmap_ctx(pv->pv_pmap)>=0);
3482 			tsb_invalidate(va, pv->pv_pmap);
3483 			tlb_flush_pte(va, pv->pv_pmap);
3484 		}
3485 		pv = pv->pv_next;
3486 	}
3487 }
3488 
3489 /*
3490  * Some routines to allocate and free PTPs.
3491  */
3492 static int
pmap_get_page(paddr_t * p)3493 pmap_get_page(paddr_t *p)
3494 {
3495 	struct vm_page *pg;
3496 	paddr_t pa;
3497 
3498 	if (uvm.page_init_done) {
3499 		pg = uvm_pagealloc(NULL, 0, NULL,
3500 		    UVM_PGA_ZERO | UVM_PGA_USERESERVE);
3501 		if (pg == NULL)
3502 			return (0);
3503 		pa = VM_PAGE_TO_PHYS(pg);
3504 	} else {
3505 		if (!uvm_page_physget(&pa))
3506 			return (0);
3507 		pmap_zero_page(pa);
3508 	}
3509 	*p = pa;
3510 	return (1);
3511 }
3512 
3513 static void
pmap_free_page(paddr_t pa,sparc64_cpuset_t cs)3514 pmap_free_page(paddr_t pa, sparc64_cpuset_t cs)
3515 {
3516 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
3517 
3518 	dcache_flush_page_cpuset(pa, cs);
3519 	uvm_pagefree(pg);
3520 }
3521 
3522 static void
pmap_free_page_noflush(paddr_t pa)3523 pmap_free_page_noflush(paddr_t pa)
3524 {
3525 	struct vm_page *pg = PHYS_TO_VM_PAGE(pa);
3526 
3527 	uvm_pagefree(pg);
3528 }
3529 
3530 #ifdef DDB
3531 
3532 void db_dump_pv(db_expr_t, int, db_expr_t, const char *);
3533 void
db_dump_pv(db_expr_t addr,int have_addr,db_expr_t count,const char * modif)3534 db_dump_pv(db_expr_t addr, int have_addr, db_expr_t count, const char *modif)
3535 {
3536 	struct vm_page *pg;
3537 	struct vm_page_md *md;
3538 	struct pv_entry *pv;
3539 
3540 	if (!have_addr) {
3541 		db_printf("Need addr for pv\n");
3542 		return;
3543 	}
3544 
3545 	pg = PHYS_TO_VM_PAGE((paddr_t)addr);
3546 	if (pg == NULL) {
3547 		db_printf("page is not managed\n");
3548 		return;
3549 	}
3550 	md = VM_PAGE_TO_MD(pg);
3551 	for (pv = &md->mdpg_pvh; pv; pv = pv->pv_next)
3552 		db_printf("pv@%p: next=%p pmap=%p va=0x%llx\n",
3553 			  pv, pv->pv_next, pv->pv_pmap,
3554 			  (unsigned long long)pv->pv_va);
3555 }
3556 
3557 #endif
3558 
3559 #ifdef DEBUG
3560 /*
3561  * Test ref/modify handling.  */
3562 void pmap_testout(void);
3563 void
pmap_testout(void)3564 pmap_testout(void)
3565 {
3566 	vaddr_t va;
3567 	volatile int *loc;
3568 	int val = 0;
3569 	paddr_t pa;
3570 	struct vm_page *pg;
3571 	int ref, mod;
3572 
3573 	/* Allocate a page */
3574 	va = (vaddr_t)(vmmap - PAGE_SIZE);
3575 	KASSERT(va != 0);
3576 	loc = (int*)va;
3577 
3578 	pmap_get_page(&pa);
3579 	pg = PHYS_TO_VM_PAGE(pa);
3580 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3581 	pmap_update(pmap_kernel());
3582 
3583 	/* Now clear reference and modify */
3584 	ref = pmap_clear_reference(pg);
3585 	mod = pmap_clear_modify(pg);
3586 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3587 	       (void *)(u_long)va, (long)pa,
3588 	       ref, mod);
3589 
3590 	/* Check it's properly cleared */
3591 	ref = pmap_is_referenced(pg);
3592 	mod = pmap_is_modified(pg);
3593 	printf("Checking cleared page: ref %d, mod %d\n",
3594 	       ref, mod);
3595 
3596 	/* Reference page */
3597 	val = *loc;
3598 
3599 	ref = pmap_is_referenced(pg);
3600 	mod = pmap_is_modified(pg);
3601 	printf("Referenced page: ref %d, mod %d val %x\n",
3602 	       ref, mod, val);
3603 
3604 	/* Now clear reference and modify */
3605 	ref = pmap_clear_reference(pg);
3606 	mod = pmap_clear_modify(pg);
3607 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3608 	       (void *)(u_long)va, (long)pa,
3609 	       ref, mod);
3610 
3611 	/* Modify page */
3612 	*loc = 1;
3613 
3614 	ref = pmap_is_referenced(pg);
3615 	mod = pmap_is_modified(pg);
3616 	printf("Modified page: ref %d, mod %d\n",
3617 	       ref, mod);
3618 
3619 	/* Now clear reference and modify */
3620 	ref = pmap_clear_reference(pg);
3621 	mod = pmap_clear_modify(pg);
3622 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3623 	       (void *)(u_long)va, (long)pa,
3624 	       ref, mod);
3625 
3626 	/* Check it's properly cleared */
3627 	ref = pmap_is_referenced(pg);
3628 	mod = pmap_is_modified(pg);
3629 	printf("Checking cleared page: ref %d, mod %d\n",
3630 	       ref, mod);
3631 
3632 	/* Modify page */
3633 	*loc = 1;
3634 
3635 	ref = pmap_is_referenced(pg);
3636 	mod = pmap_is_modified(pg);
3637 	printf("Modified page: ref %d, mod %d\n",
3638 	       ref, mod);
3639 
3640 	/* Check pmap_protect() */
3641 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_READ);
3642 	pmap_update(pmap_kernel());
3643 	ref = pmap_is_referenced(pg);
3644 	mod = pmap_is_modified(pg);
3645 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3646 	       ref, mod);
3647 
3648 	/* Now clear reference and modify */
3649 	ref = pmap_clear_reference(pg);
3650 	mod = pmap_clear_modify(pg);
3651 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3652 	       (void *)(u_long)va, (long)pa,
3653 	       ref, mod);
3654 
3655 	/* Modify page */
3656 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3657 	pmap_update(pmap_kernel());
3658 	*loc = 1;
3659 
3660 	ref = pmap_is_referenced(pg);
3661 	mod = pmap_is_modified(pg);
3662 	printf("Modified page: ref %d, mod %d\n",
3663 	       ref, mod);
3664 
3665 	/* Check pmap_protect() */
3666 	pmap_protect(pmap_kernel(), va, va+1, VM_PROT_NONE);
3667 	pmap_update(pmap_kernel());
3668 	ref = pmap_is_referenced(pg);
3669 	mod = pmap_is_modified(pg);
3670 	printf("pmap_protect(VM_PROT_READ): ref %d, mod %d\n",
3671 	       ref, mod);
3672 
3673 	/* Now clear reference and modify */
3674 	ref = pmap_clear_reference(pg);
3675 	mod = pmap_clear_modify(pg);
3676 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3677 	       (void *)(u_long)va, (long)pa,
3678 	       ref, mod);
3679 
3680 	/* Modify page */
3681 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3682 	pmap_update(pmap_kernel());
3683 	*loc = 1;
3684 
3685 	ref = pmap_is_referenced(pg);
3686 	mod = pmap_is_modified(pg);
3687 	printf("Modified page: ref %d, mod %d\n",
3688 	       ref, mod);
3689 
3690 	/* Check pmap_pag_protect() */
3691 	pmap_page_protect(pg, VM_PROT_READ);
3692 	ref = pmap_is_referenced(pg);
3693 	mod = pmap_is_modified(pg);
3694 	printf("pmap_protect(): ref %d, mod %d\n",
3695 	       ref, mod);
3696 
3697 	/* Now clear reference and modify */
3698 	ref = pmap_clear_reference(pg);
3699 	mod = pmap_clear_modify(pg);
3700 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3701 	       (void *)(u_long)va, (long)pa,
3702 	       ref, mod);
3703 
3704 
3705 	/* Modify page */
3706 	pmap_enter(pmap_kernel(), va, pa, VM_PROT_ALL, VM_PROT_ALL);
3707 	pmap_update(pmap_kernel());
3708 	*loc = 1;
3709 
3710 	ref = pmap_is_referenced(pg);
3711 	mod = pmap_is_modified(pg);
3712 	printf("Modified page: ref %d, mod %d\n",
3713 	       ref, mod);
3714 
3715 	/* Check pmap_pag_protect() */
3716 	pmap_page_protect(pg, VM_PROT_NONE);
3717 	ref = pmap_is_referenced(pg);
3718 	mod = pmap_is_modified(pg);
3719 	printf("pmap_protect(): ref %d, mod %d\n",
3720 	       ref, mod);
3721 
3722 	/* Now clear reference and modify */
3723 	ref = pmap_clear_reference(pg);
3724 	mod = pmap_clear_modify(pg);
3725 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3726 	       (void *)(u_long)va, (long)pa,
3727 	       ref, mod);
3728 
3729 	/* Unmap page */
3730 	pmap_remove(pmap_kernel(), va, va+1);
3731 	pmap_update(pmap_kernel());
3732 	ref = pmap_is_referenced(pg);
3733 	mod = pmap_is_modified(pg);
3734 	printf("Unmapped page: ref %d, mod %d\n", ref, mod);
3735 
3736 	/* Now clear reference and modify */
3737 	ref = pmap_clear_reference(pg);
3738 	mod = pmap_clear_modify(pg);
3739 	printf("Clearing page va %p pa %lx: ref %d, mod %d\n",
3740 	       (void *)(u_long)va, (long)pa, ref, mod);
3741 
3742 	/* Check it's properly cleared */
3743 	ref = pmap_is_referenced(pg);
3744 	mod = pmap_is_modified(pg);
3745 	printf("Checking cleared page: ref %d, mod %d\n",
3746 	       ref, mod);
3747 
3748 	pmap_remove(pmap_kernel(), va, va+1);
3749 	pmap_update(pmap_kernel());
3750 	pmap_free_page(pa, cpus_active);
3751 }
3752 #endif
3753 
3754 void
pmap_update(struct pmap * pmap)3755 pmap_update(struct pmap *pmap)
3756 {
3757 
3758 	if (pmap->pm_refs > 0) {
3759 		return;
3760 	}
3761 	pmap->pm_refs = 1;
3762 	pmap_activate_pmap(pmap);
3763 }
3764 
3765 /*
3766  * pmap_copy_page()/pmap_zero_page()
3767  *
3768  * we make sure that the destination page is flushed from all D$'s
3769  * before we perform the copy/zero.
3770  */
3771 extern int cold;
3772 void
pmap_copy_page(paddr_t src,paddr_t dst)3773 pmap_copy_page(paddr_t src, paddr_t dst)
3774 {
3775 
3776 	if (!cold)
3777 		dcache_flush_page_all(dst);
3778 	pmap_copy_page_phys(src, dst);
3779 }
3780 
3781 void
pmap_zero_page(paddr_t pa)3782 pmap_zero_page(paddr_t pa)
3783 {
3784 
3785 	if (!cold)
3786 		dcache_flush_page_all(pa);
3787 	pmap_zero_page_phys(pa);
3788 }
3789 
3790 #ifdef _LP64
3791 int
sparc64_mmap_range_test(vaddr_t addr,vaddr_t eaddr)3792 sparc64_mmap_range_test(vaddr_t addr, vaddr_t eaddr)
3793 {
3794 	const vaddr_t hole_start = 0x000007ffffffffff;
3795 	const vaddr_t hole_end   = 0xfffff80000000000;
3796 
3797 	if (addr >= hole_end)
3798 		return 0;
3799 	if (eaddr <= hole_start)
3800 		return 0;
3801 
3802 	return EINVAL;
3803 }
3804 #endif
3805 
3806 #ifdef SUN4V
3807 void
pmap_setup_intstack_sun4v(paddr_t pa)3808 pmap_setup_intstack_sun4v(paddr_t pa)
3809 {
3810 	int64_t hv_rc;
3811 	int64_t data;
3812 	data = SUN4V_TSB_DATA(
3813 	    0 /* global */,
3814 	    PGSZ_64K,
3815 	    pa,
3816 	    1 /* priv */,
3817 	    1 /* Write */,
3818 	    1 /* Cacheable */,
3819 	    FORCE_ALIAS /* ALIAS -- Disable D$ */,
3820 	    1 /* valid */,
3821 	    0 /* IE */,
3822 	    0 /* wc */);
3823 	hv_rc = hv_mmu_map_perm_addr(INTSTACK, data, MAP_DTLB);
3824 	if ( hv_rc != H_EOK ) {
3825 		panic("hv_mmu_map_perm_addr() failed - rc = %" PRId64 "\n",
3826 		    hv_rc);
3827 	}
3828 }
3829 
3830 void
pmap_setup_tsb_sun4v(struct tsb_desc * tsb_desc)3831 pmap_setup_tsb_sun4v(struct tsb_desc* tsb_desc)
3832 {
3833 	int err;
3834 	paddr_t tsb_desc_p;
3835 	tsb_desc_p = pmap_kextract((vaddr_t)tsb_desc);
3836 	if (!tsb_desc_p) {
3837 		panic("pmap_setup_tsb_sun4v() pmap_kextract() failed");
3838 	}
3839 	err = hv_mmu_tsb_ctx0(1, tsb_desc_p);
3840 	if (err != H_EOK) {
3841 		prom_printf("hv_mmu_tsb_ctx0() err: %d\n", err);
3842 		panic("pmap_setup_tsb_sun4v() hv_mmu_tsb_ctx0() failed");
3843 	}
3844 	err = hv_mmu_tsb_ctxnon0(1, tsb_desc_p);
3845 	if (err != H_EOK) {
3846 		prom_printf("hv_mmu_tsb_ctxnon0() err: %d\n", err);
3847 		panic("pmap_setup_tsb_sun4v() hv_mmu_tsb_ctxnon0() failed");
3848 	}
3849 }
3850 
3851 #endif
3852