xref: /openbsd/sys/arch/riscv64/riscv64/pmap.c (revision 45f3019f)
1 /*	$OpenBSD: pmap.c,v 1.44 2025/01/19 20:18:38 kettenis Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2020 Brian Bamsch <bbamsch@google.com>
5  * Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com>
6  *
7  * Permission to use, copy, modify, and distribute this software for any
8  * purpose with or without fee is hereby granted, provided that the above
9  * copyright notice and this permission notice appear in all copies.
10  *
11  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18  */
19 
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/atomic.h>
23 #include <sys/pool.h>
24 #include <sys/proc.h>
25 
26 #include <uvm/uvm.h>
27 
28 #include <machine/cpufunc.h>
29 #include <machine/pmap.h>
30 #include <machine/riscvreg.h>
31 #include <machine/sbi.h>
32 
33 #include <dev/ofw/fdt.h>
34 
35 #ifdef MULTIPROCESSOR
36 
37 static inline int
pmap_is_active(struct pmap * pm,struct cpu_info * ci)38 pmap_is_active(struct pmap *pm, struct cpu_info *ci)
39 {
40 	return pm == pmap_kernel() || pm == ci->ci_curpm;
41 }
42 
43 #endif
44 
45 void
do_tlb_flush_page(pmap_t pm,vaddr_t va)46 do_tlb_flush_page(pmap_t pm, vaddr_t va)
47 {
48 #ifdef MULTIPROCESSOR
49 	CPU_INFO_ITERATOR cii;
50 	struct cpu_info *ci;
51 	unsigned long hart_mask = 0;
52 
53 	CPU_INFO_FOREACH(cii, ci) {
54 		if (ci == curcpu())
55 			continue;
56 		if (pmap_is_active(pm, ci))
57 			hart_mask |= (1UL << ci->ci_hartid);
58 	}
59 
60 	/*
61 	 * From the RISC-V privileged spec:
62 	 *
63 	 * SFENCE.VMA orders only the local hart's implicit references
64 	 * to the memory-management data structures. Consequently, other
65 	 * harts must be notified separately when the memory-management
66 	 * data structures have been modified. One approach is to use 1)
67 	 * a local data fence to ensure local writes are visible
68 	 * globally, then 2) an interprocessor interrupt to the other
69 	 * thread, then 3) a local SFENCE.VMA in the interrupt handler
70 	 * of the remote thread, and finally 4) signal back to
71 	 * originating thread that operation is complete.
72 	 */
73 	if (hart_mask != 0) {
74 		membar_sync();
75 		sbi_remote_sfence_vma(&hart_mask, va, PAGE_SIZE);
76 	}
77 #endif
78 
79 	sfence_vma_page(va);
80 }
81 
82 void
do_tlb_flush(pmap_t pm)83 do_tlb_flush(pmap_t pm)
84 {
85 #ifdef MULTIPROCESSOR
86 	CPU_INFO_ITERATOR cii;
87 	struct cpu_info *ci;
88 	unsigned long hart_mask = 0;
89 
90 	CPU_INFO_FOREACH(cii, ci) {
91 		if (ci == curcpu())
92 			continue;
93 		if (pmap_is_active(pm, ci))
94 			hart_mask |= (1UL << ci->ci_hartid);
95 	}
96 
97 	/*
98 	 * From the RISC-V privileged spec:
99 	 *
100 	 * SFENCE.VMA orders only the local hart's implicit references
101 	 * to the memory-management data structures. Consequently, other
102 	 * harts must be notified separately when the memory-management
103 	 * data structures have been modified. One approach is to use 1)
104 	 * a local data fence to ensure local writes are visible
105 	 * globally, then 2) an interprocessor interrupt to the other
106 	 * thread, then 3) a local SFENCE.VMA in the interrupt handler
107 	 * of the remote thread, and finally 4) signal back to
108 	 * originating thread that operation is complete.
109 	 */
110 	if (hart_mask != 0) {
111 		membar_sync();
112 		sbi_remote_sfence_vma(&hart_mask, 0, -1);
113 	}
114 #endif
115 
116 	sfence_vma();
117 }
118 
119 void
tlb_flush_page(pmap_t pm,vaddr_t va)120 tlb_flush_page(pmap_t pm, vaddr_t va)
121 {
122 	if (cpu_errata_sifive_cip_1200)
123 		do_tlb_flush(pm);
124 	else
125 		do_tlb_flush_page(pm, va);
126 }
127 
128 static inline void
icache_flush(void)129 icache_flush(void)
130 {
131 #ifdef MULTIPROCESSOR
132 	CPU_INFO_ITERATOR cii;
133 	struct cpu_info *ci;
134 	unsigned long hart_mask = 0;
135 #endif
136 
137 	fence_i();
138 
139 #ifdef MULTIPROCESSOR
140 	CPU_INFO_FOREACH(cii, ci) {
141 		if (ci == curcpu())
142 			continue;
143 		hart_mask |= (1UL << ci->ci_hartid);
144 	}
145 
146 	/*
147 	 * From the RISC-V ISA:
148 	 *
149 	 * To make a store to instruction memory visible to all RISC-V
150 	 * harts, the writing hart has to execute a data FENCE before
151 	 * requesting that all remote RISC-V harts execute a FENCE.I.
152 	 */
153 	if (hart_mask != 0) {
154 		membar_sync();
155 		sbi_remote_fence_i(&hart_mask);
156 	}
157 #endif
158 }
159 
160 struct pmap kernel_pmap_;
161 
162 LIST_HEAD(pted_pv_head, pte_desc);
163 
164 struct pte_desc {
165 	LIST_ENTRY(pte_desc) pted_pv_list;
166 	pt_entry_t pted_pte;
167 	pmap_t pted_pmap;
168 	vaddr_t pted_va;
169 };
170 
171 struct pmapvp1 {
172 	pt_entry_t l1[VP_IDX1_CNT];
173 	struct pmapvp2 *vp[VP_IDX1_CNT];
174 };
175 
176 struct pmapvp2 {
177 	pt_entry_t l2[VP_IDX2_CNT];
178 	struct pmapvp3 *vp[VP_IDX2_CNT];
179 };
180 
181 struct pmapvp3 {
182 	pt_entry_t l3[VP_IDX3_CNT];
183 	struct pte_desc *vp[VP_IDX3_CNT];
184 };
185 CTASSERT(sizeof(struct pmapvp1) == sizeof(struct pmapvp2));
186 CTASSERT(sizeof(struct pmapvp1) == sizeof(struct pmapvp3));
187 
188 void	pmap_vp_destroy(pmap_t);
189 
190 /* Allocator for VP pool. */
191 void	*pmap_vp_page_alloc(struct pool *, int, int *);
192 void	pmap_vp_page_free(struct pool *, void *);
193 
194 struct pool_allocator pmap_vp_allocator = {
195 	pmap_vp_page_alloc, pmap_vp_page_free, sizeof(struct pmapvp1)
196 };
197 
198 void	pmap_remove_pted(pmap_t, struct pte_desc *);
199 void	pmap_kremove_pg(vaddr_t);
200 void	pmap_set_l2(struct pmap *, uint64_t, struct pmapvp2 *, paddr_t);
201 void	pmap_set_l3(struct pmap *, uint64_t, struct pmapvp3 *, paddr_t);
202 void	pmap_set_satp(struct proc *);
203 
204 void	pmap_fill_pte(pmap_t, vaddr_t, paddr_t, struct pte_desc *,
205 	    vm_prot_t, int, int);
206 void	pmap_pte_insert(struct pte_desc *);
207 void	pmap_pte_remove(struct pte_desc *, int);
208 void	pmap_pte_update(struct pte_desc *, pt_entry_t *);
209 void	pmap_release(pmap_t);
210 paddr_t	pmap_steal_avail(size_t, int, void **);
211 void	pmap_remove_avail(paddr_t, paddr_t);
212 vaddr_t	pmap_map_stolen(vaddr_t);
213 
214 vaddr_t vmmap;
215 vaddr_t zero_page;
216 vaddr_t copy_src_page;
217 vaddr_t copy_dst_page;
218 
219 #define CPU_VENDOR_THEAD	0x5b7
220 
221 struct pool pmap_pmap_pool;
222 struct pool pmap_pted_pool;
223 struct pool pmap_vp_pool;
224 
225 int pmap_initialized = 0;
226 
227 struct mem_region {
228 	vaddr_t start;
229 	vsize_t size;
230 };
231 
232 struct mem_region pmap_avail_regions[10];
233 struct mem_region pmap_allocated_regions[10];
234 struct mem_region *pmap_avail = &pmap_avail_regions[0];
235 struct mem_region *pmap_allocated = &pmap_allocated_regions[0];
236 int pmap_cnt_avail, pmap_cnt_allocated;
237 uint64_t pmap_avail_kvo;
238 
239 paddr_t pmap_cached_start, pmap_cached_end;
240 paddr_t pmap_uncached_start, pmap_uncached_end;
241 
242 static inline void
pmap_lock(struct pmap * pmap)243 pmap_lock(struct pmap *pmap)
244 {
245 	if (pmap != pmap_kernel())
246 		mtx_enter(&pmap->pm_mtx);
247 }
248 
249 static inline void
pmap_unlock(struct pmap * pmap)250 pmap_unlock(struct pmap *pmap)
251 {
252 	if (pmap != pmap_kernel())
253 		mtx_leave(&pmap->pm_mtx);
254 }
255 
256 /* virtual to physical helpers */
257 static inline int
VP_IDX1(vaddr_t va)258 VP_IDX1(vaddr_t va)
259 {
260 	return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
261 }
262 
263 static inline int
VP_IDX2(vaddr_t va)264 VP_IDX2(vaddr_t va)
265 {
266 	return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
267 }
268 
269 static inline int
VP_IDX3(vaddr_t va)270 VP_IDX3(vaddr_t va)
271 {
272 	return (va >> VP_IDX3_POS) & VP_IDX3_MASK;
273 }
274 
275 /*
276  * On RISC-V, the encodings for write permission without read
277  * permission (r=0, w=1, x=0, or r=0, w=1, x=1) are reserved, so
278  * PROT_WRITE implies PROT_READ.  We need to handle PROT_NONE
279  * separately (see pmap_pte_update()) since r=0, w=0, x=0 is reserved
280  * for non-leaf page table entries.
281  */
282 const pt_entry_t ap_bits_user[8] = {
283 	[PROT_NONE]				= 0,
284 	[PROT_READ]				= PTE_U|PTE_A|PTE_R,
285 	[PROT_WRITE]				= PTE_U|PTE_A|PTE_R|PTE_D|PTE_W,
286 	[PROT_WRITE|PROT_READ]			= PTE_U|PTE_A|PTE_R|PTE_D|PTE_W,
287 	[PROT_EXEC]				= PTE_U|PTE_A|PTE_X,
288 	[PROT_EXEC|PROT_READ]			= PTE_U|PTE_A|PTE_X|PTE_R,
289 	[PROT_EXEC|PROT_WRITE]			= PTE_U|PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
290 	[PROT_EXEC|PROT_WRITE|PROT_READ]	= PTE_U|PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
291 };
292 
293 const pt_entry_t ap_bits_kern[8] = {
294 	[PROT_NONE]				= 0,
295 	[PROT_READ]				= PTE_A|PTE_R,
296 	[PROT_WRITE]				= PTE_A|PTE_R|PTE_D|PTE_W,
297 	[PROT_WRITE|PROT_READ]			= PTE_A|PTE_R|PTE_D|PTE_W,
298 	[PROT_EXEC]				= PTE_A|PTE_X,
299 	[PROT_EXEC|PROT_READ]			= PTE_A|PTE_X|PTE_R,
300 	[PROT_EXEC|PROT_WRITE]			= PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
301 	[PROT_EXEC|PROT_WRITE|PROT_READ]	= PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
302 };
303 
304 /* PBMT encodings for the Svpmbt modes. */
305 uint64_t pmap_pma;
306 uint64_t pmap_nc;
307 uint64_t pmap_io;
308 
309 /*
310  * This is used for pmap_kernel() mappings, they are not to be removed
311  * from the vp table because they were statically initialized at the
312  * initial pmap initialization. This is so that memory allocation
313  * is not necessary in the pmap_kernel() mappings.
314  * Otherwise bad race conditions can appear.
315  */
316 struct pte_desc *
pmap_vp_lookup(pmap_t pm,vaddr_t va,pt_entry_t ** pl3entry)317 pmap_vp_lookup(pmap_t pm, vaddr_t va, pt_entry_t **pl3entry)
318 {
319 	struct pmapvp1 *vp1;
320 	struct pmapvp2 *vp2;
321 	struct pmapvp3 *vp3;
322 	struct pte_desc *pted;
323 
324 	vp1 = pm->pm_vp.l1;
325 	if (vp1 == NULL) {
326 		return NULL;
327 	}
328 
329 	vp2 = vp1->vp[VP_IDX1(va)];
330 	if (vp2 == NULL) {
331 		return NULL;
332 	}
333 
334 	vp3 = vp2->vp[VP_IDX2(va)];
335 	if (vp3 == NULL) {
336 		return NULL;
337 	}
338 
339 	pted = vp3->vp[VP_IDX3(va)];
340 	if (pl3entry != NULL)
341 		*pl3entry = &(vp3->l3[VP_IDX3(va)]);
342 
343 	return pted;
344 }
345 
346 /*
347  * Create a V -> P mapping for the given pmap and virtual address
348  * with reference to the pte descriptor that is used to map the page.
349  * This code should track allocations of vp table allocations
350  * so they can be freed efficiently.
351  *
352  * XXX it may be possible to save some bits of count in the
353  * upper address bits of the pa or the pte entry.
354  * However that does make populating the other bits more tricky.
355  * each level has 512 entries, so that mean 9 bits to store
356  * stash 3 bits each in the first 3 entries?
357  */
358 int
pmap_vp_enter(pmap_t pm,vaddr_t va,struct pte_desc * pted,int flags)359 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags)
360 {
361 	struct pmapvp1 *vp1;
362 	struct pmapvp2 *vp2;
363 	struct pmapvp3 *vp3;
364 
365 	vp1 = pm->pm_vp.l1;
366 
367 	vp2 = vp1->vp[VP_IDX1(va)];
368 	if (vp2 == NULL) {
369 		vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
370 		if (vp2 == NULL) {
371 			if ((flags & PMAP_CANFAIL) == 0)
372 				panic("%s: unable to allocate L2", __func__);
373 			return ENOMEM;
374 		}
375 		pmap_set_l2(pm, va, vp2, 0);
376 	}
377 
378 	vp3 = vp2->vp[VP_IDX2(va)];
379 	if (vp3 == NULL) {
380 		vp3 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
381 		if (vp3 == NULL) {
382 			if ((flags & PMAP_CANFAIL) == 0)
383 				panic("%s: unable to allocate L3", __func__);
384 			return ENOMEM;
385 		}
386 		pmap_set_l3(pm, va, vp3, 0);
387 	}
388 
389 	vp3->vp[VP_IDX3(va)] = pted;
390 	return 0;
391 }
392 
393 void
pmap_vp_populate(pmap_t pm,vaddr_t va)394 pmap_vp_populate(pmap_t pm, vaddr_t va)
395 {
396 	struct pte_desc *pted;
397 	struct pmapvp1 *vp1;
398 	struct pmapvp2 *vp2;
399 	struct pmapvp3 *vp3;
400 	void *vp;
401 
402 	pted = pool_get(&pmap_pted_pool, PR_WAITOK | PR_ZERO);
403 	vp = pool_get(&pmap_vp_pool, PR_WAITOK | PR_ZERO);
404 
405 	pmap_lock(pm);
406 
407 	vp1 = pm->pm_vp.l1;
408 
409 	vp2 = vp1->vp[VP_IDX1(va)];
410 	if (vp2 == NULL) {
411 		vp2 = vp; vp = NULL;
412 		pmap_set_l2(pm, va, vp2, 0);
413 	}
414 
415 	if (vp == NULL) {
416 		pmap_unlock(pm);
417 		vp = pool_get(&pmap_vp_pool, PR_WAITOK | PR_ZERO);
418 		pmap_lock(pm);
419 	}
420 
421 	vp3 = vp2->vp[VP_IDX2(va)];
422 	if (vp3 == NULL) {
423 		vp3 = vp; vp = NULL;
424 		pmap_set_l3(pm, va, vp3, 0);
425 	}
426 
427 	if (vp3->vp[VP_IDX3(va)] == NULL) {
428 		vp3->vp[VP_IDX3(va)] = pted;
429 		pted = NULL;
430 	}
431 
432 	pmap_unlock(pm);
433 
434 	if (vp)
435 		pool_put(&pmap_vp_pool, vp);
436 	if (pted)
437 		pool_put(&pmap_pted_pool, pted);
438 }
439 
440 void *
pmap_vp_page_alloc(struct pool * pp,int flags,int * slowdown)441 pmap_vp_page_alloc(struct pool *pp, int flags, int *slowdown)
442 {
443 	struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
444 
445 	kd.kd_waitok = ISSET(flags, PR_WAITOK);
446 	kd.kd_trylock = ISSET(flags, PR_NOWAIT);
447 	kd.kd_slowdown = slowdown;
448 
449 	return km_alloc(pp->pr_pgsize, &kv_any, &kp_dirty, &kd);
450 }
451 
452 void
pmap_vp_page_free(struct pool * pp,void * v)453 pmap_vp_page_free(struct pool *pp, void *v)
454 {
455 	km_free(v, pp->pr_pgsize, &kv_any, &kp_dirty);
456 }
457 
458 static inline u_int32_t
PTED_MANAGED(struct pte_desc * pted)459 PTED_MANAGED(struct pte_desc *pted)
460 {
461 	return (pted->pted_va & PTED_VA_MANAGED_M);
462 }
463 
464 static inline u_int32_t
PTED_WIRED(struct pte_desc * pted)465 PTED_WIRED(struct pte_desc *pted)
466 {
467 	return (pted->pted_va & PTED_VA_WIRED_M);
468 }
469 
470 static inline u_int32_t
PTED_VALID(struct pte_desc * pted)471 PTED_VALID(struct pte_desc *pted)
472 {
473 	return (pted->pted_pte != 0);
474 }
475 
476 /*
477  * PV entries -
478  * manipulate the physical to virtual translations for the entire system.
479  *
480  * QUESTION: should all mapped memory be stored in PV tables? Or
481  * is it alright to only store "ram" memory. Currently device mappings
482  * are not stored.
483  * It makes sense to pre-allocate mappings for all of "ram" memory, since
484  * it is likely that it will be mapped at some point, but would it also
485  * make sense to use a tree/table like is use for pmap to store device
486  * mappings?
487  * Further notes: It seems that the PV table is only used for pmap_protect
488  * and other paging related operations. Given this, it is not necessary
489  * to store any pmap_kernel() entries in PV tables and does not make
490  * sense to store device mappings in PV either.
491  *
492  * Note: unlike other powerpc pmap designs, the array is only an array
493  * of pointers. Since the same structure is used for holding information
494  * in the VP table, the PV table, and for kernel mappings, the wired entries.
495  * Allocate one data structure to hold all of the info, instead of replicating
496  * it multiple times.
497  *
498  * One issue of making this a single data structure is that two pointers are
499  * wasted for every page which does not map ram (device mappings), this
500  * should be a low percentage of mapped pages in the system, so should not
501  * have too noticeable unnecessary ram consumption.
502  */
503 
504 void
pmap_enter_pv(struct pte_desc * pted,struct vm_page * pg)505 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg)
506 {
507 	/*
508 	 * XXX does this test mean that some pages try to be managed,
509 	 * but this is called too soon?
510 	 */
511 	if (__predict_false(!pmap_initialized))
512 		return;
513 
514 	mtx_enter(&pg->mdpage.pv_mtx);
515 	LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
516 	pted->pted_va |= PTED_VA_MANAGED_M;
517 	mtx_leave(&pg->mdpage.pv_mtx);
518 }
519 
520 void
pmap_remove_pv(struct pte_desc * pted)521 pmap_remove_pv(struct pte_desc *pted)
522 {
523 	struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN);
524 
525 	mtx_enter(&pg->mdpage.pv_mtx);
526 	LIST_REMOVE(pted, pted_pv_list);
527 	mtx_leave(&pg->mdpage.pv_mtx);
528 }
529 
530 int
pmap_enter(pmap_t pm,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)531 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
532 {
533 	struct pte_desc *pted;
534 	struct vm_page *pg;
535 	int error;
536 	int cache = PMAP_CACHE_WB;
537 
538 	if (pa & PMAP_NOCACHE)
539 		cache = PMAP_CACHE_CI;
540 	if (pa & PMAP_DEVICE)
541 		cache = PMAP_CACHE_DEV;
542 	pg = PHYS_TO_VM_PAGE(pa);
543 
544 	pmap_lock(pm);
545 	pted = pmap_vp_lookup(pm, va, NULL);
546 	if (pted && PTED_VALID(pted)) {
547 		pmap_remove_pted(pm, pted);
548 		/* we lost our pted if it was user */
549 		if (pm != pmap_kernel())
550 			pted = pmap_vp_lookup(pm, va, NULL);
551 	}
552 
553 	pm->pm_stats.resident_count++;
554 
555 	/* Do not have pted for this, get one and put it in VP */
556 	if (pted == NULL) {
557 		pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO);
558 		if (pted == NULL) {
559 			if ((flags & PMAP_CANFAIL) == 0)
560 				panic("%s: failed to allocate pted", __func__);
561 			error = ENOMEM;
562 			goto out;
563 		}
564 		if (pmap_vp_enter(pm, va, pted, flags)) {
565 			if ((flags & PMAP_CANFAIL) == 0)
566 				panic("%s: failed to allocate L2/L3", __func__);
567 			error = ENOMEM;
568 			pool_put(&pmap_pted_pool, pted);
569 			goto out;
570 		}
571 	}
572 
573 	/*
574 	 * If it should be enabled _right now_, we can skip doing ref/mod
575 	 * emulation. Any access includes reference, modified only by write.
576 	 */
577 	if (pg != NULL &&
578 	    ((flags & PROT_MASK) || (pg->pg_flags & PG_PMAP_REF))) {
579 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
580 		if ((prot & PROT_WRITE) && (flags & PROT_WRITE)) {
581 			atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD);
582 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
583 		}
584 	}
585 
586 	pmap_fill_pte(pm, va, pa, pted, prot, flags, cache);
587 
588 	if (pg != NULL) {
589 		pmap_enter_pv(pted, pg); /* only managed mem */
590 	}
591 
592 	if (pg != NULL && (flags & PROT_EXEC)) {
593 		if ((pg->pg_flags & PG_PMAP_EXE) == 0)
594 			icache_flush();
595 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE);
596 	}
597 
598 	/*
599 	 * Insert into table, if this mapping said it needed to be mapped
600 	 * now.
601 	 */
602 	if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC|PMAP_WIRED)) {
603 		pmap_pte_insert(pted);
604 		tlb_flush_page(pm, va & ~PAGE_MASK);
605 	}
606 
607 	error = 0;
608 out:
609 	pmap_unlock(pm);
610 	return error;
611 }
612 
613 void
pmap_populate(pmap_t pm,vaddr_t va)614 pmap_populate(pmap_t pm, vaddr_t va)
615 {
616 	pmap_vp_populate(pm, va);
617 }
618 
619 /*
620  * Remove the given range of mapping entries.
621  */
622 void
pmap_remove(pmap_t pm,vaddr_t sva,vaddr_t eva)623 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
624 {
625 	struct pte_desc *pted;
626 	vaddr_t va;
627 
628 	pmap_lock(pm);
629 	for (va = sva; va < eva; va += PAGE_SIZE) {
630 		pted = pmap_vp_lookup(pm, va, NULL);
631 
632 		if (pted == NULL)
633 			continue;
634 
635 		if (PTED_WIRED(pted)) {
636 			pm->pm_stats.wired_count--;
637 			pted->pted_va &= ~PTED_VA_WIRED_M;
638 		}
639 
640 		if (PTED_VALID(pted))
641 			pmap_remove_pted(pm, pted);
642 	}
643 	pmap_unlock(pm);
644 }
645 
646 /*
647  * remove a single mapping, notice that this code is O(1)
648  */
649 void
pmap_remove_pted(pmap_t pm,struct pte_desc * pted)650 pmap_remove_pted(pmap_t pm, struct pte_desc *pted)
651 {
652 	pm->pm_stats.resident_count--;
653 
654 	if (PTED_WIRED(pted)) {
655 		pm->pm_stats.wired_count--;
656 		pted->pted_va &= ~PTED_VA_WIRED_M;
657 	}
658 
659 	pmap_pte_remove(pted, pm != pmap_kernel());
660 	tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
661 
662 	if (pted->pted_va & PTED_VA_EXEC_M) {
663 		pted->pted_va &= ~PTED_VA_EXEC_M;
664 	}
665 
666 	if (PTED_MANAGED(pted))
667 		pmap_remove_pv(pted);
668 
669 	pted->pted_pte = 0;
670 	pted->pted_va = 0;
671 
672 	if (pm != pmap_kernel())
673 		pool_put(&pmap_pted_pool, pted);
674 }
675 
676 
677 /*
678  * Populate a kernel mapping for the given page.
679  * kernel mappings have a larger set of prerequisites than normal mappings.
680  *
681  * 1. no memory should be allocated to create a kernel mapping.
682  * 2. a vp mapping should already exist, even if invalid. (see 1)
683  * 3. all vp tree mappings should already exist (see 1)
684  *
685  */
686 void
_pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)687 _pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, int flags, int cache)
688 {
689 	pmap_t pm = pmap_kernel();
690 	struct pte_desc *pted;
691 	struct vm_page *pg;
692 
693 	pted = pmap_vp_lookup(pm, va, NULL);
694 
695 	/* Do not have pted for this, get one and put it in VP */
696 	if (pted == NULL) {
697 		panic("pted not preallocated in pmap_kernel() va %lx pa %lx",
698 		    va, pa);
699 	}
700 
701 	if (pted && PTED_VALID(pted))
702 		pmap_kremove_pg(va); /* pted is reused */
703 
704 	pm->pm_stats.resident_count++;
705 
706 	flags |= PMAP_WIRED; /* kernel mappings are always wired. */
707 	/* Calculate PTE */
708 	pmap_fill_pte(pm, va, pa, pted, prot, flags, cache);
709 
710 	/*
711 	 * Insert into table
712 	 * We were told to map the page, probably called from vm_fault,
713 	 * so map the page!
714 	 */
715 	pmap_pte_insert(pted);
716 	tlb_flush_page(pm, va & ~PAGE_MASK);
717 
718 	pg = PHYS_TO_VM_PAGE(pa);
719 	if (pg && cache == PMAP_CACHE_CI)
720 		cpu_dcache_wbinv_range(pa & ~PAGE_MASK, PAGE_SIZE);
721 }
722 
723 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)724 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
725 {
726 	_pmap_kenter_pa(va, pa, prot, prot,
727 	    (pa & PMAP_NOCACHE) ? PMAP_CACHE_CI : PMAP_CACHE_WB);
728 }
729 
730 void
pmap_kenter_cache(vaddr_t va,paddr_t pa,vm_prot_t prot,int cacheable)731 pmap_kenter_cache(vaddr_t va, paddr_t pa, vm_prot_t prot, int cacheable)
732 {
733 	_pmap_kenter_pa(va, pa, prot, prot, cacheable);
734 }
735 
736 /*
737  * remove kernel (pmap_kernel()) mapping, one page
738  */
739 void
pmap_kremove_pg(vaddr_t va)740 pmap_kremove_pg(vaddr_t va)
741 {
742 	pmap_t pm = pmap_kernel();
743 	struct pte_desc *pted;
744 	int s;
745 
746 	pted = pmap_vp_lookup(pm, va, NULL);
747 	if (pted == NULL)
748 		return;
749 
750 	if (!PTED_VALID(pted))
751 		return; /* not mapped */
752 
753 	s = splvm();
754 
755 	pm->pm_stats.resident_count--;
756 
757 	/*
758 	 * Table needs to be locked here as well as pmap, and pv list.
759 	 * so that we know the mapping information is either valid,
760 	 * or that the mapping is not present in the hash table.
761 	 */
762 	pmap_pte_remove(pted, 0);
763 	tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
764 
765 	if (pted->pted_va & PTED_VA_EXEC_M)
766 		pted->pted_va &= ~PTED_VA_EXEC_M;
767 
768 	if (PTED_MANAGED(pted))
769 		pmap_remove_pv(pted);
770 
771 	if (PTED_WIRED(pted))
772 		pm->pm_stats.wired_count--;
773 
774 	/* invalidate pted; */
775 	pted->pted_pte = 0;
776 	pted->pted_va = 0;
777 
778 	splx(s);
779 }
780 
781 /*
782  * remove kernel (pmap_kernel()) mappings
783  */
784 void
pmap_kremove(vaddr_t va,vsize_t len)785 pmap_kremove(vaddr_t va, vsize_t len)
786 {
787 	for (len >>= PAGE_SHIFT; len >0; len--, va += PAGE_SIZE)
788 		pmap_kremove_pg(va);
789 }
790 
791 void
pmap_fill_pte(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int flags,int cache)792 pmap_fill_pte(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
793     vm_prot_t prot, int flags, int cache)
794 {
795 	pted->pted_va = va;
796 	pted->pted_pmap = pm;
797 
798 	switch (cache) {
799 	case PMAP_CACHE_WB:
800 		break;
801 	case PMAP_CACHE_CI:
802 		if (pa >= pmap_cached_start && pa <= pmap_cached_end)
803 			pa += (pmap_uncached_start - pmap_cached_start);
804 		break;
805 	case PMAP_CACHE_DEV:
806 		break;
807 	default:
808 		panic("%s: invalid cache mode", __func__);
809 	}
810 	pted->pted_va |= cache;
811 
812 	pted->pted_va |= prot & (PROT_READ|PROT_WRITE|PROT_EXEC);
813 
814 	if (flags & PMAP_WIRED) {
815 		pted->pted_va |= PTED_VA_WIRED_M;
816 		pm->pm_stats.wired_count++;
817 	}
818 
819 	pted->pted_pte = pa & PTE_RPGN;
820 	pted->pted_pte |= flags & (PROT_READ|PROT_WRITE|PROT_EXEC);
821 }
822 
823 /*
824  * Fill the given physical page with zeros.
825  */
826 void
pmap_zero_page(struct vm_page * pg)827 pmap_zero_page(struct vm_page *pg)
828 {
829 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
830 	vaddr_t va = zero_page + cpu_number() * PAGE_SIZE;
831 
832 	pmap_kenter_pa(va, pa, PROT_READ|PROT_WRITE);
833 	pagezero(va);
834 	pmap_kremove_pg(va);
835 }
836 
837 /*
838  * Copy the given physical page.
839  */
840 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)841 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
842 {
843 	paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
844 	paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
845 	vaddr_t srcva = copy_src_page + cpu_number() * PAGE_SIZE;
846 	vaddr_t dstva = copy_dst_page + cpu_number() * PAGE_SIZE;
847 	int s;
848 
849 	/*
850 	 * XXX The buffer flipper (incorrectly?) uses pmap_copy_page()
851 	 * (from uvm_pagerealloc_multi()) from interrupt context!
852 	 */
853 	s = splbio();
854 	pmap_kenter_pa(srcva, srcpa, PROT_READ);
855 	pmap_kenter_pa(dstva, dstpa, PROT_READ|PROT_WRITE);
856 	memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
857 	pmap_kremove_pg(srcva);
858 	pmap_kremove_pg(dstva);
859 	splx(s);
860 }
861 
862 void
pmap_pinit(pmap_t pm)863 pmap_pinit(pmap_t pm)
864 {
865 	struct pmapvp1 *vp1, *kvp1;
866 	vaddr_t l1va;
867 	uint64_t l1pa;
868 
869 	/* Allocate a full L1 table. */
870 	while (pm->pm_vp.l1 == NULL) {
871 		pm->pm_vp.l1 = pool_get(&pmap_vp_pool,
872 		    PR_WAITOK | PR_ZERO);
873 	}
874 
875 	vp1 = pm->pm_vp.l1; /* top level is l1 */
876 	l1va = (vaddr_t)vp1->l1;
877 
878 	/* Fill kernel PTEs. */
879 	kvp1 = pmap_kernel()->pm_vp.l1;
880 	memcpy(&vp1->l1[L1_KERN_BASE], &kvp1->l1[L1_KERN_BASE],
881 	    L1_KERN_ENTRIES * sizeof(pt_entry_t));
882 	memcpy(&vp1->vp[L1_KERN_BASE], &kvp1->vp[L1_KERN_BASE],
883 	    L1_KERN_ENTRIES * sizeof(struct pmapvp2 *));
884 
885 	pmap_extract(pmap_kernel(), l1va, (paddr_t *)&l1pa);
886 	pm->pm_satp |= SATP_FORMAT_PPN(PPN(l1pa));
887 	pm->pm_satp |= SATP_MODE_SV39;
888 	pmap_reference(pm);
889 }
890 
891 int pmap_vp_poolcache = 0; /* force vp poolcache to allocate late */
892 
893 /*
894  * Create and return a physical map.
895  */
896 pmap_t
pmap_create(void)897 pmap_create(void)
898 {
899 	pmap_t pmap;
900 
901 	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK | PR_ZERO);
902 
903 	mtx_init(&pmap->pm_mtx, IPL_VM);
904 
905 	pmap_pinit(pmap);
906 	if (pmap_vp_poolcache == 0) {
907 		pool_setlowat(&pmap_vp_pool, 20);
908 		pmap_vp_poolcache = 20;
909 	}
910 	return (pmap);
911 }
912 
913 /*
914  * Add a reference to a given pmap.
915  */
916 void
pmap_reference(pmap_t pm)917 pmap_reference(pmap_t pm)
918 {
919 	atomic_inc_int(&pm->pm_refs);
920 }
921 
922 /*
923  * Retire the given pmap from service.
924  * Should only be called if the map contains no valid mappings.
925  */
926 void
pmap_destroy(pmap_t pm)927 pmap_destroy(pmap_t pm)
928 {
929 	int refs;
930 
931 	refs = atomic_dec_int_nv(&pm->pm_refs);
932 	if (refs > 0)
933 		return;
934 
935 	/*
936 	 * reference count is zero, free pmap resources and free pmap.
937 	 */
938 	pmap_release(pm);
939 	pool_put(&pmap_pmap_pool, pm);
940 }
941 
942 /*
943  * Release any resources held by the given physical map.
944  * Called when a pmap initialized by pmap_pinit is being released.
945  */
946 void
pmap_release(pmap_t pm)947 pmap_release(pmap_t pm)
948 {
949 	pmap_vp_destroy(pm);
950 }
951 
952 void
pmap_vp_destroy(pmap_t pm)953 pmap_vp_destroy(pmap_t pm)
954 {
955 	struct pmapvp1 *vp1;
956 	struct pmapvp2 *vp2;
957 	struct pmapvp3 *vp3;
958 	struct pte_desc *pted;
959 	int j, k, l;
960 
961 	vp1 = pm->pm_vp.l1;
962 	/*
963 	 * there is not a separate supervisor and user page table root ?
964 	 * remove only user page tables
965 	 */
966 	for (j = 0; j < L1_KERN_BASE; j++) {
967 		vp2 = vp1->vp[j];
968 		if (vp2 == NULL)
969 			continue;
970 		vp1->vp[j] = NULL;
971 
972 		for (k = 0; k < VP_IDX2_CNT; k++) {
973 			vp3 = vp2->vp[k];
974 			if (vp3 == NULL)
975 				continue;
976 			vp2->vp[k] = NULL;
977 
978 			for (l = 0; l < VP_IDX3_CNT; l++) {
979 				pted = vp3->vp[l];
980 				if (pted == NULL)
981 					continue;
982 				vp3->vp[l] = NULL;
983 
984 				pool_put(&pmap_pted_pool, pted);
985 			}
986 			pool_put(&pmap_vp_pool, vp3);
987 		}
988 		pool_put(&pmap_vp_pool, vp2);
989 	}
990 	pool_put(&pmap_vp_pool, pm->pm_vp.l1);
991 	pm->pm_vp.l1 = NULL;
992 	return;
993 }
994 
995 vaddr_t virtual_avail;
996 int	pmap_virtual_space_called;
997 
998 static inline pt_entry_t
VP_Lx(paddr_t pa)999 VP_Lx(paddr_t pa)
1000 {
1001 	/*
1002 	 * This function takes the pa address given and manipulates it
1003 	 * into the form that should be inserted into the VM table.
1004 	 */
1005 	// NOTE: We always assume the entry is valid. OpenBSD/arm64 uses
1006 	// the least significant bits to differentiate between PTD / PTE.
1007 	// In riscv64 Sv39 address translation mode PTD / PTE distinguished
1008 	// by the lack of PTE_R / PTE_X on an entry with PTE_V set. For both
1009 	// a PTD and PTE, the PTE_V bit is set.
1010 	return (((pa & PTE_RPGN) >> PAGE_SHIFT) << PTE_PPN0_S) | PTE_V;
1011 }
1012 
1013 /*
1014  * In pmap_bootstrap() we allocate the page tables for the first GB
1015  * of the kernel address space.
1016  */
1017 vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS + 1024 * 1024 * 1024;
1018 
1019 /*
1020  * Allocator for growing the kernel page tables.  We use a dedicated
1021  * submap to make sure we have the space to map them as we are called
1022  * when address space is tight!
1023  */
1024 
1025 struct vm_map *pmap_kvp_map;
1026 
1027 const struct kmem_va_mode kv_kvp = {
1028 	.kv_map = &pmap_kvp_map,
1029 	.kv_wait = 0
1030 };
1031 
1032 void *
pmap_kvp_alloc(void)1033 pmap_kvp_alloc(void)
1034 {
1035 	void *kvp;
1036 
1037 	if (!uvm.page_init_done && !pmap_virtual_space_called) {
1038 		paddr_t pa[2];
1039 		vaddr_t va;
1040 
1041 		if (!uvm_page_physget(&pa[0]) || !uvm_page_physget(&pa[1]))
1042 			panic("%s: out of memory", __func__);
1043 
1044 		va = virtual_avail;
1045 		virtual_avail += 2 * PAGE_SIZE;
1046 		KASSERT(virtual_avail <= pmap_maxkvaddr);
1047 		kvp = (void *)va;
1048 
1049 		pmap_kenter_pa(va, pa[0], PROT_READ|PROT_WRITE);
1050 		pmap_kenter_pa(va + PAGE_SIZE, pa[1], PROT_READ|PROT_WRITE);
1051 		pagezero(va);
1052 		pagezero(va + PAGE_SIZE);
1053 	} else {
1054 		kvp = km_alloc(sizeof(struct pmapvp1), &kv_kvp, &kp_zero,
1055 		    &kd_nowait);
1056 	}
1057 
1058 	return kvp;
1059 }
1060 
1061 struct pte_desc *
pmap_kpted_alloc(void)1062 pmap_kpted_alloc(void)
1063 {
1064 	static struct pte_desc *pted;
1065 	static int npted;
1066 
1067 	if (npted == 0) {
1068 		if (!uvm.page_init_done && !pmap_virtual_space_called) {
1069 			paddr_t pa;
1070 			vaddr_t va;
1071 
1072 			if (!uvm_page_physget(&pa))
1073 				panic("%s: out of memory", __func__);
1074 
1075 			va = virtual_avail;
1076 			virtual_avail += PAGE_SIZE;
1077 			KASSERT(virtual_avail <= pmap_maxkvaddr);
1078 			pted = (struct pte_desc *)va;
1079 
1080 			pmap_kenter_pa(va, pa, PROT_READ|PROT_WRITE);
1081 			pagezero(va);
1082 		} else {
1083 			pted = km_alloc(PAGE_SIZE, &kv_kvp, &kp_zero,
1084 			    &kd_nowait);
1085 			if (pted == NULL)
1086 				return NULL;
1087 		}
1088 
1089 		npted = PAGE_SIZE / sizeof(struct pte_desc);
1090 	}
1091 
1092 	npted--;
1093 	return pted++;
1094 }
1095 
1096 vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)1097 pmap_growkernel(vaddr_t maxkvaddr)
1098 {
1099 	struct pmapvp1 *vp1 = pmap_kernel()->pm_vp.l1;
1100 	struct pmapvp2 *vp2;
1101 	struct pmapvp3 *vp3;
1102 	struct pte_desc *pted;
1103 	paddr_t pa;
1104 	int lb_idx2, ub_idx2;
1105 	int i, j, k;
1106 	int s;
1107 
1108 	if (maxkvaddr <= pmap_maxkvaddr)
1109 		return pmap_maxkvaddr;
1110 
1111 	/*
1112 	 * Not strictly necessary, but we use an interrupt-safe map
1113 	 * and uvm asserts that we're at IPL_VM.
1114 	 */
1115 	s = splvm();
1116 
1117 	for (i = VP_IDX1(pmap_maxkvaddr); i <= VP_IDX1(maxkvaddr - 1); i++) {
1118 		vp2 = vp1->vp[i];
1119 		if (vp2 == NULL) {
1120 			vp2 = pmap_kvp_alloc();
1121 			if (vp2 == NULL)
1122 				goto fail;
1123 			pmap_extract(pmap_kernel(), (vaddr_t)vp2, &pa);
1124 			vp1->vp[i] = vp2;
1125 			vp1->l1[i] = VP_Lx(pa);
1126 		}
1127 
1128 		if (i == VP_IDX1(pmap_maxkvaddr)) {
1129 			lb_idx2 = VP_IDX2(pmap_maxkvaddr);
1130 		} else {
1131 			lb_idx2 = 0;
1132 		}
1133 
1134 		if (i == VP_IDX1(maxkvaddr - 1)) {
1135 			ub_idx2 = VP_IDX2(maxkvaddr - 1);
1136 		} else {
1137 			ub_idx2 = VP_IDX2_CNT - 1;
1138 		}
1139 
1140 		for (j = lb_idx2; j <= ub_idx2; j++) {
1141 			vp3 = vp2->vp[j];
1142 			if (vp3 == NULL) {
1143 				vp3 = pmap_kvp_alloc();
1144 				if (vp3 == NULL)
1145 					goto fail;
1146 				pmap_extract(pmap_kernel(), (vaddr_t)vp3, &pa);
1147 				vp2->vp[j] = vp3;
1148 				vp2->l2[j] = VP_Lx(pa);
1149 			}
1150 
1151 			for (k = 0; k <= VP_IDX3_CNT - 1; k++) {
1152 				if (vp3->vp[k] == NULL) {
1153 					pted = pmap_kpted_alloc();
1154 					if (pted == NULL)
1155 						goto fail;
1156 					vp3->vp[k] = pted;
1157 					pmap_maxkvaddr += PAGE_SIZE;
1158 				}
1159 			}
1160 		}
1161 	}
1162 	KASSERT(pmap_maxkvaddr >= maxkvaddr);
1163 
1164 fail:
1165 	splx(s);
1166 
1167 	return pmap_maxkvaddr;
1168 }
1169 
1170 void pmap_setup_avail(uint64_t memstart, uint64_t memend, uint64_t kvo);
1171 
1172 /*
1173  * Initialize pmap setup.
1174  * ALL of the code which deals with avail needs rewritten as an actual
1175  * memory allocation.
1176  */
1177 CTASSERT(sizeof(struct pmapvp1) == 2 * PAGE_SIZE);
1178 
1179 int mappings_allocated = 0;
1180 int pted_allocated = 0;
1181 
1182 extern char __text_start[], _etext[];
1183 extern char __rodata_start[], _erodata[];
1184 
1185 paddr_t dmap_phys_base;
1186 
1187 void
pmap_bootstrap_dmap(vaddr_t kern_l1,paddr_t min_pa,paddr_t max_pa)1188 pmap_bootstrap_dmap(vaddr_t kern_l1, paddr_t min_pa, paddr_t max_pa)
1189 {
1190 	vaddr_t va;
1191 	paddr_t pa;
1192 	pt_entry_t *l1;
1193 	u_int l1_slot;
1194 	pt_entry_t entry;
1195 	pn_t pn;
1196 
1197 	pa = dmap_phys_base = min_pa & ~L1_OFFSET;  // 1 GiB Align
1198 	va = DMAP_MIN_ADDRESS;
1199 	l1 = (pt_entry_t *)kern_l1;
1200 	l1_slot = VP_IDX1(DMAP_MIN_ADDRESS);
1201 
1202 	for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
1203 	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
1204 		KASSERT(l1_slot < Ln_ENTRIES);
1205 
1206 		/* gigapages */
1207 		pn = (pa / PAGE_SIZE);
1208 		entry = PTE_KERN | pmap_pma;
1209 		entry |= (pn << PTE_PPN0_S);
1210 		l1[l1_slot] = entry;
1211 	}
1212 
1213 	sfence_vma();
1214 }
1215 
1216 vaddr_t
pmap_bootstrap(long kvo,vaddr_t l1pt,vaddr_t kernelstart,vaddr_t kernelend,paddr_t memstart,paddr_t memend)1217 pmap_bootstrap(long kvo, vaddr_t l1pt, vaddr_t kernelstart, vaddr_t kernelend,
1218     paddr_t memstart, paddr_t memend)
1219 {
1220 	void  *va;
1221 	paddr_t pa, pt1pa;
1222 	struct pmapvp1 *vp1;
1223 	struct pmapvp2 *vp2;
1224 	struct pmapvp3 *vp3;
1225 	struct pte_desc *pted;
1226 	vaddr_t vstart;
1227 	int i, j, k;
1228 	int lb_idx2, ub_idx2;
1229 	uint64_t marchid, mimpid;
1230 	uint32_t mvendorid;
1231 
1232 	mvendorid = sbi_get_mvendorid();
1233 	marchid = sbi_get_marchid();
1234 	mimpid = sbi_get_mimpid();
1235 
1236 	/*
1237 	 * The T-Head cores implement a page attributes extension that
1238 	 * violates the RISC-V privileged architecture specification.
1239 	 * Work around this as best as we can by adding the
1240 	 * appropriate page attributes in a way that is mostly
1241 	 * compatible with the Svpbmt extension.
1242 	 */
1243 	if (mvendorid == CPU_VENDOR_THEAD && marchid == 0 && mimpid == 0) {
1244 		pmap_pma = PTE_THEAD_C | PTE_THEAD_B | PTE_THEAD_SH;
1245 		pmap_nc = PTE_THEAD_B | PTE_THEAD_SH;
1246 		pmap_io = PTE_THEAD_SO | PTE_THEAD_SH;
1247 	}
1248 
1249 	pmap_setup_avail(memstart, memend, kvo);
1250 	pmap_remove_avail(kernelstart + kvo, kernelend + kvo);
1251 
1252 	/*
1253 	 * KERNEL IS ASSUMED TO BE 39 bits (or less), start from L1,
1254 	 * not L0 ALSO kernel mappings may not cover enough ram to
1255 	 * bootstrap so all accesses initializing tables must be done
1256 	 * via physical pointers
1257 	 */
1258 
1259 	/* Map the initial 64MB block to the Direct Mapped Region. */
1260 	pmap_bootstrap_dmap(l1pt, memstart, memend);
1261 
1262 	pt1pa = pmap_steal_avail(2 * sizeof(struct pmapvp1), Lx_TABLE_ALIGN,
1263 	    &va);
1264 	vp1 = (struct pmapvp1 *) PHYS_TO_DMAP(pt1pa);
1265 	pmap_kernel()->pm_vp.l1 = (struct pmapvp1 *)va;
1266 	pmap_kernel()->pm_privileged = 1;
1267 	pmap_kernel()->pm_satp = SATP_MODE_SV39 | /* ASID = 0 */
1268 		((PPN(pt1pa) & SATP_PPN_MASK) << SATP_PPN_SHIFT);
1269 
1270 	/* allocate memory (in unit of pages) for l2 and l3 page table */
1271 	for (i = VP_IDX1(VM_MIN_KERNEL_ADDRESS);
1272 	    i <= VP_IDX1(pmap_maxkvaddr - 1);
1273 	    i++) {
1274 		mappings_allocated++;
1275 		pa = pmap_steal_avail(sizeof(struct pmapvp2), Lx_TABLE_ALIGN,
1276 		    &va);
1277 		vp2 = (struct pmapvp2 *)PHYS_TO_DMAP(pa);
1278 		vp1->vp[i] = va;
1279 		vp1->l1[i] = VP_Lx(pa);
1280 
1281 		if (i == VP_IDX1(VM_MIN_KERNEL_ADDRESS)) {
1282 			lb_idx2 = VP_IDX2(VM_MIN_KERNEL_ADDRESS);
1283 		} else {
1284 			lb_idx2 = 0;
1285 		}
1286 		if (i == VP_IDX1(pmap_maxkvaddr - 1)) {
1287 			ub_idx2 = VP_IDX2(pmap_maxkvaddr - 1);
1288 		} else {
1289 			ub_idx2 = VP_IDX2_CNT - 1;
1290 		}
1291 		for (j = lb_idx2; j <= ub_idx2; j++) {
1292 			mappings_allocated++;
1293 			pa = pmap_steal_avail(sizeof(struct pmapvp3),
1294 			    Lx_TABLE_ALIGN, &va);
1295 			vp3 = (struct pmapvp3 *)PHYS_TO_DMAP(pa);
1296 			vp2->vp[j] = va;
1297 			vp2->l2[j] = VP_Lx(pa);
1298 		}
1299 	}
1300 	/* allocate memory for pte_desc */
1301 	for (i = VP_IDX1(VM_MIN_KERNEL_ADDRESS);
1302 	    i <= VP_IDX1(pmap_maxkvaddr - 1);
1303 	    i++) {
1304 		vp2 = (void *)PHYS_TO_DMAP((long)vp1->vp[i] + kvo);
1305 
1306 		if (i == VP_IDX1(VM_MIN_KERNEL_ADDRESS)) {
1307 			lb_idx2 = VP_IDX2(VM_MIN_KERNEL_ADDRESS);
1308 		} else {
1309 			lb_idx2 = 0;
1310 		}
1311 		if (i == VP_IDX1(pmap_maxkvaddr - 1)) {
1312 			ub_idx2 = VP_IDX2(pmap_maxkvaddr - 1);
1313 		} else {
1314 			ub_idx2 = VP_IDX2_CNT - 1;
1315 		}
1316 		for (j = lb_idx2; j <= ub_idx2; j++) {
1317 			vp3 = (void *)PHYS_TO_DMAP((long)vp2->vp[j] + kvo);
1318 
1319 			for (k = 0; k <= VP_IDX3_CNT - 1; k++) {
1320 				pted_allocated++;
1321 				pa = pmap_steal_avail(sizeof(struct pte_desc),
1322 				    4, &va);
1323 				pted = va;
1324 				vp3->vp[k] = pted;
1325 			}
1326 		}
1327 	}
1328 
1329 	pmap_avail_fixup();
1330 
1331 	/*
1332 	 * At this point we are still running on the bootstrap page
1333 	 * tables however all memory for the final page tables is
1334 	 * 'allocated' and should now be mapped.  This means we are
1335 	 * able to use the virtual addressing to populate the final
1336 	 * mappings into the new mapping tables.
1337 	 */
1338 	vstart = pmap_map_stolen(kernelstart);
1339 
1340 	/*
1341 	 * Temporarily add the Direct Map Area into the kernel pmap
1342 	 * such that we can continue to access stolen memory by
1343 	 * physical address.
1344 	 */
1345 	pmap_bootstrap_dmap((vaddr_t)pmap_kernel()->pm_vp.l1, memstart, memend);
1346 
1347 	/* Switch to the new page tables. */
1348 	uint64_t satp = pmap_kernel()->pm_satp;
1349 	__asm volatile("csrw satp, %0" :: "r" (satp) : "memory");
1350 	sfence_vma();
1351 
1352 	curcpu()->ci_curpm = pmap_kernel();
1353 
1354 	vmmap = vstart;
1355 	vstart += PAGE_SIZE;
1356 
1357 	return vstart;
1358 }
1359 
1360 void
pmap_set_l2(struct pmap * pm,uint64_t va,struct pmapvp2 * l2_va,paddr_t l2_pa)1361 pmap_set_l2(struct pmap *pm, uint64_t va, struct pmapvp2 *l2_va, paddr_t l2_pa)
1362 {
1363 	pt_entry_t pg_entry;
1364 	struct pmapvp1 *vp1;
1365 	int idx1;
1366 
1367 	if (l2_pa == 0) {
1368 		/*
1369 		 * if this is called from pmap_vp_enter, this is a
1370 		 * normally mapped page, call pmap_extract to get pa
1371 		 */
1372 		pmap_extract(pmap_kernel(), (vaddr_t)l2_va, &l2_pa);
1373 	}
1374 
1375 	if (l2_pa & (Lx_TABLE_ALIGN-1))
1376 		panic("misaligned L2 table");
1377 
1378 	pg_entry = VP_Lx(l2_pa);
1379 
1380 	idx1 = VP_IDX1(va);
1381 	vp1 = pm->pm_vp.l1;
1382 	vp1->vp[idx1] = l2_va;
1383 	vp1->l1[idx1] = pg_entry;
1384 }
1385 
1386 void
pmap_set_l3(struct pmap * pm,uint64_t va,struct pmapvp3 * l3_va,paddr_t l3_pa)1387 pmap_set_l3(struct pmap *pm, uint64_t va, struct pmapvp3 *l3_va, paddr_t l3_pa)
1388 {
1389 	pt_entry_t pg_entry;
1390 	struct pmapvp1 *vp1;
1391 	struct pmapvp2 *vp2;
1392 	int idx1, idx2;
1393 
1394 	if (l3_pa == 0) {
1395 		/*
1396 		 * if this is called from pmap_vp_enter, this is a
1397 		 * normally mapped page, call pmap_extract to get pa
1398 		 */
1399 		pmap_extract(pmap_kernel(), (vaddr_t)l3_va, &l3_pa);
1400 	}
1401 
1402 	if (l3_pa & (Lx_TABLE_ALIGN-1))
1403 		panic("misaligned L2 table");
1404 
1405 	pg_entry = VP_Lx(l3_pa);
1406 
1407 	idx1 = VP_IDX1(va);
1408 	idx2 = VP_IDX2(va);
1409 	vp1 = pm->pm_vp.l1;
1410 	vp2 = vp1->vp[idx1];
1411 	vp2->vp[idx2] = l3_va;
1412 	vp2->l2[idx2] = pg_entry;
1413 }
1414 
1415 /*
1416  * activate a pmap entry
1417  */
1418 void
pmap_activate(struct proc * p)1419 pmap_activate(struct proc *p)
1420 {
1421 	pmap_t pm = p->p_vmspace->vm_map.pmap;
1422 	u_long sie;
1423 
1424 	sie = intr_disable();
1425 	if (p == curproc && pm != curcpu()->ci_curpm)
1426 		pmap_set_satp(p);
1427 	intr_restore(sie);
1428 }
1429 
1430 /*
1431  * deactivate a pmap entry
1432  */
1433 void
pmap_deactivate(struct proc * p)1434 pmap_deactivate(struct proc *p)
1435 {
1436 }
1437 
1438 /*
1439  * Get the physical page address for the given pmap/virtual address.
1440  */
1441 int
pmap_extract(pmap_t pm,vaddr_t va,paddr_t * pap)1442 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
1443 {
1444 	struct pte_desc *pted;
1445 	paddr_t pa;
1446 
1447 	pmap_lock(pm);
1448 	pted = pmap_vp_lookup(pm, va, NULL);
1449 	if (!pted || !PTED_VALID(pted)) {
1450 		pmap_unlock(pm);
1451 		return 0;
1452 	}
1453 	if (pap != NULL) {
1454 		pa = pted->pted_pte & PTE_RPGN;
1455 		if (pa >= pmap_uncached_start && pa <= pmap_uncached_end)
1456 			pa -= (pmap_uncached_start - pmap_cached_start);
1457 		*pap = pa | (va & PAGE_MASK);
1458 	}
1459 	pmap_unlock(pm);
1460 
1461 	return 1;
1462 }
1463 
1464 void
pmap_page_ro(pmap_t pm,vaddr_t va,vm_prot_t prot)1465 pmap_page_ro(pmap_t pm, vaddr_t va, vm_prot_t prot)
1466 {
1467 	struct pte_desc *pted;
1468 	pt_entry_t *pl3;
1469 
1470 	/* Every VA needs a pted, even unmanaged ones. */
1471 	pted = pmap_vp_lookup(pm, va, &pl3);
1472 	if (!pted || !PTED_VALID(pted)) {
1473 		return;
1474 	}
1475 
1476 	pted->pted_va &= ~PROT_WRITE;
1477 	pted->pted_pte &= ~PROT_WRITE;
1478 	if ((prot & PROT_READ) == 0) {
1479 		pted->pted_va &= ~PROT_READ;
1480 		pted->pted_pte &= ~PROT_READ;
1481 	}
1482 	if ((prot & PROT_EXEC) == 0) {
1483 		pted->pted_va &= ~PROT_EXEC;
1484 		pted->pted_pte &= ~PROT_EXEC;
1485 	}
1486 	pmap_pte_update(pted, pl3);
1487 	tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
1488 }
1489 
1490 /*
1491  * Lower the protection on the specified physical page.
1492  *
1493  * There are only two cases, either the protection is going to 0,
1494  * or it is going to read-only.
1495  */
1496 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)1497 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1498 {
1499 	struct pte_desc *pted;
1500 	struct pmap *pm;
1501 
1502 	if (prot != PROT_NONE) {
1503 		mtx_enter(&pg->mdpage.pv_mtx);
1504 		LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1505 			pmap_page_ro(pted->pted_pmap, pted->pted_va, prot);
1506 		}
1507 		mtx_leave(&pg->mdpage.pv_mtx);
1508 		return;
1509 	}
1510 
1511 	mtx_enter(&pg->mdpage.pv_mtx);
1512 	while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) {
1513 		pmap_reference(pted->pted_pmap);
1514 		pm = pted->pted_pmap;
1515 		mtx_leave(&pg->mdpage.pv_mtx);
1516 
1517 		pmap_lock(pm);
1518 
1519 		/*
1520 		 * We dropped the pvlist lock before grabbing the pmap
1521 		 * lock to avoid lock ordering problems.  This means
1522 		 * we have to check the pvlist again since somebody
1523 		 * else might have modified it.  All we care about is
1524 		 * that the pvlist entry matches the pmap we just
1525 		 * locked.  If it doesn't, unlock the pmap and try
1526 		 * again.
1527 		 */
1528 		mtx_enter(&pg->mdpage.pv_mtx);
1529 		pted = LIST_FIRST(&(pg->mdpage.pv_list));
1530 		if (pted == NULL || pted->pted_pmap != pm) {
1531 			mtx_leave(&pg->mdpage.pv_mtx);
1532 			pmap_unlock(pm);
1533 			pmap_destroy(pm);
1534 			mtx_enter(&pg->mdpage.pv_mtx);
1535 			continue;
1536 		}
1537 		mtx_leave(&pg->mdpage.pv_mtx);
1538 
1539 		pmap_remove_pted(pm, pted);
1540 		pmap_unlock(pm);
1541 		pmap_destroy(pm);
1542 
1543 		mtx_enter(&pg->mdpage.pv_mtx);
1544 	}
1545 	/* page is being reclaimed, sync icache next use */
1546 	atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1547 	mtx_leave(&pg->mdpage.pv_mtx);
1548 }
1549 
1550 void
pmap_protect(pmap_t pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)1551 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1552 {
1553 	if (prot & (PROT_READ | PROT_EXEC)) {
1554 		pmap_lock(pm);
1555 		while (sva < eva) {
1556 			pmap_page_ro(pm, sva, prot);
1557 			sva += PAGE_SIZE;
1558 		}
1559 		pmap_unlock(pm);
1560 		return;
1561 	}
1562 	pmap_remove(pm, sva, eva);
1563 }
1564 
1565 void
pmap_init(void)1566 pmap_init(void)
1567 {
1568 	struct pmapvp1 *kvp1;
1569 	void *node;
1570 
1571 	node = fdt_find_node("/");
1572 	if (fdt_is_compatible(node, "starfive,jh7100")) {
1573 		pmap_cached_start = 0x0080000000ULL;
1574 		pmap_cached_end = 0x087fffffffULL;
1575 		pmap_uncached_start = 0x1000000000ULL;
1576 		pmap_uncached_end = 0x17ffffffffULL;
1577 	}
1578 
1579 	/* Clear DMAP PTEs. */
1580 	kvp1 = pmap_kernel()->pm_vp.l1;
1581 	memset(&kvp1->l1[L1_DMAP_BASE], 0,
1582 	    L1_DMAP_ENTRIES * sizeof(pt_entry_t));
1583 	memset(&kvp1->vp[L1_DMAP_BASE], 0,
1584 	    L1_DMAP_ENTRIES * sizeof(struct pmapvp2 *));
1585 	sfence_vma();
1586 
1587 	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
1588 	    "pmap", NULL);
1589 	pool_setlowat(&pmap_pmap_pool, 2);
1590 	pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
1591 	    "pted", NULL);
1592 	pool_setlowat(&pmap_pted_pool, 20);
1593 	pool_init(&pmap_vp_pool, sizeof(struct pmapvp1), PAGE_SIZE, IPL_VM, 0,
1594 	    "vp", &pmap_vp_allocator);
1595 	pool_setlowat(&pmap_vp_pool, 20);
1596 
1597 	pmap_initialized = 1;
1598 }
1599 
1600 void
pmap_proc_iflush(struct process * pr,vaddr_t va,vsize_t len)1601 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
1602 {
1603 	icache_flush();
1604 }
1605 
1606 void
pmap_pte_insert(struct pte_desc * pted)1607 pmap_pte_insert(struct pte_desc *pted)
1608 {
1609 	/* put entry into table */
1610 	/* need to deal with ref/change here */
1611 	pmap_t pm = pted->pted_pmap;
1612 	pt_entry_t *pl3;
1613 
1614 	if (pmap_vp_lookup(pm, pted->pted_va, &pl3) == NULL) {
1615 		panic("%s: have a pted, but missing a vp"
1616 		    " for %lx va pmap %p", __func__, pted->pted_va, pm);
1617 	}
1618 
1619 	pmap_pte_update(pted, pl3);
1620 }
1621 
1622 void
pmap_pte_update(struct pte_desc * pted,uint64_t * pl3)1623 pmap_pte_update(struct pte_desc *pted, uint64_t *pl3)
1624 {
1625 	uint64_t pte, access_bits;
1626 	pmap_t pm = pted->pted_pmap;
1627 	uint64_t attr = 0;
1628 
1629 	switch (pted->pted_va & PMAP_CACHE_BITS) {
1630 	case PMAP_CACHE_WB:
1631 		attr |= pmap_pma;
1632 		break;
1633 	case PMAP_CACHE_CI:
1634 		attr |= pmap_nc;
1635 		break;
1636 	case PMAP_CACHE_DEV:
1637 		attr |= pmap_io;
1638 		break;
1639 	default:
1640 		panic("%s: invalid cache mode", __func__);
1641 	}
1642 
1643 	if (pm->pm_privileged)
1644 		access_bits = ap_bits_kern[pted->pted_pte & PROT_MASK];
1645 	else
1646 		access_bits = ap_bits_user[pted->pted_pte & PROT_MASK];
1647 
1648 	pte = VP_Lx(pted->pted_pte) | attr | access_bits | PTE_V;
1649 	*pl3 = access_bits ? pte : 0;
1650 }
1651 
1652 void
pmap_pte_remove(struct pte_desc * pted,int remove_pted)1653 pmap_pte_remove(struct pte_desc *pted, int remove_pted)
1654 {
1655 	struct pmapvp1 *vp1;
1656 	struct pmapvp2 *vp2;
1657 	struct pmapvp3 *vp3;
1658 	pmap_t pm = pted->pted_pmap;
1659 
1660 	vp1 = pm->pm_vp.l1;
1661 	if (vp1->vp[VP_IDX1(pted->pted_va)] == NULL) {
1662 		panic("have a pted, but missing the l2 for %lx va pmap %p",
1663 		    pted->pted_va, pm);
1664 	}
1665 	vp2 = vp1->vp[VP_IDX1(pted->pted_va)];
1666 	if (vp2 == NULL) {
1667 		panic("have a pted, but missing the l2 for %lx va pmap %p",
1668 		    pted->pted_va, pm);
1669 	}
1670 	vp3 = vp2->vp[VP_IDX2(pted->pted_va)];
1671 	if (vp3 == NULL) {
1672 		panic("have a pted, but missing the l2 for %lx va pmap %p",
1673 		    pted->pted_va, pm);
1674 	}
1675 	vp3->l3[VP_IDX3(pted->pted_va)] = 0;
1676 	if (remove_pted)
1677 		vp3->vp[VP_IDX3(pted->pted_va)] = NULL;
1678 }
1679 
1680 /*
1681  * This function exists to do software referenced/modified emulation.
1682  * It's purpose is to tell the caller that a fault was generated either
1683  * for this emulation, or to tell the caller that it's a legit fault.
1684  */
1685 int
pmap_fault_fixup(pmap_t pm,vaddr_t va,vm_prot_t ftype)1686 pmap_fault_fixup(pmap_t pm, vaddr_t va, vm_prot_t ftype)
1687 {
1688 	struct pte_desc *pted;
1689 	struct vm_page *pg;
1690 	paddr_t pa;
1691 	pt_entry_t *pl3 = NULL;
1692 	int retcode = 0;
1693 
1694 	pmap_lock(pm);
1695 
1696 	/* Every VA needs a pted, even unmanaged ones. */
1697 	pted = pmap_vp_lookup(pm, va, &pl3);
1698 	if (!pted || !PTED_VALID(pted))
1699 		goto done;
1700 
1701 	/* There has to be a PA for the VA, get it. */
1702 	pa = (pted->pted_pte & PTE_RPGN);
1703 
1704 	/* If it's unmanaged, it must not fault. */
1705 	pg = PHYS_TO_VM_PAGE(pa);
1706 	if (pg == NULL)
1707 		goto done;
1708 
1709 	/*
1710 	 * Check the fault types to find out if we were doing
1711 	 * any mod/ref emulation and fixup the PTE if we were.
1712 	 */
1713 	if ((ftype & PROT_WRITE) && /* fault caused by a write */
1714 	    !(pted->pted_pte & PROT_WRITE) && /* and write is disabled now */
1715 	    (pted->pted_va & PROT_WRITE)) { /* but is supposedly allowed */
1716 
1717 		/*
1718 		 * Page modified emulation. A write always includes
1719 		 * a reference.  This means that we can enable read and
1720 		 * exec as well, akin to the page reference emulation.
1721 		 */
1722 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD|PG_PMAP_REF);
1723 		atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1724 
1725 		/* Thus, enable read, write and exec. */
1726 		pted->pted_pte |=
1727 		    (pted->pted_va & (PROT_READ|PROT_WRITE|PROT_EXEC));
1728 	} else if ((ftype & PROT_EXEC) && /* fault caused by an exec */
1729 	    !(pted->pted_pte & PROT_EXEC) && /* and exec is disabled now */
1730 	    (pted->pted_va & PROT_EXEC)) { /* but is supposedly allowed */
1731 
1732 		/*
1733 		 * Exec always includes a reference. Since we now know
1734 		 * the page has been accessed, we can enable read as well
1735 		 * if UVM allows it.
1736 		 */
1737 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1738 
1739 		/* Thus, enable read and exec. */
1740 		pted->pted_pte |= (pted->pted_va & (PROT_READ|PROT_EXEC));
1741 	} else if ((ftype & PROT_READ) && /* fault caused by a read */
1742 	    !(pted->pted_pte & PROT_READ) && /* and read is disabled now */
1743 	    (pted->pted_va & PROT_READ)) { /* but is supposedly allowed */
1744 
1745 		/*
1746 		 * Page referenced emulation. Since we now know the page
1747 		 * has been accessed, we can enable exec as well if UVM
1748 		 * allows it.
1749 		 */
1750 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1751 
1752 		/* Thus, enable read and exec. */
1753 		pted->pted_pte |= (pted->pted_va & (PROT_READ|PROT_EXEC));
1754 	} else {
1755 		/* didn't catch it, so probably broken */
1756 		goto done;
1757 	}
1758 
1759 	/*
1760 	 * If this is a page that can be executed, make sure to invalidate
1761 	 * the instruction cache if the page has been modified or not used
1762 	 * yet.
1763 	 */
1764 	if (pted->pted_va & PROT_EXEC) {
1765 		if ((pg->pg_flags & PG_PMAP_EXE) == 0)
1766 			icache_flush();
1767 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE);
1768 	}
1769 
1770 	/* We actually made a change, so flush it and sync. */
1771 	pmap_pte_update(pted, pl3);
1772 	tlb_flush_page(pm, va & ~PAGE_MASK);
1773 
1774 	retcode = 1;
1775 done:
1776 	pmap_unlock(pm);
1777 	return retcode;
1778 }
1779 
1780 void
pmap_postinit(void)1781 pmap_postinit(void)
1782 {
1783 	vaddr_t minaddr, maxaddr;
1784 	u_long npteds, npages;
1785 
1786 	/*
1787 	 * Reserve enough virtual address space to grow the kernel
1788 	 * page tables.  We need a descriptor for each page as well as
1789 	 * an extra page for level 1/2/3 page tables for management.
1790 	 * To simplify the code, we always allocate full tables at
1791 	 * level 3, so take that into account.
1792 	 */
1793 	npteds = (VM_MAX_KERNEL_ADDRESS - pmap_maxkvaddr + 1) / PAGE_SIZE;
1794 	npteds = roundup(npteds, VP_IDX3_CNT);
1795 	npages = howmany(npteds, PAGE_SIZE / (sizeof(struct pte_desc)));
1796 	npages += 2 * howmany(npteds, VP_IDX3_CNT);
1797 	npages += 2 * howmany(npteds, VP_IDX3_CNT * VP_IDX2_CNT);
1798 	npages += 2 * howmany(npteds, VP_IDX3_CNT * VP_IDX2_CNT * VP_IDX1_CNT);
1799 
1800 	/*
1801 	 * Use an interrupt safe map such that we don't recurse into
1802 	 * uvm_map() to allocate map entries.
1803 	 */
1804 	minaddr = vm_map_min(kernel_map);
1805 	pmap_kvp_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
1806 	    npages * PAGE_SIZE, VM_MAP_INTRSAFE, FALSE, NULL);
1807 }
1808 
1809 void
pmap_init_percpu(void)1810 pmap_init_percpu(void)
1811 {
1812 	pool_cache_init(&pmap_pted_pool);
1813 	pool_cache_init(&pmap_vp_pool);
1814 }
1815 
1816 void
pmap_update(pmap_t pm)1817 pmap_update(pmap_t pm)
1818 {
1819 }
1820 
1821 int
pmap_is_referenced(struct vm_page * pg)1822 pmap_is_referenced(struct vm_page *pg)
1823 {
1824 	return ((pg->pg_flags & PG_PMAP_REF) != 0);
1825 }
1826 
1827 int
pmap_is_modified(struct vm_page * pg)1828 pmap_is_modified(struct vm_page *pg)
1829 {
1830 	return ((pg->pg_flags & PG_PMAP_MOD) != 0);
1831 }
1832 
1833 int
pmap_clear_modify(struct vm_page * pg)1834 pmap_clear_modify(struct vm_page *pg)
1835 {
1836 	struct pte_desc *pted;
1837 
1838 	atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD);
1839 
1840 	mtx_enter(&pg->mdpage.pv_mtx);
1841 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1842 		pted->pted_pte &= ~PROT_WRITE;
1843 		pmap_pte_insert(pted);
1844 		tlb_flush_page(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
1845 	}
1846 	mtx_leave(&pg->mdpage.pv_mtx);
1847 
1848 	return 0;
1849 }
1850 
1851 /*
1852  * When this turns off read permissions it also disables write permissions
1853  * so that mod is correctly tracked after clear_ref; FAULT_READ; FAULT_WRITE;
1854  */
1855 int
pmap_clear_reference(struct vm_page * pg)1856 pmap_clear_reference(struct vm_page *pg)
1857 {
1858 	struct pte_desc *pted;
1859 
1860 	atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF);
1861 
1862 	mtx_enter(&pg->mdpage.pv_mtx);
1863 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1864 		pted->pted_pte &= ~PROT_MASK;
1865 		pmap_pte_insert(pted);
1866 		tlb_flush_page(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
1867 	}
1868 	mtx_leave(&pg->mdpage.pv_mtx);
1869 
1870 	return 0;
1871 }
1872 
1873 void
pmap_unwire(pmap_t pm,vaddr_t va)1874 pmap_unwire(pmap_t pm, vaddr_t va)
1875 {
1876 	struct pte_desc *pted;
1877 
1878 	pmap_lock(pm);
1879 	pted = pmap_vp_lookup(pm, va, NULL);
1880 	if (pted != NULL && PTED_WIRED(pted)) {
1881 		pm->pm_stats.wired_count--;
1882 		pted->pted_va &= ~PTED_VA_WIRED_M;
1883 	}
1884 	pmap_unlock(pm);
1885 }
1886 
1887 void
pmap_remove_holes(struct vmspace * vm)1888 pmap_remove_holes(struct vmspace *vm)
1889 {
1890 	/* NOOP */
1891 }
1892 
1893 void
pmap_virtual_space(vaddr_t * start,vaddr_t * end)1894 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1895 {
1896 	*start = virtual_avail;
1897 	*end = VM_MAX_KERNEL_ADDRESS;
1898 
1899 	/* Prevent further KVA stealing. */
1900 	pmap_virtual_space_called = 1;
1901 }
1902 
1903 void
pmap_setup_avail(uint64_t memstart,uint64_t memend,uint64_t kvo)1904 pmap_setup_avail(uint64_t memstart, uint64_t memend, uint64_t kvo)
1905 {
1906 	/* This makes several assumptions
1907 	 * 1) kernel will be located 'low' in memory
1908 	 * 2) memory will not start at VM_MIN_KERNEL_ADDRESS
1909 	 * 3) several MB of memory starting just after the kernel will
1910 	 *    be premapped at the kernel address in the bootstrap mappings
1911 	 * 4) kvo will be the 64 bit number to add to the ram address to
1912 	 *    obtain the kernel virtual mapping of the ram. KVO == PA -> VA
1913 	 * 5) it is generally assumed that these translations will occur with
1914 	 *    large granularity, at minimum the translation will be page
1915 	 *    aligned, if not 'section' or greater.
1916 	 */
1917 
1918 	pmap_avail_kvo = kvo;
1919 	pmap_avail[0].start = memstart;
1920 	pmap_avail[0].size = memend - memstart;
1921 	pmap_cnt_avail = 1;
1922 
1923 	pmap_avail_fixup();
1924 }
1925 
1926 void
pmap_avail_fixup(void)1927 pmap_avail_fixup(void)
1928 {
1929 	struct mem_region *mp;
1930 	vaddr_t align;
1931 	vaddr_t end;
1932 
1933 	mp = pmap_avail;
1934 	while (mp->size !=0) {
1935 		align = round_page(mp->start);
1936 		if (mp->start != align) {
1937 			pmap_remove_avail(mp->start, align);
1938 			mp = pmap_avail;
1939 			continue;
1940 		}
1941 		end = mp->start+mp->size;
1942 		align = trunc_page(end);
1943 		if (end != align) {
1944 			pmap_remove_avail(align, end);
1945 			mp = pmap_avail;
1946 			continue;
1947 		}
1948 		mp++;
1949 	}
1950 }
1951 
1952 /* remove a given region from avail memory */
1953 void
pmap_remove_avail(paddr_t base,paddr_t end)1954 pmap_remove_avail(paddr_t base, paddr_t end)
1955 {
1956 	struct mem_region *mp;
1957 	int i;
1958 	long mpend;
1959 
1960 	/* remove given region from available */
1961 	for (mp = pmap_avail; mp->size; mp++) {
1962 		/*
1963 		 * Check if this region holds all of the region
1964 		 */
1965 		mpend = mp->start + mp->size;
1966 		if (base > mpend) {
1967 			continue;
1968 		}
1969 		if (base <= mp->start) {
1970 			if (end <= mp->start)
1971 				break; /* region not present -??? */
1972 
1973 			if (end >= mpend) {
1974 				/* covers whole region */
1975 				/* shorten */
1976 				for (i = mp - pmap_avail;
1977 				    i < pmap_cnt_avail;
1978 				    i++) {
1979 					pmap_avail[i] = pmap_avail[i+1];
1980 				}
1981 				pmap_cnt_avail--;
1982 				pmap_avail[pmap_cnt_avail].size = 0;
1983 			} else {
1984 				mp->start = end;
1985 				mp->size = mpend - end;
1986 			}
1987 		} else {
1988 			/* start after the beginning */
1989 			if (end >= mpend) {
1990 				/* just truncate */
1991 				mp->size = base - mp->start;
1992 			} else {
1993 				/* split */
1994 				for (i = pmap_cnt_avail;
1995 				    i > (mp - pmap_avail);
1996 				    i--) {
1997 					pmap_avail[i] = pmap_avail[i - 1];
1998 				}
1999 				pmap_cnt_avail++;
2000 				mp->size = base - mp->start;
2001 				mp++;
2002 				mp->start = end;
2003 				mp->size = mpend - end;
2004 			}
2005 		}
2006 	}
2007 	for (mp = pmap_allocated; mp->size != 0; mp++) {
2008 		if (base < mp->start) {
2009 			if (end == mp->start) {
2010 				mp->start = base;
2011 				mp->size += end - base;
2012 				break;
2013 			}
2014 			/* lengthen */
2015 			for (i = pmap_cnt_allocated; i > (mp - pmap_allocated);
2016 			    i--) {
2017 				pmap_allocated[i] = pmap_allocated[i - 1];
2018 			}
2019 			pmap_cnt_allocated++;
2020 			mp->start = base;
2021 			mp->size = end - base;
2022 			return;
2023 		}
2024 		if (base == (mp->start + mp->size)) {
2025 			mp->size += end - base;
2026 			return;
2027 		}
2028 	}
2029 	if (mp->size == 0) {
2030 		mp->start = base;
2031 		mp->size  = end - base;
2032 		pmap_cnt_allocated++;
2033 	}
2034 }
2035 
2036 /* XXX - this zeros pages via their physical address */
2037 paddr_t
pmap_steal_avail(size_t size,int align,void ** kva)2038 pmap_steal_avail(size_t size, int align, void **kva)
2039 {
2040 	struct mem_region *mp;
2041 	long start;
2042 	long remsize;
2043 
2044 	for (mp = pmap_avail; mp->size; mp++) {
2045 		if (mp->size > size) {
2046 			start = (mp->start + (align -1)) & ~(align -1);
2047 			remsize = mp->size - (start - mp->start);
2048 			if (remsize >= 0) {//XXX buggy?? should be remsize >= size
2049 				pmap_remove_avail(start, start+size);
2050 				if (kva != NULL){
2051 					*kva = (void *)(start - pmap_avail_kvo);
2052 				}
2053 				// XXX We clear the page based on its Direct
2054 				// Mapped address for now. Physical Addresses
2055 				// are not available because we have unmapped
2056 				// our identity mapped kernel. Should consider
2057 				// if we need to keep the identity mapping
2058 				// during pmap bootstrapping.
2059 				vaddr_t start_dmap = PHYS_TO_DMAP(start);
2060 				bzero((void*)(start_dmap), size);
2061 				return start;
2062 			}
2063 		}
2064 	}
2065 	panic("unable to allocate region with size %lx align %x",
2066 	    size, align);
2067 }
2068 
2069 vaddr_t
pmap_map_stolen(vaddr_t kernel_start)2070 pmap_map_stolen(vaddr_t kernel_start)
2071 {
2072 	struct mem_region *mp;
2073 	paddr_t pa;
2074 	vaddr_t va;
2075 	uint64_t e;
2076 
2077 	for (mp = pmap_allocated; mp->size; mp++) {
2078 		for (e = 0; e < mp->size; e += PAGE_SIZE) {
2079 			int prot = PROT_READ | PROT_WRITE;
2080 
2081 			pa = mp->start + e;
2082 			va = pa - pmap_avail_kvo;
2083 
2084 			if (va < VM_MIN_KERNEL_ADDRESS ||
2085 			    va >= VM_MAX_KERNEL_ADDRESS)
2086 				continue;
2087 
2088 			if (va >= (vaddr_t)__text_start &&
2089 			    va < (vaddr_t)_etext)
2090 				prot = PROT_READ | PROT_EXEC;
2091 			else if (va >= (vaddr_t)__rodata_start &&
2092 			    va < (vaddr_t)_erodata)
2093 				prot = PROT_READ;
2094 
2095 			pmap_kenter_cache(va, pa, prot, PMAP_CACHE_WB);
2096 		}
2097 	}
2098 
2099 	return va + PAGE_SIZE;
2100 }
2101 
2102 void
pmap_physload_avail(void)2103 pmap_physload_avail(void)
2104 {
2105 	struct mem_region *mp;
2106 	uint64_t start, end;
2107 
2108 	for (mp = pmap_avail; mp->size; mp++) {
2109 		if (mp->size < PAGE_SIZE) {
2110 			printf(" skipped - too small\n");
2111 			continue;
2112 		}
2113 		start = mp->start;
2114 		if (start & PAGE_MASK) {
2115 			start = PAGE_SIZE + (start & PMAP_PA_MASK);
2116 		}
2117 		end = mp->start + mp->size;
2118 		if (end & PAGE_MASK) {
2119 			end = (end & PMAP_PA_MASK);
2120 		}
2121 		uvm_page_physload(atop(start), atop(end),
2122 		    atop(start), atop(end), 0);
2123 
2124 	}
2125 }
2126 
2127 void
pmap_show_mapping(uint64_t va)2128 pmap_show_mapping(uint64_t va)
2129 {
2130 	struct pmapvp1 *vp1;
2131 	struct pmapvp2 *vp2;
2132 	struct pmapvp3 *vp3;
2133 	struct pte_desc *pted;
2134 	struct pmap *pm;
2135 	uint64_t satp;
2136 
2137 	printf("showing mapping of %llx\n", va);
2138 
2139 	if (va & 1ULL << 63)
2140 		pm = pmap_kernel();
2141 	else
2142 		pm = curproc->p_vmspace->vm_map.pmap;
2143 
2144 	vp1 = pm->pm_vp.l1;
2145 
2146 	__asm volatile ("csrr %0, satp" : "=r" (satp));
2147 	printf("  satp %llx %llx\n", satp, SATP_PPN(pm->pm_satp) << PAGE_SHIFT);
2148 	printf("  vp1 = %p\n", vp1);
2149 
2150 	vp2 = vp1->vp[VP_IDX1(va)];
2151 	printf("  vp2 = %p lp2 = %llx idx1 off %x\n",
2152 		vp2, vp1->l1[VP_IDX1(va)], VP_IDX1(va)*8);
2153 	if (vp2 == NULL)
2154 		return;
2155 
2156 	vp3 = vp2->vp[VP_IDX2(va)];
2157 	printf("  vp3 = %p lp3 = %llx idx2 off %x\n",
2158 		vp3, vp2->l2[VP_IDX2(va)], VP_IDX2(va)*8);
2159 	if (vp3 == NULL)
2160 		return;
2161 
2162 	pted = vp3->vp[VP_IDX3(va)];
2163 	printf("  pted = %p lp3 = %llx idx3 off  %x\n",
2164 		pted, vp3->l3[VP_IDX3(va)], VP_IDX3(va)*8);
2165 }
2166 
2167 void
pmap_set_satp(struct proc * p)2168 pmap_set_satp(struct proc *p)
2169 {
2170 	struct cpu_info *ci = curcpu();
2171 	pmap_t pm = p->p_vmspace->vm_map.pmap;
2172 
2173 	ci->ci_curpm = pm;
2174 	load_satp(pm->pm_satp);
2175 	sfence_vma();
2176 }
2177