1 /* $OpenBSD: pmap.c,v 1.44 2025/01/19 20:18:38 kettenis Exp $ */
2
3 /*
4 * Copyright (c) 2019-2020 Brian Bamsch <bbamsch@google.com>
5 * Copyright (c) 2008-2009,2014-2016 Dale Rahn <drahn@dalerahn.com>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 */
19
20 #include <sys/param.h>
21 #include <sys/systm.h>
22 #include <sys/atomic.h>
23 #include <sys/pool.h>
24 #include <sys/proc.h>
25
26 #include <uvm/uvm.h>
27
28 #include <machine/cpufunc.h>
29 #include <machine/pmap.h>
30 #include <machine/riscvreg.h>
31 #include <machine/sbi.h>
32
33 #include <dev/ofw/fdt.h>
34
35 #ifdef MULTIPROCESSOR
36
37 static inline int
pmap_is_active(struct pmap * pm,struct cpu_info * ci)38 pmap_is_active(struct pmap *pm, struct cpu_info *ci)
39 {
40 return pm == pmap_kernel() || pm == ci->ci_curpm;
41 }
42
43 #endif
44
45 void
do_tlb_flush_page(pmap_t pm,vaddr_t va)46 do_tlb_flush_page(pmap_t pm, vaddr_t va)
47 {
48 #ifdef MULTIPROCESSOR
49 CPU_INFO_ITERATOR cii;
50 struct cpu_info *ci;
51 unsigned long hart_mask = 0;
52
53 CPU_INFO_FOREACH(cii, ci) {
54 if (ci == curcpu())
55 continue;
56 if (pmap_is_active(pm, ci))
57 hart_mask |= (1UL << ci->ci_hartid);
58 }
59
60 /*
61 * From the RISC-V privileged spec:
62 *
63 * SFENCE.VMA orders only the local hart's implicit references
64 * to the memory-management data structures. Consequently, other
65 * harts must be notified separately when the memory-management
66 * data structures have been modified. One approach is to use 1)
67 * a local data fence to ensure local writes are visible
68 * globally, then 2) an interprocessor interrupt to the other
69 * thread, then 3) a local SFENCE.VMA in the interrupt handler
70 * of the remote thread, and finally 4) signal back to
71 * originating thread that operation is complete.
72 */
73 if (hart_mask != 0) {
74 membar_sync();
75 sbi_remote_sfence_vma(&hart_mask, va, PAGE_SIZE);
76 }
77 #endif
78
79 sfence_vma_page(va);
80 }
81
82 void
do_tlb_flush(pmap_t pm)83 do_tlb_flush(pmap_t pm)
84 {
85 #ifdef MULTIPROCESSOR
86 CPU_INFO_ITERATOR cii;
87 struct cpu_info *ci;
88 unsigned long hart_mask = 0;
89
90 CPU_INFO_FOREACH(cii, ci) {
91 if (ci == curcpu())
92 continue;
93 if (pmap_is_active(pm, ci))
94 hart_mask |= (1UL << ci->ci_hartid);
95 }
96
97 /*
98 * From the RISC-V privileged spec:
99 *
100 * SFENCE.VMA orders only the local hart's implicit references
101 * to the memory-management data structures. Consequently, other
102 * harts must be notified separately when the memory-management
103 * data structures have been modified. One approach is to use 1)
104 * a local data fence to ensure local writes are visible
105 * globally, then 2) an interprocessor interrupt to the other
106 * thread, then 3) a local SFENCE.VMA in the interrupt handler
107 * of the remote thread, and finally 4) signal back to
108 * originating thread that operation is complete.
109 */
110 if (hart_mask != 0) {
111 membar_sync();
112 sbi_remote_sfence_vma(&hart_mask, 0, -1);
113 }
114 #endif
115
116 sfence_vma();
117 }
118
119 void
tlb_flush_page(pmap_t pm,vaddr_t va)120 tlb_flush_page(pmap_t pm, vaddr_t va)
121 {
122 if (cpu_errata_sifive_cip_1200)
123 do_tlb_flush(pm);
124 else
125 do_tlb_flush_page(pm, va);
126 }
127
128 static inline void
icache_flush(void)129 icache_flush(void)
130 {
131 #ifdef MULTIPROCESSOR
132 CPU_INFO_ITERATOR cii;
133 struct cpu_info *ci;
134 unsigned long hart_mask = 0;
135 #endif
136
137 fence_i();
138
139 #ifdef MULTIPROCESSOR
140 CPU_INFO_FOREACH(cii, ci) {
141 if (ci == curcpu())
142 continue;
143 hart_mask |= (1UL << ci->ci_hartid);
144 }
145
146 /*
147 * From the RISC-V ISA:
148 *
149 * To make a store to instruction memory visible to all RISC-V
150 * harts, the writing hart has to execute a data FENCE before
151 * requesting that all remote RISC-V harts execute a FENCE.I.
152 */
153 if (hart_mask != 0) {
154 membar_sync();
155 sbi_remote_fence_i(&hart_mask);
156 }
157 #endif
158 }
159
160 struct pmap kernel_pmap_;
161
162 LIST_HEAD(pted_pv_head, pte_desc);
163
164 struct pte_desc {
165 LIST_ENTRY(pte_desc) pted_pv_list;
166 pt_entry_t pted_pte;
167 pmap_t pted_pmap;
168 vaddr_t pted_va;
169 };
170
171 struct pmapvp1 {
172 pt_entry_t l1[VP_IDX1_CNT];
173 struct pmapvp2 *vp[VP_IDX1_CNT];
174 };
175
176 struct pmapvp2 {
177 pt_entry_t l2[VP_IDX2_CNT];
178 struct pmapvp3 *vp[VP_IDX2_CNT];
179 };
180
181 struct pmapvp3 {
182 pt_entry_t l3[VP_IDX3_CNT];
183 struct pte_desc *vp[VP_IDX3_CNT];
184 };
185 CTASSERT(sizeof(struct pmapvp1) == sizeof(struct pmapvp2));
186 CTASSERT(sizeof(struct pmapvp1) == sizeof(struct pmapvp3));
187
188 void pmap_vp_destroy(pmap_t);
189
190 /* Allocator for VP pool. */
191 void *pmap_vp_page_alloc(struct pool *, int, int *);
192 void pmap_vp_page_free(struct pool *, void *);
193
194 struct pool_allocator pmap_vp_allocator = {
195 pmap_vp_page_alloc, pmap_vp_page_free, sizeof(struct pmapvp1)
196 };
197
198 void pmap_remove_pted(pmap_t, struct pte_desc *);
199 void pmap_kremove_pg(vaddr_t);
200 void pmap_set_l2(struct pmap *, uint64_t, struct pmapvp2 *, paddr_t);
201 void pmap_set_l3(struct pmap *, uint64_t, struct pmapvp3 *, paddr_t);
202 void pmap_set_satp(struct proc *);
203
204 void pmap_fill_pte(pmap_t, vaddr_t, paddr_t, struct pte_desc *,
205 vm_prot_t, int, int);
206 void pmap_pte_insert(struct pte_desc *);
207 void pmap_pte_remove(struct pte_desc *, int);
208 void pmap_pte_update(struct pte_desc *, pt_entry_t *);
209 void pmap_release(pmap_t);
210 paddr_t pmap_steal_avail(size_t, int, void **);
211 void pmap_remove_avail(paddr_t, paddr_t);
212 vaddr_t pmap_map_stolen(vaddr_t);
213
214 vaddr_t vmmap;
215 vaddr_t zero_page;
216 vaddr_t copy_src_page;
217 vaddr_t copy_dst_page;
218
219 #define CPU_VENDOR_THEAD 0x5b7
220
221 struct pool pmap_pmap_pool;
222 struct pool pmap_pted_pool;
223 struct pool pmap_vp_pool;
224
225 int pmap_initialized = 0;
226
227 struct mem_region {
228 vaddr_t start;
229 vsize_t size;
230 };
231
232 struct mem_region pmap_avail_regions[10];
233 struct mem_region pmap_allocated_regions[10];
234 struct mem_region *pmap_avail = &pmap_avail_regions[0];
235 struct mem_region *pmap_allocated = &pmap_allocated_regions[0];
236 int pmap_cnt_avail, pmap_cnt_allocated;
237 uint64_t pmap_avail_kvo;
238
239 paddr_t pmap_cached_start, pmap_cached_end;
240 paddr_t pmap_uncached_start, pmap_uncached_end;
241
242 static inline void
pmap_lock(struct pmap * pmap)243 pmap_lock(struct pmap *pmap)
244 {
245 if (pmap != pmap_kernel())
246 mtx_enter(&pmap->pm_mtx);
247 }
248
249 static inline void
pmap_unlock(struct pmap * pmap)250 pmap_unlock(struct pmap *pmap)
251 {
252 if (pmap != pmap_kernel())
253 mtx_leave(&pmap->pm_mtx);
254 }
255
256 /* virtual to physical helpers */
257 static inline int
VP_IDX1(vaddr_t va)258 VP_IDX1(vaddr_t va)
259 {
260 return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
261 }
262
263 static inline int
VP_IDX2(vaddr_t va)264 VP_IDX2(vaddr_t va)
265 {
266 return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
267 }
268
269 static inline int
VP_IDX3(vaddr_t va)270 VP_IDX3(vaddr_t va)
271 {
272 return (va >> VP_IDX3_POS) & VP_IDX3_MASK;
273 }
274
275 /*
276 * On RISC-V, the encodings for write permission without read
277 * permission (r=0, w=1, x=0, or r=0, w=1, x=1) are reserved, so
278 * PROT_WRITE implies PROT_READ. We need to handle PROT_NONE
279 * separately (see pmap_pte_update()) since r=0, w=0, x=0 is reserved
280 * for non-leaf page table entries.
281 */
282 const pt_entry_t ap_bits_user[8] = {
283 [PROT_NONE] = 0,
284 [PROT_READ] = PTE_U|PTE_A|PTE_R,
285 [PROT_WRITE] = PTE_U|PTE_A|PTE_R|PTE_D|PTE_W,
286 [PROT_WRITE|PROT_READ] = PTE_U|PTE_A|PTE_R|PTE_D|PTE_W,
287 [PROT_EXEC] = PTE_U|PTE_A|PTE_X,
288 [PROT_EXEC|PROT_READ] = PTE_U|PTE_A|PTE_X|PTE_R,
289 [PROT_EXEC|PROT_WRITE] = PTE_U|PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
290 [PROT_EXEC|PROT_WRITE|PROT_READ] = PTE_U|PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
291 };
292
293 const pt_entry_t ap_bits_kern[8] = {
294 [PROT_NONE] = 0,
295 [PROT_READ] = PTE_A|PTE_R,
296 [PROT_WRITE] = PTE_A|PTE_R|PTE_D|PTE_W,
297 [PROT_WRITE|PROT_READ] = PTE_A|PTE_R|PTE_D|PTE_W,
298 [PROT_EXEC] = PTE_A|PTE_X,
299 [PROT_EXEC|PROT_READ] = PTE_A|PTE_X|PTE_R,
300 [PROT_EXEC|PROT_WRITE] = PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
301 [PROT_EXEC|PROT_WRITE|PROT_READ] = PTE_A|PTE_X|PTE_R|PTE_D|PTE_W,
302 };
303
304 /* PBMT encodings for the Svpmbt modes. */
305 uint64_t pmap_pma;
306 uint64_t pmap_nc;
307 uint64_t pmap_io;
308
309 /*
310 * This is used for pmap_kernel() mappings, they are not to be removed
311 * from the vp table because they were statically initialized at the
312 * initial pmap initialization. This is so that memory allocation
313 * is not necessary in the pmap_kernel() mappings.
314 * Otherwise bad race conditions can appear.
315 */
316 struct pte_desc *
pmap_vp_lookup(pmap_t pm,vaddr_t va,pt_entry_t ** pl3entry)317 pmap_vp_lookup(pmap_t pm, vaddr_t va, pt_entry_t **pl3entry)
318 {
319 struct pmapvp1 *vp1;
320 struct pmapvp2 *vp2;
321 struct pmapvp3 *vp3;
322 struct pte_desc *pted;
323
324 vp1 = pm->pm_vp.l1;
325 if (vp1 == NULL) {
326 return NULL;
327 }
328
329 vp2 = vp1->vp[VP_IDX1(va)];
330 if (vp2 == NULL) {
331 return NULL;
332 }
333
334 vp3 = vp2->vp[VP_IDX2(va)];
335 if (vp3 == NULL) {
336 return NULL;
337 }
338
339 pted = vp3->vp[VP_IDX3(va)];
340 if (pl3entry != NULL)
341 *pl3entry = &(vp3->l3[VP_IDX3(va)]);
342
343 return pted;
344 }
345
346 /*
347 * Create a V -> P mapping for the given pmap and virtual address
348 * with reference to the pte descriptor that is used to map the page.
349 * This code should track allocations of vp table allocations
350 * so they can be freed efficiently.
351 *
352 * XXX it may be possible to save some bits of count in the
353 * upper address bits of the pa or the pte entry.
354 * However that does make populating the other bits more tricky.
355 * each level has 512 entries, so that mean 9 bits to store
356 * stash 3 bits each in the first 3 entries?
357 */
358 int
pmap_vp_enter(pmap_t pm,vaddr_t va,struct pte_desc * pted,int flags)359 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags)
360 {
361 struct pmapvp1 *vp1;
362 struct pmapvp2 *vp2;
363 struct pmapvp3 *vp3;
364
365 vp1 = pm->pm_vp.l1;
366
367 vp2 = vp1->vp[VP_IDX1(va)];
368 if (vp2 == NULL) {
369 vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
370 if (vp2 == NULL) {
371 if ((flags & PMAP_CANFAIL) == 0)
372 panic("%s: unable to allocate L2", __func__);
373 return ENOMEM;
374 }
375 pmap_set_l2(pm, va, vp2, 0);
376 }
377
378 vp3 = vp2->vp[VP_IDX2(va)];
379 if (vp3 == NULL) {
380 vp3 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
381 if (vp3 == NULL) {
382 if ((flags & PMAP_CANFAIL) == 0)
383 panic("%s: unable to allocate L3", __func__);
384 return ENOMEM;
385 }
386 pmap_set_l3(pm, va, vp3, 0);
387 }
388
389 vp3->vp[VP_IDX3(va)] = pted;
390 return 0;
391 }
392
393 void
pmap_vp_populate(pmap_t pm,vaddr_t va)394 pmap_vp_populate(pmap_t pm, vaddr_t va)
395 {
396 struct pte_desc *pted;
397 struct pmapvp1 *vp1;
398 struct pmapvp2 *vp2;
399 struct pmapvp3 *vp3;
400 void *vp;
401
402 pted = pool_get(&pmap_pted_pool, PR_WAITOK | PR_ZERO);
403 vp = pool_get(&pmap_vp_pool, PR_WAITOK | PR_ZERO);
404
405 pmap_lock(pm);
406
407 vp1 = pm->pm_vp.l1;
408
409 vp2 = vp1->vp[VP_IDX1(va)];
410 if (vp2 == NULL) {
411 vp2 = vp; vp = NULL;
412 pmap_set_l2(pm, va, vp2, 0);
413 }
414
415 if (vp == NULL) {
416 pmap_unlock(pm);
417 vp = pool_get(&pmap_vp_pool, PR_WAITOK | PR_ZERO);
418 pmap_lock(pm);
419 }
420
421 vp3 = vp2->vp[VP_IDX2(va)];
422 if (vp3 == NULL) {
423 vp3 = vp; vp = NULL;
424 pmap_set_l3(pm, va, vp3, 0);
425 }
426
427 if (vp3->vp[VP_IDX3(va)] == NULL) {
428 vp3->vp[VP_IDX3(va)] = pted;
429 pted = NULL;
430 }
431
432 pmap_unlock(pm);
433
434 if (vp)
435 pool_put(&pmap_vp_pool, vp);
436 if (pted)
437 pool_put(&pmap_pted_pool, pted);
438 }
439
440 void *
pmap_vp_page_alloc(struct pool * pp,int flags,int * slowdown)441 pmap_vp_page_alloc(struct pool *pp, int flags, int *slowdown)
442 {
443 struct kmem_dyn_mode kd = KMEM_DYN_INITIALIZER;
444
445 kd.kd_waitok = ISSET(flags, PR_WAITOK);
446 kd.kd_trylock = ISSET(flags, PR_NOWAIT);
447 kd.kd_slowdown = slowdown;
448
449 return km_alloc(pp->pr_pgsize, &kv_any, &kp_dirty, &kd);
450 }
451
452 void
pmap_vp_page_free(struct pool * pp,void * v)453 pmap_vp_page_free(struct pool *pp, void *v)
454 {
455 km_free(v, pp->pr_pgsize, &kv_any, &kp_dirty);
456 }
457
458 static inline u_int32_t
PTED_MANAGED(struct pte_desc * pted)459 PTED_MANAGED(struct pte_desc *pted)
460 {
461 return (pted->pted_va & PTED_VA_MANAGED_M);
462 }
463
464 static inline u_int32_t
PTED_WIRED(struct pte_desc * pted)465 PTED_WIRED(struct pte_desc *pted)
466 {
467 return (pted->pted_va & PTED_VA_WIRED_M);
468 }
469
470 static inline u_int32_t
PTED_VALID(struct pte_desc * pted)471 PTED_VALID(struct pte_desc *pted)
472 {
473 return (pted->pted_pte != 0);
474 }
475
476 /*
477 * PV entries -
478 * manipulate the physical to virtual translations for the entire system.
479 *
480 * QUESTION: should all mapped memory be stored in PV tables? Or
481 * is it alright to only store "ram" memory. Currently device mappings
482 * are not stored.
483 * It makes sense to pre-allocate mappings for all of "ram" memory, since
484 * it is likely that it will be mapped at some point, but would it also
485 * make sense to use a tree/table like is use for pmap to store device
486 * mappings?
487 * Further notes: It seems that the PV table is only used for pmap_protect
488 * and other paging related operations. Given this, it is not necessary
489 * to store any pmap_kernel() entries in PV tables and does not make
490 * sense to store device mappings in PV either.
491 *
492 * Note: unlike other powerpc pmap designs, the array is only an array
493 * of pointers. Since the same structure is used for holding information
494 * in the VP table, the PV table, and for kernel mappings, the wired entries.
495 * Allocate one data structure to hold all of the info, instead of replicating
496 * it multiple times.
497 *
498 * One issue of making this a single data structure is that two pointers are
499 * wasted for every page which does not map ram (device mappings), this
500 * should be a low percentage of mapped pages in the system, so should not
501 * have too noticeable unnecessary ram consumption.
502 */
503
504 void
pmap_enter_pv(struct pte_desc * pted,struct vm_page * pg)505 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg)
506 {
507 /*
508 * XXX does this test mean that some pages try to be managed,
509 * but this is called too soon?
510 */
511 if (__predict_false(!pmap_initialized))
512 return;
513
514 mtx_enter(&pg->mdpage.pv_mtx);
515 LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
516 pted->pted_va |= PTED_VA_MANAGED_M;
517 mtx_leave(&pg->mdpage.pv_mtx);
518 }
519
520 void
pmap_remove_pv(struct pte_desc * pted)521 pmap_remove_pv(struct pte_desc *pted)
522 {
523 struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte & PTE_RPGN);
524
525 mtx_enter(&pg->mdpage.pv_mtx);
526 LIST_REMOVE(pted, pted_pv_list);
527 mtx_leave(&pg->mdpage.pv_mtx);
528 }
529
530 int
pmap_enter(pmap_t pm,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)531 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
532 {
533 struct pte_desc *pted;
534 struct vm_page *pg;
535 int error;
536 int cache = PMAP_CACHE_WB;
537
538 if (pa & PMAP_NOCACHE)
539 cache = PMAP_CACHE_CI;
540 if (pa & PMAP_DEVICE)
541 cache = PMAP_CACHE_DEV;
542 pg = PHYS_TO_VM_PAGE(pa);
543
544 pmap_lock(pm);
545 pted = pmap_vp_lookup(pm, va, NULL);
546 if (pted && PTED_VALID(pted)) {
547 pmap_remove_pted(pm, pted);
548 /* we lost our pted if it was user */
549 if (pm != pmap_kernel())
550 pted = pmap_vp_lookup(pm, va, NULL);
551 }
552
553 pm->pm_stats.resident_count++;
554
555 /* Do not have pted for this, get one and put it in VP */
556 if (pted == NULL) {
557 pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO);
558 if (pted == NULL) {
559 if ((flags & PMAP_CANFAIL) == 0)
560 panic("%s: failed to allocate pted", __func__);
561 error = ENOMEM;
562 goto out;
563 }
564 if (pmap_vp_enter(pm, va, pted, flags)) {
565 if ((flags & PMAP_CANFAIL) == 0)
566 panic("%s: failed to allocate L2/L3", __func__);
567 error = ENOMEM;
568 pool_put(&pmap_pted_pool, pted);
569 goto out;
570 }
571 }
572
573 /*
574 * If it should be enabled _right now_, we can skip doing ref/mod
575 * emulation. Any access includes reference, modified only by write.
576 */
577 if (pg != NULL &&
578 ((flags & PROT_MASK) || (pg->pg_flags & PG_PMAP_REF))) {
579 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
580 if ((prot & PROT_WRITE) && (flags & PROT_WRITE)) {
581 atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD);
582 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
583 }
584 }
585
586 pmap_fill_pte(pm, va, pa, pted, prot, flags, cache);
587
588 if (pg != NULL) {
589 pmap_enter_pv(pted, pg); /* only managed mem */
590 }
591
592 if (pg != NULL && (flags & PROT_EXEC)) {
593 if ((pg->pg_flags & PG_PMAP_EXE) == 0)
594 icache_flush();
595 atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE);
596 }
597
598 /*
599 * Insert into table, if this mapping said it needed to be mapped
600 * now.
601 */
602 if (flags & (PROT_READ|PROT_WRITE|PROT_EXEC|PMAP_WIRED)) {
603 pmap_pte_insert(pted);
604 tlb_flush_page(pm, va & ~PAGE_MASK);
605 }
606
607 error = 0;
608 out:
609 pmap_unlock(pm);
610 return error;
611 }
612
613 void
pmap_populate(pmap_t pm,vaddr_t va)614 pmap_populate(pmap_t pm, vaddr_t va)
615 {
616 pmap_vp_populate(pm, va);
617 }
618
619 /*
620 * Remove the given range of mapping entries.
621 */
622 void
pmap_remove(pmap_t pm,vaddr_t sva,vaddr_t eva)623 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
624 {
625 struct pte_desc *pted;
626 vaddr_t va;
627
628 pmap_lock(pm);
629 for (va = sva; va < eva; va += PAGE_SIZE) {
630 pted = pmap_vp_lookup(pm, va, NULL);
631
632 if (pted == NULL)
633 continue;
634
635 if (PTED_WIRED(pted)) {
636 pm->pm_stats.wired_count--;
637 pted->pted_va &= ~PTED_VA_WIRED_M;
638 }
639
640 if (PTED_VALID(pted))
641 pmap_remove_pted(pm, pted);
642 }
643 pmap_unlock(pm);
644 }
645
646 /*
647 * remove a single mapping, notice that this code is O(1)
648 */
649 void
pmap_remove_pted(pmap_t pm,struct pte_desc * pted)650 pmap_remove_pted(pmap_t pm, struct pte_desc *pted)
651 {
652 pm->pm_stats.resident_count--;
653
654 if (PTED_WIRED(pted)) {
655 pm->pm_stats.wired_count--;
656 pted->pted_va &= ~PTED_VA_WIRED_M;
657 }
658
659 pmap_pte_remove(pted, pm != pmap_kernel());
660 tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
661
662 if (pted->pted_va & PTED_VA_EXEC_M) {
663 pted->pted_va &= ~PTED_VA_EXEC_M;
664 }
665
666 if (PTED_MANAGED(pted))
667 pmap_remove_pv(pted);
668
669 pted->pted_pte = 0;
670 pted->pted_va = 0;
671
672 if (pm != pmap_kernel())
673 pool_put(&pmap_pted_pool, pted);
674 }
675
676
677 /*
678 * Populate a kernel mapping for the given page.
679 * kernel mappings have a larger set of prerequisites than normal mappings.
680 *
681 * 1. no memory should be allocated to create a kernel mapping.
682 * 2. a vp mapping should already exist, even if invalid. (see 1)
683 * 3. all vp tree mappings should already exist (see 1)
684 *
685 */
686 void
_pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot,int flags,int cache)687 _pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot, int flags, int cache)
688 {
689 pmap_t pm = pmap_kernel();
690 struct pte_desc *pted;
691 struct vm_page *pg;
692
693 pted = pmap_vp_lookup(pm, va, NULL);
694
695 /* Do not have pted for this, get one and put it in VP */
696 if (pted == NULL) {
697 panic("pted not preallocated in pmap_kernel() va %lx pa %lx",
698 va, pa);
699 }
700
701 if (pted && PTED_VALID(pted))
702 pmap_kremove_pg(va); /* pted is reused */
703
704 pm->pm_stats.resident_count++;
705
706 flags |= PMAP_WIRED; /* kernel mappings are always wired. */
707 /* Calculate PTE */
708 pmap_fill_pte(pm, va, pa, pted, prot, flags, cache);
709
710 /*
711 * Insert into table
712 * We were told to map the page, probably called from vm_fault,
713 * so map the page!
714 */
715 pmap_pte_insert(pted);
716 tlb_flush_page(pm, va & ~PAGE_MASK);
717
718 pg = PHYS_TO_VM_PAGE(pa);
719 if (pg && cache == PMAP_CACHE_CI)
720 cpu_dcache_wbinv_range(pa & ~PAGE_MASK, PAGE_SIZE);
721 }
722
723 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)724 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
725 {
726 _pmap_kenter_pa(va, pa, prot, prot,
727 (pa & PMAP_NOCACHE) ? PMAP_CACHE_CI : PMAP_CACHE_WB);
728 }
729
730 void
pmap_kenter_cache(vaddr_t va,paddr_t pa,vm_prot_t prot,int cacheable)731 pmap_kenter_cache(vaddr_t va, paddr_t pa, vm_prot_t prot, int cacheable)
732 {
733 _pmap_kenter_pa(va, pa, prot, prot, cacheable);
734 }
735
736 /*
737 * remove kernel (pmap_kernel()) mapping, one page
738 */
739 void
pmap_kremove_pg(vaddr_t va)740 pmap_kremove_pg(vaddr_t va)
741 {
742 pmap_t pm = pmap_kernel();
743 struct pte_desc *pted;
744 int s;
745
746 pted = pmap_vp_lookup(pm, va, NULL);
747 if (pted == NULL)
748 return;
749
750 if (!PTED_VALID(pted))
751 return; /* not mapped */
752
753 s = splvm();
754
755 pm->pm_stats.resident_count--;
756
757 /*
758 * Table needs to be locked here as well as pmap, and pv list.
759 * so that we know the mapping information is either valid,
760 * or that the mapping is not present in the hash table.
761 */
762 pmap_pte_remove(pted, 0);
763 tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
764
765 if (pted->pted_va & PTED_VA_EXEC_M)
766 pted->pted_va &= ~PTED_VA_EXEC_M;
767
768 if (PTED_MANAGED(pted))
769 pmap_remove_pv(pted);
770
771 if (PTED_WIRED(pted))
772 pm->pm_stats.wired_count--;
773
774 /* invalidate pted; */
775 pted->pted_pte = 0;
776 pted->pted_va = 0;
777
778 splx(s);
779 }
780
781 /*
782 * remove kernel (pmap_kernel()) mappings
783 */
784 void
pmap_kremove(vaddr_t va,vsize_t len)785 pmap_kremove(vaddr_t va, vsize_t len)
786 {
787 for (len >>= PAGE_SHIFT; len >0; len--, va += PAGE_SIZE)
788 pmap_kremove_pg(va);
789 }
790
791 void
pmap_fill_pte(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int flags,int cache)792 pmap_fill_pte(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
793 vm_prot_t prot, int flags, int cache)
794 {
795 pted->pted_va = va;
796 pted->pted_pmap = pm;
797
798 switch (cache) {
799 case PMAP_CACHE_WB:
800 break;
801 case PMAP_CACHE_CI:
802 if (pa >= pmap_cached_start && pa <= pmap_cached_end)
803 pa += (pmap_uncached_start - pmap_cached_start);
804 break;
805 case PMAP_CACHE_DEV:
806 break;
807 default:
808 panic("%s: invalid cache mode", __func__);
809 }
810 pted->pted_va |= cache;
811
812 pted->pted_va |= prot & (PROT_READ|PROT_WRITE|PROT_EXEC);
813
814 if (flags & PMAP_WIRED) {
815 pted->pted_va |= PTED_VA_WIRED_M;
816 pm->pm_stats.wired_count++;
817 }
818
819 pted->pted_pte = pa & PTE_RPGN;
820 pted->pted_pte |= flags & (PROT_READ|PROT_WRITE|PROT_EXEC);
821 }
822
823 /*
824 * Fill the given physical page with zeros.
825 */
826 void
pmap_zero_page(struct vm_page * pg)827 pmap_zero_page(struct vm_page *pg)
828 {
829 paddr_t pa = VM_PAGE_TO_PHYS(pg);
830 vaddr_t va = zero_page + cpu_number() * PAGE_SIZE;
831
832 pmap_kenter_pa(va, pa, PROT_READ|PROT_WRITE);
833 pagezero(va);
834 pmap_kremove_pg(va);
835 }
836
837 /*
838 * Copy the given physical page.
839 */
840 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)841 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
842 {
843 paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
844 paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
845 vaddr_t srcva = copy_src_page + cpu_number() * PAGE_SIZE;
846 vaddr_t dstva = copy_dst_page + cpu_number() * PAGE_SIZE;
847 int s;
848
849 /*
850 * XXX The buffer flipper (incorrectly?) uses pmap_copy_page()
851 * (from uvm_pagerealloc_multi()) from interrupt context!
852 */
853 s = splbio();
854 pmap_kenter_pa(srcva, srcpa, PROT_READ);
855 pmap_kenter_pa(dstva, dstpa, PROT_READ|PROT_WRITE);
856 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
857 pmap_kremove_pg(srcva);
858 pmap_kremove_pg(dstva);
859 splx(s);
860 }
861
862 void
pmap_pinit(pmap_t pm)863 pmap_pinit(pmap_t pm)
864 {
865 struct pmapvp1 *vp1, *kvp1;
866 vaddr_t l1va;
867 uint64_t l1pa;
868
869 /* Allocate a full L1 table. */
870 while (pm->pm_vp.l1 == NULL) {
871 pm->pm_vp.l1 = pool_get(&pmap_vp_pool,
872 PR_WAITOK | PR_ZERO);
873 }
874
875 vp1 = pm->pm_vp.l1; /* top level is l1 */
876 l1va = (vaddr_t)vp1->l1;
877
878 /* Fill kernel PTEs. */
879 kvp1 = pmap_kernel()->pm_vp.l1;
880 memcpy(&vp1->l1[L1_KERN_BASE], &kvp1->l1[L1_KERN_BASE],
881 L1_KERN_ENTRIES * sizeof(pt_entry_t));
882 memcpy(&vp1->vp[L1_KERN_BASE], &kvp1->vp[L1_KERN_BASE],
883 L1_KERN_ENTRIES * sizeof(struct pmapvp2 *));
884
885 pmap_extract(pmap_kernel(), l1va, (paddr_t *)&l1pa);
886 pm->pm_satp |= SATP_FORMAT_PPN(PPN(l1pa));
887 pm->pm_satp |= SATP_MODE_SV39;
888 pmap_reference(pm);
889 }
890
891 int pmap_vp_poolcache = 0; /* force vp poolcache to allocate late */
892
893 /*
894 * Create and return a physical map.
895 */
896 pmap_t
pmap_create(void)897 pmap_create(void)
898 {
899 pmap_t pmap;
900
901 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK | PR_ZERO);
902
903 mtx_init(&pmap->pm_mtx, IPL_VM);
904
905 pmap_pinit(pmap);
906 if (pmap_vp_poolcache == 0) {
907 pool_setlowat(&pmap_vp_pool, 20);
908 pmap_vp_poolcache = 20;
909 }
910 return (pmap);
911 }
912
913 /*
914 * Add a reference to a given pmap.
915 */
916 void
pmap_reference(pmap_t pm)917 pmap_reference(pmap_t pm)
918 {
919 atomic_inc_int(&pm->pm_refs);
920 }
921
922 /*
923 * Retire the given pmap from service.
924 * Should only be called if the map contains no valid mappings.
925 */
926 void
pmap_destroy(pmap_t pm)927 pmap_destroy(pmap_t pm)
928 {
929 int refs;
930
931 refs = atomic_dec_int_nv(&pm->pm_refs);
932 if (refs > 0)
933 return;
934
935 /*
936 * reference count is zero, free pmap resources and free pmap.
937 */
938 pmap_release(pm);
939 pool_put(&pmap_pmap_pool, pm);
940 }
941
942 /*
943 * Release any resources held by the given physical map.
944 * Called when a pmap initialized by pmap_pinit is being released.
945 */
946 void
pmap_release(pmap_t pm)947 pmap_release(pmap_t pm)
948 {
949 pmap_vp_destroy(pm);
950 }
951
952 void
pmap_vp_destroy(pmap_t pm)953 pmap_vp_destroy(pmap_t pm)
954 {
955 struct pmapvp1 *vp1;
956 struct pmapvp2 *vp2;
957 struct pmapvp3 *vp3;
958 struct pte_desc *pted;
959 int j, k, l;
960
961 vp1 = pm->pm_vp.l1;
962 /*
963 * there is not a separate supervisor and user page table root ?
964 * remove only user page tables
965 */
966 for (j = 0; j < L1_KERN_BASE; j++) {
967 vp2 = vp1->vp[j];
968 if (vp2 == NULL)
969 continue;
970 vp1->vp[j] = NULL;
971
972 for (k = 0; k < VP_IDX2_CNT; k++) {
973 vp3 = vp2->vp[k];
974 if (vp3 == NULL)
975 continue;
976 vp2->vp[k] = NULL;
977
978 for (l = 0; l < VP_IDX3_CNT; l++) {
979 pted = vp3->vp[l];
980 if (pted == NULL)
981 continue;
982 vp3->vp[l] = NULL;
983
984 pool_put(&pmap_pted_pool, pted);
985 }
986 pool_put(&pmap_vp_pool, vp3);
987 }
988 pool_put(&pmap_vp_pool, vp2);
989 }
990 pool_put(&pmap_vp_pool, pm->pm_vp.l1);
991 pm->pm_vp.l1 = NULL;
992 return;
993 }
994
995 vaddr_t virtual_avail;
996 int pmap_virtual_space_called;
997
998 static inline pt_entry_t
VP_Lx(paddr_t pa)999 VP_Lx(paddr_t pa)
1000 {
1001 /*
1002 * This function takes the pa address given and manipulates it
1003 * into the form that should be inserted into the VM table.
1004 */
1005 // NOTE: We always assume the entry is valid. OpenBSD/arm64 uses
1006 // the least significant bits to differentiate between PTD / PTE.
1007 // In riscv64 Sv39 address translation mode PTD / PTE distinguished
1008 // by the lack of PTE_R / PTE_X on an entry with PTE_V set. For both
1009 // a PTD and PTE, the PTE_V bit is set.
1010 return (((pa & PTE_RPGN) >> PAGE_SHIFT) << PTE_PPN0_S) | PTE_V;
1011 }
1012
1013 /*
1014 * In pmap_bootstrap() we allocate the page tables for the first GB
1015 * of the kernel address space.
1016 */
1017 vaddr_t pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS + 1024 * 1024 * 1024;
1018
1019 /*
1020 * Allocator for growing the kernel page tables. We use a dedicated
1021 * submap to make sure we have the space to map them as we are called
1022 * when address space is tight!
1023 */
1024
1025 struct vm_map *pmap_kvp_map;
1026
1027 const struct kmem_va_mode kv_kvp = {
1028 .kv_map = &pmap_kvp_map,
1029 .kv_wait = 0
1030 };
1031
1032 void *
pmap_kvp_alloc(void)1033 pmap_kvp_alloc(void)
1034 {
1035 void *kvp;
1036
1037 if (!uvm.page_init_done && !pmap_virtual_space_called) {
1038 paddr_t pa[2];
1039 vaddr_t va;
1040
1041 if (!uvm_page_physget(&pa[0]) || !uvm_page_physget(&pa[1]))
1042 panic("%s: out of memory", __func__);
1043
1044 va = virtual_avail;
1045 virtual_avail += 2 * PAGE_SIZE;
1046 KASSERT(virtual_avail <= pmap_maxkvaddr);
1047 kvp = (void *)va;
1048
1049 pmap_kenter_pa(va, pa[0], PROT_READ|PROT_WRITE);
1050 pmap_kenter_pa(va + PAGE_SIZE, pa[1], PROT_READ|PROT_WRITE);
1051 pagezero(va);
1052 pagezero(va + PAGE_SIZE);
1053 } else {
1054 kvp = km_alloc(sizeof(struct pmapvp1), &kv_kvp, &kp_zero,
1055 &kd_nowait);
1056 }
1057
1058 return kvp;
1059 }
1060
1061 struct pte_desc *
pmap_kpted_alloc(void)1062 pmap_kpted_alloc(void)
1063 {
1064 static struct pte_desc *pted;
1065 static int npted;
1066
1067 if (npted == 0) {
1068 if (!uvm.page_init_done && !pmap_virtual_space_called) {
1069 paddr_t pa;
1070 vaddr_t va;
1071
1072 if (!uvm_page_physget(&pa))
1073 panic("%s: out of memory", __func__);
1074
1075 va = virtual_avail;
1076 virtual_avail += PAGE_SIZE;
1077 KASSERT(virtual_avail <= pmap_maxkvaddr);
1078 pted = (struct pte_desc *)va;
1079
1080 pmap_kenter_pa(va, pa, PROT_READ|PROT_WRITE);
1081 pagezero(va);
1082 } else {
1083 pted = km_alloc(PAGE_SIZE, &kv_kvp, &kp_zero,
1084 &kd_nowait);
1085 if (pted == NULL)
1086 return NULL;
1087 }
1088
1089 npted = PAGE_SIZE / sizeof(struct pte_desc);
1090 }
1091
1092 npted--;
1093 return pted++;
1094 }
1095
1096 vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)1097 pmap_growkernel(vaddr_t maxkvaddr)
1098 {
1099 struct pmapvp1 *vp1 = pmap_kernel()->pm_vp.l1;
1100 struct pmapvp2 *vp2;
1101 struct pmapvp3 *vp3;
1102 struct pte_desc *pted;
1103 paddr_t pa;
1104 int lb_idx2, ub_idx2;
1105 int i, j, k;
1106 int s;
1107
1108 if (maxkvaddr <= pmap_maxkvaddr)
1109 return pmap_maxkvaddr;
1110
1111 /*
1112 * Not strictly necessary, but we use an interrupt-safe map
1113 * and uvm asserts that we're at IPL_VM.
1114 */
1115 s = splvm();
1116
1117 for (i = VP_IDX1(pmap_maxkvaddr); i <= VP_IDX1(maxkvaddr - 1); i++) {
1118 vp2 = vp1->vp[i];
1119 if (vp2 == NULL) {
1120 vp2 = pmap_kvp_alloc();
1121 if (vp2 == NULL)
1122 goto fail;
1123 pmap_extract(pmap_kernel(), (vaddr_t)vp2, &pa);
1124 vp1->vp[i] = vp2;
1125 vp1->l1[i] = VP_Lx(pa);
1126 }
1127
1128 if (i == VP_IDX1(pmap_maxkvaddr)) {
1129 lb_idx2 = VP_IDX2(pmap_maxkvaddr);
1130 } else {
1131 lb_idx2 = 0;
1132 }
1133
1134 if (i == VP_IDX1(maxkvaddr - 1)) {
1135 ub_idx2 = VP_IDX2(maxkvaddr - 1);
1136 } else {
1137 ub_idx2 = VP_IDX2_CNT - 1;
1138 }
1139
1140 for (j = lb_idx2; j <= ub_idx2; j++) {
1141 vp3 = vp2->vp[j];
1142 if (vp3 == NULL) {
1143 vp3 = pmap_kvp_alloc();
1144 if (vp3 == NULL)
1145 goto fail;
1146 pmap_extract(pmap_kernel(), (vaddr_t)vp3, &pa);
1147 vp2->vp[j] = vp3;
1148 vp2->l2[j] = VP_Lx(pa);
1149 }
1150
1151 for (k = 0; k <= VP_IDX3_CNT - 1; k++) {
1152 if (vp3->vp[k] == NULL) {
1153 pted = pmap_kpted_alloc();
1154 if (pted == NULL)
1155 goto fail;
1156 vp3->vp[k] = pted;
1157 pmap_maxkvaddr += PAGE_SIZE;
1158 }
1159 }
1160 }
1161 }
1162 KASSERT(pmap_maxkvaddr >= maxkvaddr);
1163
1164 fail:
1165 splx(s);
1166
1167 return pmap_maxkvaddr;
1168 }
1169
1170 void pmap_setup_avail(uint64_t memstart, uint64_t memend, uint64_t kvo);
1171
1172 /*
1173 * Initialize pmap setup.
1174 * ALL of the code which deals with avail needs rewritten as an actual
1175 * memory allocation.
1176 */
1177 CTASSERT(sizeof(struct pmapvp1) == 2 * PAGE_SIZE);
1178
1179 int mappings_allocated = 0;
1180 int pted_allocated = 0;
1181
1182 extern char __text_start[], _etext[];
1183 extern char __rodata_start[], _erodata[];
1184
1185 paddr_t dmap_phys_base;
1186
1187 void
pmap_bootstrap_dmap(vaddr_t kern_l1,paddr_t min_pa,paddr_t max_pa)1188 pmap_bootstrap_dmap(vaddr_t kern_l1, paddr_t min_pa, paddr_t max_pa)
1189 {
1190 vaddr_t va;
1191 paddr_t pa;
1192 pt_entry_t *l1;
1193 u_int l1_slot;
1194 pt_entry_t entry;
1195 pn_t pn;
1196
1197 pa = dmap_phys_base = min_pa & ~L1_OFFSET; // 1 GiB Align
1198 va = DMAP_MIN_ADDRESS;
1199 l1 = (pt_entry_t *)kern_l1;
1200 l1_slot = VP_IDX1(DMAP_MIN_ADDRESS);
1201
1202 for (; va < DMAP_MAX_ADDRESS && pa < max_pa;
1203 pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
1204 KASSERT(l1_slot < Ln_ENTRIES);
1205
1206 /* gigapages */
1207 pn = (pa / PAGE_SIZE);
1208 entry = PTE_KERN | pmap_pma;
1209 entry |= (pn << PTE_PPN0_S);
1210 l1[l1_slot] = entry;
1211 }
1212
1213 sfence_vma();
1214 }
1215
1216 vaddr_t
pmap_bootstrap(long kvo,vaddr_t l1pt,vaddr_t kernelstart,vaddr_t kernelend,paddr_t memstart,paddr_t memend)1217 pmap_bootstrap(long kvo, vaddr_t l1pt, vaddr_t kernelstart, vaddr_t kernelend,
1218 paddr_t memstart, paddr_t memend)
1219 {
1220 void *va;
1221 paddr_t pa, pt1pa;
1222 struct pmapvp1 *vp1;
1223 struct pmapvp2 *vp2;
1224 struct pmapvp3 *vp3;
1225 struct pte_desc *pted;
1226 vaddr_t vstart;
1227 int i, j, k;
1228 int lb_idx2, ub_idx2;
1229 uint64_t marchid, mimpid;
1230 uint32_t mvendorid;
1231
1232 mvendorid = sbi_get_mvendorid();
1233 marchid = sbi_get_marchid();
1234 mimpid = sbi_get_mimpid();
1235
1236 /*
1237 * The T-Head cores implement a page attributes extension that
1238 * violates the RISC-V privileged architecture specification.
1239 * Work around this as best as we can by adding the
1240 * appropriate page attributes in a way that is mostly
1241 * compatible with the Svpbmt extension.
1242 */
1243 if (mvendorid == CPU_VENDOR_THEAD && marchid == 0 && mimpid == 0) {
1244 pmap_pma = PTE_THEAD_C | PTE_THEAD_B | PTE_THEAD_SH;
1245 pmap_nc = PTE_THEAD_B | PTE_THEAD_SH;
1246 pmap_io = PTE_THEAD_SO | PTE_THEAD_SH;
1247 }
1248
1249 pmap_setup_avail(memstart, memend, kvo);
1250 pmap_remove_avail(kernelstart + kvo, kernelend + kvo);
1251
1252 /*
1253 * KERNEL IS ASSUMED TO BE 39 bits (or less), start from L1,
1254 * not L0 ALSO kernel mappings may not cover enough ram to
1255 * bootstrap so all accesses initializing tables must be done
1256 * via physical pointers
1257 */
1258
1259 /* Map the initial 64MB block to the Direct Mapped Region. */
1260 pmap_bootstrap_dmap(l1pt, memstart, memend);
1261
1262 pt1pa = pmap_steal_avail(2 * sizeof(struct pmapvp1), Lx_TABLE_ALIGN,
1263 &va);
1264 vp1 = (struct pmapvp1 *) PHYS_TO_DMAP(pt1pa);
1265 pmap_kernel()->pm_vp.l1 = (struct pmapvp1 *)va;
1266 pmap_kernel()->pm_privileged = 1;
1267 pmap_kernel()->pm_satp = SATP_MODE_SV39 | /* ASID = 0 */
1268 ((PPN(pt1pa) & SATP_PPN_MASK) << SATP_PPN_SHIFT);
1269
1270 /* allocate memory (in unit of pages) for l2 and l3 page table */
1271 for (i = VP_IDX1(VM_MIN_KERNEL_ADDRESS);
1272 i <= VP_IDX1(pmap_maxkvaddr - 1);
1273 i++) {
1274 mappings_allocated++;
1275 pa = pmap_steal_avail(sizeof(struct pmapvp2), Lx_TABLE_ALIGN,
1276 &va);
1277 vp2 = (struct pmapvp2 *)PHYS_TO_DMAP(pa);
1278 vp1->vp[i] = va;
1279 vp1->l1[i] = VP_Lx(pa);
1280
1281 if (i == VP_IDX1(VM_MIN_KERNEL_ADDRESS)) {
1282 lb_idx2 = VP_IDX2(VM_MIN_KERNEL_ADDRESS);
1283 } else {
1284 lb_idx2 = 0;
1285 }
1286 if (i == VP_IDX1(pmap_maxkvaddr - 1)) {
1287 ub_idx2 = VP_IDX2(pmap_maxkvaddr - 1);
1288 } else {
1289 ub_idx2 = VP_IDX2_CNT - 1;
1290 }
1291 for (j = lb_idx2; j <= ub_idx2; j++) {
1292 mappings_allocated++;
1293 pa = pmap_steal_avail(sizeof(struct pmapvp3),
1294 Lx_TABLE_ALIGN, &va);
1295 vp3 = (struct pmapvp3 *)PHYS_TO_DMAP(pa);
1296 vp2->vp[j] = va;
1297 vp2->l2[j] = VP_Lx(pa);
1298 }
1299 }
1300 /* allocate memory for pte_desc */
1301 for (i = VP_IDX1(VM_MIN_KERNEL_ADDRESS);
1302 i <= VP_IDX1(pmap_maxkvaddr - 1);
1303 i++) {
1304 vp2 = (void *)PHYS_TO_DMAP((long)vp1->vp[i] + kvo);
1305
1306 if (i == VP_IDX1(VM_MIN_KERNEL_ADDRESS)) {
1307 lb_idx2 = VP_IDX2(VM_MIN_KERNEL_ADDRESS);
1308 } else {
1309 lb_idx2 = 0;
1310 }
1311 if (i == VP_IDX1(pmap_maxkvaddr - 1)) {
1312 ub_idx2 = VP_IDX2(pmap_maxkvaddr - 1);
1313 } else {
1314 ub_idx2 = VP_IDX2_CNT - 1;
1315 }
1316 for (j = lb_idx2; j <= ub_idx2; j++) {
1317 vp3 = (void *)PHYS_TO_DMAP((long)vp2->vp[j] + kvo);
1318
1319 for (k = 0; k <= VP_IDX3_CNT - 1; k++) {
1320 pted_allocated++;
1321 pa = pmap_steal_avail(sizeof(struct pte_desc),
1322 4, &va);
1323 pted = va;
1324 vp3->vp[k] = pted;
1325 }
1326 }
1327 }
1328
1329 pmap_avail_fixup();
1330
1331 /*
1332 * At this point we are still running on the bootstrap page
1333 * tables however all memory for the final page tables is
1334 * 'allocated' and should now be mapped. This means we are
1335 * able to use the virtual addressing to populate the final
1336 * mappings into the new mapping tables.
1337 */
1338 vstart = pmap_map_stolen(kernelstart);
1339
1340 /*
1341 * Temporarily add the Direct Map Area into the kernel pmap
1342 * such that we can continue to access stolen memory by
1343 * physical address.
1344 */
1345 pmap_bootstrap_dmap((vaddr_t)pmap_kernel()->pm_vp.l1, memstart, memend);
1346
1347 /* Switch to the new page tables. */
1348 uint64_t satp = pmap_kernel()->pm_satp;
1349 __asm volatile("csrw satp, %0" :: "r" (satp) : "memory");
1350 sfence_vma();
1351
1352 curcpu()->ci_curpm = pmap_kernel();
1353
1354 vmmap = vstart;
1355 vstart += PAGE_SIZE;
1356
1357 return vstart;
1358 }
1359
1360 void
pmap_set_l2(struct pmap * pm,uint64_t va,struct pmapvp2 * l2_va,paddr_t l2_pa)1361 pmap_set_l2(struct pmap *pm, uint64_t va, struct pmapvp2 *l2_va, paddr_t l2_pa)
1362 {
1363 pt_entry_t pg_entry;
1364 struct pmapvp1 *vp1;
1365 int idx1;
1366
1367 if (l2_pa == 0) {
1368 /*
1369 * if this is called from pmap_vp_enter, this is a
1370 * normally mapped page, call pmap_extract to get pa
1371 */
1372 pmap_extract(pmap_kernel(), (vaddr_t)l2_va, &l2_pa);
1373 }
1374
1375 if (l2_pa & (Lx_TABLE_ALIGN-1))
1376 panic("misaligned L2 table");
1377
1378 pg_entry = VP_Lx(l2_pa);
1379
1380 idx1 = VP_IDX1(va);
1381 vp1 = pm->pm_vp.l1;
1382 vp1->vp[idx1] = l2_va;
1383 vp1->l1[idx1] = pg_entry;
1384 }
1385
1386 void
pmap_set_l3(struct pmap * pm,uint64_t va,struct pmapvp3 * l3_va,paddr_t l3_pa)1387 pmap_set_l3(struct pmap *pm, uint64_t va, struct pmapvp3 *l3_va, paddr_t l3_pa)
1388 {
1389 pt_entry_t pg_entry;
1390 struct pmapvp1 *vp1;
1391 struct pmapvp2 *vp2;
1392 int idx1, idx2;
1393
1394 if (l3_pa == 0) {
1395 /*
1396 * if this is called from pmap_vp_enter, this is a
1397 * normally mapped page, call pmap_extract to get pa
1398 */
1399 pmap_extract(pmap_kernel(), (vaddr_t)l3_va, &l3_pa);
1400 }
1401
1402 if (l3_pa & (Lx_TABLE_ALIGN-1))
1403 panic("misaligned L2 table");
1404
1405 pg_entry = VP_Lx(l3_pa);
1406
1407 idx1 = VP_IDX1(va);
1408 idx2 = VP_IDX2(va);
1409 vp1 = pm->pm_vp.l1;
1410 vp2 = vp1->vp[idx1];
1411 vp2->vp[idx2] = l3_va;
1412 vp2->l2[idx2] = pg_entry;
1413 }
1414
1415 /*
1416 * activate a pmap entry
1417 */
1418 void
pmap_activate(struct proc * p)1419 pmap_activate(struct proc *p)
1420 {
1421 pmap_t pm = p->p_vmspace->vm_map.pmap;
1422 u_long sie;
1423
1424 sie = intr_disable();
1425 if (p == curproc && pm != curcpu()->ci_curpm)
1426 pmap_set_satp(p);
1427 intr_restore(sie);
1428 }
1429
1430 /*
1431 * deactivate a pmap entry
1432 */
1433 void
pmap_deactivate(struct proc * p)1434 pmap_deactivate(struct proc *p)
1435 {
1436 }
1437
1438 /*
1439 * Get the physical page address for the given pmap/virtual address.
1440 */
1441 int
pmap_extract(pmap_t pm,vaddr_t va,paddr_t * pap)1442 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pap)
1443 {
1444 struct pte_desc *pted;
1445 paddr_t pa;
1446
1447 pmap_lock(pm);
1448 pted = pmap_vp_lookup(pm, va, NULL);
1449 if (!pted || !PTED_VALID(pted)) {
1450 pmap_unlock(pm);
1451 return 0;
1452 }
1453 if (pap != NULL) {
1454 pa = pted->pted_pte & PTE_RPGN;
1455 if (pa >= pmap_uncached_start && pa <= pmap_uncached_end)
1456 pa -= (pmap_uncached_start - pmap_cached_start);
1457 *pap = pa | (va & PAGE_MASK);
1458 }
1459 pmap_unlock(pm);
1460
1461 return 1;
1462 }
1463
1464 void
pmap_page_ro(pmap_t pm,vaddr_t va,vm_prot_t prot)1465 pmap_page_ro(pmap_t pm, vaddr_t va, vm_prot_t prot)
1466 {
1467 struct pte_desc *pted;
1468 pt_entry_t *pl3;
1469
1470 /* Every VA needs a pted, even unmanaged ones. */
1471 pted = pmap_vp_lookup(pm, va, &pl3);
1472 if (!pted || !PTED_VALID(pted)) {
1473 return;
1474 }
1475
1476 pted->pted_va &= ~PROT_WRITE;
1477 pted->pted_pte &= ~PROT_WRITE;
1478 if ((prot & PROT_READ) == 0) {
1479 pted->pted_va &= ~PROT_READ;
1480 pted->pted_pte &= ~PROT_READ;
1481 }
1482 if ((prot & PROT_EXEC) == 0) {
1483 pted->pted_va &= ~PROT_EXEC;
1484 pted->pted_pte &= ~PROT_EXEC;
1485 }
1486 pmap_pte_update(pted, pl3);
1487 tlb_flush_page(pm, pted->pted_va & ~PAGE_MASK);
1488 }
1489
1490 /*
1491 * Lower the protection on the specified physical page.
1492 *
1493 * There are only two cases, either the protection is going to 0,
1494 * or it is going to read-only.
1495 */
1496 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)1497 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1498 {
1499 struct pte_desc *pted;
1500 struct pmap *pm;
1501
1502 if (prot != PROT_NONE) {
1503 mtx_enter(&pg->mdpage.pv_mtx);
1504 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1505 pmap_page_ro(pted->pted_pmap, pted->pted_va, prot);
1506 }
1507 mtx_leave(&pg->mdpage.pv_mtx);
1508 return;
1509 }
1510
1511 mtx_enter(&pg->mdpage.pv_mtx);
1512 while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) {
1513 pmap_reference(pted->pted_pmap);
1514 pm = pted->pted_pmap;
1515 mtx_leave(&pg->mdpage.pv_mtx);
1516
1517 pmap_lock(pm);
1518
1519 /*
1520 * We dropped the pvlist lock before grabbing the pmap
1521 * lock to avoid lock ordering problems. This means
1522 * we have to check the pvlist again since somebody
1523 * else might have modified it. All we care about is
1524 * that the pvlist entry matches the pmap we just
1525 * locked. If it doesn't, unlock the pmap and try
1526 * again.
1527 */
1528 mtx_enter(&pg->mdpage.pv_mtx);
1529 pted = LIST_FIRST(&(pg->mdpage.pv_list));
1530 if (pted == NULL || pted->pted_pmap != pm) {
1531 mtx_leave(&pg->mdpage.pv_mtx);
1532 pmap_unlock(pm);
1533 pmap_destroy(pm);
1534 mtx_enter(&pg->mdpage.pv_mtx);
1535 continue;
1536 }
1537 mtx_leave(&pg->mdpage.pv_mtx);
1538
1539 pmap_remove_pted(pm, pted);
1540 pmap_unlock(pm);
1541 pmap_destroy(pm);
1542
1543 mtx_enter(&pg->mdpage.pv_mtx);
1544 }
1545 /* page is being reclaimed, sync icache next use */
1546 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1547 mtx_leave(&pg->mdpage.pv_mtx);
1548 }
1549
1550 void
pmap_protect(pmap_t pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)1551 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1552 {
1553 if (prot & (PROT_READ | PROT_EXEC)) {
1554 pmap_lock(pm);
1555 while (sva < eva) {
1556 pmap_page_ro(pm, sva, prot);
1557 sva += PAGE_SIZE;
1558 }
1559 pmap_unlock(pm);
1560 return;
1561 }
1562 pmap_remove(pm, sva, eva);
1563 }
1564
1565 void
pmap_init(void)1566 pmap_init(void)
1567 {
1568 struct pmapvp1 *kvp1;
1569 void *node;
1570
1571 node = fdt_find_node("/");
1572 if (fdt_is_compatible(node, "starfive,jh7100")) {
1573 pmap_cached_start = 0x0080000000ULL;
1574 pmap_cached_end = 0x087fffffffULL;
1575 pmap_uncached_start = 0x1000000000ULL;
1576 pmap_uncached_end = 0x17ffffffffULL;
1577 }
1578
1579 /* Clear DMAP PTEs. */
1580 kvp1 = pmap_kernel()->pm_vp.l1;
1581 memset(&kvp1->l1[L1_DMAP_BASE], 0,
1582 L1_DMAP_ENTRIES * sizeof(pt_entry_t));
1583 memset(&kvp1->vp[L1_DMAP_BASE], 0,
1584 L1_DMAP_ENTRIES * sizeof(struct pmapvp2 *));
1585 sfence_vma();
1586
1587 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
1588 "pmap", NULL);
1589 pool_setlowat(&pmap_pmap_pool, 2);
1590 pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
1591 "pted", NULL);
1592 pool_setlowat(&pmap_pted_pool, 20);
1593 pool_init(&pmap_vp_pool, sizeof(struct pmapvp1), PAGE_SIZE, IPL_VM, 0,
1594 "vp", &pmap_vp_allocator);
1595 pool_setlowat(&pmap_vp_pool, 20);
1596
1597 pmap_initialized = 1;
1598 }
1599
1600 void
pmap_proc_iflush(struct process * pr,vaddr_t va,vsize_t len)1601 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
1602 {
1603 icache_flush();
1604 }
1605
1606 void
pmap_pte_insert(struct pte_desc * pted)1607 pmap_pte_insert(struct pte_desc *pted)
1608 {
1609 /* put entry into table */
1610 /* need to deal with ref/change here */
1611 pmap_t pm = pted->pted_pmap;
1612 pt_entry_t *pl3;
1613
1614 if (pmap_vp_lookup(pm, pted->pted_va, &pl3) == NULL) {
1615 panic("%s: have a pted, but missing a vp"
1616 " for %lx va pmap %p", __func__, pted->pted_va, pm);
1617 }
1618
1619 pmap_pte_update(pted, pl3);
1620 }
1621
1622 void
pmap_pte_update(struct pte_desc * pted,uint64_t * pl3)1623 pmap_pte_update(struct pte_desc *pted, uint64_t *pl3)
1624 {
1625 uint64_t pte, access_bits;
1626 pmap_t pm = pted->pted_pmap;
1627 uint64_t attr = 0;
1628
1629 switch (pted->pted_va & PMAP_CACHE_BITS) {
1630 case PMAP_CACHE_WB:
1631 attr |= pmap_pma;
1632 break;
1633 case PMAP_CACHE_CI:
1634 attr |= pmap_nc;
1635 break;
1636 case PMAP_CACHE_DEV:
1637 attr |= pmap_io;
1638 break;
1639 default:
1640 panic("%s: invalid cache mode", __func__);
1641 }
1642
1643 if (pm->pm_privileged)
1644 access_bits = ap_bits_kern[pted->pted_pte & PROT_MASK];
1645 else
1646 access_bits = ap_bits_user[pted->pted_pte & PROT_MASK];
1647
1648 pte = VP_Lx(pted->pted_pte) | attr | access_bits | PTE_V;
1649 *pl3 = access_bits ? pte : 0;
1650 }
1651
1652 void
pmap_pte_remove(struct pte_desc * pted,int remove_pted)1653 pmap_pte_remove(struct pte_desc *pted, int remove_pted)
1654 {
1655 struct pmapvp1 *vp1;
1656 struct pmapvp2 *vp2;
1657 struct pmapvp3 *vp3;
1658 pmap_t pm = pted->pted_pmap;
1659
1660 vp1 = pm->pm_vp.l1;
1661 if (vp1->vp[VP_IDX1(pted->pted_va)] == NULL) {
1662 panic("have a pted, but missing the l2 for %lx va pmap %p",
1663 pted->pted_va, pm);
1664 }
1665 vp2 = vp1->vp[VP_IDX1(pted->pted_va)];
1666 if (vp2 == NULL) {
1667 panic("have a pted, but missing the l2 for %lx va pmap %p",
1668 pted->pted_va, pm);
1669 }
1670 vp3 = vp2->vp[VP_IDX2(pted->pted_va)];
1671 if (vp3 == NULL) {
1672 panic("have a pted, but missing the l2 for %lx va pmap %p",
1673 pted->pted_va, pm);
1674 }
1675 vp3->l3[VP_IDX3(pted->pted_va)] = 0;
1676 if (remove_pted)
1677 vp3->vp[VP_IDX3(pted->pted_va)] = NULL;
1678 }
1679
1680 /*
1681 * This function exists to do software referenced/modified emulation.
1682 * It's purpose is to tell the caller that a fault was generated either
1683 * for this emulation, or to tell the caller that it's a legit fault.
1684 */
1685 int
pmap_fault_fixup(pmap_t pm,vaddr_t va,vm_prot_t ftype)1686 pmap_fault_fixup(pmap_t pm, vaddr_t va, vm_prot_t ftype)
1687 {
1688 struct pte_desc *pted;
1689 struct vm_page *pg;
1690 paddr_t pa;
1691 pt_entry_t *pl3 = NULL;
1692 int retcode = 0;
1693
1694 pmap_lock(pm);
1695
1696 /* Every VA needs a pted, even unmanaged ones. */
1697 pted = pmap_vp_lookup(pm, va, &pl3);
1698 if (!pted || !PTED_VALID(pted))
1699 goto done;
1700
1701 /* There has to be a PA for the VA, get it. */
1702 pa = (pted->pted_pte & PTE_RPGN);
1703
1704 /* If it's unmanaged, it must not fault. */
1705 pg = PHYS_TO_VM_PAGE(pa);
1706 if (pg == NULL)
1707 goto done;
1708
1709 /*
1710 * Check the fault types to find out if we were doing
1711 * any mod/ref emulation and fixup the PTE if we were.
1712 */
1713 if ((ftype & PROT_WRITE) && /* fault caused by a write */
1714 !(pted->pted_pte & PROT_WRITE) && /* and write is disabled now */
1715 (pted->pted_va & PROT_WRITE)) { /* but is supposedly allowed */
1716
1717 /*
1718 * Page modified emulation. A write always includes
1719 * a reference. This means that we can enable read and
1720 * exec as well, akin to the page reference emulation.
1721 */
1722 atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD|PG_PMAP_REF);
1723 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1724
1725 /* Thus, enable read, write and exec. */
1726 pted->pted_pte |=
1727 (pted->pted_va & (PROT_READ|PROT_WRITE|PROT_EXEC));
1728 } else if ((ftype & PROT_EXEC) && /* fault caused by an exec */
1729 !(pted->pted_pte & PROT_EXEC) && /* and exec is disabled now */
1730 (pted->pted_va & PROT_EXEC)) { /* but is supposedly allowed */
1731
1732 /*
1733 * Exec always includes a reference. Since we now know
1734 * the page has been accessed, we can enable read as well
1735 * if UVM allows it.
1736 */
1737 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1738
1739 /* Thus, enable read and exec. */
1740 pted->pted_pte |= (pted->pted_va & (PROT_READ|PROT_EXEC));
1741 } else if ((ftype & PROT_READ) && /* fault caused by a read */
1742 !(pted->pted_pte & PROT_READ) && /* and read is disabled now */
1743 (pted->pted_va & PROT_READ)) { /* but is supposedly allowed */
1744
1745 /*
1746 * Page referenced emulation. Since we now know the page
1747 * has been accessed, we can enable exec as well if UVM
1748 * allows it.
1749 */
1750 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1751
1752 /* Thus, enable read and exec. */
1753 pted->pted_pte |= (pted->pted_va & (PROT_READ|PROT_EXEC));
1754 } else {
1755 /* didn't catch it, so probably broken */
1756 goto done;
1757 }
1758
1759 /*
1760 * If this is a page that can be executed, make sure to invalidate
1761 * the instruction cache if the page has been modified or not used
1762 * yet.
1763 */
1764 if (pted->pted_va & PROT_EXEC) {
1765 if ((pg->pg_flags & PG_PMAP_EXE) == 0)
1766 icache_flush();
1767 atomic_setbits_int(&pg->pg_flags, PG_PMAP_EXE);
1768 }
1769
1770 /* We actually made a change, so flush it and sync. */
1771 pmap_pte_update(pted, pl3);
1772 tlb_flush_page(pm, va & ~PAGE_MASK);
1773
1774 retcode = 1;
1775 done:
1776 pmap_unlock(pm);
1777 return retcode;
1778 }
1779
1780 void
pmap_postinit(void)1781 pmap_postinit(void)
1782 {
1783 vaddr_t minaddr, maxaddr;
1784 u_long npteds, npages;
1785
1786 /*
1787 * Reserve enough virtual address space to grow the kernel
1788 * page tables. We need a descriptor for each page as well as
1789 * an extra page for level 1/2/3 page tables for management.
1790 * To simplify the code, we always allocate full tables at
1791 * level 3, so take that into account.
1792 */
1793 npteds = (VM_MAX_KERNEL_ADDRESS - pmap_maxkvaddr + 1) / PAGE_SIZE;
1794 npteds = roundup(npteds, VP_IDX3_CNT);
1795 npages = howmany(npteds, PAGE_SIZE / (sizeof(struct pte_desc)));
1796 npages += 2 * howmany(npteds, VP_IDX3_CNT);
1797 npages += 2 * howmany(npteds, VP_IDX3_CNT * VP_IDX2_CNT);
1798 npages += 2 * howmany(npteds, VP_IDX3_CNT * VP_IDX2_CNT * VP_IDX1_CNT);
1799
1800 /*
1801 * Use an interrupt safe map such that we don't recurse into
1802 * uvm_map() to allocate map entries.
1803 */
1804 minaddr = vm_map_min(kernel_map);
1805 pmap_kvp_map = uvm_km_suballoc(kernel_map, &minaddr, &maxaddr,
1806 npages * PAGE_SIZE, VM_MAP_INTRSAFE, FALSE, NULL);
1807 }
1808
1809 void
pmap_init_percpu(void)1810 pmap_init_percpu(void)
1811 {
1812 pool_cache_init(&pmap_pted_pool);
1813 pool_cache_init(&pmap_vp_pool);
1814 }
1815
1816 void
pmap_update(pmap_t pm)1817 pmap_update(pmap_t pm)
1818 {
1819 }
1820
1821 int
pmap_is_referenced(struct vm_page * pg)1822 pmap_is_referenced(struct vm_page *pg)
1823 {
1824 return ((pg->pg_flags & PG_PMAP_REF) != 0);
1825 }
1826
1827 int
pmap_is_modified(struct vm_page * pg)1828 pmap_is_modified(struct vm_page *pg)
1829 {
1830 return ((pg->pg_flags & PG_PMAP_MOD) != 0);
1831 }
1832
1833 int
pmap_clear_modify(struct vm_page * pg)1834 pmap_clear_modify(struct vm_page *pg)
1835 {
1836 struct pte_desc *pted;
1837
1838 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD);
1839
1840 mtx_enter(&pg->mdpage.pv_mtx);
1841 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1842 pted->pted_pte &= ~PROT_WRITE;
1843 pmap_pte_insert(pted);
1844 tlb_flush_page(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
1845 }
1846 mtx_leave(&pg->mdpage.pv_mtx);
1847
1848 return 0;
1849 }
1850
1851 /*
1852 * When this turns off read permissions it also disables write permissions
1853 * so that mod is correctly tracked after clear_ref; FAULT_READ; FAULT_WRITE;
1854 */
1855 int
pmap_clear_reference(struct vm_page * pg)1856 pmap_clear_reference(struct vm_page *pg)
1857 {
1858 struct pte_desc *pted;
1859
1860 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF);
1861
1862 mtx_enter(&pg->mdpage.pv_mtx);
1863 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1864 pted->pted_pte &= ~PROT_MASK;
1865 pmap_pte_insert(pted);
1866 tlb_flush_page(pted->pted_pmap, pted->pted_va & ~PAGE_MASK);
1867 }
1868 mtx_leave(&pg->mdpage.pv_mtx);
1869
1870 return 0;
1871 }
1872
1873 void
pmap_unwire(pmap_t pm,vaddr_t va)1874 pmap_unwire(pmap_t pm, vaddr_t va)
1875 {
1876 struct pte_desc *pted;
1877
1878 pmap_lock(pm);
1879 pted = pmap_vp_lookup(pm, va, NULL);
1880 if (pted != NULL && PTED_WIRED(pted)) {
1881 pm->pm_stats.wired_count--;
1882 pted->pted_va &= ~PTED_VA_WIRED_M;
1883 }
1884 pmap_unlock(pm);
1885 }
1886
1887 void
pmap_remove_holes(struct vmspace * vm)1888 pmap_remove_holes(struct vmspace *vm)
1889 {
1890 /* NOOP */
1891 }
1892
1893 void
pmap_virtual_space(vaddr_t * start,vaddr_t * end)1894 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
1895 {
1896 *start = virtual_avail;
1897 *end = VM_MAX_KERNEL_ADDRESS;
1898
1899 /* Prevent further KVA stealing. */
1900 pmap_virtual_space_called = 1;
1901 }
1902
1903 void
pmap_setup_avail(uint64_t memstart,uint64_t memend,uint64_t kvo)1904 pmap_setup_avail(uint64_t memstart, uint64_t memend, uint64_t kvo)
1905 {
1906 /* This makes several assumptions
1907 * 1) kernel will be located 'low' in memory
1908 * 2) memory will not start at VM_MIN_KERNEL_ADDRESS
1909 * 3) several MB of memory starting just after the kernel will
1910 * be premapped at the kernel address in the bootstrap mappings
1911 * 4) kvo will be the 64 bit number to add to the ram address to
1912 * obtain the kernel virtual mapping of the ram. KVO == PA -> VA
1913 * 5) it is generally assumed that these translations will occur with
1914 * large granularity, at minimum the translation will be page
1915 * aligned, if not 'section' or greater.
1916 */
1917
1918 pmap_avail_kvo = kvo;
1919 pmap_avail[0].start = memstart;
1920 pmap_avail[0].size = memend - memstart;
1921 pmap_cnt_avail = 1;
1922
1923 pmap_avail_fixup();
1924 }
1925
1926 void
pmap_avail_fixup(void)1927 pmap_avail_fixup(void)
1928 {
1929 struct mem_region *mp;
1930 vaddr_t align;
1931 vaddr_t end;
1932
1933 mp = pmap_avail;
1934 while (mp->size !=0) {
1935 align = round_page(mp->start);
1936 if (mp->start != align) {
1937 pmap_remove_avail(mp->start, align);
1938 mp = pmap_avail;
1939 continue;
1940 }
1941 end = mp->start+mp->size;
1942 align = trunc_page(end);
1943 if (end != align) {
1944 pmap_remove_avail(align, end);
1945 mp = pmap_avail;
1946 continue;
1947 }
1948 mp++;
1949 }
1950 }
1951
1952 /* remove a given region from avail memory */
1953 void
pmap_remove_avail(paddr_t base,paddr_t end)1954 pmap_remove_avail(paddr_t base, paddr_t end)
1955 {
1956 struct mem_region *mp;
1957 int i;
1958 long mpend;
1959
1960 /* remove given region from available */
1961 for (mp = pmap_avail; mp->size; mp++) {
1962 /*
1963 * Check if this region holds all of the region
1964 */
1965 mpend = mp->start + mp->size;
1966 if (base > mpend) {
1967 continue;
1968 }
1969 if (base <= mp->start) {
1970 if (end <= mp->start)
1971 break; /* region not present -??? */
1972
1973 if (end >= mpend) {
1974 /* covers whole region */
1975 /* shorten */
1976 for (i = mp - pmap_avail;
1977 i < pmap_cnt_avail;
1978 i++) {
1979 pmap_avail[i] = pmap_avail[i+1];
1980 }
1981 pmap_cnt_avail--;
1982 pmap_avail[pmap_cnt_avail].size = 0;
1983 } else {
1984 mp->start = end;
1985 mp->size = mpend - end;
1986 }
1987 } else {
1988 /* start after the beginning */
1989 if (end >= mpend) {
1990 /* just truncate */
1991 mp->size = base - mp->start;
1992 } else {
1993 /* split */
1994 for (i = pmap_cnt_avail;
1995 i > (mp - pmap_avail);
1996 i--) {
1997 pmap_avail[i] = pmap_avail[i - 1];
1998 }
1999 pmap_cnt_avail++;
2000 mp->size = base - mp->start;
2001 mp++;
2002 mp->start = end;
2003 mp->size = mpend - end;
2004 }
2005 }
2006 }
2007 for (mp = pmap_allocated; mp->size != 0; mp++) {
2008 if (base < mp->start) {
2009 if (end == mp->start) {
2010 mp->start = base;
2011 mp->size += end - base;
2012 break;
2013 }
2014 /* lengthen */
2015 for (i = pmap_cnt_allocated; i > (mp - pmap_allocated);
2016 i--) {
2017 pmap_allocated[i] = pmap_allocated[i - 1];
2018 }
2019 pmap_cnt_allocated++;
2020 mp->start = base;
2021 mp->size = end - base;
2022 return;
2023 }
2024 if (base == (mp->start + mp->size)) {
2025 mp->size += end - base;
2026 return;
2027 }
2028 }
2029 if (mp->size == 0) {
2030 mp->start = base;
2031 mp->size = end - base;
2032 pmap_cnt_allocated++;
2033 }
2034 }
2035
2036 /* XXX - this zeros pages via their physical address */
2037 paddr_t
pmap_steal_avail(size_t size,int align,void ** kva)2038 pmap_steal_avail(size_t size, int align, void **kva)
2039 {
2040 struct mem_region *mp;
2041 long start;
2042 long remsize;
2043
2044 for (mp = pmap_avail; mp->size; mp++) {
2045 if (mp->size > size) {
2046 start = (mp->start + (align -1)) & ~(align -1);
2047 remsize = mp->size - (start - mp->start);
2048 if (remsize >= 0) {//XXX buggy?? should be remsize >= size
2049 pmap_remove_avail(start, start+size);
2050 if (kva != NULL){
2051 *kva = (void *)(start - pmap_avail_kvo);
2052 }
2053 // XXX We clear the page based on its Direct
2054 // Mapped address for now. Physical Addresses
2055 // are not available because we have unmapped
2056 // our identity mapped kernel. Should consider
2057 // if we need to keep the identity mapping
2058 // during pmap bootstrapping.
2059 vaddr_t start_dmap = PHYS_TO_DMAP(start);
2060 bzero((void*)(start_dmap), size);
2061 return start;
2062 }
2063 }
2064 }
2065 panic("unable to allocate region with size %lx align %x",
2066 size, align);
2067 }
2068
2069 vaddr_t
pmap_map_stolen(vaddr_t kernel_start)2070 pmap_map_stolen(vaddr_t kernel_start)
2071 {
2072 struct mem_region *mp;
2073 paddr_t pa;
2074 vaddr_t va;
2075 uint64_t e;
2076
2077 for (mp = pmap_allocated; mp->size; mp++) {
2078 for (e = 0; e < mp->size; e += PAGE_SIZE) {
2079 int prot = PROT_READ | PROT_WRITE;
2080
2081 pa = mp->start + e;
2082 va = pa - pmap_avail_kvo;
2083
2084 if (va < VM_MIN_KERNEL_ADDRESS ||
2085 va >= VM_MAX_KERNEL_ADDRESS)
2086 continue;
2087
2088 if (va >= (vaddr_t)__text_start &&
2089 va < (vaddr_t)_etext)
2090 prot = PROT_READ | PROT_EXEC;
2091 else if (va >= (vaddr_t)__rodata_start &&
2092 va < (vaddr_t)_erodata)
2093 prot = PROT_READ;
2094
2095 pmap_kenter_cache(va, pa, prot, PMAP_CACHE_WB);
2096 }
2097 }
2098
2099 return va + PAGE_SIZE;
2100 }
2101
2102 void
pmap_physload_avail(void)2103 pmap_physload_avail(void)
2104 {
2105 struct mem_region *mp;
2106 uint64_t start, end;
2107
2108 for (mp = pmap_avail; mp->size; mp++) {
2109 if (mp->size < PAGE_SIZE) {
2110 printf(" skipped - too small\n");
2111 continue;
2112 }
2113 start = mp->start;
2114 if (start & PAGE_MASK) {
2115 start = PAGE_SIZE + (start & PMAP_PA_MASK);
2116 }
2117 end = mp->start + mp->size;
2118 if (end & PAGE_MASK) {
2119 end = (end & PMAP_PA_MASK);
2120 }
2121 uvm_page_physload(atop(start), atop(end),
2122 atop(start), atop(end), 0);
2123
2124 }
2125 }
2126
2127 void
pmap_show_mapping(uint64_t va)2128 pmap_show_mapping(uint64_t va)
2129 {
2130 struct pmapvp1 *vp1;
2131 struct pmapvp2 *vp2;
2132 struct pmapvp3 *vp3;
2133 struct pte_desc *pted;
2134 struct pmap *pm;
2135 uint64_t satp;
2136
2137 printf("showing mapping of %llx\n", va);
2138
2139 if (va & 1ULL << 63)
2140 pm = pmap_kernel();
2141 else
2142 pm = curproc->p_vmspace->vm_map.pmap;
2143
2144 vp1 = pm->pm_vp.l1;
2145
2146 __asm volatile ("csrr %0, satp" : "=r" (satp));
2147 printf(" satp %llx %llx\n", satp, SATP_PPN(pm->pm_satp) << PAGE_SHIFT);
2148 printf(" vp1 = %p\n", vp1);
2149
2150 vp2 = vp1->vp[VP_IDX1(va)];
2151 printf(" vp2 = %p lp2 = %llx idx1 off %x\n",
2152 vp2, vp1->l1[VP_IDX1(va)], VP_IDX1(va)*8);
2153 if (vp2 == NULL)
2154 return;
2155
2156 vp3 = vp2->vp[VP_IDX2(va)];
2157 printf(" vp3 = %p lp3 = %llx idx2 off %x\n",
2158 vp3, vp2->l2[VP_IDX2(va)], VP_IDX2(va)*8);
2159 if (vp3 == NULL)
2160 return;
2161
2162 pted = vp3->vp[VP_IDX3(va)];
2163 printf(" pted = %p lp3 = %llx idx3 off %x\n",
2164 pted, vp3->l3[VP_IDX3(va)], VP_IDX3(va)*8);
2165 }
2166
2167 void
pmap_set_satp(struct proc * p)2168 pmap_set_satp(struct proc *p)
2169 {
2170 struct cpu_info *ci = curcpu();
2171 pmap_t pm = p->p_vmspace->vm_map.pmap;
2172
2173 ci->ci_curpm = pm;
2174 load_satp(pm->pm_satp);
2175 sfence_vma();
2176 }
2177