1 /* $OpenBSD: pmap.c,v 1.62 2024/06/04 17:31:59 gkoehler Exp $ */
2
3 /*
4 * Copyright (c) 2015 Martin Pieuchot
5 * Copyright (c) 2001, 2002, 2007 Dale Rahn.
6 * All rights reserved.
7 *
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * Effort sponsored in part by the Defense Advanced Research Projects
30 * Agency (DARPA) and Air Force Research Laboratory, Air Force
31 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
32 */
33
34 /*
35 * Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
36 *
37 * Permission to use, copy, modify, and distribute this software for any
38 * purpose with or without fee is hereby granted, provided that the above
39 * copyright notice and this permission notice appear in all copies.
40 *
41 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
42 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
43 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
44 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
45 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
46 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
47 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
48 */
49
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/atomic.h>
53 #include <sys/pool.h>
54 #include <sys/proc.h>
55 #include <sys/user.h>
56
57 #include <uvm/uvm_extern.h>
58
59 #include <machine/cpufunc.h>
60 #include <machine/pcb.h>
61 #include <machine/pmap.h>
62 #include <machine/pte.h>
63
64 #include <dev/ofw/fdt.h>
65
66 extern char _start[], _etext[], _erodata[], _end[];
67
68 #ifdef MULTIPROCESSOR
69
70 struct mutex pmap_hash_lock = MUTEX_INITIALIZER(IPL_HIGH);
71
72 #define PMAP_HASH_LOCK(s) \
73 do { \
74 (void)s; \
75 mtx_enter(&pmap_hash_lock); \
76 } while (0)
77
78 #define PMAP_HASH_UNLOCK(s) \
79 do { \
80 mtx_leave(&pmap_hash_lock); \
81 } while (0)
82
83 #define PMAP_VP_LOCK_INIT(pm) mtx_init(&pm->pm_mtx, IPL_VM)
84
85 #define PMAP_VP_LOCK(pm) \
86 do { \
87 if (pm != pmap_kernel()) \
88 mtx_enter(&pm->pm_mtx); \
89 } while (0)
90
91 #define PMAP_VP_UNLOCK(pm) \
92 do { \
93 if (pm != pmap_kernel()) \
94 mtx_leave(&pm->pm_mtx); \
95 } while (0)
96
97 #define PMAP_VP_ASSERT_LOCKED(pm) \
98 do { \
99 if (pm != pmap_kernel()) \
100 MUTEX_ASSERT_LOCKED(&pm->pm_mtx); \
101 } while (0)
102
103 #else
104
105 #define PMAP_HASH_LOCK(s) (void)s
106 #define PMAP_HASH_UNLOCK(s) /* nothing */
107
108 #define PMAP_VP_LOCK_INIT(pm) /* nothing */
109 #define PMAP_VP_LOCK(pm) /* nothing */
110 #define PMAP_VP_UNLOCK(pm) /* nothing */
111 #define PMAP_VP_ASSERT_LOCKED(pm) /* nothing */
112
113 #endif
114
115 struct pmap kernel_pmap_store;
116
117 struct pte *pmap_ptable;
118 int pmap_ptab_cnt;
119 uint64_t pmap_ptab_mask;
120
121 #define HTABMEMSZ (pmap_ptab_cnt * 8 * sizeof(struct pte))
122 #define HTABSIZE (ffs(pmap_ptab_cnt) - 12)
123
124 struct pate *pmap_pat;
125
126 #define PATMEMSZ (64 * 1024)
127 #define PATSIZE (ffs(PATMEMSZ) - 12)
128
129 struct pte_desc {
130 /* Linked list of phys -> virt entries */
131 LIST_ENTRY(pte_desc) pted_pv_list;
132 struct pte pted_pte;
133 pmap_t pted_pmap;
134 vaddr_t pted_va;
135 uint64_t pted_vsid;
136 };
137
138 #define PTED_VA_PTEGIDX_M 0x07
139 #define PTED_VA_HID_M 0x08
140 #define PTED_VA_MANAGED_M 0x10
141 #define PTED_VA_WIRED_M 0x20
142 #define PTED_VA_EXEC_M 0x40
143
144 void pmap_pted_syncicache(struct pte_desc *);
145 void pmap_flush_page(struct vm_page *);
146
147 struct slb_desc {
148 LIST_ENTRY(slb_desc) slbd_list;
149 uint64_t slbd_esid;
150 uint64_t slbd_vsid;
151 struct pmapvp1 *slbd_vp;
152 };
153
154 /* Preallocated SLB entries for the kernel. */
155 struct slb_desc kernel_slb_desc[16 + VM_KERNEL_SPACE_SIZE / SEGMENT_SIZE];
156
157 struct slb_desc *pmap_slbd_lookup(pmap_t, vaddr_t);
158
159 struct pmapvp1 {
160 struct pmapvp2 *vp[VP_IDX1_CNT];
161 };
162
163 struct pmapvp2 {
164 struct pte_desc *vp[VP_IDX2_CNT];
165 };
166
167 CTASSERT(sizeof(struct pmapvp1) == sizeof(struct pmapvp2));
168
169 static inline int
VP_IDX1(vaddr_t va)170 VP_IDX1(vaddr_t va)
171 {
172 return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
173 }
174
175 static inline int
VP_IDX2(vaddr_t va)176 VP_IDX2(vaddr_t va)
177 {
178 return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
179 }
180
181 void pmap_vp_destroy(pmap_t);
182 void pmap_release(pmap_t);
183
184 struct pool pmap_pmap_pool;
185 struct pool pmap_vp_pool;
186 struct pool pmap_pted_pool;
187 struct pool pmap_slbd_pool;
188
189 int pmap_initialized = 0;
190
191 /*
192 * We use only 4K pages and 256MB segments. That means p = b = 12 and
193 * s = 28.
194 */
195
196 #define KERNEL_VSID_BIT 0x0000001000000000ULL
197 #define VSID_HASH_MASK 0x0000007fffffffffULL
198
199 static inline int
PTED_HID(struct pte_desc * pted)200 PTED_HID(struct pte_desc *pted)
201 {
202 return !!(pted->pted_va & PTED_VA_HID_M);
203 }
204
205 static inline int
PTED_PTEGIDX(struct pte_desc * pted)206 PTED_PTEGIDX(struct pte_desc *pted)
207 {
208 return (pted->pted_va & PTED_VA_PTEGIDX_M);
209 }
210
211 static inline int
PTED_MANAGED(struct pte_desc * pted)212 PTED_MANAGED(struct pte_desc *pted)
213 {
214 return !!(pted->pted_va & PTED_VA_MANAGED_M);
215 }
216
217 static inline int
PTED_WIRED(struct pte_desc * pted)218 PTED_WIRED(struct pte_desc *pted)
219 {
220 return !!(pted->pted_va & PTED_VA_WIRED_M);
221 }
222
223 static inline int
PTED_VALID(struct pte_desc * pted)224 PTED_VALID(struct pte_desc *pted)
225 {
226 return !!(pted->pted_pte.pte_hi & PTE_VALID);
227 }
228
229 #define TLBIEL_MAX_SETS 4096
230 #define TLBIEL_SET_SHIFT 12
231 #define TLBIEL_INVAL_SET (0x3 << 10)
232
233 void
tlbia(void)234 tlbia(void)
235 {
236 int set;
237
238 for (set = 0; set < TLBIEL_MAX_SETS; set++)
239 tlbiel((set << TLBIEL_SET_SHIFT) | TLBIEL_INVAL_SET);
240 }
241
242 /*
243 * Return AVA for use with TLB invalidate instructions.
244 */
245 static inline uint64_t
pmap_ava(uint64_t vsid,vaddr_t va)246 pmap_ava(uint64_t vsid, vaddr_t va)
247 {
248 return ((vsid << ADDR_VSID_SHIFT) | (va & ADDR_PIDX));
249 }
250
251 /*
252 * Return AVA for a PTE descriptor.
253 */
254 static inline uint64_t
pmap_pted2ava(struct pte_desc * pted)255 pmap_pted2ava(struct pte_desc *pted)
256 {
257 return pmap_ava(pted->pted_vsid, pted->pted_va);
258 }
259
260 /*
261 * Return the top 64 bits of the (80-bit) VPN for a PTE descriptor.
262 */
263 static inline uint64_t
pmap_pted2avpn(struct pte_desc * pted)264 pmap_pted2avpn(struct pte_desc *pted)
265 {
266 return (pted->pted_vsid << (PTE_VSID_SHIFT) |
267 (pted->pted_va & ADDR_PIDX) >>
268 (ADDR_VSID_SHIFT - PTE_VSID_SHIFT));
269 }
270
271 static inline uint64_t
pmap_kernel_vsid(uint64_t esid)272 pmap_kernel_vsid(uint64_t esid)
273 {
274 uint64_t vsid;
275 vsid = (((esid << 8) | (esid > 28)) * 0x13bb) & (KERNEL_VSID_BIT - 1);
276 return vsid | KERNEL_VSID_BIT;
277 }
278
279 static inline uint64_t
pmap_va2vsid(pmap_t pm,vaddr_t va)280 pmap_va2vsid(pmap_t pm, vaddr_t va)
281 {
282 uint64_t esid = va >> ADDR_ESID_SHIFT;
283 struct slb_desc *slbd;
284
285 if (pm == pmap_kernel())
286 return pmap_kernel_vsid(esid);
287
288 slbd = pmap_slbd_lookup(pm, va);
289 if (slbd)
290 return slbd->slbd_vsid;
291
292 return 0;
293 }
294
295 struct pte *
pmap_ptedinhash(struct pte_desc * pted)296 pmap_ptedinhash(struct pte_desc *pted)
297 {
298 struct pte *pte;
299 vaddr_t va;
300 uint64_t vsid, hash;
301 int idx;
302
303 va = pted->pted_va & ~PAGE_MASK;
304 vsid = pted->pted_vsid;
305 hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
306 idx = (hash & pmap_ptab_mask);
307
308 idx ^= (PTED_HID(pted) ? pmap_ptab_mask : 0);
309 pte = pmap_ptable + (idx * 8);
310 pte += PTED_PTEGIDX(pted); /* increment by index into pteg */
311
312 /*
313 * We now have the pointer to where it will be, if it is
314 * currently mapped. If the mapping was thrown away in
315 * exchange for another page mapping, then this page is not
316 * currently in the hash.
317 */
318 if ((pted->pted_pte.pte_hi |
319 (PTED_HID(pted) ? PTE_HID : 0)) == pte->pte_hi)
320 return pte;
321
322 return NULL;
323 }
324
325 struct slb_desc *
pmap_slbd_lookup(pmap_t pm,vaddr_t va)326 pmap_slbd_lookup(pmap_t pm, vaddr_t va)
327 {
328 uint64_t esid = va >> ADDR_ESID_SHIFT;
329 struct slb_desc *slbd;
330
331 PMAP_VP_ASSERT_LOCKED(pm);
332
333 LIST_FOREACH(slbd, &pm->pm_slbd, slbd_list) {
334 if (slbd->slbd_esid == esid)
335 return slbd;
336 }
337
338 return NULL;
339 }
340
341 void
pmap_slbd_cache(pmap_t pm,struct slb_desc * slbd)342 pmap_slbd_cache(pmap_t pm, struct slb_desc *slbd)
343 {
344 struct pcb *pcb = &curproc->p_addr->u_pcb;
345 uint64_t slbe, slbv;
346 int idx;
347
348 KASSERT(curproc->p_vmspace->vm_map.pmap == pm);
349
350 for (idx = 0; idx < nitems(pcb->pcb_slb); idx++) {
351 if (pcb->pcb_slb[idx].slb_slbe == 0)
352 break;
353 }
354 if (idx == nitems(pcb->pcb_slb))
355 idx = arc4random_uniform(nitems(pcb->pcb_slb));
356
357 slbe = (slbd->slbd_esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
358 slbv = slbd->slbd_vsid << SLBV_VSID_SHIFT;
359
360 pcb->pcb_slb[idx].slb_slbe = slbe;
361 pcb->pcb_slb[idx].slb_slbv = slbv;
362 }
363
364 int
pmap_slbd_fault(pmap_t pm,vaddr_t va)365 pmap_slbd_fault(pmap_t pm, vaddr_t va)
366 {
367 struct slb_desc *slbd;
368
369 PMAP_VP_LOCK(pm);
370 slbd = pmap_slbd_lookup(pm, va);
371 if (slbd) {
372 pmap_slbd_cache(pm, slbd);
373 PMAP_VP_UNLOCK(pm);
374 return 0;
375 }
376 PMAP_VP_UNLOCK(pm);
377
378 return EFAULT;
379 }
380
381 #define NUM_VSID (1 << 20)
382 uint32_t pmap_vsid[NUM_VSID / 32];
383
384 uint64_t
pmap_alloc_vsid(void)385 pmap_alloc_vsid(void)
386 {
387 uint32_t bits;
388 uint32_t vsid, bit;
389
390 for (;;) {
391 do {
392 vsid = arc4random() & (NUM_VSID - 1);
393 bit = (vsid & (32 - 1));
394 bits = pmap_vsid[vsid / 32];
395 } while (bits & (1U << bit));
396
397 if (atomic_cas_uint(&pmap_vsid[vsid / 32], bits,
398 bits | (1U << bit)) == bits)
399 return vsid;
400 }
401 }
402
403 void
pmap_free_vsid(uint64_t vsid)404 pmap_free_vsid(uint64_t vsid)
405 {
406 uint32_t bits;
407 int bit;
408
409 KASSERT(vsid < NUM_VSID);
410
411 bit = (vsid & (32 - 1));
412 for (;;) {
413 bits = pmap_vsid[vsid / 32];
414 if (atomic_cas_uint(&pmap_vsid[vsid / 32], bits,
415 bits & ~(1U << bit)) == bits)
416 break;
417 }
418 }
419
420 struct slb_desc *
pmap_slbd_alloc(pmap_t pm,vaddr_t va)421 pmap_slbd_alloc(pmap_t pm, vaddr_t va)
422 {
423 uint64_t esid = va >> ADDR_ESID_SHIFT;
424 struct slb_desc *slbd;
425
426 KASSERT(pm != pmap_kernel());
427 PMAP_VP_ASSERT_LOCKED(pm);
428
429 slbd = pool_get(&pmap_slbd_pool, PR_NOWAIT | PR_ZERO);
430 if (slbd == NULL)
431 return NULL;
432
433 slbd->slbd_esid = esid;
434 slbd->slbd_vsid = pmap_alloc_vsid();
435 KASSERT((slbd->slbd_vsid & KERNEL_VSID_BIT) == 0);
436 LIST_INSERT_HEAD(&pm->pm_slbd, slbd, slbd_list);
437
438 /* We're almost certainly going to use it soon. */
439 pmap_slbd_cache(pm, slbd);
440
441 return slbd;
442 }
443
444 int
pmap_slbd_enter(pmap_t pm,vaddr_t va)445 pmap_slbd_enter(pmap_t pm, vaddr_t va)
446 {
447 struct slb_desc *slbd;
448
449 PMAP_VP_LOCK(pm);
450 slbd = pmap_slbd_lookup(pm, va);
451 if (slbd == NULL)
452 slbd = pmap_slbd_alloc(pm, va);
453 PMAP_VP_UNLOCK(pm);
454
455 return slbd ? 0 : EFAULT;
456 }
457
458 int
pmap_set_user_slb(pmap_t pm,vaddr_t va,vaddr_t * kva,vsize_t * len)459 pmap_set_user_slb(pmap_t pm, vaddr_t va, vaddr_t *kva, vsize_t *len)
460 {
461 struct cpu_info *ci = curcpu();
462 struct slb_desc *slbd;
463 uint64_t slbe, slbv;
464 uint64_t vsid;
465
466 KASSERT(pm != pmap_kernel());
467
468 PMAP_VP_LOCK(pm);
469 slbd = pmap_slbd_lookup(pm, va);
470 if (slbd == NULL) {
471 slbd = pmap_slbd_alloc(pm, va);
472 if (slbd == NULL) {
473 PMAP_VP_UNLOCK(pm);
474 return EFAULT;
475 }
476 }
477 vsid = slbd->slbd_vsid;
478 PMAP_VP_UNLOCK(pm);
479
480 /*
481 * We might get here while another process is sleeping while
482 * handling a page fault. Kill their SLB entry before
483 * inserting our own.
484 */
485 if (ci->ci_kernel_slb[31].slb_slbe != 0) {
486 isync();
487 slbie(ci->ci_kernel_slb[31].slb_slbe);
488 isync();
489 }
490
491 slbe = (USER_ESID << SLBE_ESID_SHIFT) | SLBE_VALID | 31;
492 slbv = vsid << SLBV_VSID_SHIFT;
493
494 ci->ci_kernel_slb[31].slb_slbe = slbe;
495 ci->ci_kernel_slb[31].slb_slbv = slbv;
496
497 isync();
498 slbmte(slbv, slbe);
499 isync();
500
501 curpcb->pcb_userva = (va & ~SEGMENT_MASK);
502
503 if (kva)
504 *kva = USER_ADDR | (va & SEGMENT_MASK);
505 if (len)
506 *len = SEGMENT_SIZE - (va & SEGMENT_MASK);
507
508 return 0;
509 }
510
511 void
pmap_clear_user_slb(void)512 pmap_clear_user_slb(void)
513 {
514 struct cpu_info *ci = curcpu();
515
516 if (ci->ci_kernel_slb[31].slb_slbe != 0) {
517 isync();
518 slbie(ci->ci_kernel_slb[31].slb_slbe);
519 isync();
520 }
521
522 ci->ci_kernel_slb[31].slb_slbe = 0;
523 ci->ci_kernel_slb[31].slb_slbv = 0;
524 }
525
526 void
pmap_unset_user_slb(void)527 pmap_unset_user_slb(void)
528 {
529 curpcb->pcb_userva = 0;
530 pmap_clear_user_slb();
531 }
532
533 /*
534 * VP routines, virtual to physical translation information.
535 * These data structures are based off of the pmap, per process.
536 */
537
538 struct pte_desc *
pmap_vp_lookup(pmap_t pm,vaddr_t va)539 pmap_vp_lookup(pmap_t pm, vaddr_t va)
540 {
541 struct slb_desc *slbd;
542 struct pmapvp1 *vp1;
543 struct pmapvp2 *vp2;
544
545 slbd = pmap_slbd_lookup(pm, va);
546 if (slbd == NULL)
547 return NULL;
548
549 vp1 = slbd->slbd_vp;
550 if (vp1 == NULL)
551 return NULL;
552
553 vp2 = vp1->vp[VP_IDX1(va)];
554 if (vp2 == NULL)
555 return NULL;
556
557 return vp2->vp[VP_IDX2(va)];
558 }
559
560 /*
561 * Remove, and return, pted at specified address, NULL if not present.
562 */
563 struct pte_desc *
pmap_vp_remove(pmap_t pm,vaddr_t va)564 pmap_vp_remove(pmap_t pm, vaddr_t va)
565 {
566 struct slb_desc *slbd;
567 struct pmapvp1 *vp1;
568 struct pmapvp2 *vp2;
569 struct pte_desc *pted;
570
571 slbd = pmap_slbd_lookup(pm, va);
572 if (slbd == NULL)
573 return NULL;
574
575 vp1 = slbd->slbd_vp;
576 if (vp1 == NULL)
577 return NULL;
578
579 vp2 = vp1->vp[VP_IDX1(va)];
580 if (vp2 == NULL)
581 return NULL;
582
583 pted = vp2->vp[VP_IDX2(va)];
584 vp2->vp[VP_IDX2(va)] = NULL;
585
586 return pted;
587 }
588
589 /*
590 * Create a V -> P mapping for the given pmap and virtual address
591 * with reference to the pte descriptor that is used to map the page.
592 * This code should track allocations of vp table allocations
593 * so they can be freed efficiently.
594 */
595 int
pmap_vp_enter(pmap_t pm,vaddr_t va,struct pte_desc * pted,int flags)596 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags)
597 {
598 struct slb_desc *slbd;
599 struct pmapvp1 *vp1;
600 struct pmapvp2 *vp2;
601
602 slbd = pmap_slbd_lookup(pm, va);
603 if (slbd == NULL) {
604 slbd = pmap_slbd_alloc(pm, va);
605 if (slbd == NULL) {
606 if ((flags & PMAP_CANFAIL) == 0)
607 panic("%s: unable to allocate slbd", __func__);
608 return ENOMEM;
609 }
610 }
611
612 vp1 = slbd->slbd_vp;
613 if (vp1 == NULL) {
614 vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
615 if (vp1 == NULL) {
616 if ((flags & PMAP_CANFAIL) == 0)
617 panic("%s: unable to allocate L1", __func__);
618 return ENOMEM;
619 }
620 slbd->slbd_vp = vp1;
621 }
622
623 vp2 = vp1->vp[VP_IDX1(va)];
624 if (vp2 == NULL) {
625 vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
626 if (vp2 == NULL) {
627 if ((flags & PMAP_CANFAIL) == 0)
628 panic("%s: unable to allocate L2", __func__);
629 return ENOMEM;
630 }
631 vp1->vp[VP_IDX1(va)] = vp2;
632 }
633
634 vp2->vp[VP_IDX2(va)] = pted;
635 return 0;
636 }
637
638 void
pmap_enter_pv(struct pte_desc * pted,struct vm_page * pg)639 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg)
640 {
641 mtx_enter(&pg->mdpage.pv_mtx);
642 LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
643 pted->pted_va |= PTED_VA_MANAGED_M;
644 mtx_leave(&pg->mdpage.pv_mtx);
645 }
646
647 void
pmap_remove_pv(struct pte_desc * pted)648 pmap_remove_pv(struct pte_desc *pted)
649 {
650 struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte.pte_lo & PTE_RPGN);
651
652 mtx_enter(&pg->mdpage.pv_mtx);
653 LIST_REMOVE(pted, pted_pv_list);
654 mtx_leave(&pg->mdpage.pv_mtx);
655 }
656
657 struct pte *
pte_lookup(uint64_t vsid,vaddr_t va)658 pte_lookup(uint64_t vsid, vaddr_t va)
659 {
660 uint64_t hash, avpn, pte_hi;
661 struct pte *pte;
662 int idx, i;
663
664 /* Primary hash. */
665 hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
666 idx = (hash & pmap_ptab_mask);
667 pte = pmap_ptable + (idx * 8);
668 avpn = (vsid << PTE_VSID_SHIFT) |
669 (va & ADDR_PIDX) >> (ADDR_VSID_SHIFT - PTE_VSID_SHIFT);
670 pte_hi = (avpn & PTE_AVPN) | PTE_VALID;
671
672 for (i = 0; i < 8; i++) {
673 if ((pte[i].pte_hi & ~PTE_WIRED) == pte_hi)
674 return &pte[i];
675 }
676
677 /* Secondary hash. */
678 idx ^= pmap_ptab_mask;
679 pte = pmap_ptable + (idx * 8);
680 pte_hi |= PTE_HID;
681
682 for (i = 0; i < 8; i++) {
683 if ((pte[i].pte_hi & ~PTE_WIRED) == pte_hi)
684 return &pte[i];
685 }
686
687 return NULL;
688 }
689
690 /*
691 * Delete a Page Table Entry, section 5.10.1.3.
692 *
693 * Note: hash table must be locked.
694 */
695 void
pte_del(struct pte * pte,uint64_t ava)696 pte_del(struct pte *pte, uint64_t ava)
697 {
698 pte->pte_hi &= ~PTE_VALID;
699 ptesync(); /* Ensure update completed. */
700 tlbie(ava); /* Invalidate old translation. */
701 eieio(); /* Order tlbie before tlbsync. */
702 tlbsync(); /* Ensure tlbie completed on all processors. */
703 ptesync(); /* Ensure tlbsync and update completed. */
704 }
705
706 void
pte_zap(struct pte * pte,struct pte_desc * pted)707 pte_zap(struct pte *pte, struct pte_desc *pted)
708 {
709 pte_del(pte, pmap_pted2ava(pted));
710 }
711
712 void
pmap_fill_pte(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int cache)713 pmap_fill_pte(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
714 vm_prot_t prot, int cache)
715 {
716 struct pte *pte = &pted->pted_pte;
717
718 pted->pted_pmap = pm;
719 pted->pted_va = va & ~PAGE_MASK;
720 pted->pted_vsid = pmap_va2vsid(pm, va);
721 KASSERT(pted->pted_vsid != 0);
722
723 pte->pte_hi = (pmap_pted2avpn(pted) & PTE_AVPN) | PTE_VALID;
724 pte->pte_lo = (pa & PTE_RPGN);
725
726 if (pm == pmap_kernel())
727 pte->pte_hi |= PTE_WIRED;
728
729 if (prot & PROT_WRITE)
730 pte->pte_lo |= PTE_RW;
731 else
732 pte->pte_lo |= PTE_RO;
733 if (prot & PROT_EXEC)
734 pted->pted_va |= PTED_VA_EXEC_M;
735 else
736 pte->pte_lo |= PTE_N;
737
738 if (cache == PMAP_CACHE_WB)
739 pte->pte_lo |= PTE_M;
740 else
741 pte->pte_lo |= (PTE_M | PTE_I | PTE_G);
742
743 if ((prot & (PROT_READ | PROT_WRITE)) == 0)
744 pte->pte_lo |= PTE_AC;
745 }
746
747 void
pte_insert(struct pte_desc * pted)748 pte_insert(struct pte_desc *pted)
749 {
750 struct pte *pte;
751 vaddr_t va;
752 uint64_t vsid, hash;
753 int off, try, idx, i;
754 int s;
755
756 PMAP_HASH_LOCK(s);
757
758 if ((pte = pmap_ptedinhash(pted)) != NULL)
759 pte_zap(pte, pted);
760
761 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
762
763 va = pted->pted_va & ~PAGE_MASK;
764 vsid = pted->pted_vsid;
765 hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
766 idx = (hash & pmap_ptab_mask);
767
768 /*
769 * instead of starting at the beginning of each pteg,
770 * the code should pick a random location with in the primary
771 * then search all of the entries, then if not yet found,
772 * do the same for the secondary.
773 * this would reduce the frontloading of the pteg.
774 */
775
776 /* first just try fill of primary hash */
777 pte = pmap_ptable + (idx * 8);
778 for (i = 0; i < 8; i++) {
779 if (pte[i].pte_hi & PTE_VALID)
780 continue;
781
782 pted->pted_va |= i;
783
784 /* Add a Page Table Entry, section 5.10.1.1. */
785 pte[i].pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID;
786 pte[i].pte_lo = pted->pted_pte.pte_lo;
787 eieio(); /* Order 1st PTE update before 2nd. */
788 pte[i].pte_hi |= PTE_VALID;
789 ptesync(); /* Ensure updates completed. */
790
791 goto out;
792 }
793
794 /* try fill of secondary hash */
795 pte = pmap_ptable + (idx ^ pmap_ptab_mask) * 8;
796 for (i = 0; i < 8; i++) {
797 if (pte[i].pte_hi & PTE_VALID)
798 continue;
799
800 pted->pted_va |= (i | PTED_VA_HID_M);
801
802 /* Add a Page Table Entry, section 5.10.1.1. */
803 pte[i].pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID;
804 pte[i].pte_lo = pted->pted_pte.pte_lo;
805 eieio(); /* Order 1st PTE update before 2nd. */
806 pte[i].pte_hi |= (PTE_HID|PTE_VALID);
807 ptesync(); /* Ensure updates completed. */
808
809 goto out;
810 }
811
812 /* need decent replacement algorithm */
813 off = mftb();
814
815 for (try = 0; try < 16; try++) {
816 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
817 pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M);
818
819 idx ^= (PTED_HID(pted) ? pmap_ptab_mask : 0);
820 pte = pmap_ptable + (idx * 8);
821 pte += PTED_PTEGIDX(pted); /* increment by index into pteg */
822
823 if ((pte->pte_hi & PTE_WIRED) == 0)
824 break;
825
826 off++;
827 }
828 /*
829 * Since we only wire unmanaged kernel mappings, we should
830 * always find a slot that we can replace.
831 */
832 KASSERT(try < 16);
833
834 if (pte->pte_hi & PTE_VALID) {
835 uint64_t avpn, vpn;
836
837 avpn = pte->pte_hi & PTE_AVPN;
838 vsid = avpn >> PTE_VSID_SHIFT;
839 vpn = avpn << (ADDR_VSID_SHIFT - PTE_VSID_SHIFT - PAGE_SHIFT);
840
841 idx ^= ((pte->pte_hi & PTE_HID) ? pmap_ptab_mask : 0);
842 vpn |= ((idx ^ vsid) & (ADDR_PIDX >> ADDR_PIDX_SHIFT));
843
844 pte_del(pte, vpn << PAGE_SHIFT);
845 }
846
847 /* Add a Page Table Entry, section 5.10.1.1. */
848 pte->pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID;
849 if (PTED_HID(pted))
850 pte->pte_hi |= PTE_HID;
851 pte->pte_lo = pted->pted_pte.pte_lo;
852 eieio(); /* Order 1st PTE update before 2nd. */
853 pte->pte_hi |= PTE_VALID;
854 ptesync(); /* Ensure updates completed. */
855
856 out:
857 PMAP_HASH_UNLOCK(s);
858 }
859
860 void
pmap_remove_pted(pmap_t pm,struct pte_desc * pted)861 pmap_remove_pted(pmap_t pm, struct pte_desc *pted)
862 {
863 struct pte *pte;
864 int s;
865
866 KASSERT(pm == pted->pted_pmap);
867 PMAP_VP_ASSERT_LOCKED(pm);
868
869 pm->pm_stats.resident_count--;
870
871 if (PTED_WIRED(pted)) {
872 pm->pm_stats.wired_count--;
873 pted->pted_va &= ~PTED_VA_WIRED_M;
874 }
875
876 PMAP_HASH_LOCK(s);
877 if ((pte = pmap_ptedinhash(pted)) != NULL)
878 pte_zap(pte, pted);
879 PMAP_HASH_UNLOCK(s);
880
881 pted->pted_va &= ~PTED_VA_EXEC_M;
882 pted->pted_pte.pte_hi &= ~PTE_VALID;
883
884 if (PTED_MANAGED(pted))
885 pmap_remove_pv(pted);
886
887 pmap_vp_remove(pm, pted->pted_va);
888 pool_put(&pmap_pted_pool, pted);
889 }
890
891 extern struct fdt_reg memreg[];
892 extern int nmemreg;
893
894 #ifdef DDB
895 extern struct fdt_reg initrd_reg;
896 #endif
897
898 void memreg_add(const struct fdt_reg *);
899 void memreg_remove(const struct fdt_reg *);
900
901 vaddr_t vmmap;
902 vaddr_t zero_page;
903 vaddr_t copy_src_page;
904 vaddr_t copy_dst_page;
905 vaddr_t virtual_avail = VM_MIN_KERNEL_ADDRESS;
906
907 void *
pmap_steal_avail(size_t size,size_t align)908 pmap_steal_avail(size_t size, size_t align)
909 {
910 struct fdt_reg reg;
911 uint64_t start, end;
912 int i;
913
914 for (i = 0; i < nmemreg; i++) {
915 if (memreg[i].size > size) {
916 start = (memreg[i].addr + (align - 1)) & ~(align - 1);
917 end = start + size;
918 if (end <= memreg[i].addr + memreg[i].size) {
919 reg.addr = start;
920 reg.size = end - start;
921 memreg_remove(®);
922 return (void *)start;
923 }
924 }
925 }
926 panic("can't allocate");
927 }
928
929 void
pmap_virtual_space(vaddr_t * start,vaddr_t * end)930 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
931 {
932 *start = virtual_avail;
933 *end = VM_MAX_KERNEL_ADDRESS;
934 }
935
936 pmap_t
pmap_create(void)937 pmap_create(void)
938 {
939 pmap_t pm;
940
941 pm = pool_get(&pmap_pmap_pool, PR_WAITOK | PR_ZERO);
942 pm->pm_refs = 1;
943 PMAP_VP_LOCK_INIT(pm);
944 LIST_INIT(&pm->pm_slbd);
945 return pm;
946 }
947
948 /*
949 * Add a reference to a given pmap.
950 */
951 void
pmap_reference(pmap_t pm)952 pmap_reference(pmap_t pm)
953 {
954 atomic_inc_int(&pm->pm_refs);
955 }
956
957 /*
958 * Retire the given pmap from service.
959 * Should only be called if the map contains no valid mappings.
960 */
961 void
pmap_destroy(pmap_t pm)962 pmap_destroy(pmap_t pm)
963 {
964 int refs;
965
966 refs = atomic_dec_int_nv(&pm->pm_refs);
967 if (refs > 0)
968 return;
969
970 /*
971 * reference count is zero, free pmap resources and free pmap.
972 */
973 pmap_release(pm);
974 pool_put(&pmap_pmap_pool, pm);
975 }
976
977 /*
978 * Release any resources held by the given physical map.
979 * Called when a pmap initialized by pmap_pinit is being released.
980 */
981 void
pmap_release(pmap_t pm)982 pmap_release(pmap_t pm)
983 {
984 pmap_vp_destroy(pm);
985 }
986
987 void
pmap_vp_destroy(pmap_t pm)988 pmap_vp_destroy(pmap_t pm)
989 {
990 struct slb_desc *slbd;
991 struct pmapvp1 *vp1;
992 struct pmapvp2 *vp2;
993 struct pte_desc *pted;
994 int i, j;
995
996 while ((slbd = LIST_FIRST(&pm->pm_slbd))) {
997 vp1 = slbd->slbd_vp;
998 if (vp1) {
999 for (i = 0; i < VP_IDX1_CNT; i++) {
1000 vp2 = vp1->vp[i];
1001 if (vp2 == NULL)
1002 continue;
1003
1004 for (j = 0; j < VP_IDX2_CNT; j++) {
1005 pted = vp2->vp[j];
1006 if (pted == NULL)
1007 continue;
1008
1009 pool_put(&pmap_pted_pool, pted);
1010 }
1011 pool_put(&pmap_vp_pool, vp2);
1012 }
1013 pool_put(&pmap_vp_pool, vp1);
1014 }
1015
1016 LIST_REMOVE(slbd, slbd_list);
1017 pmap_free_vsid(slbd->slbd_vsid);
1018 pool_put(&pmap_slbd_pool, slbd);
1019 }
1020 }
1021
1022 void
pmap_init(void)1023 pmap_init(void)
1024 {
1025 int i;
1026
1027 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_VM, 0,
1028 "pmap", &pool_allocator_single);
1029 pool_setlowat(&pmap_pmap_pool, 2);
1030 pool_init(&pmap_vp_pool, sizeof(struct pmapvp1), 0, IPL_VM, 0,
1031 "vp", &pool_allocator_single);
1032 pool_setlowat(&pmap_vp_pool, 10);
1033 pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
1034 "pted", NULL);
1035 pool_setlowat(&pmap_pted_pool, 20);
1036 pool_init(&pmap_slbd_pool, sizeof(struct slb_desc), 0, IPL_VM, 0,
1037 "slbd", NULL);
1038 pool_setlowat(&pmap_slbd_pool, 5);
1039
1040 LIST_INIT(&pmap_kernel()->pm_slbd);
1041 for (i = 0; i < nitems(kernel_slb_desc); i++) {
1042 LIST_INSERT_HEAD(&pmap_kernel()->pm_slbd,
1043 &kernel_slb_desc[i], slbd_list);
1044 }
1045
1046 pmap_initialized = 1;
1047 }
1048
1049 int
pmap_enter(pmap_t pm,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)1050 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1051 {
1052 struct pte_desc *pted;
1053 struct vm_page *pg;
1054 int cache = PMAP_CACHE_WB;
1055 int need_sync = 0;
1056 int error = 0;
1057
1058 if (pa & PMAP_NOCACHE)
1059 cache = PMAP_CACHE_CI;
1060 pg = PHYS_TO_VM_PAGE(pa);
1061 if (!pmap_initialized)
1062 printf("%s\n", __func__);
1063
1064 PMAP_VP_LOCK(pm);
1065 pted = pmap_vp_lookup(pm, va);
1066 if (pted && PTED_VALID(pted)) {
1067 pmap_remove_pted(pm, pted);
1068 pted = NULL;
1069 }
1070
1071 pm->pm_stats.resident_count++;
1072
1073 /* Do not have pted for this, get one and put it in VP */
1074 if (pted == NULL) {
1075 pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO);
1076 if (pted == NULL) {
1077 if ((flags & PMAP_CANFAIL) == 0)
1078 panic("%s: failed to allocate pted", __func__);
1079 error = ENOMEM;
1080 goto out;
1081 }
1082 if (pmap_vp_enter(pm, va, pted, flags)) {
1083 if ((flags & PMAP_CANFAIL) == 0)
1084 panic("%s: failed to allocate L2/L3", __func__);
1085 error = ENOMEM;
1086 pool_put(&pmap_pted_pool, pted);
1087 goto out;
1088 }
1089 }
1090
1091 if ((flags & PROT_WRITE) == 0)
1092 prot &= ~PROT_WRITE;
1093
1094 pmap_fill_pte(pm, va, pa, pted, prot, cache);
1095 if (flags & PMAP_WIRED) {
1096 pted->pted_va |= PTED_VA_WIRED_M;
1097 pm->pm_stats.wired_count++;
1098 }
1099
1100 if (pg != NULL) {
1101 pmap_enter_pv(pted, pg); /* only managed mem */
1102
1103 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1104 if (flags & PROT_WRITE)
1105 atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD);
1106
1107 if ((pg->pg_flags & PG_DEV) == 0 && cache != PMAP_CACHE_WB)
1108 pmap_flush_page(pg);
1109 }
1110
1111 pte_insert(pted);
1112
1113 if (prot & PROT_EXEC) {
1114 if (pg != NULL) {
1115 need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0);
1116 if (prot & PROT_WRITE)
1117 atomic_clearbits_int(&pg->pg_flags,
1118 PG_PMAP_EXE);
1119 else
1120 atomic_setbits_int(&pg->pg_flags,
1121 PG_PMAP_EXE);
1122 } else
1123 need_sync = 1;
1124 } else {
1125 /*
1126 * Should we be paranoid about writeable non-exec
1127 * mappings ? if so, clear the exec tag
1128 */
1129 if ((prot & PROT_WRITE) && (pg != NULL))
1130 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1131 }
1132
1133 if (need_sync)
1134 pmap_pted_syncicache(pted);
1135
1136 out:
1137 PMAP_VP_UNLOCK(pm);
1138 return error;
1139 }
1140
1141 void
pmap_remove(pmap_t pm,vaddr_t sva,vaddr_t eva)1142 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
1143 {
1144 struct pte_desc *pted;
1145 vaddr_t va;
1146
1147 PMAP_VP_LOCK(pm);
1148 for (va = sva; va < eva; va += PAGE_SIZE) {
1149 pted = pmap_vp_lookup(pm, va);
1150 if (pted && PTED_VALID(pted))
1151 pmap_remove_pted(pm, pted);
1152 }
1153 PMAP_VP_UNLOCK(pm);
1154 }
1155
1156 void
pmap_pted_syncicache(struct pte_desc * pted)1157 pmap_pted_syncicache(struct pte_desc *pted)
1158 {
1159 paddr_t pa = pted->pted_pte.pte_lo & PTE_RPGN;
1160 vaddr_t va = pted->pted_va & ~PAGE_MASK;
1161
1162 if (pted->pted_pmap != pmap_kernel()) {
1163 va = zero_page + cpu_number() * PAGE_SIZE;
1164 pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1165 }
1166
1167 __syncicache((void *)va, PAGE_SIZE);
1168
1169 if (pted->pted_pmap != pmap_kernel())
1170 pmap_kremove(va, PAGE_SIZE);
1171 }
1172
1173 void
pmap_pted_ro(struct pte_desc * pted,vm_prot_t prot)1174 pmap_pted_ro(struct pte_desc *pted, vm_prot_t prot)
1175 {
1176 struct vm_page *pg;
1177 struct pte *pte;
1178 int s;
1179
1180 pg = PHYS_TO_VM_PAGE(pted->pted_pte.pte_lo & PTE_RPGN);
1181 if (pg->pg_flags & PG_PMAP_EXE) {
1182 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE)
1183 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1184 else
1185 pmap_pted_syncicache(pted);
1186 }
1187
1188 pted->pted_pte.pte_lo &= ~PTE_PP;
1189 pted->pted_pte.pte_lo |= PTE_RO;
1190
1191 if ((prot & PROT_EXEC) == 0)
1192 pted->pted_pte.pte_lo |= PTE_N;
1193
1194 if ((prot & (PROT_READ | PROT_WRITE)) == 0)
1195 pted->pted_pte.pte_lo |= PTE_AC;
1196
1197 PMAP_HASH_LOCK(s);
1198 if ((pte = pmap_ptedinhash(pted)) != NULL) {
1199 pte_del(pte, pmap_pted2ava(pted));
1200
1201 /* Add a Page Table Entry, section 5.10.1.1. */
1202 pte->pte_lo = pted->pted_pte.pte_lo;
1203 eieio(); /* Order 1st PTE update before 2nd. */
1204 pte->pte_hi |= PTE_VALID;
1205 ptesync(); /* Ensure updates completed. */
1206 }
1207 PMAP_HASH_UNLOCK(s);
1208 }
1209
1210 /*
1211 * Lower the protection on the specified physical page.
1212 *
1213 * There are only two cases, either the protection is going to 0,
1214 * or it is going to read-only.
1215 */
1216 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)1217 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1218 {
1219 struct pte_desc *pted;
1220 void *pte;
1221 pmap_t pm;
1222 int s;
1223
1224 if (prot == PROT_NONE) {
1225 mtx_enter(&pg->mdpage.pv_mtx);
1226 while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) {
1227 pmap_reference(pted->pted_pmap);
1228 pm = pted->pted_pmap;
1229 mtx_leave(&pg->mdpage.pv_mtx);
1230
1231 PMAP_VP_LOCK(pm);
1232
1233 /*
1234 * We dropped the pvlist lock before grabbing
1235 * the pmap lock to avoid lock ordering
1236 * problems. This means we have to check the
1237 * pvlist again since somebody else might have
1238 * modified it. All we care about is that the
1239 * pvlist entry matches the pmap we just
1240 * locked. If it doesn't, unlock the pmap and
1241 * try again.
1242 */
1243 mtx_enter(&pg->mdpage.pv_mtx);
1244 if ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) == NULL ||
1245 pted->pted_pmap != pm) {
1246 mtx_leave(&pg->mdpage.pv_mtx);
1247 PMAP_VP_UNLOCK(pm);
1248 pmap_destroy(pm);
1249 mtx_enter(&pg->mdpage.pv_mtx);
1250 continue;
1251 }
1252
1253 PMAP_HASH_LOCK(s);
1254 if ((pte = pmap_ptedinhash(pted)) != NULL)
1255 pte_zap(pte, pted);
1256 PMAP_HASH_UNLOCK(s);
1257
1258 pted->pted_va &= ~PTED_VA_MANAGED_M;
1259 LIST_REMOVE(pted, pted_pv_list);
1260 mtx_leave(&pg->mdpage.pv_mtx);
1261
1262 pmap_remove_pted(pm, pted);
1263
1264 PMAP_VP_UNLOCK(pm);
1265 pmap_destroy(pm);
1266 mtx_enter(&pg->mdpage.pv_mtx);
1267 }
1268 mtx_leave(&pg->mdpage.pv_mtx);
1269 /* page is being reclaimed, sync icache next use */
1270 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1271 return;
1272 }
1273
1274 mtx_enter(&pg->mdpage.pv_mtx);
1275 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list)
1276 pmap_pted_ro(pted, prot);
1277 mtx_leave(&pg->mdpage.pv_mtx);
1278 }
1279
1280 void
pmap_protect(pmap_t pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)1281 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1282 {
1283 if (prot & (PROT_READ | PROT_EXEC)) {
1284 struct pte_desc *pted;
1285
1286 PMAP_VP_LOCK(pm);
1287 while (sva < eva) {
1288 pted = pmap_vp_lookup(pm, sva);
1289 if (pted && PTED_VALID(pted))
1290 pmap_pted_ro(pted, prot);
1291 sva += PAGE_SIZE;
1292 }
1293 PMAP_VP_UNLOCK(pm);
1294 return;
1295 }
1296 pmap_remove(pm, sva, eva);
1297 }
1298
1299 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)1300 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1301 {
1302 pmap_t pm = pmap_kernel();
1303 struct pte_desc pted;
1304 struct vm_page *pg;
1305 int cache = (pa & PMAP_NOCACHE) ? PMAP_CACHE_CI : PMAP_CACHE_WB;
1306
1307 pm->pm_stats.resident_count++;
1308
1309 if (prot & PROT_WRITE) {
1310 pg = PHYS_TO_VM_PAGE(pa);
1311 if (pg != NULL)
1312 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1313 }
1314
1315 /* Calculate PTE */
1316 pmap_fill_pte(pm, va, pa, &pted, prot, cache);
1317 pted.pted_pte.pte_hi |= PTE_WIRED;
1318
1319 /* Insert into HTAB */
1320 pte_insert(&pted);
1321 }
1322
1323 void
pmap_kremove(vaddr_t va,vsize_t len)1324 pmap_kremove(vaddr_t va, vsize_t len)
1325 {
1326 pmap_t pm = pmap_kernel();
1327 vaddr_t eva = va + len;
1328 struct pte *pte;
1329 uint64_t vsid;
1330 int s;
1331
1332 while (va < eva) {
1333 vsid = pmap_kernel_vsid(va >> ADDR_ESID_SHIFT);
1334
1335 PMAP_HASH_LOCK(s);
1336 pte = pte_lookup(vsid, va);
1337 if (pte)
1338 pte_del(pte, pmap_ava(vsid, va));
1339 PMAP_HASH_UNLOCK(s);
1340
1341 if (pte)
1342 pm->pm_stats.resident_count--;
1343
1344 va += PAGE_SIZE;
1345 }
1346 }
1347
1348 int
pmap_is_referenced(struct vm_page * pg)1349 pmap_is_referenced(struct vm_page *pg)
1350 {
1351 return ((pg->pg_flags & PG_PMAP_REF) != 0);
1352 }
1353
1354 int
pmap_is_modified(struct vm_page * pg)1355 pmap_is_modified(struct vm_page *pg)
1356 {
1357 return ((pg->pg_flags & PG_PMAP_MOD) != 0);
1358 }
1359
1360 int
pmap_clear_reference(struct vm_page * pg)1361 pmap_clear_reference(struct vm_page *pg)
1362 {
1363 struct pte_desc *pted;
1364 int s;
1365
1366 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF);
1367
1368 mtx_enter(&pg->mdpage.pv_mtx);
1369 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1370 struct pte *pte;
1371
1372 PMAP_HASH_LOCK(s);
1373 if ((pte = pmap_ptedinhash(pted)) != NULL)
1374 pte_zap(pte, pted);
1375 PMAP_HASH_UNLOCK(s);
1376 }
1377 mtx_leave(&pg->mdpage.pv_mtx);
1378
1379 return 0;
1380 }
1381
1382 int
pmap_clear_modify(struct vm_page * pg)1383 pmap_clear_modify(struct vm_page *pg)
1384 {
1385 struct pte_desc *pted;
1386 int s;
1387
1388 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD);
1389
1390 mtx_enter(&pg->mdpage.pv_mtx);
1391 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1392 struct pte *pte;
1393
1394 pted->pted_pte.pte_lo &= ~PTE_PP;
1395 pted->pted_pte.pte_lo |= PTE_RO;
1396
1397 PMAP_HASH_LOCK(s);
1398 if ((pte = pmap_ptedinhash(pted)) != NULL) {
1399 pte_zap(pte, pted);
1400
1401 /* Add a Page Table Entry, section 5.10.1.1. */
1402 pte->pte_lo = pted->pted_pte.pte_lo;
1403 eieio(); /* Order 1st PTE update before 2nd. */
1404 pte->pte_hi |= PTE_VALID;
1405 ptesync(); /* Ensure updates completed. */
1406 }
1407 PMAP_HASH_UNLOCK(s);
1408 }
1409 mtx_leave(&pg->mdpage.pv_mtx);
1410
1411 return 0;
1412 }
1413
1414 int
pmap_extract(pmap_t pm,vaddr_t va,paddr_t * pa)1415 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pa)
1416 {
1417 struct pte *pte;
1418 uint64_t vsid;
1419 int s;
1420
1421 if (pm == pmap_kernel() &&
1422 va >= (vaddr_t)_start && va < (vaddr_t)_end) {
1423 *pa = va;
1424 return 1;
1425 }
1426
1427 PMAP_VP_LOCK(pm);
1428 vsid = pmap_va2vsid(pm, va);
1429 PMAP_VP_UNLOCK(pm);
1430 if (vsid == 0)
1431 return 0;
1432
1433 PMAP_HASH_LOCK(s);
1434 pte = pte_lookup(vsid, va);
1435 if (pte)
1436 *pa = (pte->pte_lo & PTE_RPGN) | (va & PAGE_MASK);
1437 PMAP_HASH_UNLOCK(s);
1438
1439 return (pte != NULL);
1440 }
1441
1442 void
pmap_activate(struct proc * p)1443 pmap_activate(struct proc *p)
1444 {
1445 }
1446
1447 void
pmap_deactivate(struct proc * p)1448 pmap_deactivate(struct proc *p)
1449 {
1450 }
1451
1452 void
pmap_unwire(pmap_t pm,vaddr_t va)1453 pmap_unwire(pmap_t pm, vaddr_t va)
1454 {
1455 struct pte_desc *pted;
1456
1457 PMAP_VP_LOCK(pm);
1458 pted = pmap_vp_lookup(pm, va);
1459 if (pted && PTED_WIRED(pted)) {
1460 pm->pm_stats.wired_count--;
1461 pted->pted_va &= ~PTED_VA_WIRED_M;
1462 }
1463 PMAP_VP_UNLOCK(pm);
1464 }
1465
1466 void
pmap_zero_page(struct vm_page * pg)1467 pmap_zero_page(struct vm_page *pg)
1468 {
1469 paddr_t pa = VM_PAGE_TO_PHYS(pg);
1470 paddr_t va = zero_page + cpu_number() * PAGE_SIZE;
1471 int offset;
1472
1473 pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1474 for (offset = 0; offset < PAGE_SIZE; offset += cacheline_size)
1475 __asm volatile ("dcbz 0, %0" :: "r"(va + offset));
1476 pmap_kremove(va, PAGE_SIZE);
1477 }
1478
1479 void
pmap_flush_page(struct vm_page * pg)1480 pmap_flush_page(struct vm_page *pg)
1481 {
1482 paddr_t pa = VM_PAGE_TO_PHYS(pg);
1483 paddr_t va = zero_page + cpu_number() * PAGE_SIZE;
1484 int offset;
1485
1486 pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1487 for (offset = 0; offset < PAGE_SIZE; offset += cacheline_size)
1488 __asm volatile ("dcbf 0, %0" :: "r"(va + offset));
1489 pmap_kremove(va, PAGE_SIZE);
1490 }
1491
1492 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)1493 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1494 {
1495 paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
1496 paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
1497 vaddr_t srcva = copy_src_page + cpu_number() * PAGE_SIZE;
1498 vaddr_t dstva = copy_dst_page + cpu_number() * PAGE_SIZE;
1499
1500 pmap_kenter_pa(srcva, srcpa, PROT_READ);
1501 pmap_kenter_pa(dstva, dstpa, PROT_READ | PROT_WRITE);
1502 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
1503 pmap_kremove(srcva, PAGE_SIZE);
1504 pmap_kremove(dstva, PAGE_SIZE);
1505 }
1506
1507 void
pmap_proc_iflush(struct process * pr,vaddr_t va,vsize_t len)1508 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
1509 {
1510 paddr_t pa;
1511 vaddr_t cva;
1512 vsize_t clen;
1513
1514 while (len > 0) {
1515 /* add one to always round up to the next page */
1516 clen = round_page(va + 1) - va;
1517 if (clen > len)
1518 clen = len;
1519
1520 if (pmap_extract(pr->ps_vmspace->vm_map.pmap, va, &pa)) {
1521 cva = zero_page + cpu_number() * PAGE_SIZE;
1522 pmap_kenter_pa(cva, pa, PROT_READ | PROT_WRITE);
1523 __syncicache((void *)cva, clen);
1524 pmap_kremove(cva, PAGE_SIZE);
1525 }
1526
1527 len -= clen;
1528 va += clen;
1529 }
1530 }
1531
1532 void
pmap_set_kernel_slb(vaddr_t va)1533 pmap_set_kernel_slb(vaddr_t va)
1534 {
1535 uint64_t esid;
1536 int idx;
1537
1538 esid = va >> ADDR_ESID_SHIFT;
1539
1540 for (idx = 0; idx < nitems(kernel_slb_desc); idx++) {
1541 if (kernel_slb_desc[idx].slbd_vsid == 0)
1542 break;
1543 if (kernel_slb_desc[idx].slbd_esid == esid)
1544 return;
1545 }
1546 KASSERT(idx < nitems(kernel_slb_desc));
1547
1548 kernel_slb_desc[idx].slbd_esid = esid;
1549 kernel_slb_desc[idx].slbd_vsid = pmap_kernel_vsid(esid);
1550 }
1551
1552 /*
1553 * Handle SLB entry spills for the kernel. This function runs without
1554 * belt and suspenders in real-mode on a small per-CPU stack.
1555 */
1556 void
pmap_spill_kernel_slb(vaddr_t va)1557 pmap_spill_kernel_slb(vaddr_t va)
1558 {
1559 struct cpu_info *ci = curcpu();
1560 uint64_t esid;
1561 uint64_t slbe, slbv;
1562 int idx;
1563
1564 esid = va >> ADDR_ESID_SHIFT;
1565
1566 for (idx = 0; idx < 31; idx++) {
1567 if (ci->ci_kernel_slb[idx].slb_slbe == 0)
1568 break;
1569 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
1570 if (ci->ci_kernel_slb[idx].slb_slbe == slbe)
1571 return;
1572 }
1573
1574 /*
1575 * If no free slot was found, randomly replace an entry in
1576 * slot 15-30.
1577 */
1578 if (idx == 31)
1579 idx = 15 + mftb() % 16;
1580
1581 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
1582 slbv = pmap_kernel_vsid(esid) << SLBV_VSID_SHIFT;
1583
1584 ci->ci_kernel_slb[idx].slb_slbe = slbe;
1585 ci->ci_kernel_slb[idx].slb_slbv = slbv;
1586 }
1587
1588 void
pmap_bootstrap_cpu(void)1589 pmap_bootstrap_cpu(void)
1590 {
1591 struct cpu_info *ci = curcpu();
1592 uint64_t esid, vsid;
1593 uint64_t slbe, slbv;
1594 int idx;
1595
1596 /* Clear SLB. */
1597 slbia();
1598 slbie(slbmfee(0));
1599
1600 /* Clear TLB. */
1601 tlbia();
1602
1603 if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) {
1604 /* Set partition table. */
1605 mtptcr((paddr_t)pmap_pat | PATSIZE);
1606 } else {
1607 /* Set page table. */
1608 mtsdr1((paddr_t)pmap_ptable | HTABSIZE);
1609 }
1610
1611 /* Load SLB. */
1612 for (idx = 0; idx < 31; idx++) {
1613 if (kernel_slb_desc[idx].slbd_vsid == 0)
1614 break;
1615
1616 esid = kernel_slb_desc[idx].slbd_esid;
1617 vsid = kernel_slb_desc[idx].slbd_vsid;
1618
1619 slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
1620 slbv = vsid << SLBV_VSID_SHIFT;
1621 slbmte(slbv, slbe);
1622
1623 ci->ci_kernel_slb[idx].slb_slbe = slbe;
1624 ci->ci_kernel_slb[idx].slb_slbv = slbv;
1625 }
1626 }
1627
1628 void
pmap_bootstrap(void)1629 pmap_bootstrap(void)
1630 {
1631 paddr_t start, end, pa;
1632 vm_prot_t prot;
1633 vaddr_t va;
1634
1635 #define HTABENTS 2048
1636
1637 pmap_ptab_cnt = HTABENTS;
1638 while (pmap_ptab_cnt * 2 < physmem)
1639 pmap_ptab_cnt <<= 1;
1640
1641 /* Make sure the page tables don't use more than 8 SLB entries. */
1642 while (HTABMEMSZ > 8 * SEGMENT_SIZE)
1643 pmap_ptab_cnt >>= 1;
1644
1645 /*
1646 * allocate suitably aligned memory for HTAB
1647 */
1648 pmap_ptable = pmap_steal_avail(HTABMEMSZ, HTABMEMSZ);
1649 memset(pmap_ptable, 0, HTABMEMSZ);
1650 pmap_ptab_mask = pmap_ptab_cnt - 1;
1651
1652 /* Map page tables. */
1653 start = (paddr_t)pmap_ptable;
1654 end = start + HTABMEMSZ;
1655 for (pa = start; pa < end; pa += PAGE_SIZE)
1656 pmap_kenter_pa(pa, pa, PROT_READ | PROT_WRITE);
1657
1658 /* Map kernel. */
1659 start = (paddr_t)_start;
1660 end = (paddr_t)_end;
1661 for (pa = start; pa < end; pa += PAGE_SIZE) {
1662 if (pa < (paddr_t)_etext)
1663 prot = PROT_READ | PROT_EXEC;
1664 else if (pa < (paddr_t)_erodata)
1665 prot = PROT_READ;
1666 else
1667 prot = PROT_READ | PROT_WRITE;
1668 pmap_kenter_pa(pa, pa, prot);
1669 }
1670
1671 #ifdef DDB
1672 /* Map initrd. */
1673 start = initrd_reg.addr;
1674 end = initrd_reg.addr + initrd_reg.size;
1675 for (pa = start; pa < end; pa += PAGE_SIZE)
1676 pmap_kenter_pa(pa, pa, PROT_READ | PROT_WRITE);
1677 #endif
1678
1679 /* Allocate partition table. */
1680 pmap_pat = pmap_steal_avail(PATMEMSZ, PATMEMSZ);
1681 memset(pmap_pat, 0, PATMEMSZ);
1682 pmap_pat[0].pate_htab = (paddr_t)pmap_ptable | HTABSIZE;
1683
1684 /* SLB entry for the kernel. */
1685 pmap_set_kernel_slb((vaddr_t)_start);
1686
1687 /* SLB entries for the page tables. */
1688 for (va = (vaddr_t)pmap_ptable; va < (vaddr_t)pmap_ptable + HTABMEMSZ;
1689 va += SEGMENT_SIZE)
1690 pmap_set_kernel_slb(va);
1691
1692 /* SLB entries for kernel VA. */
1693 for (va = VM_MIN_KERNEL_ADDRESS; va < VM_MAX_KERNEL_ADDRESS;
1694 va += SEGMENT_SIZE)
1695 pmap_set_kernel_slb(va);
1696
1697 pmap_bootstrap_cpu();
1698
1699 pmap_vsid[0] |= (1U << 0);
1700 #if VSID_VRMA < NUM_VSID
1701 pmap_vsid[VSID_VRMA / 32] |= (1U << (VSID_VRMA % 32));
1702 #endif
1703
1704 vmmap = virtual_avail;
1705 virtual_avail += PAGE_SIZE;
1706 zero_page = virtual_avail;
1707 virtual_avail += MAXCPUS * PAGE_SIZE;
1708 copy_src_page = virtual_avail;
1709 virtual_avail += MAXCPUS * PAGE_SIZE;
1710 copy_dst_page = virtual_avail;
1711 virtual_avail += MAXCPUS * PAGE_SIZE;
1712 }
1713
1714 #ifdef DDB
1715 /*
1716 * DDB will edit the PTE to gain temporary write access to a page in
1717 * the read-only kernel text.
1718 */
1719 struct pte *
pmap_get_kernel_pte(vaddr_t va)1720 pmap_get_kernel_pte(vaddr_t va)
1721 {
1722 uint64_t vsid;
1723
1724 vsid = pmap_kernel_vsid(va >> ADDR_ESID_SHIFT);
1725 return pte_lookup(vsid, va);
1726 }
1727 #endif
1728