xref: /openbsd/sys/arch/powerpc64/powerpc64/pmap.c (revision 9e6587ae)
1 /*	$OpenBSD: pmap.c,v 1.62 2024/06/04 17:31:59 gkoehler Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Martin Pieuchot
5  * Copyright (c) 2001, 2002, 2007 Dale Rahn.
6  * All rights reserved.
7  *
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * Effort sponsored in part by the Defense Advanced Research Projects
30  * Agency (DARPA) and Air Force Research Laboratory, Air Force
31  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
32  */
33 
34 /*
35  * Copyright (c) 2020 Mark Kettenis <kettenis@openbsd.org>
36  *
37  * Permission to use, copy, modify, and distribute this software for any
38  * purpose with or without fee is hereby granted, provided that the above
39  * copyright notice and this permission notice appear in all copies.
40  *
41  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
42  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
43  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
44  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
45  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
46  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
47  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
48  */
49 
50 #include <sys/param.h>
51 #include <sys/systm.h>
52 #include <sys/atomic.h>
53 #include <sys/pool.h>
54 #include <sys/proc.h>
55 #include <sys/user.h>
56 
57 #include <uvm/uvm_extern.h>
58 
59 #include <machine/cpufunc.h>
60 #include <machine/pcb.h>
61 #include <machine/pmap.h>
62 #include <machine/pte.h>
63 
64 #include <dev/ofw/fdt.h>
65 
66 extern char _start[], _etext[], _erodata[], _end[];
67 
68 #ifdef MULTIPROCESSOR
69 
70 struct mutex pmap_hash_lock = MUTEX_INITIALIZER(IPL_HIGH);
71 
72 #define	PMAP_HASH_LOCK(s)						\
73 do {									\
74 	(void)s;							\
75 	mtx_enter(&pmap_hash_lock);					\
76 } while (0)
77 
78 #define	PMAP_HASH_UNLOCK(s)						\
79 do {									\
80 	mtx_leave(&pmap_hash_lock);					\
81 } while (0)
82 
83 #define	PMAP_VP_LOCK_INIT(pm)	mtx_init(&pm->pm_mtx, IPL_VM)
84 
85 #define	PMAP_VP_LOCK(pm)						\
86 do {									\
87 	if (pm != pmap_kernel())					\
88 		mtx_enter(&pm->pm_mtx);					\
89 } while (0)
90 
91 #define	PMAP_VP_UNLOCK(pm)						\
92 do {									\
93 	if (pm != pmap_kernel())					\
94 		mtx_leave(&pm->pm_mtx);					\
95 } while (0)
96 
97 #define PMAP_VP_ASSERT_LOCKED(pm)					\
98 do {									\
99 	if (pm != pmap_kernel())					\
100 		MUTEX_ASSERT_LOCKED(&pm->pm_mtx);			\
101 } while (0)
102 
103 #else
104 
105 #define	PMAP_HASH_LOCK(s)		(void)s
106 #define	PMAP_HASH_UNLOCK(s)		/* nothing */
107 
108 #define	PMAP_VP_LOCK_INIT(pm)		/* nothing */
109 #define	PMAP_VP_LOCK(pm)		/* nothing */
110 #define	PMAP_VP_UNLOCK(pm)		/* nothing */
111 #define	PMAP_VP_ASSERT_LOCKED(pm)	/* nothing */
112 
113 #endif
114 
115 struct pmap kernel_pmap_store;
116 
117 struct pte *pmap_ptable;
118 int	pmap_ptab_cnt;
119 uint64_t pmap_ptab_mask;
120 
121 #define HTABMEMSZ	(pmap_ptab_cnt * 8 * sizeof(struct pte))
122 #define HTABSIZE	(ffs(pmap_ptab_cnt) - 12)
123 
124 struct pate *pmap_pat;
125 
126 #define PATMEMSZ	(64 * 1024)
127 #define PATSIZE		(ffs(PATMEMSZ) - 12)
128 
129 struct pte_desc {
130 	/* Linked list of phys -> virt entries */
131 	LIST_ENTRY(pte_desc) pted_pv_list;
132 	struct pte pted_pte;
133 	pmap_t pted_pmap;
134 	vaddr_t pted_va;
135 	uint64_t pted_vsid;
136 };
137 
138 #define PTED_VA_PTEGIDX_M	0x07
139 #define PTED_VA_HID_M		0x08
140 #define PTED_VA_MANAGED_M	0x10
141 #define PTED_VA_WIRED_M		0x20
142 #define PTED_VA_EXEC_M		0x40
143 
144 void	pmap_pted_syncicache(struct pte_desc *);
145 void	pmap_flush_page(struct vm_page *);
146 
147 struct slb_desc {
148 	LIST_ENTRY(slb_desc) slbd_list;
149 	uint64_t	slbd_esid;
150 	uint64_t	slbd_vsid;
151 	struct pmapvp1	*slbd_vp;
152 };
153 
154 /* Preallocated SLB entries for the kernel. */
155 struct slb_desc	kernel_slb_desc[16 + VM_KERNEL_SPACE_SIZE / SEGMENT_SIZE];
156 
157 struct slb_desc *pmap_slbd_lookup(pmap_t, vaddr_t);
158 
159 struct pmapvp1 {
160 	struct pmapvp2 *vp[VP_IDX1_CNT];
161 };
162 
163 struct pmapvp2 {
164 	struct pte_desc *vp[VP_IDX2_CNT];
165 };
166 
167 CTASSERT(sizeof(struct pmapvp1) == sizeof(struct pmapvp2));
168 
169 static inline int
VP_IDX1(vaddr_t va)170 VP_IDX1(vaddr_t va)
171 {
172 	return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
173 }
174 
175 static inline int
VP_IDX2(vaddr_t va)176 VP_IDX2(vaddr_t va)
177 {
178 	return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
179 }
180 
181 void	pmap_vp_destroy(pmap_t);
182 void	pmap_release(pmap_t);
183 
184 struct pool pmap_pmap_pool;
185 struct pool pmap_vp_pool;
186 struct pool pmap_pted_pool;
187 struct pool pmap_slbd_pool;
188 
189 int pmap_initialized = 0;
190 
191 /*
192  * We use only 4K pages and 256MB segments.  That means p = b = 12 and
193  * s = 28.
194  */
195 
196 #define KERNEL_VSID_BIT		0x0000001000000000ULL
197 #define VSID_HASH_MASK		0x0000007fffffffffULL
198 
199 static inline int
PTED_HID(struct pte_desc * pted)200 PTED_HID(struct pte_desc *pted)
201 {
202 	return !!(pted->pted_va & PTED_VA_HID_M);
203 }
204 
205 static inline int
PTED_PTEGIDX(struct pte_desc * pted)206 PTED_PTEGIDX(struct pte_desc *pted)
207 {
208 	return (pted->pted_va & PTED_VA_PTEGIDX_M);
209 }
210 
211 static inline int
PTED_MANAGED(struct pte_desc * pted)212 PTED_MANAGED(struct pte_desc *pted)
213 {
214 	return !!(pted->pted_va & PTED_VA_MANAGED_M);
215 }
216 
217 static inline int
PTED_WIRED(struct pte_desc * pted)218 PTED_WIRED(struct pte_desc *pted)
219 {
220 	return !!(pted->pted_va & PTED_VA_WIRED_M);
221 }
222 
223 static inline int
PTED_VALID(struct pte_desc * pted)224 PTED_VALID(struct pte_desc *pted)
225 {
226 	return !!(pted->pted_pte.pte_hi & PTE_VALID);
227 }
228 
229 #define TLBIEL_MAX_SETS		4096
230 #define TLBIEL_SET_SHIFT	12
231 #define TLBIEL_INVAL_SET	(0x3 << 10)
232 
233 void
tlbia(void)234 tlbia(void)
235 {
236 	int set;
237 
238 	for (set = 0; set < TLBIEL_MAX_SETS; set++)
239 		tlbiel((set << TLBIEL_SET_SHIFT) | TLBIEL_INVAL_SET);
240 }
241 
242 /*
243  * Return AVA for use with TLB invalidate instructions.
244  */
245 static inline uint64_t
pmap_ava(uint64_t vsid,vaddr_t va)246 pmap_ava(uint64_t vsid, vaddr_t va)
247 {
248 	return ((vsid << ADDR_VSID_SHIFT) | (va & ADDR_PIDX));
249 }
250 
251 /*
252  * Return AVA for a PTE descriptor.
253  */
254 static inline uint64_t
pmap_pted2ava(struct pte_desc * pted)255 pmap_pted2ava(struct pte_desc *pted)
256 {
257 	return pmap_ava(pted->pted_vsid, pted->pted_va);
258 }
259 
260 /*
261  * Return the top 64 bits of the (80-bit) VPN for a PTE descriptor.
262  */
263 static inline uint64_t
pmap_pted2avpn(struct pte_desc * pted)264 pmap_pted2avpn(struct pte_desc *pted)
265 {
266 	return (pted->pted_vsid << (PTE_VSID_SHIFT) |
267 	    (pted->pted_va & ADDR_PIDX) >>
268 		(ADDR_VSID_SHIFT - PTE_VSID_SHIFT));
269 }
270 
271 static inline uint64_t
pmap_kernel_vsid(uint64_t esid)272 pmap_kernel_vsid(uint64_t esid)
273 {
274 	uint64_t vsid;
275 	vsid = (((esid << 8) | (esid > 28)) * 0x13bb) & (KERNEL_VSID_BIT - 1);
276 	return vsid | KERNEL_VSID_BIT;
277 }
278 
279 static inline uint64_t
pmap_va2vsid(pmap_t pm,vaddr_t va)280 pmap_va2vsid(pmap_t pm, vaddr_t va)
281 {
282 	uint64_t esid = va >> ADDR_ESID_SHIFT;
283 	struct slb_desc *slbd;
284 
285 	if (pm == pmap_kernel())
286 		return pmap_kernel_vsid(esid);
287 
288 	slbd = pmap_slbd_lookup(pm, va);
289 	if (slbd)
290 		return slbd->slbd_vsid;
291 
292 	return 0;
293 }
294 
295 struct pte *
pmap_ptedinhash(struct pte_desc * pted)296 pmap_ptedinhash(struct pte_desc *pted)
297 {
298 	struct pte *pte;
299 	vaddr_t va;
300 	uint64_t vsid, hash;
301 	int idx;
302 
303 	va = pted->pted_va & ~PAGE_MASK;
304 	vsid = pted->pted_vsid;
305 	hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
306 	idx = (hash & pmap_ptab_mask);
307 
308 	idx ^= (PTED_HID(pted) ? pmap_ptab_mask : 0);
309 	pte = pmap_ptable + (idx * 8);
310 	pte += PTED_PTEGIDX(pted); /* increment by index into pteg */
311 
312 	/*
313 	 * We now have the pointer to where it will be, if it is
314 	 * currently mapped. If the mapping was thrown away in
315 	 * exchange for another page mapping, then this page is not
316 	 * currently in the hash.
317 	 */
318 	if ((pted->pted_pte.pte_hi |
319 	     (PTED_HID(pted) ? PTE_HID : 0)) == pte->pte_hi)
320 		return pte;
321 
322 	return NULL;
323 }
324 
325 struct slb_desc *
pmap_slbd_lookup(pmap_t pm,vaddr_t va)326 pmap_slbd_lookup(pmap_t pm, vaddr_t va)
327 {
328 	uint64_t esid = va >> ADDR_ESID_SHIFT;
329 	struct slb_desc *slbd;
330 
331 	PMAP_VP_ASSERT_LOCKED(pm);
332 
333 	LIST_FOREACH(slbd, &pm->pm_slbd, slbd_list) {
334 		if (slbd->slbd_esid == esid)
335 			return slbd;
336 	}
337 
338 	return NULL;
339 }
340 
341 void
pmap_slbd_cache(pmap_t pm,struct slb_desc * slbd)342 pmap_slbd_cache(pmap_t pm, struct slb_desc *slbd)
343 {
344 	struct pcb *pcb = &curproc->p_addr->u_pcb;
345 	uint64_t slbe, slbv;
346 	int idx;
347 
348 	KASSERT(curproc->p_vmspace->vm_map.pmap == pm);
349 
350 	for (idx = 0; idx < nitems(pcb->pcb_slb); idx++) {
351 		if (pcb->pcb_slb[idx].slb_slbe == 0)
352 			break;
353 	}
354 	if (idx == nitems(pcb->pcb_slb))
355 		idx = arc4random_uniform(nitems(pcb->pcb_slb));
356 
357 	slbe = (slbd->slbd_esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
358 	slbv = slbd->slbd_vsid << SLBV_VSID_SHIFT;
359 
360 	pcb->pcb_slb[idx].slb_slbe = slbe;
361 	pcb->pcb_slb[idx].slb_slbv = slbv;
362 }
363 
364 int
pmap_slbd_fault(pmap_t pm,vaddr_t va)365 pmap_slbd_fault(pmap_t pm, vaddr_t va)
366 {
367 	struct slb_desc *slbd;
368 
369 	PMAP_VP_LOCK(pm);
370 	slbd = pmap_slbd_lookup(pm, va);
371 	if (slbd) {
372 		pmap_slbd_cache(pm, slbd);
373 		PMAP_VP_UNLOCK(pm);
374 		return 0;
375 	}
376 	PMAP_VP_UNLOCK(pm);
377 
378 	return EFAULT;
379 }
380 
381 #define NUM_VSID (1 << 20)
382 uint32_t pmap_vsid[NUM_VSID / 32];
383 
384 uint64_t
pmap_alloc_vsid(void)385 pmap_alloc_vsid(void)
386 {
387 	uint32_t bits;
388 	uint32_t vsid, bit;
389 
390 	for (;;) {
391 		do {
392 			vsid = arc4random() & (NUM_VSID - 1);
393 			bit = (vsid & (32 - 1));
394 			bits = pmap_vsid[vsid / 32];
395 		} while (bits & (1U << bit));
396 
397 		if (atomic_cas_uint(&pmap_vsid[vsid / 32], bits,
398 		    bits | (1U << bit)) == bits)
399 			return vsid;
400 	}
401 }
402 
403 void
pmap_free_vsid(uint64_t vsid)404 pmap_free_vsid(uint64_t vsid)
405 {
406 	uint32_t bits;
407 	int bit;
408 
409 	KASSERT(vsid < NUM_VSID);
410 
411 	bit = (vsid & (32 - 1));
412 	for (;;) {
413 		bits = pmap_vsid[vsid / 32];
414 		if (atomic_cas_uint(&pmap_vsid[vsid / 32], bits,
415 		    bits & ~(1U << bit)) == bits)
416 			break;
417 	}
418 }
419 
420 struct slb_desc *
pmap_slbd_alloc(pmap_t pm,vaddr_t va)421 pmap_slbd_alloc(pmap_t pm, vaddr_t va)
422 {
423 	uint64_t esid = va >> ADDR_ESID_SHIFT;
424 	struct slb_desc *slbd;
425 
426 	KASSERT(pm != pmap_kernel());
427 	PMAP_VP_ASSERT_LOCKED(pm);
428 
429 	slbd = pool_get(&pmap_slbd_pool, PR_NOWAIT | PR_ZERO);
430 	if (slbd == NULL)
431 		return NULL;
432 
433 	slbd->slbd_esid = esid;
434 	slbd->slbd_vsid = pmap_alloc_vsid();
435 	KASSERT((slbd->slbd_vsid & KERNEL_VSID_BIT) == 0);
436 	LIST_INSERT_HEAD(&pm->pm_slbd, slbd, slbd_list);
437 
438 	/* We're almost certainly going to use it soon. */
439 	pmap_slbd_cache(pm, slbd);
440 
441 	return slbd;
442 }
443 
444 int
pmap_slbd_enter(pmap_t pm,vaddr_t va)445 pmap_slbd_enter(pmap_t pm, vaddr_t va)
446 {
447 	struct slb_desc *slbd;
448 
449 	PMAP_VP_LOCK(pm);
450 	slbd = pmap_slbd_lookup(pm, va);
451 	if (slbd == NULL)
452 		slbd = pmap_slbd_alloc(pm, va);
453 	PMAP_VP_UNLOCK(pm);
454 
455 	return slbd ? 0 : EFAULT;
456 }
457 
458 int
pmap_set_user_slb(pmap_t pm,vaddr_t va,vaddr_t * kva,vsize_t * len)459 pmap_set_user_slb(pmap_t pm, vaddr_t va, vaddr_t *kva, vsize_t *len)
460 {
461 	struct cpu_info *ci = curcpu();
462 	struct slb_desc *slbd;
463 	uint64_t slbe, slbv;
464 	uint64_t vsid;
465 
466 	KASSERT(pm != pmap_kernel());
467 
468 	PMAP_VP_LOCK(pm);
469 	slbd = pmap_slbd_lookup(pm, va);
470 	if (slbd == NULL) {
471 		slbd = pmap_slbd_alloc(pm, va);
472 		if (slbd == NULL) {
473 			PMAP_VP_UNLOCK(pm);
474 			return EFAULT;
475 		}
476 	}
477 	vsid = slbd->slbd_vsid;
478 	PMAP_VP_UNLOCK(pm);
479 
480 	/*
481 	 * We might get here while another process is sleeping while
482 	 * handling a page fault.  Kill their SLB entry before
483 	 * inserting our own.
484 	 */
485 	if (ci->ci_kernel_slb[31].slb_slbe != 0) {
486 		isync();
487 		slbie(ci->ci_kernel_slb[31].slb_slbe);
488 		isync();
489 	}
490 
491 	slbe = (USER_ESID << SLBE_ESID_SHIFT) | SLBE_VALID | 31;
492 	slbv = vsid << SLBV_VSID_SHIFT;
493 
494 	ci->ci_kernel_slb[31].slb_slbe = slbe;
495 	ci->ci_kernel_slb[31].slb_slbv = slbv;
496 
497 	isync();
498 	slbmte(slbv, slbe);
499 	isync();
500 
501 	curpcb->pcb_userva = (va & ~SEGMENT_MASK);
502 
503 	if (kva)
504 		*kva = USER_ADDR | (va & SEGMENT_MASK);
505 	if (len)
506 		*len = SEGMENT_SIZE - (va & SEGMENT_MASK);
507 
508 	return 0;
509 }
510 
511 void
pmap_clear_user_slb(void)512 pmap_clear_user_slb(void)
513 {
514 	struct cpu_info *ci = curcpu();
515 
516 	if (ci->ci_kernel_slb[31].slb_slbe != 0) {
517 		isync();
518 		slbie(ci->ci_kernel_slb[31].slb_slbe);
519 		isync();
520 	}
521 
522 	ci->ci_kernel_slb[31].slb_slbe = 0;
523 	ci->ci_kernel_slb[31].slb_slbv = 0;
524 }
525 
526 void
pmap_unset_user_slb(void)527 pmap_unset_user_slb(void)
528 {
529 	curpcb->pcb_userva = 0;
530 	pmap_clear_user_slb();
531 }
532 
533 /*
534  * VP routines, virtual to physical translation information.
535  * These data structures are based off of the pmap, per process.
536  */
537 
538 struct pte_desc *
pmap_vp_lookup(pmap_t pm,vaddr_t va)539 pmap_vp_lookup(pmap_t pm, vaddr_t va)
540 {
541 	struct slb_desc *slbd;
542 	struct pmapvp1 *vp1;
543 	struct pmapvp2 *vp2;
544 
545 	slbd = pmap_slbd_lookup(pm, va);
546 	if (slbd == NULL)
547 		return NULL;
548 
549 	vp1 = slbd->slbd_vp;
550 	if (vp1 == NULL)
551 		return NULL;
552 
553 	vp2 = vp1->vp[VP_IDX1(va)];
554 	if (vp2 == NULL)
555 		return NULL;
556 
557 	return vp2->vp[VP_IDX2(va)];
558 }
559 
560 /*
561  * Remove, and return, pted at specified address, NULL if not present.
562  */
563 struct pte_desc *
pmap_vp_remove(pmap_t pm,vaddr_t va)564 pmap_vp_remove(pmap_t pm, vaddr_t va)
565 {
566 	struct slb_desc *slbd;
567 	struct pmapvp1 *vp1;
568 	struct pmapvp2 *vp2;
569 	struct pte_desc *pted;
570 
571 	slbd = pmap_slbd_lookup(pm, va);
572 	if (slbd == NULL)
573 		return NULL;
574 
575 	vp1 = slbd->slbd_vp;
576 	if (vp1 == NULL)
577 		return NULL;
578 
579 	vp2 = vp1->vp[VP_IDX1(va)];
580 	if (vp2 == NULL)
581 		return NULL;
582 
583 	pted = vp2->vp[VP_IDX2(va)];
584 	vp2->vp[VP_IDX2(va)] = NULL;
585 
586 	return pted;
587 }
588 
589 /*
590  * Create a V -> P mapping for the given pmap and virtual address
591  * with reference to the pte descriptor that is used to map the page.
592  * This code should track allocations of vp table allocations
593  * so they can be freed efficiently.
594  */
595 int
pmap_vp_enter(pmap_t pm,vaddr_t va,struct pte_desc * pted,int flags)596 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags)
597 {
598 	struct slb_desc *slbd;
599 	struct pmapvp1 *vp1;
600 	struct pmapvp2 *vp2;
601 
602 	slbd = pmap_slbd_lookup(pm, va);
603 	if (slbd == NULL) {
604 		slbd = pmap_slbd_alloc(pm, va);
605 		if (slbd == NULL) {
606 			if ((flags & PMAP_CANFAIL) == 0)
607 				panic("%s: unable to allocate slbd", __func__);
608 			return ENOMEM;
609 		}
610 	}
611 
612 	vp1 = slbd->slbd_vp;
613 	if (vp1 == NULL) {
614 		vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
615 		if (vp1 == NULL) {
616 			if ((flags & PMAP_CANFAIL) == 0)
617 				panic("%s: unable to allocate L1", __func__);
618 			return ENOMEM;
619 		}
620 		slbd->slbd_vp = vp1;
621 	}
622 
623 	vp2 = vp1->vp[VP_IDX1(va)];
624 	if (vp2 == NULL) {
625 		vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
626 		if (vp2 == NULL) {
627 			if ((flags & PMAP_CANFAIL) == 0)
628 				panic("%s: unable to allocate L2", __func__);
629 			return ENOMEM;
630 		}
631 		vp1->vp[VP_IDX1(va)] = vp2;
632 	}
633 
634 	vp2->vp[VP_IDX2(va)] = pted;
635 	return 0;
636 }
637 
638 void
pmap_enter_pv(struct pte_desc * pted,struct vm_page * pg)639 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg)
640 {
641 	mtx_enter(&pg->mdpage.pv_mtx);
642 	LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
643 	pted->pted_va |= PTED_VA_MANAGED_M;
644 	mtx_leave(&pg->mdpage.pv_mtx);
645 }
646 
647 void
pmap_remove_pv(struct pte_desc * pted)648 pmap_remove_pv(struct pte_desc *pted)
649 {
650 	struct vm_page *pg = PHYS_TO_VM_PAGE(pted->pted_pte.pte_lo & PTE_RPGN);
651 
652 	mtx_enter(&pg->mdpage.pv_mtx);
653 	LIST_REMOVE(pted, pted_pv_list);
654 	mtx_leave(&pg->mdpage.pv_mtx);
655 }
656 
657 struct pte *
pte_lookup(uint64_t vsid,vaddr_t va)658 pte_lookup(uint64_t vsid, vaddr_t va)
659 {
660 	uint64_t hash, avpn, pte_hi;
661 	struct pte *pte;
662 	int idx, i;
663 
664 	/* Primary hash. */
665 	hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
666 	idx = (hash & pmap_ptab_mask);
667 	pte = pmap_ptable + (idx * 8);
668 	avpn = (vsid << PTE_VSID_SHIFT) |
669 	    (va & ADDR_PIDX) >> (ADDR_VSID_SHIFT - PTE_VSID_SHIFT);
670 	pte_hi = (avpn & PTE_AVPN) | PTE_VALID;
671 
672 	for (i = 0; i < 8; i++) {
673 		if ((pte[i].pte_hi & ~PTE_WIRED) == pte_hi)
674 			return &pte[i];
675 	}
676 
677 	/* Secondary hash. */
678 	idx ^= pmap_ptab_mask;
679 	pte = pmap_ptable + (idx * 8);
680 	pte_hi |= PTE_HID;
681 
682 	for (i = 0; i < 8; i++) {
683 		if ((pte[i].pte_hi & ~PTE_WIRED) == pte_hi)
684 			return &pte[i];
685 	}
686 
687 	return NULL;
688 }
689 
690 /*
691  * Delete a Page Table Entry, section 5.10.1.3.
692  *
693  * Note: hash table must be locked.
694  */
695 void
pte_del(struct pte * pte,uint64_t ava)696 pte_del(struct pte *pte, uint64_t ava)
697 {
698 	pte->pte_hi &= ~PTE_VALID;
699 	ptesync();	/* Ensure update completed. */
700 	tlbie(ava);	/* Invalidate old translation. */
701 	eieio();	/* Order tlbie before tlbsync. */
702 	tlbsync();	/* Ensure tlbie completed on all processors. */
703 	ptesync();	/* Ensure tlbsync and update completed. */
704 }
705 
706 void
pte_zap(struct pte * pte,struct pte_desc * pted)707 pte_zap(struct pte *pte, struct pte_desc *pted)
708 {
709 	pte_del(pte, pmap_pted2ava(pted));
710 }
711 
712 void
pmap_fill_pte(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int cache)713 pmap_fill_pte(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
714     vm_prot_t prot, int cache)
715 {
716 	struct pte *pte = &pted->pted_pte;
717 
718 	pted->pted_pmap = pm;
719 	pted->pted_va = va & ~PAGE_MASK;
720 	pted->pted_vsid = pmap_va2vsid(pm, va);
721 	KASSERT(pted->pted_vsid != 0);
722 
723 	pte->pte_hi = (pmap_pted2avpn(pted) & PTE_AVPN) | PTE_VALID;
724 	pte->pte_lo = (pa & PTE_RPGN);
725 
726 	if (pm == pmap_kernel())
727 		pte->pte_hi |= PTE_WIRED;
728 
729 	if (prot & PROT_WRITE)
730 		pte->pte_lo |= PTE_RW;
731 	else
732 		pte->pte_lo |= PTE_RO;
733 	if (prot & PROT_EXEC)
734 		pted->pted_va |= PTED_VA_EXEC_M;
735 	else
736 		pte->pte_lo |= PTE_N;
737 
738 	if (cache == PMAP_CACHE_WB)
739 		pte->pte_lo |= PTE_M;
740 	else
741 		pte->pte_lo |= (PTE_M | PTE_I | PTE_G);
742 
743 	if ((prot & (PROT_READ | PROT_WRITE)) == 0)
744 		pte->pte_lo |= PTE_AC;
745 }
746 
747 void
pte_insert(struct pte_desc * pted)748 pte_insert(struct pte_desc *pted)
749 {
750 	struct pte *pte;
751 	vaddr_t va;
752 	uint64_t vsid, hash;
753 	int off, try, idx, i;
754 	int s;
755 
756 	PMAP_HASH_LOCK(s);
757 
758 	if ((pte = pmap_ptedinhash(pted)) != NULL)
759 		pte_zap(pte, pted);
760 
761 	pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
762 
763 	va = pted->pted_va & ~PAGE_MASK;
764 	vsid = pted->pted_vsid;
765 	hash = (vsid & VSID_HASH_MASK) ^ ((va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
766 	idx = (hash & pmap_ptab_mask);
767 
768 	/*
769 	 * instead of starting at the beginning of each pteg,
770 	 * the code should pick a random location with in the primary
771 	 * then search all of the entries, then if not yet found,
772 	 * do the same for the secondary.
773 	 * this would reduce the frontloading of the pteg.
774 	 */
775 
776 	/* first just try fill of primary hash */
777 	pte = pmap_ptable + (idx * 8);
778 	for (i = 0; i < 8; i++) {
779 		if (pte[i].pte_hi & PTE_VALID)
780 			continue;
781 
782 		pted->pted_va |= i;
783 
784 		/* Add a Page Table Entry, section 5.10.1.1. */
785 		pte[i].pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID;
786 		pte[i].pte_lo = pted->pted_pte.pte_lo;
787 		eieio();	/* Order 1st PTE update before 2nd. */
788 		pte[i].pte_hi |= PTE_VALID;
789 		ptesync();	/* Ensure updates completed. */
790 
791 		goto out;
792 	}
793 
794 	/* try fill of secondary hash */
795 	pte = pmap_ptable + (idx ^ pmap_ptab_mask) * 8;
796 	for (i = 0; i < 8; i++) {
797 		if (pte[i].pte_hi & PTE_VALID)
798 			continue;
799 
800 		pted->pted_va |= (i | PTED_VA_HID_M);
801 
802 		/* Add a Page Table Entry, section 5.10.1.1. */
803 		pte[i].pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID;
804 		pte[i].pte_lo = pted->pted_pte.pte_lo;
805 		eieio();	/* Order 1st PTE update before 2nd. */
806 		pte[i].pte_hi |= (PTE_HID|PTE_VALID);
807 		ptesync();	/* Ensure updates completed. */
808 
809 		goto out;
810 	}
811 
812 	/* need decent replacement algorithm */
813 	off = mftb();
814 
815 	for (try = 0; try < 16; try++) {
816 		pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
817 		pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M);
818 
819 		idx ^= (PTED_HID(pted) ? pmap_ptab_mask : 0);
820 		pte = pmap_ptable + (idx * 8);
821 		pte += PTED_PTEGIDX(pted); /* increment by index into pteg */
822 
823 		if ((pte->pte_hi & PTE_WIRED) == 0)
824 			break;
825 
826 		off++;
827 	}
828 	/*
829 	 * Since we only wire unmanaged kernel mappings, we should
830 	 * always find a slot that we can replace.
831 	 */
832 	KASSERT(try < 16);
833 
834 	if (pte->pte_hi & PTE_VALID) {
835 		uint64_t avpn, vpn;
836 
837 		avpn = pte->pte_hi & PTE_AVPN;
838 		vsid = avpn >> PTE_VSID_SHIFT;
839 		vpn = avpn << (ADDR_VSID_SHIFT - PTE_VSID_SHIFT - PAGE_SHIFT);
840 
841 		idx ^= ((pte->pte_hi & PTE_HID) ? pmap_ptab_mask : 0);
842 		vpn |= ((idx ^ vsid) & (ADDR_PIDX >> ADDR_PIDX_SHIFT));
843 
844 		pte_del(pte, vpn << PAGE_SHIFT);
845 	}
846 
847 	/* Add a Page Table Entry, section 5.10.1.1. */
848 	pte->pte_hi = pted->pted_pte.pte_hi & ~PTE_VALID;
849 	if (PTED_HID(pted))
850 		pte->pte_hi |= PTE_HID;
851 	pte->pte_lo = pted->pted_pte.pte_lo;
852 	eieio();	/* Order 1st PTE update before 2nd. */
853 	pte->pte_hi |= PTE_VALID;
854 	ptesync();	/* Ensure updates completed. */
855 
856 out:
857 	PMAP_HASH_UNLOCK(s);
858 }
859 
860 void
pmap_remove_pted(pmap_t pm,struct pte_desc * pted)861 pmap_remove_pted(pmap_t pm, struct pte_desc *pted)
862 {
863 	struct pte *pte;
864 	int s;
865 
866 	KASSERT(pm == pted->pted_pmap);
867 	PMAP_VP_ASSERT_LOCKED(pm);
868 
869 	pm->pm_stats.resident_count--;
870 
871 	if (PTED_WIRED(pted)) {
872 		pm->pm_stats.wired_count--;
873 		pted->pted_va &= ~PTED_VA_WIRED_M;
874 	}
875 
876 	PMAP_HASH_LOCK(s);
877 	if ((pte = pmap_ptedinhash(pted)) != NULL)
878 		pte_zap(pte, pted);
879 	PMAP_HASH_UNLOCK(s);
880 
881 	pted->pted_va &= ~PTED_VA_EXEC_M;
882 	pted->pted_pte.pte_hi &= ~PTE_VALID;
883 
884 	if (PTED_MANAGED(pted))
885 		pmap_remove_pv(pted);
886 
887 	pmap_vp_remove(pm, pted->pted_va);
888 	pool_put(&pmap_pted_pool, pted);
889 }
890 
891 extern struct fdt_reg memreg[];
892 extern int nmemreg;
893 
894 #ifdef DDB
895 extern struct fdt_reg initrd_reg;
896 #endif
897 
898 void memreg_add(const struct fdt_reg *);
899 void memreg_remove(const struct fdt_reg *);
900 
901 vaddr_t vmmap;
902 vaddr_t zero_page;
903 vaddr_t copy_src_page;
904 vaddr_t copy_dst_page;
905 vaddr_t virtual_avail = VM_MIN_KERNEL_ADDRESS;
906 
907 void *
pmap_steal_avail(size_t size,size_t align)908 pmap_steal_avail(size_t size, size_t align)
909 {
910 	struct fdt_reg reg;
911 	uint64_t start, end;
912 	int i;
913 
914 	for (i = 0; i < nmemreg; i++) {
915 		if (memreg[i].size > size) {
916 			start = (memreg[i].addr + (align - 1)) & ~(align - 1);
917 			end = start + size;
918 			if (end <= memreg[i].addr + memreg[i].size) {
919 				reg.addr = start;
920 				reg.size = end - start;
921 				memreg_remove(&reg);
922 				return (void *)start;
923 			}
924 		}
925 	}
926 	panic("can't allocate");
927 }
928 
929 void
pmap_virtual_space(vaddr_t * start,vaddr_t * end)930 pmap_virtual_space(vaddr_t *start, vaddr_t *end)
931 {
932 	*start = virtual_avail;
933 	*end = VM_MAX_KERNEL_ADDRESS;
934 }
935 
936 pmap_t
pmap_create(void)937 pmap_create(void)
938 {
939 	pmap_t pm;
940 
941 	pm = pool_get(&pmap_pmap_pool, PR_WAITOK | PR_ZERO);
942 	pm->pm_refs = 1;
943 	PMAP_VP_LOCK_INIT(pm);
944 	LIST_INIT(&pm->pm_slbd);
945 	return pm;
946 }
947 
948 /*
949  * Add a reference to a given pmap.
950  */
951 void
pmap_reference(pmap_t pm)952 pmap_reference(pmap_t pm)
953 {
954 	atomic_inc_int(&pm->pm_refs);
955 }
956 
957 /*
958  * Retire the given pmap from service.
959  * Should only be called if the map contains no valid mappings.
960  */
961 void
pmap_destroy(pmap_t pm)962 pmap_destroy(pmap_t pm)
963 {
964 	int refs;
965 
966 	refs = atomic_dec_int_nv(&pm->pm_refs);
967 	if (refs > 0)
968 		return;
969 
970 	/*
971 	 * reference count is zero, free pmap resources and free pmap.
972 	 */
973 	pmap_release(pm);
974 	pool_put(&pmap_pmap_pool, pm);
975 }
976 
977 /*
978  * Release any resources held by the given physical map.
979  * Called when a pmap initialized by pmap_pinit is being released.
980  */
981 void
pmap_release(pmap_t pm)982 pmap_release(pmap_t pm)
983 {
984 	pmap_vp_destroy(pm);
985 }
986 
987 void
pmap_vp_destroy(pmap_t pm)988 pmap_vp_destroy(pmap_t pm)
989 {
990 	struct slb_desc *slbd;
991 	struct pmapvp1 *vp1;
992 	struct pmapvp2 *vp2;
993 	struct pte_desc *pted;
994 	int i, j;
995 
996 	while ((slbd = LIST_FIRST(&pm->pm_slbd))) {
997 		vp1 = slbd->slbd_vp;
998 		if (vp1) {
999 			for (i = 0; i < VP_IDX1_CNT; i++) {
1000 				vp2 = vp1->vp[i];
1001 				if (vp2 == NULL)
1002 					continue;
1003 
1004 				for (j = 0; j < VP_IDX2_CNT; j++) {
1005 					pted = vp2->vp[j];
1006 					if (pted == NULL)
1007 						continue;
1008 
1009 					pool_put(&pmap_pted_pool, pted);
1010 				}
1011 				pool_put(&pmap_vp_pool, vp2);
1012 			}
1013 			pool_put(&pmap_vp_pool, vp1);
1014 		}
1015 
1016 		LIST_REMOVE(slbd, slbd_list);
1017 		pmap_free_vsid(slbd->slbd_vsid);
1018 		pool_put(&pmap_slbd_pool, slbd);
1019 	}
1020 }
1021 
1022 void
pmap_init(void)1023 pmap_init(void)
1024 {
1025 	int i;
1026 
1027 	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_VM, 0,
1028 	    "pmap", &pool_allocator_single);
1029 	pool_setlowat(&pmap_pmap_pool, 2);
1030 	pool_init(&pmap_vp_pool, sizeof(struct pmapvp1), 0, IPL_VM, 0,
1031 	    "vp", &pool_allocator_single);
1032 	pool_setlowat(&pmap_vp_pool, 10);
1033 	pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
1034 	    "pted", NULL);
1035 	pool_setlowat(&pmap_pted_pool, 20);
1036 	pool_init(&pmap_slbd_pool, sizeof(struct slb_desc), 0, IPL_VM, 0,
1037 	    "slbd", NULL);
1038 	pool_setlowat(&pmap_slbd_pool, 5);
1039 
1040 	LIST_INIT(&pmap_kernel()->pm_slbd);
1041 	for (i = 0; i < nitems(kernel_slb_desc); i++) {
1042 		LIST_INSERT_HEAD(&pmap_kernel()->pm_slbd,
1043 		    &kernel_slb_desc[i], slbd_list);
1044 	}
1045 
1046 	pmap_initialized = 1;
1047 }
1048 
1049 int
pmap_enter(pmap_t pm,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)1050 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1051 {
1052 	struct pte_desc *pted;
1053 	struct vm_page *pg;
1054 	int cache = PMAP_CACHE_WB;
1055 	int need_sync = 0;
1056 	int error = 0;
1057 
1058 	if (pa & PMAP_NOCACHE)
1059 		cache = PMAP_CACHE_CI;
1060 	pg = PHYS_TO_VM_PAGE(pa);
1061 	if (!pmap_initialized)
1062 		printf("%s\n", __func__);
1063 
1064 	PMAP_VP_LOCK(pm);
1065 	pted = pmap_vp_lookup(pm, va);
1066 	if (pted && PTED_VALID(pted)) {
1067 		pmap_remove_pted(pm, pted);
1068 		pted = NULL;
1069 	}
1070 
1071 	pm->pm_stats.resident_count++;
1072 
1073 	/* Do not have pted for this, get one and put it in VP */
1074 	if (pted == NULL) {
1075 		pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO);
1076 		if (pted == NULL) {
1077 			if ((flags & PMAP_CANFAIL) == 0)
1078 				panic("%s: failed to allocate pted", __func__);
1079 			error = ENOMEM;
1080 			goto out;
1081 		}
1082 		if (pmap_vp_enter(pm, va, pted, flags)) {
1083 			if ((flags & PMAP_CANFAIL) == 0)
1084 				panic("%s: failed to allocate L2/L3", __func__);
1085 			error = ENOMEM;
1086 			pool_put(&pmap_pted_pool, pted);
1087 			goto out;
1088 		}
1089 	}
1090 
1091 	if ((flags & PROT_WRITE) == 0)
1092 		prot &= ~PROT_WRITE;
1093 
1094 	pmap_fill_pte(pm, va, pa, pted, prot, cache);
1095 	if (flags & PMAP_WIRED) {
1096 		pted->pted_va |= PTED_VA_WIRED_M;
1097 		pm->pm_stats.wired_count++;
1098 	}
1099 
1100 	if (pg != NULL) {
1101 		pmap_enter_pv(pted, pg); /* only managed mem */
1102 
1103 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1104 		if (flags & PROT_WRITE)
1105 			atomic_setbits_int(&pg->pg_flags, PG_PMAP_MOD);
1106 
1107 		if ((pg->pg_flags & PG_DEV) == 0 && cache != PMAP_CACHE_WB)
1108 			pmap_flush_page(pg);
1109 	}
1110 
1111 	pte_insert(pted);
1112 
1113 	if (prot & PROT_EXEC) {
1114 		if (pg != NULL) {
1115 			need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0);
1116 			if (prot & PROT_WRITE)
1117 				atomic_clearbits_int(&pg->pg_flags,
1118 				    PG_PMAP_EXE);
1119 			else
1120 				atomic_setbits_int(&pg->pg_flags,
1121 				    PG_PMAP_EXE);
1122 		} else
1123 			need_sync = 1;
1124 	} else {
1125 		/*
1126 		 * Should we be paranoid about writeable non-exec
1127 		 * mappings ? if so, clear the exec tag
1128 		 */
1129 		if ((prot & PROT_WRITE) && (pg != NULL))
1130 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1131 	}
1132 
1133 	if (need_sync)
1134 		pmap_pted_syncicache(pted);
1135 
1136 out:
1137 	PMAP_VP_UNLOCK(pm);
1138 	return error;
1139 }
1140 
1141 void
pmap_remove(pmap_t pm,vaddr_t sva,vaddr_t eva)1142 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
1143 {
1144 	struct pte_desc *pted;
1145 	vaddr_t va;
1146 
1147 	PMAP_VP_LOCK(pm);
1148 	for (va = sva; va < eva; va += PAGE_SIZE) {
1149 		pted = pmap_vp_lookup(pm, va);
1150 		if (pted && PTED_VALID(pted))
1151 			pmap_remove_pted(pm, pted);
1152 	}
1153 	PMAP_VP_UNLOCK(pm);
1154 }
1155 
1156 void
pmap_pted_syncicache(struct pte_desc * pted)1157 pmap_pted_syncicache(struct pte_desc *pted)
1158 {
1159 	paddr_t pa = pted->pted_pte.pte_lo & PTE_RPGN;
1160 	vaddr_t va = pted->pted_va & ~PAGE_MASK;
1161 
1162 	if (pted->pted_pmap != pmap_kernel()) {
1163 		va = zero_page + cpu_number() * PAGE_SIZE;
1164 		pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1165 	}
1166 
1167 	__syncicache((void *)va, PAGE_SIZE);
1168 
1169 	if (pted->pted_pmap != pmap_kernel())
1170 		pmap_kremove(va, PAGE_SIZE);
1171 }
1172 
1173 void
pmap_pted_ro(struct pte_desc * pted,vm_prot_t prot)1174 pmap_pted_ro(struct pte_desc *pted, vm_prot_t prot)
1175 {
1176 	struct vm_page *pg;
1177 	struct pte *pte;
1178 	int s;
1179 
1180 	pg = PHYS_TO_VM_PAGE(pted->pted_pte.pte_lo & PTE_RPGN);
1181 	if (pg->pg_flags & PG_PMAP_EXE) {
1182 		if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE)
1183 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1184 		else
1185 			pmap_pted_syncicache(pted);
1186 	}
1187 
1188 	pted->pted_pte.pte_lo &= ~PTE_PP;
1189 	pted->pted_pte.pte_lo |= PTE_RO;
1190 
1191 	if ((prot & PROT_EXEC) == 0)
1192 		pted->pted_pte.pte_lo |= PTE_N;
1193 
1194 	if ((prot & (PROT_READ | PROT_WRITE)) == 0)
1195 		pted->pted_pte.pte_lo |= PTE_AC;
1196 
1197 	PMAP_HASH_LOCK(s);
1198 	if ((pte = pmap_ptedinhash(pted)) != NULL) {
1199 		pte_del(pte, pmap_pted2ava(pted));
1200 
1201 		/* Add a Page Table Entry, section 5.10.1.1. */
1202 		pte->pte_lo = pted->pted_pte.pte_lo;
1203 		eieio();	/* Order 1st PTE update before 2nd. */
1204 		pte->pte_hi |= PTE_VALID;
1205 		ptesync();	/* Ensure updates completed. */
1206 	}
1207 	PMAP_HASH_UNLOCK(s);
1208 }
1209 
1210 /*
1211  * Lower the protection on the specified physical page.
1212  *
1213  * There are only two cases, either the protection is going to 0,
1214  * or it is going to read-only.
1215  */
1216 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)1217 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1218 {
1219 	struct pte_desc *pted;
1220 	void *pte;
1221 	pmap_t pm;
1222 	int s;
1223 
1224 	if (prot == PROT_NONE) {
1225 		mtx_enter(&pg->mdpage.pv_mtx);
1226 		while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) {
1227 			pmap_reference(pted->pted_pmap);
1228 			pm = pted->pted_pmap;
1229 			mtx_leave(&pg->mdpage.pv_mtx);
1230 
1231 			PMAP_VP_LOCK(pm);
1232 
1233 			/*
1234 			 * We dropped the pvlist lock before grabbing
1235 			 * the pmap lock to avoid lock ordering
1236 			 * problems.  This means we have to check the
1237 			 * pvlist again since somebody else might have
1238 			 * modified it.  All we care about is that the
1239 			 * pvlist entry matches the pmap we just
1240 			 * locked.  If it doesn't, unlock the pmap and
1241 			 * try again.
1242 			 */
1243 			mtx_enter(&pg->mdpage.pv_mtx);
1244 			if ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) == NULL ||
1245 			    pted->pted_pmap != pm) {
1246 				mtx_leave(&pg->mdpage.pv_mtx);
1247 				PMAP_VP_UNLOCK(pm);
1248 				pmap_destroy(pm);
1249 				mtx_enter(&pg->mdpage.pv_mtx);
1250 				continue;
1251 			}
1252 
1253 			PMAP_HASH_LOCK(s);
1254 			if ((pte = pmap_ptedinhash(pted)) != NULL)
1255 				pte_zap(pte, pted);
1256 			PMAP_HASH_UNLOCK(s);
1257 
1258 			pted->pted_va &= ~PTED_VA_MANAGED_M;
1259 			LIST_REMOVE(pted, pted_pv_list);
1260 			mtx_leave(&pg->mdpage.pv_mtx);
1261 
1262 			pmap_remove_pted(pm, pted);
1263 
1264 			PMAP_VP_UNLOCK(pm);
1265 			pmap_destroy(pm);
1266 			mtx_enter(&pg->mdpage.pv_mtx);
1267 		}
1268 		mtx_leave(&pg->mdpage.pv_mtx);
1269 		/* page is being reclaimed, sync icache next use */
1270 		atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1271 		return;
1272 	}
1273 
1274 	mtx_enter(&pg->mdpage.pv_mtx);
1275 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list)
1276 		pmap_pted_ro(pted, prot);
1277 	mtx_leave(&pg->mdpage.pv_mtx);
1278 }
1279 
1280 void
pmap_protect(pmap_t pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)1281 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1282 {
1283 	if (prot & (PROT_READ | PROT_EXEC)) {
1284 		struct pte_desc *pted;
1285 
1286 		PMAP_VP_LOCK(pm);
1287 		while (sva < eva) {
1288 			pted = pmap_vp_lookup(pm, sva);
1289 			if (pted && PTED_VALID(pted))
1290 				pmap_pted_ro(pted, prot);
1291 			sva += PAGE_SIZE;
1292 		}
1293 		PMAP_VP_UNLOCK(pm);
1294 		return;
1295 	}
1296 	pmap_remove(pm, sva, eva);
1297 }
1298 
1299 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)1300 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1301 {
1302 	pmap_t pm = pmap_kernel();
1303 	struct pte_desc pted;
1304 	struct vm_page *pg;
1305 	int cache = (pa & PMAP_NOCACHE) ? PMAP_CACHE_CI : PMAP_CACHE_WB;
1306 
1307 	pm->pm_stats.resident_count++;
1308 
1309 	if (prot & PROT_WRITE) {
1310 		pg = PHYS_TO_VM_PAGE(pa);
1311 		if (pg != NULL)
1312 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
1313 	}
1314 
1315 	/* Calculate PTE */
1316 	pmap_fill_pte(pm, va, pa, &pted, prot, cache);
1317 	pted.pted_pte.pte_hi |= PTE_WIRED;
1318 
1319 	/* Insert into HTAB */
1320 	pte_insert(&pted);
1321 }
1322 
1323 void
pmap_kremove(vaddr_t va,vsize_t len)1324 pmap_kremove(vaddr_t va, vsize_t len)
1325 {
1326 	pmap_t pm = pmap_kernel();
1327 	vaddr_t eva = va + len;
1328 	struct pte *pte;
1329 	uint64_t vsid;
1330 	int s;
1331 
1332 	while (va < eva) {
1333 		vsid = pmap_kernel_vsid(va >> ADDR_ESID_SHIFT);
1334 
1335 		PMAP_HASH_LOCK(s);
1336 		pte = pte_lookup(vsid, va);
1337 		if (pte)
1338 			pte_del(pte, pmap_ava(vsid, va));
1339 		PMAP_HASH_UNLOCK(s);
1340 
1341 		if (pte)
1342 			pm->pm_stats.resident_count--;
1343 
1344 		va += PAGE_SIZE;
1345 	}
1346 }
1347 
1348 int
pmap_is_referenced(struct vm_page * pg)1349 pmap_is_referenced(struct vm_page *pg)
1350 {
1351 	return ((pg->pg_flags & PG_PMAP_REF) != 0);
1352 }
1353 
1354 int
pmap_is_modified(struct vm_page * pg)1355 pmap_is_modified(struct vm_page *pg)
1356 {
1357 	return ((pg->pg_flags & PG_PMAP_MOD) != 0);
1358 }
1359 
1360 int
pmap_clear_reference(struct vm_page * pg)1361 pmap_clear_reference(struct vm_page *pg)
1362 {
1363 	struct pte_desc *pted;
1364 	int s;
1365 
1366 	atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF);
1367 
1368 	mtx_enter(&pg->mdpage.pv_mtx);
1369 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1370 		struct pte *pte;
1371 
1372 		PMAP_HASH_LOCK(s);
1373 		if ((pte = pmap_ptedinhash(pted)) != NULL)
1374 			pte_zap(pte, pted);
1375 		PMAP_HASH_UNLOCK(s);
1376 	}
1377 	mtx_leave(&pg->mdpage.pv_mtx);
1378 
1379 	return 0;
1380 }
1381 
1382 int
pmap_clear_modify(struct vm_page * pg)1383 pmap_clear_modify(struct vm_page *pg)
1384 {
1385 	struct pte_desc *pted;
1386 	int s;
1387 
1388 	atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD);
1389 
1390 	mtx_enter(&pg->mdpage.pv_mtx);
1391 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1392 		struct pte *pte;
1393 
1394 		pted->pted_pte.pte_lo &= ~PTE_PP;
1395 		pted->pted_pte.pte_lo |= PTE_RO;
1396 
1397 		PMAP_HASH_LOCK(s);
1398 		if ((pte = pmap_ptedinhash(pted)) != NULL) {
1399 			pte_zap(pte, pted);
1400 
1401 			/* Add a Page Table Entry, section 5.10.1.1. */
1402 			pte->pte_lo = pted->pted_pte.pte_lo;
1403 			eieio();	/* Order 1st PTE update before 2nd. */
1404 			pte->pte_hi |= PTE_VALID;
1405 			ptesync();	/* Ensure updates completed. */
1406 		}
1407 		PMAP_HASH_UNLOCK(s);
1408 	}
1409 	mtx_leave(&pg->mdpage.pv_mtx);
1410 
1411 	return 0;
1412 }
1413 
1414 int
pmap_extract(pmap_t pm,vaddr_t va,paddr_t * pa)1415 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pa)
1416 {
1417 	struct pte *pte;
1418 	uint64_t vsid;
1419 	int s;
1420 
1421 	if (pm == pmap_kernel() &&
1422 	    va >= (vaddr_t)_start && va < (vaddr_t)_end) {
1423 		*pa = va;
1424 		return 1;
1425 	}
1426 
1427 	PMAP_VP_LOCK(pm);
1428 	vsid = pmap_va2vsid(pm, va);
1429 	PMAP_VP_UNLOCK(pm);
1430 	if (vsid == 0)
1431 		return 0;
1432 
1433 	PMAP_HASH_LOCK(s);
1434 	pte = pte_lookup(vsid, va);
1435 	if (pte)
1436 		*pa = (pte->pte_lo & PTE_RPGN) | (va & PAGE_MASK);
1437 	PMAP_HASH_UNLOCK(s);
1438 
1439 	return (pte != NULL);
1440 }
1441 
1442 void
pmap_activate(struct proc * p)1443 pmap_activate(struct proc *p)
1444 {
1445 }
1446 
1447 void
pmap_deactivate(struct proc * p)1448 pmap_deactivate(struct proc *p)
1449 {
1450 }
1451 
1452 void
pmap_unwire(pmap_t pm,vaddr_t va)1453 pmap_unwire(pmap_t pm, vaddr_t va)
1454 {
1455 	struct pte_desc *pted;
1456 
1457 	PMAP_VP_LOCK(pm);
1458 	pted = pmap_vp_lookup(pm, va);
1459 	if (pted && PTED_WIRED(pted)) {
1460 		pm->pm_stats.wired_count--;
1461 		pted->pted_va &= ~PTED_VA_WIRED_M;
1462 	}
1463 	PMAP_VP_UNLOCK(pm);
1464 }
1465 
1466 void
pmap_zero_page(struct vm_page * pg)1467 pmap_zero_page(struct vm_page *pg)
1468 {
1469 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1470 	paddr_t va = zero_page + cpu_number() * PAGE_SIZE;
1471 	int offset;
1472 
1473 	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1474 	for (offset = 0; offset < PAGE_SIZE; offset += cacheline_size)
1475 		__asm volatile ("dcbz 0, %0" :: "r"(va + offset));
1476 	pmap_kremove(va, PAGE_SIZE);
1477 }
1478 
1479 void
pmap_flush_page(struct vm_page * pg)1480 pmap_flush_page(struct vm_page *pg)
1481 {
1482 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1483 	paddr_t va = zero_page + cpu_number() * PAGE_SIZE;
1484 	int offset;
1485 
1486 	pmap_kenter_pa(va, pa, PROT_READ | PROT_WRITE);
1487 	for (offset = 0; offset < PAGE_SIZE; offset += cacheline_size)
1488 		__asm volatile ("dcbf 0, %0" :: "r"(va + offset));
1489 	pmap_kremove(va, PAGE_SIZE);
1490 }
1491 
1492 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)1493 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1494 {
1495 	paddr_t srcpa = VM_PAGE_TO_PHYS(srcpg);
1496 	paddr_t dstpa = VM_PAGE_TO_PHYS(dstpg);
1497 	vaddr_t srcva = copy_src_page + cpu_number() * PAGE_SIZE;
1498 	vaddr_t dstva = copy_dst_page + cpu_number() * PAGE_SIZE;
1499 
1500 	pmap_kenter_pa(srcva, srcpa, PROT_READ);
1501 	pmap_kenter_pa(dstva, dstpa, PROT_READ | PROT_WRITE);
1502 	memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
1503 	pmap_kremove(srcva, PAGE_SIZE);
1504 	pmap_kremove(dstva, PAGE_SIZE);
1505 }
1506 
1507 void
pmap_proc_iflush(struct process * pr,vaddr_t va,vsize_t len)1508 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
1509 {
1510 	paddr_t pa;
1511 	vaddr_t cva;
1512 	vsize_t clen;
1513 
1514 	while (len > 0) {
1515 		/* add one to always round up to the next page */
1516 		clen = round_page(va + 1) - va;
1517 		if (clen > len)
1518 			clen = len;
1519 
1520 		if (pmap_extract(pr->ps_vmspace->vm_map.pmap, va, &pa)) {
1521 			cva = zero_page + cpu_number() * PAGE_SIZE;
1522 			pmap_kenter_pa(cva, pa, PROT_READ | PROT_WRITE);
1523 			__syncicache((void *)cva, clen);
1524 			pmap_kremove(cva, PAGE_SIZE);
1525 		}
1526 
1527 		len -= clen;
1528 		va += clen;
1529 	}
1530 }
1531 
1532 void
pmap_set_kernel_slb(vaddr_t va)1533 pmap_set_kernel_slb(vaddr_t va)
1534 {
1535 	uint64_t esid;
1536 	int idx;
1537 
1538 	esid = va >> ADDR_ESID_SHIFT;
1539 
1540 	for (idx = 0; idx < nitems(kernel_slb_desc); idx++) {
1541 		if (kernel_slb_desc[idx].slbd_vsid == 0)
1542 			break;
1543 		if (kernel_slb_desc[idx].slbd_esid == esid)
1544 			return;
1545 	}
1546 	KASSERT(idx < nitems(kernel_slb_desc));
1547 
1548 	kernel_slb_desc[idx].slbd_esid = esid;
1549 	kernel_slb_desc[idx].slbd_vsid = pmap_kernel_vsid(esid);
1550 }
1551 
1552 /*
1553  * Handle SLB entry spills for the kernel.  This function runs without
1554  * belt and suspenders in real-mode on a small per-CPU stack.
1555  */
1556 void
pmap_spill_kernel_slb(vaddr_t va)1557 pmap_spill_kernel_slb(vaddr_t va)
1558 {
1559 	struct cpu_info *ci = curcpu();
1560 	uint64_t esid;
1561 	uint64_t slbe, slbv;
1562 	int idx;
1563 
1564 	esid = va >> ADDR_ESID_SHIFT;
1565 
1566 	for (idx = 0; idx < 31; idx++) {
1567 		if (ci->ci_kernel_slb[idx].slb_slbe == 0)
1568 			break;
1569 		slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
1570 		if (ci->ci_kernel_slb[idx].slb_slbe == slbe)
1571 			return;
1572 	}
1573 
1574 	/*
1575 	 * If no free slot was found, randomly replace an entry in
1576 	 * slot 15-30.
1577 	 */
1578 	if (idx == 31)
1579 		idx = 15 + mftb() % 16;
1580 
1581 	slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
1582 	slbv = pmap_kernel_vsid(esid) << SLBV_VSID_SHIFT;
1583 
1584 	ci->ci_kernel_slb[idx].slb_slbe = slbe;
1585 	ci->ci_kernel_slb[idx].slb_slbv = slbv;
1586 }
1587 
1588 void
pmap_bootstrap_cpu(void)1589 pmap_bootstrap_cpu(void)
1590 {
1591 	struct cpu_info *ci = curcpu();
1592 	uint64_t esid, vsid;
1593 	uint64_t slbe, slbv;
1594 	int idx;
1595 
1596 	/* Clear SLB. */
1597 	slbia();
1598 	slbie(slbmfee(0));
1599 
1600 	/* Clear TLB. */
1601 	tlbia();
1602 
1603 	if (cpu_features2 & PPC_FEATURE2_ARCH_3_00) {
1604 		/* Set partition table. */
1605 		mtptcr((paddr_t)pmap_pat | PATSIZE);
1606 	} else {
1607 		/* Set page table. */
1608 		mtsdr1((paddr_t)pmap_ptable | HTABSIZE);
1609 	}
1610 
1611 	/* Load SLB. */
1612 	for (idx = 0; idx < 31; idx++) {
1613 		if (kernel_slb_desc[idx].slbd_vsid == 0)
1614 			break;
1615 
1616 		esid = kernel_slb_desc[idx].slbd_esid;
1617 		vsid = kernel_slb_desc[idx].slbd_vsid;
1618 
1619 		slbe = (esid << SLBE_ESID_SHIFT) | SLBE_VALID | idx;
1620 		slbv = vsid << SLBV_VSID_SHIFT;
1621 		slbmte(slbv, slbe);
1622 
1623 		ci->ci_kernel_slb[idx].slb_slbe = slbe;
1624 		ci->ci_kernel_slb[idx].slb_slbv = slbv;
1625 	}
1626 }
1627 
1628 void
pmap_bootstrap(void)1629 pmap_bootstrap(void)
1630 {
1631 	paddr_t start, end, pa;
1632 	vm_prot_t prot;
1633 	vaddr_t va;
1634 
1635 #define HTABENTS 2048
1636 
1637 	pmap_ptab_cnt = HTABENTS;
1638 	while (pmap_ptab_cnt * 2 < physmem)
1639 		pmap_ptab_cnt <<= 1;
1640 
1641 	/* Make sure the page tables don't use more than 8 SLB entries. */
1642 	while (HTABMEMSZ > 8 * SEGMENT_SIZE)
1643 		pmap_ptab_cnt >>= 1;
1644 
1645 	/*
1646 	 * allocate suitably aligned memory for HTAB
1647 	 */
1648 	pmap_ptable = pmap_steal_avail(HTABMEMSZ, HTABMEMSZ);
1649 	memset(pmap_ptable, 0, HTABMEMSZ);
1650 	pmap_ptab_mask = pmap_ptab_cnt - 1;
1651 
1652 	/* Map page tables. */
1653 	start = (paddr_t)pmap_ptable;
1654 	end = start + HTABMEMSZ;
1655 	for (pa = start; pa < end; pa += PAGE_SIZE)
1656 		pmap_kenter_pa(pa, pa, PROT_READ | PROT_WRITE);
1657 
1658 	/* Map kernel. */
1659 	start = (paddr_t)_start;
1660 	end = (paddr_t)_end;
1661 	for (pa = start; pa < end; pa += PAGE_SIZE) {
1662 		if (pa < (paddr_t)_etext)
1663 			prot = PROT_READ | PROT_EXEC;
1664 		else if (pa < (paddr_t)_erodata)
1665 			prot = PROT_READ;
1666 		else
1667 			prot = PROT_READ | PROT_WRITE;
1668 		pmap_kenter_pa(pa, pa, prot);
1669 	}
1670 
1671 #ifdef DDB
1672 	/* Map initrd. */
1673 	start = initrd_reg.addr;
1674 	end = initrd_reg.addr + initrd_reg.size;
1675 	for (pa = start; pa < end; pa += PAGE_SIZE)
1676 		pmap_kenter_pa(pa, pa, PROT_READ | PROT_WRITE);
1677 #endif
1678 
1679 	/* Allocate partition table. */
1680 	pmap_pat = pmap_steal_avail(PATMEMSZ, PATMEMSZ);
1681 	memset(pmap_pat, 0, PATMEMSZ);
1682 	pmap_pat[0].pate_htab = (paddr_t)pmap_ptable | HTABSIZE;
1683 
1684 	/* SLB entry for the kernel. */
1685 	pmap_set_kernel_slb((vaddr_t)_start);
1686 
1687 	/* SLB entries for the page tables. */
1688 	for (va = (vaddr_t)pmap_ptable; va < (vaddr_t)pmap_ptable + HTABMEMSZ;
1689 	     va += SEGMENT_SIZE)
1690 		pmap_set_kernel_slb(va);
1691 
1692 	/* SLB entries for kernel VA. */
1693 	for (va = VM_MIN_KERNEL_ADDRESS; va < VM_MAX_KERNEL_ADDRESS;
1694 	     va += SEGMENT_SIZE)
1695 		pmap_set_kernel_slb(va);
1696 
1697 	pmap_bootstrap_cpu();
1698 
1699 	pmap_vsid[0] |= (1U << 0);
1700 #if VSID_VRMA < NUM_VSID
1701 	pmap_vsid[VSID_VRMA / 32] |= (1U << (VSID_VRMA % 32));
1702 #endif
1703 
1704 	vmmap = virtual_avail;
1705 	virtual_avail += PAGE_SIZE;
1706 	zero_page = virtual_avail;
1707 	virtual_avail += MAXCPUS * PAGE_SIZE;
1708 	copy_src_page = virtual_avail;
1709 	virtual_avail += MAXCPUS * PAGE_SIZE;
1710 	copy_dst_page = virtual_avail;
1711 	virtual_avail += MAXCPUS * PAGE_SIZE;
1712 }
1713 
1714 #ifdef DDB
1715 /*
1716  * DDB will edit the PTE to gain temporary write access to a page in
1717  * the read-only kernel text.
1718  */
1719 struct pte *
pmap_get_kernel_pte(vaddr_t va)1720 pmap_get_kernel_pte(vaddr_t va)
1721 {
1722 	uint64_t vsid;
1723 
1724 	vsid = pmap_kernel_vsid(va >> ADDR_ESID_SHIFT);
1725 	return pte_lookup(vsid, va);
1726 }
1727 #endif
1728