xref: /openbsd/sys/arch/powerpc/powerpc/pmap.c (revision 5eec54d3)
1 /*	$OpenBSD: pmap.c,v 1.185 2024/09/06 10:54:08 jsg Exp $ */
2 
3 /*
4  * Copyright (c) 2015 Martin Pieuchot
5  * Copyright (c) 2001, 2002, 2007 Dale Rahn.
6  * All rights reserved.
7  *
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28  *
29  * Effort sponsored in part by the Defense Advanced Research Projects
30  * Agency (DARPA) and Air Force Research Laboratory, Air Force
31  * Materiel Command, USAF, under agreement number F30602-01-2-0537.
32  */
33 
34 /*
35  * powerpc lazy icache management.
36  * The icache does not snoop dcache accesses. The icache also will not load
37  * modified data from the dcache, but the unmodified data in ram.
38  * Before the icache is loaded, the dcache must be synced to ram to prevent
39  * the icache from loading stale data.
40  * pg->pg_flags PG_PMAP_EXE bit is used to track if the dcache is clean
41  * and the icache may have valid data in it.
42  * if the PG_PMAP_EXE bit is set (and the page is not currently RWX)
43  * the icache will only have valid code in it. If the bit is clear
44  * memory may not match the dcache contents or the icache may contain
45  * data from a previous page.
46  *
47  * pmap enter
48  * !E  NONE 	-> R	no action
49  * !E  NONE|R 	-> RW	no action
50  * !E  NONE|R 	-> RX	flush dcache, inval icache (that page only), set E
51  * !E  NONE|R 	-> RWX	flush dcache, inval icache (that page only), set E
52  * !E  NONE|RW 	-> RWX	flush dcache, inval icache (that page only), set E
53  *  E  NONE 	-> R	no action
54  *  E  NONE|R 	-> RW	clear PG_PMAP_EXE bit
55  *  E  NONE|R 	-> RX	no action
56  *  E  NONE|R 	-> RWX	no action
57  *  E  NONE|RW 	-> RWX	-invalid source state
58  *
59  * pamp_protect
60  *  E RW -> R	- invalid source state
61  * !E RW -> R	- no action
62  *  * RX -> R	- no action
63  *  * RWX -> R	- sync dcache, inval icache
64  *  * RWX -> RW	- clear PG_PMAP_EXE
65  *  * RWX -> RX	- sync dcache, inval icache
66  *  * * -> NONE	- no action
67  *
68  * pmap_page_protect (called with arg PROT_NONE if page is to be reused)
69  *  * RW -> R	- as pmap_protect
70  *  * RX -> R	- as pmap_protect
71  *  * RWX -> R	- as pmap_protect
72  *  * RWX -> RW	- as pmap_protect
73  *  * RWX -> RX	- as pmap_protect
74  *  * * -> NONE - clear PG_PMAP_EXE
75  *
76  */
77 
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/queue.h>
82 #include <sys/pool.h>
83 #include <sys/atomic.h>
84 #include <sys/user.h>
85 
86 #include <uvm/uvm_extern.h>
87 
88 #include <machine/pcb.h>
89 #include <powerpc/powerpc.h>
90 #include <powerpc/bat.h>
91 #include <machine/pmap.h>
92 
93 struct bat battable[16];
94 
95 struct dumpmem dumpmem[VM_PHYSSEG_MAX];
96 u_int ndumpmem;
97 
98 struct pmap kernel_pmap_;
99 static struct mem_region *pmap_mem, *pmap_avail;
100 struct mem_region pmap_allocated[10];
101 int pmap_cnt_avail;
102 int pmap_cnt_allocated;
103 
104 struct pte_64  *pmap_ptable64;
105 struct pte_32  *pmap_ptable32;
106 int	pmap_ptab_cnt;
107 u_int	pmap_ptab_mask;
108 
109 #define HTABSIZE_32	(pmap_ptab_cnt * 64)
110 #define HTABMEMSZ_64	(pmap_ptab_cnt * 8 * sizeof(struct pte_64))
111 #define HTABSIZE_64	(ffs(pmap_ptab_cnt) - 12)
112 
113 static u_int usedsr[NPMAPS / sizeof(u_int) / 8];
114 
115 struct pte_desc {
116 	/* Linked list of phys -> virt entries */
117 	LIST_ENTRY(pte_desc) pted_pv_list;
118 	union {
119 		struct pte_32 pted_pte32;
120 		struct pte_64 pted_pte64;
121 	} p;
122 	pmap_t pted_pmap;
123 	vaddr_t pted_va;
124 };
125 
126 void pmap_attr_save(paddr_t pa, u_int32_t bits);
127 void pmap_pted_ro(struct pte_desc *, vm_prot_t);
128 void pmap_pted_ro64(struct pte_desc *, vm_prot_t);
129 void pmap_pted_ro32(struct pte_desc *, vm_prot_t);
130 
131 /*
132  * Some functions are called in real mode and cannot be profiled.
133  */
134 #define __noprof __attribute__((__no_instrument_function__))
135 
136 /* VP routines */
137 int pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags);
138 struct pte_desc *pmap_vp_remove(pmap_t pm, vaddr_t va);
139 void pmap_vp_destroy(pmap_t pm);
140 struct pte_desc *pmap_vp_lookup(pmap_t pm, vaddr_t va) __noprof;
141 
142 /* PV routines */
143 void pmap_enter_pv(struct pte_desc *pted, struct vm_page *);
144 void pmap_remove_pv(struct pte_desc *pted);
145 
146 
147 /* pte hash table routines */
148 static inline void *pmap_ptedinhash(struct pte_desc *);
149 void pte_insert32(struct pte_desc *) __noprof;
150 void pte_insert64(struct pte_desc *) __noprof;
151 void pmap_fill_pte64(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t,
152     int) __noprof;
153 void pmap_fill_pte32(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t,
154     int) __noprof;
155 
156 void pmap_syncicache_user_virt(pmap_t pm, vaddr_t va);
157 
158 void pmap_remove_pted(pmap_t, struct pte_desc *);
159 
160 /* setup/initialization functions */
161 void pmap_avail_setup(void);
162 void pmap_avail_fixup(void);
163 void pmap_remove_avail(paddr_t base, paddr_t end);
164 void *pmap_steal_avail(size_t size, int align);
165 
166 /* asm interface */
167 int pte_spill_r(u_int32_t, u_int32_t, u_int32_t, int) __noprof;
168 int pte_spill_v(pmap_t, u_int32_t, u_int32_t, int) __noprof;
169 
170 u_int32_t pmap_setusr(pmap_t pm, vaddr_t va);
171 void pmap_popusr(u_int32_t oldsr);
172 
173 /* pte invalidation */
174 void pte_del(void *, vaddr_t);
175 void pte_zap(void *, struct pte_desc *);
176 
177 /* XXX - panic on pool get failures? */
178 struct pool pmap_pmap_pool;
179 struct pool pmap_vp_pool;
180 struct pool pmap_pted_pool;
181 
182 int pmap_initialized = 0;
183 int physmem;
184 int physmaxaddr;
185 
186 #ifdef MULTIPROCESSOR
187 struct __ppc_lock pmap_hash_lock = PPC_LOCK_INITIALIZER;
188 
189 #define	PMAP_HASH_LOCK(s)						\
190 do {									\
191 	s = ppc_intr_disable();						\
192 	__ppc_lock(&pmap_hash_lock);					\
193 } while (0)
194 
195 #define	PMAP_HASH_UNLOCK(s)						\
196 do {									\
197 	__ppc_unlock(&pmap_hash_lock);					\
198 	ppc_intr_enable(s);						\
199 } while (0)
200 
201 #define	PMAP_VP_LOCK_INIT(pm)		mtx_init(&pm->pm_mtx, IPL_VM)
202 
203 #define	PMAP_VP_LOCK(pm)						\
204 do {									\
205 	if (pm != pmap_kernel())					\
206 		mtx_enter(&pm->pm_mtx);					\
207 } while (0)
208 
209 #define	PMAP_VP_UNLOCK(pm)						\
210 do {									\
211 	if (pm != pmap_kernel())					\
212 		mtx_leave(&pm->pm_mtx);					\
213 } while (0)
214 
215 #define PMAP_VP_ASSERT_LOCKED(pm)					\
216 do {									\
217 	if (pm != pmap_kernel())					\
218 		MUTEX_ASSERT_LOCKED(&pm->pm_mtx);			\
219 } while (0)
220 
221 #else /* ! MULTIPROCESSOR */
222 
223 #define	PMAP_HASH_LOCK(s)		(void)s
224 #define	PMAP_HASH_UNLOCK(s)		/* nothing */
225 
226 #define	PMAP_VP_LOCK_INIT(pm)		/* nothing */
227 #define	PMAP_VP_LOCK(pm)		/* nothing */
228 #define	PMAP_VP_UNLOCK(pm)		/* nothing */
229 #define	PMAP_VP_ASSERT_LOCKED(pm)	/* nothing */
230 #endif /* MULTIPROCESSOR */
231 
232 /* virtual to physical helpers */
233 static inline int
VP_SR(vaddr_t va)234 VP_SR(vaddr_t va)
235 {
236 	return (va >>VP_SR_POS) & VP_SR_MASK;
237 }
238 
239 static inline int
VP_IDX1(vaddr_t va)240 VP_IDX1(vaddr_t va)
241 {
242 	return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
243 }
244 
245 static inline int
VP_IDX2(vaddr_t va)246 VP_IDX2(vaddr_t va)
247 {
248 	return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
249 }
250 
251 #if VP_IDX1_SIZE != VP_IDX2_SIZE
252 #error pmap allocation code expects IDX1 and IDX2 size to be same
253 #endif
254 struct pmapvp {
255 	void *vp[VP_IDX1_SIZE];
256 };
257 
258 
259 /*
260  * VP routines, virtual to physical translation information.
261  * These data structures are based off of the pmap, per process.
262  */
263 
264 /*
265  * This is used for pmap_kernel() mappings, they are not to be removed
266  * from the vp table because they were statically initialized at the
267  * initial pmap initialization. This is so that memory allocation
268  * is not necessary in the pmap_kernel() mappings.
269  * Otherwise bad race conditions can appear.
270  */
271 struct pte_desc *
pmap_vp_lookup(pmap_t pm,vaddr_t va)272 pmap_vp_lookup(pmap_t pm, vaddr_t va)
273 {
274 	struct pmapvp *vp1;
275 	struct pmapvp *vp2;
276 	struct pte_desc *pted;
277 
278 	PMAP_VP_ASSERT_LOCKED(pm);
279 
280 	vp1 = pm->pm_vp[VP_SR(va)];
281 	if (vp1 == NULL) {
282 		return NULL;
283 	}
284 
285 	vp2 = vp1->vp[VP_IDX1(va)];
286 	if (vp2 == NULL) {
287 		return NULL;
288 	}
289 
290 	pted = vp2->vp[VP_IDX2(va)];
291 
292 	return pted;
293 }
294 
295 /*
296  * Remove, and return, pted at specified address, NULL if not present
297  */
298 struct pte_desc *
pmap_vp_remove(pmap_t pm,vaddr_t va)299 pmap_vp_remove(pmap_t pm, vaddr_t va)
300 {
301 	struct pmapvp *vp1;
302 	struct pmapvp *vp2;
303 	struct pte_desc *pted;
304 
305 	PMAP_VP_ASSERT_LOCKED(pm);
306 
307 	vp1 = pm->pm_vp[VP_SR(va)];
308 	if (vp1 == NULL) {
309 		return NULL;
310 	}
311 
312 	vp2 = vp1->vp[VP_IDX1(va)];
313 	if (vp2 == NULL) {
314 		return NULL;
315 	}
316 
317 	pted = vp2->vp[VP_IDX2(va)];
318 	vp2->vp[VP_IDX2(va)] = NULL;
319 
320 	return pted;
321 }
322 
323 /*
324  * Create a V -> P mapping for the given pmap and virtual address
325  * with reference to the pte descriptor that is used to map the page.
326  * This code should track allocations of vp table allocations
327  * so they can be freed efficiently.
328  */
329 int
pmap_vp_enter(pmap_t pm,vaddr_t va,struct pte_desc * pted,int flags)330 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags)
331 {
332 	struct pmapvp *vp1;
333 	struct pmapvp *vp2;
334 
335 	PMAP_VP_ASSERT_LOCKED(pm);
336 
337 	vp1 = pm->pm_vp[VP_SR(va)];
338 	if (vp1 == NULL) {
339 		vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
340 		if (vp1 == NULL) {
341 			if ((flags & PMAP_CANFAIL) == 0)
342 				panic("pmap_vp_enter: failed to allocate vp1");
343 			return ENOMEM;
344 		}
345 		pm->pm_vp[VP_SR(va)] = vp1;
346 	}
347 
348 	vp2 = vp1->vp[VP_IDX1(va)];
349 	if (vp2 == NULL) {
350 		vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
351 		if (vp2 == NULL) {
352 			if ((flags & PMAP_CANFAIL) == 0)
353 				panic("pmap_vp_enter: failed to allocate vp2");
354 			return ENOMEM;
355 		}
356 		vp1->vp[VP_IDX1(va)] = vp2;
357 	}
358 
359 	vp2->vp[VP_IDX2(va)] = pted;
360 
361 	return 0;
362 }
363 
364 static inline void
tlbie(vaddr_t va)365 tlbie(vaddr_t va)
366 {
367 	asm volatile ("tlbie %0" :: "r"(va & ~PAGE_MASK));
368 }
369 
370 static inline void
tlbsync(void)371 tlbsync(void)
372 {
373 	asm volatile ("tlbsync");
374 }
375 static inline void
eieio(void)376 eieio(void)
377 {
378 	asm volatile ("eieio");
379 }
380 
381 static inline void
sync(void)382 sync(void)
383 {
384 	asm volatile ("sync");
385 }
386 
387 static inline void
tlbia(void)388 tlbia(void)
389 {
390 	vaddr_t va;
391 
392 	sync();
393 	for (va = 0; va < 0x00040000; va += 0x00001000)
394 		tlbie(va);
395 	eieio();
396 	tlbsync();
397 	sync();
398 }
399 
400 static inline int
ptesr(sr_t * sr,vaddr_t va)401 ptesr(sr_t *sr, vaddr_t va)
402 {
403 	return sr[(u_int)va >> ADDR_SR_SHIFT];
404 }
405 
406 static inline int
pteidx(sr_t sr,vaddr_t va)407 pteidx(sr_t sr, vaddr_t va)
408 {
409 	int hash;
410 	hash = (sr & SR_VSID) ^ (((u_int)va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
411 	return hash & pmap_ptab_mask;
412 }
413 
414 #define PTED_VA_PTEGIDX_M	0x07
415 #define PTED_VA_HID_M		0x08
416 #define PTED_VA_MANAGED_M	0x10
417 #define PTED_VA_WIRED_M		0x20
418 #define PTED_VA_EXEC_M		0x40
419 
420 static inline u_int32_t
PTED_HID(struct pte_desc * pted)421 PTED_HID(struct pte_desc *pted)
422 {
423 	return (pted->pted_va & PTED_VA_HID_M);
424 }
425 
426 static inline u_int32_t
PTED_PTEGIDX(struct pte_desc * pted)427 PTED_PTEGIDX(struct pte_desc *pted)
428 {
429 	return (pted->pted_va & PTED_VA_PTEGIDX_M);
430 }
431 
432 static inline u_int32_t
PTED_MANAGED(struct pte_desc * pted)433 PTED_MANAGED(struct pte_desc *pted)
434 {
435 	return (pted->pted_va & PTED_VA_MANAGED_M);
436 }
437 
438 static inline u_int32_t
PTED_VALID(struct pte_desc * pted)439 PTED_VALID(struct pte_desc *pted)
440 {
441 	if (ppc_proc_is_64b)
442 		return (pted->p.pted_pte64.pte_hi & PTE_VALID_64);
443 	else
444 		return (pted->p.pted_pte32.pte_hi & PTE_VALID_32);
445 }
446 
447 /*
448  * PV entries -
449  * manipulate the physical to virtual translations for the entire system.
450  *
451  * QUESTION: should all mapped memory be stored in PV tables? Or
452  * is it alright to only store "ram" memory. Currently device mappings
453  * are not stored.
454  * It makes sense to pre-allocate mappings for all of "ram" memory, since
455  * it is likely that it will be mapped at some point, but would it also
456  * make sense to use a tree/table like is use for pmap to store device
457  * mappings?
458  * Further notes: It seems that the PV table is only used for pmap_protect
459  * and other paging related operations. Given this, it is not necessary
460  * to store any pmap_kernel() entries in PV tables and does not make
461  * sense to store device mappings in PV either.
462  *
463  * Note: unlike other powerpc pmap designs, the array is only an array
464  * of pointers. Since the same structure is used for holding information
465  * in the VP table, the PV table, and for kernel mappings, the wired entries.
466  * Allocate one data structure to hold all of the info, instead of replicating
467  * it multiple times.
468  *
469  * One issue of making this a single data structure is that two pointers are
470  * wasted for every page which does not map ram (device mappings), this
471  * should be a low percentage of mapped pages in the system, so should not
472  * have too noticeable unnecessary ram consumption.
473  */
474 
475 void
pmap_enter_pv(struct pte_desc * pted,struct vm_page * pg)476 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg)
477 {
478 	if (__predict_false(!pmap_initialized)) {
479 		return;
480 	}
481 
482 	mtx_enter(&pg->mdpage.pv_mtx);
483 	LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
484 	pted->pted_va |= PTED_VA_MANAGED_M;
485 	mtx_leave(&pg->mdpage.pv_mtx);
486 }
487 
488 void
pmap_remove_pv(struct pte_desc * pted)489 pmap_remove_pv(struct pte_desc *pted)
490 {
491 	struct vm_page *pg;
492 
493 	if (ppc_proc_is_64b)
494 		pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64);
495 	else
496 		pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32);
497 
498 	mtx_enter(&pg->mdpage.pv_mtx);
499 	pted->pted_va &= ~PTED_VA_MANAGED_M;
500 	LIST_REMOVE(pted, pted_pv_list);
501 	mtx_leave(&pg->mdpage.pv_mtx);
502 }
503 
504 
505 /* PTE_CHG_32 == PTE_CHG_64 */
506 /* PTE_REF_32 == PTE_REF_64 */
507 static __inline u_int
pmap_pte2flags(u_int32_t pte)508 pmap_pte2flags(u_int32_t pte)
509 {
510 	return (((pte & PTE_REF_32) ? PG_PMAP_REF : 0) |
511 	    ((pte & PTE_CHG_32) ? PG_PMAP_MOD : 0));
512 }
513 
514 static __inline u_int
pmap_flags2pte(u_int32_t flags)515 pmap_flags2pte(u_int32_t flags)
516 {
517 	return (((flags & PG_PMAP_REF) ? PTE_REF_32 : 0) |
518 	    ((flags & PG_PMAP_MOD) ? PTE_CHG_32 : 0));
519 }
520 
521 void
pmap_attr_save(paddr_t pa,u_int32_t bits)522 pmap_attr_save(paddr_t pa, u_int32_t bits)
523 {
524 	struct vm_page *pg;
525 
526 	pg = PHYS_TO_VM_PAGE(pa);
527 	if (pg == NULL)
528 		return;
529 
530 	atomic_setbits_int(&pg->pg_flags,  pmap_pte2flags(bits));
531 }
532 
533 int
pmap_enter(pmap_t pm,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)534 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
535 {
536 	struct pte_desc *pted;
537 	struct vm_page *pg;
538 	boolean_t nocache = (pa & PMAP_NOCACHE) != 0;
539 	boolean_t wt = (pa & PMAP_WT) != 0;
540 	int need_sync = 0;
541 	int cache, error = 0;
542 
543 	KASSERT(!(wt && nocache));
544 	pa &= PMAP_PA_MASK;
545 
546 	PMAP_VP_LOCK(pm);
547 	pted = pmap_vp_lookup(pm, va);
548 	if (pted && PTED_VALID(pted)) {
549 		pmap_remove_pted(pm, pted);
550 		/* we lost our pted if it was user */
551 		if (pm != pmap_kernel())
552 			pted = pmap_vp_lookup(pm, va);
553 	}
554 
555 	pm->pm_stats.resident_count++;
556 
557 	/* Do not have pted for this, get one and put it in VP */
558 	if (pted == NULL) {
559 		pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO);
560 		if (pted == NULL) {
561 			if ((flags & PMAP_CANFAIL) == 0) {
562 				error = ENOMEM;
563 				goto out;
564 			}
565 			panic("pmap_enter: failed to allocate pted");
566 		}
567 		error = pmap_vp_enter(pm, va, pted, flags);
568 		if (error) {
569 			pool_put(&pmap_pted_pool, pted);
570 			goto out;
571 		}
572 	}
573 
574 	pg = PHYS_TO_VM_PAGE(pa);
575 	if (pg != NULL && (pg->pg_flags & PG_PMAP_UC))
576 		nocache = TRUE;
577 	if (wt)
578 		cache = PMAP_CACHE_WT;
579 	else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache)
580 		cache = PMAP_CACHE_WB;
581 	else
582 		cache = PMAP_CACHE_CI;
583 
584 	/* Calculate PTE */
585 	if (ppc_proc_is_64b)
586 		pmap_fill_pte64(pm, va, pa, pted, prot, cache);
587 	else
588 		pmap_fill_pte32(pm, va, pa, pted, prot, cache);
589 
590 	if (pg != NULL) {
591 		pmap_enter_pv(pted, pg); /* only managed mem */
592 	}
593 
594 	/*
595 	 * Insert into HTAB
596 	 * We were told to map the page, probably called from vm_fault,
597 	 * so map the page!
598 	 */
599 	if (ppc_proc_is_64b)
600 		pte_insert64(pted);
601 	else
602 		pte_insert32(pted);
603 
604         if (prot & PROT_EXEC) {
605 		u_int sn = VP_SR(va);
606 
607         	pm->pm_exec[sn]++;
608 		if (pm->pm_sr[sn] & SR_NOEXEC)
609 			pm->pm_sr[sn] &= ~SR_NOEXEC;
610 
611 		if (pg != NULL) {
612 			need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0);
613 			if (prot & PROT_WRITE)
614 				atomic_clearbits_int(&pg->pg_flags,
615 				    PG_PMAP_EXE);
616 			else
617 				atomic_setbits_int(&pg->pg_flags,
618 				    PG_PMAP_EXE);
619 		} else
620 			need_sync = 1;
621 	} else {
622 		/*
623 		 * Should we be paranoid about writeable non-exec
624 		 * mappings ? if so, clear the exec tag
625 		 */
626 		if ((prot & PROT_WRITE) && (pg != NULL))
627 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
628 	}
629 
630 	/* only instruction sync executable pages */
631 	if (need_sync)
632 		pmap_syncicache_user_virt(pm, va);
633 
634 out:
635 	PMAP_VP_UNLOCK(pm);
636 	return (error);
637 }
638 
639 /*
640  * Remove the given range of mapping entries.
641  */
642 void
pmap_remove(pmap_t pm,vaddr_t sva,vaddr_t eva)643 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
644 {
645 	struct pte_desc *pted;
646 	vaddr_t va;
647 
648 	PMAP_VP_LOCK(pm);
649 	for (va = sva; va < eva; va += PAGE_SIZE) {
650 		pted = pmap_vp_lookup(pm, va);
651 		if (pted && PTED_VALID(pted))
652 			pmap_remove_pted(pm, pted);
653 	}
654 	PMAP_VP_UNLOCK(pm);
655 }
656 
657 /*
658  * remove a single mapping, notice that this code is O(1)
659  */
660 void
pmap_remove_pted(pmap_t pm,struct pte_desc * pted)661 pmap_remove_pted(pmap_t pm, struct pte_desc *pted)
662 {
663 	void *pte;
664 	int s;
665 
666 	KASSERT(pm == pted->pted_pmap);
667 	PMAP_VP_ASSERT_LOCKED(pm);
668 
669 	pm->pm_stats.resident_count--;
670 
671 	PMAP_HASH_LOCK(s);
672 	if ((pte = pmap_ptedinhash(pted)) != NULL)
673 		pte_zap(pte, pted);
674 	PMAP_HASH_UNLOCK(s);
675 
676 	if (pted->pted_va & PTED_VA_EXEC_M) {
677 		u_int sn = VP_SR(pted->pted_va);
678 
679 		pted->pted_va &= ~PTED_VA_EXEC_M;
680 		pm->pm_exec[sn]--;
681 		if (pm->pm_exec[sn] == 0)
682 			pm->pm_sr[sn] |= SR_NOEXEC;
683 	}
684 
685 	if (ppc_proc_is_64b)
686 		pted->p.pted_pte64.pte_hi &= ~PTE_VALID_64;
687 	else
688 		pted->p.pted_pte32.pte_hi &= ~PTE_VALID_32;
689 
690 	if (PTED_MANAGED(pted))
691 		pmap_remove_pv(pted);
692 
693 	if (pm != pmap_kernel()) {
694 		(void)pmap_vp_remove(pm, pted->pted_va);
695 		pool_put(&pmap_pted_pool, pted);
696 	}
697 }
698 
699 /*
700  * Enter a kernel mapping for the given page.
701  * kernel mappings have a larger set of prerequisites than normal mappings.
702  *
703  * 1. no memory should be allocated to create a kernel mapping.
704  * 2. a vp mapping should already exist, even if invalid. (see 1)
705  * 3. all vp tree mappings should already exist (see 1)
706  *
707  */
708 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)709 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
710 {
711 	struct pte_desc *pted;
712 	struct vm_page *pg;
713 	boolean_t nocache = (pa & PMAP_NOCACHE) != 0;
714 	boolean_t wt = (pa & PMAP_WT) != 0;
715 	pmap_t pm;
716 	int cache;
717 
718 	KASSERT(!(wt && nocache));
719 	pa &= PMAP_PA_MASK;
720 
721 	pm = pmap_kernel();
722 
723 	pted = pmap_vp_lookup(pm, va);
724 	if (pted && PTED_VALID(pted))
725 		pmap_remove_pted(pm, pted); /* pted is reused */
726 
727 	pm->pm_stats.resident_count++;
728 
729 	if (prot & PROT_WRITE) {
730 		pg = PHYS_TO_VM_PAGE(pa);
731 		if (pg != NULL)
732 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
733 	}
734 
735 	/* Do not have pted for this, get one and put it in VP */
736 	if (pted == NULL) {
737 		panic("pted not preallocated in pmap_kernel() va %lx pa %lx",
738 		    va, pa);
739 	}
740 
741 	pg = PHYS_TO_VM_PAGE(pa);
742 	if (wt)
743 		cache = PMAP_CACHE_WT;
744 	else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache)
745 		cache = PMAP_CACHE_WB;
746 	else
747 		cache = PMAP_CACHE_CI;
748 
749 	/* Calculate PTE */
750 	if (ppc_proc_is_64b)
751 		pmap_fill_pte64(pm, va, pa, pted, prot, cache);
752 	else
753 		pmap_fill_pte32(pm, va, pa, pted, prot, cache);
754 
755 	/*
756 	 * Insert into HTAB
757 	 * We were told to map the page, probably called from vm_fault,
758 	 * so map the page!
759 	 */
760 	if (ppc_proc_is_64b)
761 		pte_insert64(pted);
762 	else
763 		pte_insert32(pted);
764 
765 	pted->pted_va |= PTED_VA_WIRED_M;
766 
767         if (prot & PROT_EXEC) {
768 		u_int sn = VP_SR(va);
769 
770         	pm->pm_exec[sn]++;
771 		if (pm->pm_sr[sn] & SR_NOEXEC)
772 			pm->pm_sr[sn] &= ~SR_NOEXEC;
773 	}
774 }
775 
776 /*
777  * remove kernel (pmap_kernel()) mappings
778  */
779 void
pmap_kremove(vaddr_t va,vsize_t len)780 pmap_kremove(vaddr_t va, vsize_t len)
781 {
782 	struct pte_desc *pted;
783 
784 	for (len >>= PAGE_SHIFT; len > 0; len--, va += PAGE_SIZE) {
785 		pted = pmap_vp_lookup(pmap_kernel(), va);
786 		if (pted && PTED_VALID(pted))
787 			pmap_remove_pted(pmap_kernel(), pted);
788 	}
789 }
790 
791 static inline void *
pmap_ptedinhash(struct pte_desc * pted)792 pmap_ptedinhash(struct pte_desc *pted)
793 {
794 	vaddr_t va = pted->pted_va & ~PAGE_MASK;
795 	pmap_t pm = pted->pted_pmap;
796 	int sr, idx;
797 
798 	sr = ptesr(pm->pm_sr, va);
799 	idx = pteidx(sr, va);
800 
801 	if (ppc_proc_is_64b) {
802 		struct pte_64 *pte = pmap_ptable64;
803 
804 		pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8;
805 		pte += PTED_PTEGIDX(pted);
806 
807 		/*
808 		 * We now have the pointer to where it will be, if it is
809 		 * currently mapped. If the mapping was thrown away in
810 		 * exchange for another page mapping, then this page is
811 		 * not currently in the HASH.
812 		 */
813 		if ((pted->p.pted_pte64.pte_hi |
814 		    (PTED_HID(pted) ? PTE_HID_64 : 0)) == pte->pte_hi)
815 			return (pte);
816 	} else {
817 		struct pte_32 *pte = pmap_ptable32;
818 
819 		pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8;
820 		pte += PTED_PTEGIDX(pted);
821 
822 		/*
823 		 * We now have the pointer to where it will be, if it is
824 		 * currently mapped. If the mapping was thrown away in
825 		 * exchange for another page mapping, then this page is
826 		 * not currently in the HASH.
827 		 */
828 		if ((pted->p.pted_pte32.pte_hi |
829 		    (PTED_HID(pted) ? PTE_HID_32 : 0)) == pte->pte_hi)
830 			return (pte);
831 	}
832 
833 	return (NULL);
834 }
835 
836 /*
837  * Delete a Page Table Entry, section 7.6.3.3.
838  *
839  * Note: pte must be locked.
840  */
841 void
pte_del(void * pte,vaddr_t va)842 pte_del(void *pte, vaddr_t va)
843 {
844 	if (ppc_proc_is_64b)
845 		((struct pte_64 *)pte)->pte_hi &= ~PTE_VALID_64;
846 	else
847 		((struct pte_32 *)pte)->pte_hi &= ~PTE_VALID_32;
848 
849 	sync();		/* Ensure update completed. */
850 	tlbie(va);	/* Invalidate old translation. */
851 	eieio();	/* Order tlbie before tlbsync. */
852 	tlbsync();	/* Ensure tlbie completed on all processors. */
853 	sync();		/* Ensure tlbsync and update completed. */
854 }
855 
856 void
pte_zap(void * pte,struct pte_desc * pted)857 pte_zap(void *pte, struct pte_desc *pted)
858 {
859 	pte_del(pte, pted->pted_va);
860 
861 	if (!PTED_MANAGED(pted))
862 		return;
863 
864 	if (ppc_proc_is_64b) {
865 		pmap_attr_save(pted->p.pted_pte64.pte_lo & PTE_RPGN_64,
866 		    ((struct pte_64 *)pte)->pte_lo & (PTE_REF_64|PTE_CHG_64));
867 	} else {
868 		pmap_attr_save(pted->p.pted_pte32.pte_lo & PTE_RPGN_32,
869 		    ((struct pte_32 *)pte)->pte_lo & (PTE_REF_32|PTE_CHG_32));
870 	}
871 }
872 
873 /*
874  * What about execution control? Even at only a segment granularity.
875  */
876 void
pmap_fill_pte64(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int cache)877 pmap_fill_pte64(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
878 	vm_prot_t prot, int cache)
879 {
880 	sr_t sr;
881 	struct pte_64 *pte64;
882 
883 	sr = ptesr(pm->pm_sr, va);
884 	pte64 = &pted->p.pted_pte64;
885 
886 	pte64->pte_hi = (((u_int64_t)sr & SR_VSID) <<
887 	   PTE_VSID_SHIFT_64) |
888 	    ((va >> ADDR_API_SHIFT_64) & PTE_API_64) | PTE_VALID_64;
889 	pte64->pte_lo = (pa & PTE_RPGN_64);
890 
891 
892 	if (cache == PMAP_CACHE_WB)
893 		pte64->pte_lo |= PTE_M_64;
894 	else if (cache == PMAP_CACHE_WT)
895 		pte64->pte_lo |= (PTE_W_64 | PTE_M_64);
896 	else
897 		pte64->pte_lo |= (PTE_M_64 | PTE_I_64 | PTE_G_64);
898 
899 	if ((prot & (PROT_READ | PROT_WRITE)) == 0)
900 		pte64->pte_lo |= PTE_AC_64;
901 
902 	if (prot & PROT_WRITE)
903 		pte64->pte_lo |= PTE_RW_64;
904 	else
905 		pte64->pte_lo |= PTE_RO_64;
906 
907 	pted->pted_va = va & ~PAGE_MASK;
908 
909 	if (prot & PROT_EXEC)
910 		pted->pted_va  |= PTED_VA_EXEC_M;
911 	else
912 		pte64->pte_lo |= PTE_N_64;
913 
914 	pted->pted_pmap = pm;
915 }
916 
917 /*
918  * What about execution control? Even at only a segment granularity.
919  */
920 void
pmap_fill_pte32(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int cache)921 pmap_fill_pte32(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
922 	vm_prot_t prot, int cache)
923 {
924 	sr_t sr;
925 	struct pte_32 *pte32;
926 
927 	sr = ptesr(pm->pm_sr, va);
928 	pte32 = &pted->p.pted_pte32;
929 
930 	pte32->pte_hi = ((sr & SR_VSID) << PTE_VSID_SHIFT_32) |
931 	    ((va >> ADDR_API_SHIFT_32) & PTE_API_32) | PTE_VALID_32;
932 	pte32->pte_lo = (pa & PTE_RPGN_32);
933 
934 	if (cache == PMAP_CACHE_WB)
935 		pte32->pte_lo |= PTE_M_32;
936 	else if (cache == PMAP_CACHE_WT)
937 		pte32->pte_lo |= (PTE_W_32 | PTE_M_32);
938 	else
939 		pte32->pte_lo |= (PTE_M_32 | PTE_I_32 | PTE_G_32);
940 
941 	if (prot & PROT_WRITE)
942 		pte32->pte_lo |= PTE_RW_32;
943 	else
944 		pte32->pte_lo |= PTE_RO_32;
945 
946 	pted->pted_va = va & ~PAGE_MASK;
947 
948 	/* XXX Per-page execution control. */
949 	if (prot & PROT_EXEC)
950 		pted->pted_va  |= PTED_VA_EXEC_M;
951 
952 	pted->pted_pmap = pm;
953 }
954 
955 int
pmap_test_attrs(struct vm_page * pg,u_int flagbit)956 pmap_test_attrs(struct vm_page *pg, u_int flagbit)
957 {
958 	u_int bits;
959 	struct pte_desc *pted;
960 	u_int ptebit = pmap_flags2pte(flagbit);
961 	int s;
962 
963 	/* PTE_CHG_32 == PTE_CHG_64 */
964 	/* PTE_REF_32 == PTE_REF_64 */
965 
966 	bits = pg->pg_flags & flagbit;
967 	if (bits == flagbit)
968 		return bits;
969 
970 	mtx_enter(&pg->mdpage.pv_mtx);
971 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
972 		void *pte;
973 
974 		PMAP_HASH_LOCK(s);
975 		if ((pte = pmap_ptedinhash(pted)) != NULL) {
976 			if (ppc_proc_is_64b) {
977 				struct pte_64 *ptp64 = pte;
978 				bits |=	pmap_pte2flags(ptp64->pte_lo & ptebit);
979 			} else {
980 				struct pte_32 *ptp32 = pte;
981 				bits |=	pmap_pte2flags(ptp32->pte_lo & ptebit);
982 			}
983 		}
984 		PMAP_HASH_UNLOCK(s);
985 
986 		if (bits == flagbit)
987 			break;
988 	}
989 	mtx_leave(&pg->mdpage.pv_mtx);
990 
991 	atomic_setbits_int(&pg->pg_flags,  bits);
992 
993 	return bits;
994 }
995 
996 int
pmap_clear_attrs(struct vm_page * pg,u_int flagbit)997 pmap_clear_attrs(struct vm_page *pg, u_int flagbit)
998 {
999 	u_int bits;
1000 	struct pte_desc *pted;
1001 	u_int ptebit = pmap_flags2pte(flagbit);
1002 	int s;
1003 
1004 	/* PTE_CHG_32 == PTE_CHG_64 */
1005 	/* PTE_REF_32 == PTE_REF_64 */
1006 
1007 	bits = pg->pg_flags & flagbit;
1008 
1009 	mtx_enter(&pg->mdpage.pv_mtx);
1010 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1011 		void *pte;
1012 
1013 		PMAP_HASH_LOCK(s);
1014 		if ((pte = pmap_ptedinhash(pted)) != NULL) {
1015 			if (ppc_proc_is_64b) {
1016 				struct pte_64 *ptp64 = pte;
1017 
1018 				bits |=	pmap_pte2flags(ptp64->pte_lo & ptebit);
1019 
1020 				pte_del(ptp64, pted->pted_va);
1021 
1022 				ptp64->pte_lo &= ~ptebit;
1023 				eieio();
1024 				ptp64->pte_hi |= PTE_VALID_64;
1025 				sync();
1026 			} else {
1027 				struct pte_32 *ptp32 = pte;
1028 
1029 				bits |=	pmap_pte2flags(ptp32->pte_lo & ptebit);
1030 
1031 				pte_del(ptp32, pted->pted_va);
1032 
1033 				ptp32->pte_lo &= ~ptebit;
1034 				eieio();
1035 				ptp32->pte_hi |= PTE_VALID_32;
1036 				sync();
1037 			}
1038 		}
1039 		PMAP_HASH_UNLOCK(s);
1040 	}
1041 	mtx_leave(&pg->mdpage.pv_mtx);
1042 
1043 	/*
1044 	 * this is done a second time, because while walking the list
1045 	 * a bit could have been promoted via pmap_attr_save()
1046 	 */
1047 	bits |= pg->pg_flags & flagbit;
1048 	atomic_clearbits_int(&pg->pg_flags,  flagbit);
1049 
1050 	return bits;
1051 }
1052 
1053 /*
1054  * Fill the given physical page with zeros.
1055  */
1056 void
pmap_zero_page(struct vm_page * pg)1057 pmap_zero_page(struct vm_page *pg)
1058 {
1059 	vaddr_t va = pmap_map_direct(pg);
1060 	int i;
1061 
1062 	/*
1063 	 * Loop over & zero cache lines.  This code assumes that 64-bit
1064 	 * CPUs have 128-byte cache lines.  We explicitly use ``dcbzl''
1065 	 * here because we do not clear the DCBZ_SIZE bit of the HID5
1066 	 * register in order to be compatible with code using ``dcbz''
1067 	 * and assuming that cache line size is 32.
1068 	 */
1069 	if (ppc_proc_is_64b) {
1070 		for (i = 0; i < PAGE_SIZE; i += 128)
1071 			asm volatile ("dcbzl 0,%0" :: "r"(va + i));
1072 		return;
1073 	}
1074 
1075 	for (i = 0; i < PAGE_SIZE; i += CACHELINESIZE)
1076 		asm volatile ("dcbz 0,%0" :: "r"(va + i));
1077 }
1078 
1079 /*
1080  * Copy a page.
1081  */
1082 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)1083 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1084 {
1085 	vaddr_t srcva = pmap_map_direct(srcpg);
1086 	vaddr_t dstva = pmap_map_direct(dstpg);
1087 
1088 	memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
1089 }
1090 
1091 int pmap_id_avail = 0;
1092 
1093 pmap_t
pmap_create(void)1094 pmap_create(void)
1095 {
1096 	u_int bits;
1097 	int first, i, k, try, tblidx, tbloff;
1098 	int seg;
1099 	pmap_t pm;
1100 
1101 	pm = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO);
1102 
1103 	pmap_reference(pm);
1104 	PMAP_VP_LOCK_INIT(pm);
1105 
1106 	/*
1107 	 * Allocate segment registers for this pmap.
1108 	 * Try not to reuse pmap ids, to spread the hash table usage.
1109 	 */
1110 	first = pmap_id_avail;
1111 again:
1112 	for (i = 0; i < NPMAPS; i++) {
1113 		try = first + i;
1114 		try = try % NPMAPS; /* truncate back into bounds */
1115 		tblidx = try / (8 * sizeof usedsr[0]);
1116 		tbloff = try % (8 * sizeof usedsr[0]);
1117 		bits = usedsr[tblidx];
1118 		if ((bits & (1U << tbloff)) == 0) {
1119 			if (atomic_cas_uint(&usedsr[tblidx], bits,
1120 			    bits | (1U << tbloff)) != bits) {
1121 				first = try;
1122 				goto again;
1123 			}
1124 			pmap_id_avail = try + 1;
1125 
1126 			seg = try << 4;
1127 			for (k = 0; k < 16; k++)
1128 				pm->pm_sr[k] = (seg + k) | SR_NOEXEC;
1129 			return (pm);
1130 		}
1131 	}
1132 	panic("out of pmap slots");
1133 }
1134 
1135 /*
1136  * Add a reference to a given pmap.
1137  */
1138 void
pmap_reference(pmap_t pm)1139 pmap_reference(pmap_t pm)
1140 {
1141 	atomic_inc_int(&pm->pm_refs);
1142 }
1143 
1144 /*
1145  * Retire the given pmap from service.
1146  * Should only be called if the map contains no valid mappings.
1147  */
1148 void
pmap_destroy(pmap_t pm)1149 pmap_destroy(pmap_t pm)
1150 {
1151 	int refs;
1152 
1153 	refs = atomic_dec_int_nv(&pm->pm_refs);
1154 	if (refs == -1)
1155 		panic("re-entering pmap_destroy");
1156 	if (refs > 0)
1157 		return;
1158 
1159 	/*
1160 	 * reference count is zero, free pmap resources and free pmap.
1161 	 */
1162 	pmap_release(pm);
1163 	pool_put(&pmap_pmap_pool, pm);
1164 }
1165 
1166 /*
1167  * Release any resources held by the given physical map.
1168  * Called when a pmap initialized by pmap_pinit is being released.
1169  */
1170 void
pmap_release(pmap_t pm)1171 pmap_release(pmap_t pm)
1172 {
1173 	int i, tblidx, tbloff;
1174 
1175 	pmap_vp_destroy(pm);
1176 	i = (pm->pm_sr[0] & SR_VSID) >> 4;
1177 	tblidx = i / (8  * sizeof usedsr[0]);
1178 	tbloff = i % (8  * sizeof usedsr[0]);
1179 
1180 	/* powerpc can do atomic cas, clearbits on same word. */
1181 	atomic_clearbits_int(&usedsr[tblidx], 1U << tbloff);
1182 }
1183 
1184 void
pmap_vp_destroy(pmap_t pm)1185 pmap_vp_destroy(pmap_t pm)
1186 {
1187 	int i, j;
1188 	struct pmapvp *vp1;
1189 	struct pmapvp *vp2;
1190 
1191 	for (i = 0; i < VP_SR_SIZE; i++) {
1192 		vp1 = pm->pm_vp[i];
1193 		if (vp1 == NULL)
1194 			continue;
1195 
1196 		for (j = 0; j < VP_IDX1_SIZE; j++) {
1197 			vp2 = vp1->vp[j];
1198 			if (vp2 == NULL)
1199 				continue;
1200 
1201 			pool_put(&pmap_vp_pool, vp2);
1202 		}
1203 		pm->pm_vp[i] = NULL;
1204 		pool_put(&pmap_vp_pool, vp1);
1205 	}
1206 }
1207 
1208 void
pmap_avail_setup(void)1209 pmap_avail_setup(void)
1210 {
1211 	struct mem_region *mp;
1212 
1213 	ppc_mem_regions(&pmap_mem, &pmap_avail);
1214 
1215 	for (mp = pmap_mem; mp->size !=0; mp++, ndumpmem++) {
1216 		physmem += atop(mp->size);
1217 		dumpmem[ndumpmem].start = atop(mp->start);
1218 		dumpmem[ndumpmem].end = atop(mp->start + mp->size);
1219 	}
1220 
1221 	for (mp = pmap_avail; mp->size !=0 ; mp++) {
1222 		if (physmaxaddr <  mp->start + mp->size)
1223 			physmaxaddr = mp->start + mp->size;
1224 	}
1225 
1226 	for (mp = pmap_avail; mp->size !=0; mp++)
1227 		pmap_cnt_avail += 1;
1228 }
1229 
1230 void
pmap_avail_fixup(void)1231 pmap_avail_fixup(void)
1232 {
1233 	struct mem_region *mp;
1234 	u_int32_t align;
1235 	u_int32_t end;
1236 
1237 	mp = pmap_avail;
1238 	while(mp->size !=0) {
1239 		align = round_page(mp->start);
1240 		if (mp->start != align) {
1241 			pmap_remove_avail(mp->start, align);
1242 			mp = pmap_avail;
1243 			continue;
1244 		}
1245 		end = mp->start+mp->size;
1246 		align = trunc_page(end);
1247 		if (end != align) {
1248 			pmap_remove_avail(align, end);
1249 			mp = pmap_avail;
1250 			continue;
1251 		}
1252 		mp++;
1253 	}
1254 }
1255 
1256 /* remove a given region from avail memory */
1257 void
pmap_remove_avail(paddr_t base,paddr_t end)1258 pmap_remove_avail(paddr_t base, paddr_t end)
1259 {
1260 	struct mem_region *mp;
1261 	int i;
1262 	int mpend;
1263 
1264 	/* remove given region from available */
1265 	for (mp = pmap_avail; mp->size; mp++) {
1266 		/*
1267 		 * Check if this region holds all of the region
1268 		 */
1269 		mpend = mp->start + mp->size;
1270 		if (base > mpend) {
1271 			continue;
1272 		}
1273 		if (base <= mp->start) {
1274 			if (end <= mp->start)
1275 				break; /* region not present -??? */
1276 
1277 			if (end >= mpend) {
1278 				/* covers whole region */
1279 				/* shorten */
1280 				for (i = mp - pmap_avail;
1281 				    i < pmap_cnt_avail;
1282 				    i++) {
1283 					pmap_avail[i] = pmap_avail[i+1];
1284 				}
1285 				pmap_cnt_avail--;
1286 				pmap_avail[pmap_cnt_avail].size = 0;
1287 			} else {
1288 				mp->start = end;
1289 				mp->size = mpend - end;
1290 			}
1291 		} else {
1292 			/* start after the beginning */
1293 			if (end >= mpend) {
1294 				/* just truncate */
1295 				mp->size = base - mp->start;
1296 			} else {
1297 				/* split */
1298 				for (i = pmap_cnt_avail;
1299 				    i > (mp - pmap_avail);
1300 				    i--) {
1301 					pmap_avail[i] = pmap_avail[i - 1];
1302 				}
1303 				pmap_cnt_avail++;
1304 				mp->size = base - mp->start;
1305 				mp++;
1306 				mp->start = end;
1307 				mp->size = mpend - end;
1308 			}
1309 		}
1310 	}
1311 	for (mp = pmap_allocated; mp->size != 0; mp++) {
1312 		if (base < mp->start) {
1313 			if (end == mp->start) {
1314 				mp->start = base;
1315 				mp->size += end - base;
1316 				break;
1317 			}
1318 			/* lengthen */
1319 			for (i = pmap_cnt_allocated; i > (mp - pmap_allocated);
1320 			    i--) {
1321 				pmap_allocated[i] = pmap_allocated[i - 1];
1322 			}
1323 			pmap_cnt_allocated++;
1324 			mp->start = base;
1325 			mp->size = end - base;
1326 			return;
1327 		}
1328 		if (base == (mp->start + mp->size)) {
1329 			mp->size += end - base;
1330 			return;
1331 		}
1332 	}
1333 	if (mp->size == 0) {
1334 		mp->start = base;
1335 		mp->size  = end - base;
1336 		pmap_cnt_allocated++;
1337 	}
1338 }
1339 
1340 void *
pmap_steal_avail(size_t size,int align)1341 pmap_steal_avail(size_t size, int align)
1342 {
1343 	struct mem_region *mp;
1344 	int start;
1345 	int remsize;
1346 
1347 	for (mp = pmap_avail; mp->size; mp++) {
1348 		if (mp->size > size) {
1349 			start = (mp->start + (align -1)) & ~(align -1);
1350 			remsize = mp->size - (start - mp->start);
1351 			if (remsize >= 0) {
1352 				pmap_remove_avail(start, start+size);
1353 				return (void *)start;
1354 			}
1355 		}
1356 	}
1357 	panic ("unable to allocate region with size %zx align %x",
1358 	    size, align);
1359 }
1360 
1361 /*
1362  * Similar to pmap_steal_avail, but operating on vm_physmem since
1363  * uvm_page_physload() has been called.
1364  */
1365 vaddr_t
pmap_steal_memory(vsize_t size,vaddr_t * start,vaddr_t * end)1366 pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end)
1367 {
1368 	int segno;
1369 	u_int npg;
1370 	vaddr_t va;
1371 	paddr_t pa;
1372 	struct vm_physseg *seg;
1373 
1374 	size = round_page(size);
1375 	npg = atop(size);
1376 
1377 	for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) {
1378 		if (seg->avail_end - seg->avail_start < npg)
1379 			continue;
1380 		/*
1381 		 * We can only steal at an ``unused'' segment boundary,
1382 		 * i.e. either at the start or at the end.
1383 		 */
1384 		if (seg->avail_start == seg->start ||
1385 		    seg->avail_end == seg->end)
1386 			break;
1387 	}
1388 	if (segno == vm_nphysseg)
1389 		va = 0;
1390 	else {
1391 		if (seg->avail_start == seg->start) {
1392 			pa = ptoa(seg->avail_start);
1393 			seg->avail_start += npg;
1394 			seg->start += npg;
1395 		} else {
1396 			pa = ptoa(seg->avail_end) - size;
1397 			seg->avail_end -= npg;
1398 			seg->end -= npg;
1399 		}
1400 		/*
1401 		 * If all the segment has been consumed now, remove it.
1402 		 * Note that the crash dump code still knows about it
1403 		 * and will dump it correctly.
1404 		 */
1405 		if (seg->start == seg->end) {
1406 			if (vm_nphysseg-- == 1)
1407 				panic("pmap_steal_memory: out of memory");
1408 			while (segno < vm_nphysseg) {
1409 				seg[0] = seg[1]; /* struct copy */
1410 				seg++;
1411 				segno++;
1412 			}
1413 		}
1414 
1415 		va = (vaddr_t)pa;	/* 1:1 mapping */
1416 		bzero((void *)va, size);
1417 	}
1418 
1419 	if (start != NULL)
1420 		*start = VM_MIN_KERNEL_ADDRESS;
1421 	if (end != NULL)
1422 		*end = VM_MAX_KERNEL_ADDRESS;
1423 
1424 	return (va);
1425 }
1426 
1427 void *msgbuf_addr;
1428 
1429 /*
1430  * Initialize pmap setup.
1431  * ALL of the code which deals with avail needs rewritten as an actual
1432  * memory allocation.
1433  */
1434 void
pmap_bootstrap(u_int kernelstart,u_int kernelend)1435 pmap_bootstrap(u_int kernelstart, u_int kernelend)
1436 {
1437 	struct mem_region *mp;
1438 	int i, k;
1439 	struct pmapvp *vp1;
1440 	struct pmapvp *vp2;
1441 	extern vaddr_t ppc_kvm_stolen;
1442 
1443 	/*
1444 	 * set the page size (default value is 4K which is ok)
1445 	 */
1446 	uvm_setpagesize();
1447 
1448 	/*
1449 	 * Get memory.
1450 	 */
1451 	pmap_avail_setup();
1452 
1453 	/*
1454 	 * Page align all regions.
1455 	 * Non-page memory isn't very interesting to us.
1456 	 * Also, sort the entries for ascending addresses.
1457 	 */
1458 	kernelstart = trunc_page(kernelstart);
1459 	kernelend = round_page(kernelend);
1460 	pmap_remove_avail(kernelstart, kernelend);
1461 
1462 	msgbuf_addr = pmap_steal_avail(MSGBUFSIZE,4);
1463 
1464 #ifdef DEBUG
1465 	for (mp = pmap_avail; mp->size; mp++) {
1466 		bzero((void *)mp->start, mp->size);
1467 	}
1468 #endif
1469 
1470 #define HTABENTS_32 1024
1471 #define HTABENTS_64 2048
1472 
1473 	if (ppc_proc_is_64b) {
1474 		pmap_ptab_cnt = HTABENTS_64;
1475 		while (pmap_ptab_cnt * 2 < physmem)
1476 			pmap_ptab_cnt <<= 1;
1477 	} else {
1478 		pmap_ptab_cnt = HTABENTS_32;
1479 		while (HTABSIZE_32 < (ptoa(physmem) >> 7))
1480 			pmap_ptab_cnt <<= 1;
1481 	}
1482 	/*
1483 	 * allocate suitably aligned memory for HTAB
1484 	 */
1485 	if (ppc_proc_is_64b) {
1486 		pmap_ptable64 = pmap_steal_avail(HTABMEMSZ_64, HTABMEMSZ_64);
1487 		bzero((void *)pmap_ptable64, HTABMEMSZ_64);
1488 		pmap_ptab_mask = pmap_ptab_cnt - 1;
1489 	} else {
1490 		pmap_ptable32 = pmap_steal_avail(HTABSIZE_32, HTABSIZE_32);
1491 		bzero((void *)pmap_ptable32, HTABSIZE_32);
1492 		pmap_ptab_mask = pmap_ptab_cnt - 1;
1493 	}
1494 
1495 	/* allocate v->p mappings for pmap_kernel() */
1496 	for (i = 0; i < VP_SR_SIZE; i++) {
1497 		pmap_kernel()->pm_vp[i] = NULL;
1498 	}
1499 	vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4);
1500 	bzero (vp1, sizeof(struct pmapvp));
1501 	pmap_kernel()->pm_vp[PPC_KERNEL_SR] = vp1;
1502 	for (i = 0; i < VP_IDX1_SIZE; i++) {
1503 		vp2 = vp1->vp[i] = pmap_steal_avail(sizeof (struct pmapvp), 4);
1504 		bzero (vp2, sizeof(struct pmapvp));
1505 		for (k = 0; k < VP_IDX2_SIZE; k++) {
1506 			struct pte_desc *pted;
1507 			pted = pmap_steal_avail(sizeof (struct pte_desc), 4);
1508 			bzero (pted, sizeof (struct pte_desc));
1509 			vp2->vp[k] = pted;
1510 		}
1511 	}
1512 
1513 	/*
1514 	 * Initialize kernel pmap and hardware.
1515 	 */
1516 #if NPMAPS >= PPC_KERNEL_SEGMENT / 16
1517 	usedsr[PPC_KERNEL_SEGMENT / 16 / (sizeof usedsr[0] * 8)]
1518 		|= 1 << ((PPC_KERNEL_SEGMENT / 16) % (sizeof usedsr[0] * 8));
1519 #endif
1520 	for (i = 0; i < 16; i++)
1521 		pmap_kernel()->pm_sr[i] = (PPC_KERNEL_SEG0 + i) | SR_NOEXEC;
1522 
1523 	if (ppc_nobat) {
1524 		vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4);
1525 		bzero (vp1, sizeof(struct pmapvp));
1526 		pmap_kernel()->pm_vp[0] = vp1;
1527 		for (i = 0; i < VP_IDX1_SIZE; i++) {
1528 			vp2 = vp1->vp[i] =
1529 			    pmap_steal_avail(sizeof (struct pmapvp), 4);
1530 			bzero (vp2, sizeof(struct pmapvp));
1531 			for (k = 0; k < VP_IDX2_SIZE; k++) {
1532 				struct pte_desc *pted;
1533 				pted = pmap_steal_avail(sizeof (struct pte_desc), 4);
1534 				bzero (pted, sizeof (struct pte_desc));
1535 				vp2->vp[k] = pted;
1536 			}
1537 		}
1538 
1539 		/* first segment contains executable pages */
1540 		pmap_kernel()->pm_exec[0]++;
1541 		pmap_kernel()->pm_sr[0] &= ~SR_NOEXEC;
1542 	} else {
1543 		/*
1544 		 * Setup fixed BAT registers.
1545 		 *
1546 		 * Note that we still run in real mode, and the BAT
1547 		 * registers were cleared in cpu_bootstrap().
1548 		 */
1549 		battable[0].batl = BATL(0x00000000, BAT_M);
1550 		if (physmem > atop(0x08000000))
1551 			battable[0].batu = BATU(0x00000000, BAT_BL_256M);
1552 		else
1553 			battable[0].batu = BATU(0x00000000, BAT_BL_128M);
1554 
1555 		/* Map physical memory with BATs. */
1556 		if (physmem > atop(0x10000000)) {
1557 			battable[0x1].batl = BATL(0x10000000, BAT_M);
1558 			battable[0x1].batu = BATU(0x10000000, BAT_BL_256M);
1559 		}
1560 		if (physmem > atop(0x20000000)) {
1561 			battable[0x2].batl = BATL(0x20000000, BAT_M);
1562 			battable[0x2].batu = BATU(0x20000000, BAT_BL_256M);
1563 		}
1564 		if (physmem > atop(0x30000000)) {
1565 			battable[0x3].batl = BATL(0x30000000, BAT_M);
1566 			battable[0x3].batu = BATU(0x30000000, BAT_BL_256M);
1567 		}
1568 		if (physmem > atop(0x40000000)) {
1569 			battable[0x4].batl = BATL(0x40000000, BAT_M);
1570 			battable[0x4].batu = BATU(0x40000000, BAT_BL_256M);
1571 		}
1572 		if (physmem > atop(0x50000000)) {
1573 			battable[0x5].batl = BATL(0x50000000, BAT_M);
1574 			battable[0x5].batu = BATU(0x50000000, BAT_BL_256M);
1575 		}
1576 		if (physmem > atop(0x60000000)) {
1577 			battable[0x6].batl = BATL(0x60000000, BAT_M);
1578 			battable[0x6].batu = BATU(0x60000000, BAT_BL_256M);
1579 		}
1580 		if (physmem > atop(0x70000000)) {
1581 			battable[0x7].batl = BATL(0x70000000, BAT_M);
1582 			battable[0x7].batu = BATU(0x70000000, BAT_BL_256M);
1583 		}
1584 	}
1585 
1586 	ppc_kvm_stolen += reserve_dumppages( (caddr_t)(VM_MIN_KERNEL_ADDRESS +
1587 	    ppc_kvm_stolen));
1588 
1589 	pmap_avail_fixup();
1590 	for (mp = pmap_avail; mp->size; mp++) {
1591 		if (mp->start > 0x80000000)
1592 			continue;
1593 		if (mp->start + mp->size > 0x80000000)
1594 			mp->size = 0x80000000 - mp->start;
1595 		uvm_page_physload(atop(mp->start), atop(mp->start+mp->size),
1596 		    atop(mp->start), atop(mp->start+mp->size), 0);
1597 	}
1598 }
1599 
1600 void
pmap_enable_mmu(void)1601 pmap_enable_mmu(void)
1602 {
1603 	uint32_t scratch, sdr1;
1604 	int i;
1605 
1606 	/*
1607 	 * For the PowerPC 970, ACCR = 3 inhibits loads and stores to
1608 	 * pages with PTE_AC_64.  This is for execute-only mappings.
1609 	 */
1610 	if (ppc_proc_is_64b)
1611 		asm volatile ("mtspr 29, %0" :: "r" (3));
1612 
1613 	if (!ppc_nobat) {
1614 		extern caddr_t etext;
1615 
1616 		/* DBAT0 used for initial segment */
1617 		ppc_mtdbat0l(battable[0].batl);
1618 		ppc_mtdbat0u(battable[0].batu);
1619 
1620 		/* IBAT0 only covering the kernel .text */
1621 		ppc_mtibat0l(battable[0].batl);
1622 		if (round_page((vaddr_t)&etext) < 8*1024*1024)
1623 			ppc_mtibat0u(BATU(0x00000000, BAT_BL_8M));
1624 		else
1625 			ppc_mtibat0u(BATU(0x00000000, BAT_BL_16M));
1626 	}
1627 
1628 	for (i = 0; i < 16; i++)
1629 		ppc_mtsrin(PPC_KERNEL_SEG0 + i, i << ADDR_SR_SHIFT);
1630 
1631 	if (ppc_proc_is_64b)
1632 		sdr1 = (uint32_t)pmap_ptable64 | HTABSIZE_64;
1633 	else
1634 		sdr1 = (uint32_t)pmap_ptable32 | (pmap_ptab_mask >> 10);
1635 
1636 	asm volatile ("sync; mtsdr1 %0; isync" :: "r"(sdr1));
1637 	tlbia();
1638 
1639 	asm volatile ("eieio; mfmsr %0; ori %0,%0,%1; mtmsr %0; sync; isync"
1640 	    : "=r"(scratch) : "K"(PSL_IR|PSL_DR|PSL_ME|PSL_RI));
1641 }
1642 
1643 /*
1644  * activate a pmap entry
1645  * All PTE entries exist in the same hash table.
1646  * Segment registers are filled on exit to user mode.
1647  */
1648 void
pmap_activate(struct proc * p)1649 pmap_activate(struct proc *p)
1650 {
1651 	struct pcb *pcb = &p->p_addr->u_pcb;
1652 
1653 	/* Set the current pmap. */
1654 	pcb->pcb_pm = p->p_vmspace->vm_map.pmap;
1655 	pmap_extract(pmap_kernel(),
1656 	    (vaddr_t)pcb->pcb_pm, (paddr_t *)&pcb->pcb_pmreal);
1657 	curcpu()->ci_curpm = pcb->pcb_pmreal;
1658 }
1659 
1660 /*
1661  * deactivate a pmap entry
1662  * NOOP on powerpc
1663  */
1664 void
pmap_deactivate(struct proc * p)1665 pmap_deactivate(struct proc *p)
1666 {
1667 }
1668 
1669 /*
1670  * pmap_extract: extract a PA for the given VA
1671  */
1672 
1673 boolean_t
pmap_extract(pmap_t pm,vaddr_t va,paddr_t * pa)1674 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pa)
1675 {
1676 	struct pte_desc *pted;
1677 
1678 	if (pm == pmap_kernel() && va < physmaxaddr) {
1679 		*pa = va;
1680 		return TRUE;
1681 	}
1682 
1683 	PMAP_VP_LOCK(pm);
1684 	pted = pmap_vp_lookup(pm, va);
1685 	if (pted == NULL || !PTED_VALID(pted)) {
1686 		PMAP_VP_UNLOCK(pm);
1687 		return FALSE;
1688 	}
1689 
1690 	if (ppc_proc_is_64b)
1691 		*pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) |
1692 		    (va & ~PTE_RPGN_64);
1693 	else
1694 		*pa = (pted->p.pted_pte32.pte_lo & PTE_RPGN_32) |
1695 		    (va & ~PTE_RPGN_32);
1696 
1697 	PMAP_VP_UNLOCK(pm);
1698 	return TRUE;
1699 }
1700 
1701 #ifdef ALTIVEC
1702 /*
1703  * Read an instruction from a given virtual memory address.
1704  * Execute-only protection is bypassed.
1705  */
1706 int
pmap_copyinsn(pmap_t pm,vaddr_t va,uint32_t * insn)1707 pmap_copyinsn(pmap_t pm, vaddr_t va, uint32_t *insn)
1708 {
1709 	struct pte_desc *pted;
1710 	paddr_t pa;
1711 
1712 	/* Assume pm != pmap_kernel(). */
1713 	if (ppc_proc_is_64b) {
1714 		/* inline pmap_extract */
1715 		PMAP_VP_LOCK(pm);
1716 		pted = pmap_vp_lookup(pm, va);
1717 		if (pted == NULL || !PTED_VALID(pted)) {
1718 			PMAP_VP_UNLOCK(pm);
1719 			return EFAULT;
1720 		}
1721 		pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) |
1722 		    (va & ~PTE_RPGN_64);
1723 		PMAP_VP_UNLOCK(pm);
1724 
1725 		if (pa > physmaxaddr - sizeof(*insn))
1726 			return EFAULT;
1727 		*insn = *(uint32_t *)pa;
1728 		return 0;
1729 	} else
1730 		return copyin32((void *)va, insn);
1731 }
1732 #endif
1733 
1734 u_int32_t
pmap_setusr(pmap_t pm,vaddr_t va)1735 pmap_setusr(pmap_t pm, vaddr_t va)
1736 {
1737 	u_int32_t sr;
1738 	u_int32_t oldsr;
1739 
1740 	sr = ptesr(pm->pm_sr, va);
1741 
1742 	/* user address range lock?? */
1743 	asm volatile ("mfsr %0,%1" : "=r" (oldsr): "n"(PPC_USER_SR));
1744 	asm volatile ("isync; mtsr %0,%1; isync" :: "n"(PPC_USER_SR), "r"(sr));
1745 	return oldsr;
1746 }
1747 
1748 void
pmap_popusr(u_int32_t sr)1749 pmap_popusr(u_int32_t sr)
1750 {
1751 	asm volatile ("isync; mtsr %0,%1; isync"
1752 	    :: "n"(PPC_USER_SR), "r"(sr));
1753 }
1754 
1755 int
_copyin(const void * udaddr,void * kaddr,size_t len)1756 _copyin(const void *udaddr, void *kaddr, size_t len)
1757 {
1758 	void *p;
1759 	size_t l;
1760 	u_int32_t oldsr;
1761 	faultbuf env;
1762 	void *oldh = curpcb->pcb_onfault;
1763 
1764 	while (len > 0) {
1765 		p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK);
1766 		l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1767 		if (l > len)
1768 			l = len;
1769 		oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr);
1770 		if (setfault(&env)) {
1771 			pmap_popusr(oldsr);
1772 			curpcb->pcb_onfault = oldh;
1773 			return EFAULT;
1774 		}
1775 		bcopy(p, kaddr, l);
1776 		pmap_popusr(oldsr);
1777 		udaddr += l;
1778 		kaddr += l;
1779 		len -= l;
1780 	}
1781 	curpcb->pcb_onfault = oldh;
1782 	return 0;
1783 }
1784 
1785 int
copyout(const void * kaddr,void * udaddr,size_t len)1786 copyout(const void *kaddr, void *udaddr, size_t len)
1787 {
1788 	void *p;
1789 	size_t l;
1790 	u_int32_t oldsr;
1791 	faultbuf env;
1792 	void *oldh = curpcb->pcb_onfault;
1793 
1794 	while (len > 0) {
1795 		p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK);
1796 		l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1797 		if (l > len)
1798 			l = len;
1799 		oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr);
1800 		if (setfault(&env)) {
1801 			pmap_popusr(oldsr);
1802 			curpcb->pcb_onfault = oldh;
1803 			return EFAULT;
1804 		}
1805 
1806 		bcopy(kaddr, p, l);
1807 		pmap_popusr(oldsr);
1808 		udaddr += l;
1809 		kaddr += l;
1810 		len -= l;
1811 	}
1812 	curpcb->pcb_onfault = oldh;
1813 	return 0;
1814 }
1815 
1816 int
copyin32(const uint32_t * udaddr,uint32_t * kaddr)1817 copyin32(const uint32_t *udaddr, uint32_t *kaddr)
1818 {
1819 	volatile uint32_t *p;
1820 	u_int32_t oldsr;
1821 	faultbuf env;
1822 	void *oldh = curpcb->pcb_onfault;
1823 
1824 	if ((u_int)udaddr & 0x3)
1825 		return EFAULT;
1826 
1827 	p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK);
1828 	oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr);
1829 	if (setfault(&env)) {
1830 		pmap_popusr(oldsr);
1831 		curpcb->pcb_onfault = oldh;
1832 		return EFAULT;
1833 	}
1834 	*kaddr = *p;
1835 	pmap_popusr(oldsr);
1836 	curpcb->pcb_onfault = oldh;
1837 	return 0;
1838 }
1839 
1840 int
_copyinstr(const void * udaddr,void * kaddr,size_t len,size_t * done)1841 _copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done)
1842 {
1843 	const u_char *uaddr = udaddr;
1844 	u_char *kp    = kaddr;
1845 	u_char *up;
1846 	u_char c;
1847 	void   *p;
1848 	size_t	 l;
1849 	u_int32_t oldsr;
1850 	int cnt = 0;
1851 	faultbuf env;
1852 	void *oldh = curpcb->pcb_onfault;
1853 
1854 	while (len > 0) {
1855 		p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK);
1856 		l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1857 		up = p;
1858 		if (l > len)
1859 			l = len;
1860 		len -= l;
1861 		oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr);
1862 		if (setfault(&env)) {
1863 			if (done != NULL)
1864 				*done =  cnt;
1865 
1866 			curpcb->pcb_onfault = oldh;
1867 			pmap_popusr(oldsr);
1868 			return EFAULT;
1869 		}
1870 		while (l > 0) {
1871 			c = *up;
1872 			*kp = c;
1873 			if (c == 0) {
1874 				if (done != NULL)
1875 					*done = cnt + 1;
1876 
1877 				curpcb->pcb_onfault = oldh;
1878 				pmap_popusr(oldsr);
1879 				return 0;
1880 			}
1881 			up++;
1882 			kp++;
1883 			l--;
1884 			cnt++;
1885 			uaddr++;
1886 		}
1887 		pmap_popusr(oldsr);
1888 	}
1889 	curpcb->pcb_onfault = oldh;
1890 	if (done != NULL)
1891 		*done = cnt;
1892 
1893 	return ENAMETOOLONG;
1894 }
1895 
1896 int
copyoutstr(const void * kaddr,void * udaddr,size_t len,size_t * done)1897 copyoutstr(const void *kaddr, void *udaddr, size_t len, size_t *done)
1898 {
1899 	u_char *uaddr = (void *)udaddr;
1900 	const u_char *kp    = kaddr;
1901 	u_char *up;
1902 	u_char c;
1903 	void   *p;
1904 	size_t	 l;
1905 	u_int32_t oldsr;
1906 	int cnt = 0;
1907 	faultbuf env;
1908 	void *oldh = curpcb->pcb_onfault;
1909 
1910 	while (len > 0) {
1911 		p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK);
1912 		l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1913 		up = p;
1914 		if (l > len)
1915 			l = len;
1916 		len -= l;
1917 		oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr);
1918 		if (setfault(&env)) {
1919 			if (done != NULL)
1920 				*done =  cnt;
1921 
1922 			curpcb->pcb_onfault = oldh;
1923 			pmap_popusr(oldsr);
1924 			return EFAULT;
1925 		}
1926 		while (l > 0) {
1927 			c = *kp;
1928 			*up = c;
1929 			if (c == 0) {
1930 				if (done != NULL)
1931 					*done = cnt + 1;
1932 
1933 				curpcb->pcb_onfault = oldh;
1934 				pmap_popusr(oldsr);
1935 				return 0;
1936 			}
1937 			up++;
1938 			kp++;
1939 			l--;
1940 			cnt++;
1941 			uaddr++;
1942 		}
1943 		pmap_popusr(oldsr);
1944 	}
1945 	curpcb->pcb_onfault = oldh;
1946 	if (done != NULL)
1947 		*done = cnt;
1948 
1949 	return ENAMETOOLONG;
1950 }
1951 
1952 /*
1953  * sync instruction cache for user virtual address.
1954  * The address WAS JUST MAPPED, so we have a VALID USERSPACE mapping
1955  */
1956 void
pmap_syncicache_user_virt(pmap_t pm,vaddr_t va)1957 pmap_syncicache_user_virt(pmap_t pm, vaddr_t va)
1958 {
1959 	vaddr_t start;
1960 	int oldsr;
1961 
1962 	if (pm != pmap_kernel()) {
1963 		start = ((u_int)PPC_USER_ADDR + ((u_int)va &
1964 		    ~PPC_SEGMENT_MASK));
1965 		/* will only ever be page size, will not cross segments */
1966 
1967 		/* USER SEGMENT LOCK - MPXXX */
1968 		oldsr = pmap_setusr(pm, va);
1969 	} else {
1970 		start = va; /* flush mapped page */
1971 	}
1972 
1973 	syncicache((void *)start, PAGE_SIZE);
1974 
1975 	if (pm != pmap_kernel()) {
1976 		pmap_popusr(oldsr);
1977 		/* USER SEGMENT UNLOCK -MPXXX */
1978 	}
1979 }
1980 
1981 void
pmap_pted_ro(struct pte_desc * pted,vm_prot_t prot)1982 pmap_pted_ro(struct pte_desc *pted, vm_prot_t prot)
1983 {
1984 	if (ppc_proc_is_64b)
1985 		pmap_pted_ro64(pted, prot);
1986 	else
1987 		pmap_pted_ro32(pted, prot);
1988 }
1989 
1990 void
pmap_pted_ro64(struct pte_desc * pted,vm_prot_t prot)1991 pmap_pted_ro64(struct pte_desc *pted, vm_prot_t prot)
1992 {
1993 	pmap_t pm = pted->pted_pmap;
1994 	vaddr_t va = pted->pted_va & ~PAGE_MASK;
1995 	struct vm_page *pg;
1996 	void *pte;
1997 	int s;
1998 
1999 	pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64);
2000 	if (pg->pg_flags & PG_PMAP_EXE) {
2001 		if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
2002 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
2003 		} else {
2004 			pmap_syncicache_user_virt(pm, va);
2005 		}
2006 	}
2007 
2008 	pted->p.pted_pte64.pte_lo &= ~PTE_PP_64;
2009 	pted->p.pted_pte64.pte_lo |= PTE_RO_64;
2010 
2011 	if ((prot & PROT_EXEC) == 0)
2012 		pted->p.pted_pte64.pte_lo |= PTE_N_64;
2013 
2014 	if ((prot & (PROT_READ | PROT_WRITE)) == 0)
2015 		pted->p.pted_pte64.pte_lo |= PTE_AC_64;
2016 
2017 	PMAP_HASH_LOCK(s);
2018 	if ((pte = pmap_ptedinhash(pted)) != NULL) {
2019 		struct pte_64 *ptp64 = pte;
2020 
2021 		pte_del(ptp64, va);
2022 
2023 		if (PTED_MANAGED(pted)) { /* XXX */
2024 			pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64,
2025 			    ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64));
2026 		}
2027 
2028 		/* Add a Page Table Entry, section 7.6.3.1. */
2029 		ptp64->pte_lo = pted->p.pted_pte64.pte_lo;
2030 		eieio();	/* Order 1st PTE update before 2nd. */
2031 		ptp64->pte_hi |= PTE_VALID_64;
2032 		sync();		/* Ensure updates completed. */
2033 	}
2034 	PMAP_HASH_UNLOCK(s);
2035 }
2036 
2037 void
pmap_pted_ro32(struct pte_desc * pted,vm_prot_t prot)2038 pmap_pted_ro32(struct pte_desc *pted, vm_prot_t prot)
2039 {
2040 	pmap_t pm = pted->pted_pmap;
2041 	vaddr_t va = pted->pted_va & ~PAGE_MASK;
2042 	struct vm_page *pg;
2043 	void *pte;
2044 	int s;
2045 
2046 	pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32);
2047 	if (pg->pg_flags & PG_PMAP_EXE) {
2048 		if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
2049 			atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
2050 		} else {
2051 			pmap_syncicache_user_virt(pm, va);
2052 		}
2053 	}
2054 
2055 	pted->p.pted_pte32.pte_lo &= ~PTE_PP_32;
2056 	pted->p.pted_pte32.pte_lo |= PTE_RO_32;
2057 
2058 	PMAP_HASH_LOCK(s);
2059 	if ((pte = pmap_ptedinhash(pted)) != NULL) {
2060 		struct pte_32 *ptp32 = pte;
2061 
2062 		pte_del(ptp32, va);
2063 
2064 		if (PTED_MANAGED(pted)) { /* XXX */
2065 			pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32,
2066 			    ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32));
2067 		}
2068 
2069 		/* Add a Page Table Entry, section 7.6.3.1. */
2070 		ptp32->pte_lo &= ~(PTE_CHG_32|PTE_PP_32);
2071 		ptp32->pte_lo |= PTE_RO_32;
2072 		eieio();	/* Order 1st PTE update before 2nd. */
2073 		ptp32->pte_hi |= PTE_VALID_32;
2074 		sync();		/* Ensure updates completed. */
2075 	}
2076 	PMAP_HASH_UNLOCK(s);
2077 }
2078 
2079 /*
2080  * Lower the protection on the specified physical page.
2081  *
2082  * There are only two cases, either the protection is going to 0,
2083  * or it is going to read-only.
2084  */
2085 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)2086 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2087 {
2088 	struct pte_desc *pted;
2089 	void *pte;
2090 	pmap_t pm;
2091 	int s;
2092 
2093 	if (prot == PROT_NONE) {
2094 		mtx_enter(&pg->mdpage.pv_mtx);
2095 		while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) {
2096 			pmap_reference(pted->pted_pmap);
2097 			pm = pted->pted_pmap;
2098 			mtx_leave(&pg->mdpage.pv_mtx);
2099 
2100 			PMAP_VP_LOCK(pm);
2101 
2102 			/*
2103 			 * We dropped the pvlist lock before grabbing
2104 			 * the pmap lock to avoid lock ordering
2105 			 * problems.  This means we have to check the
2106 			 * pvlist again since somebody else might have
2107 			 * modified it.  All we care about is that the
2108 			 * pvlist entry matches the pmap we just
2109 			 * locked.  If it doesn't, unlock the pmap and
2110 			 * try again.
2111 			 */
2112 			mtx_enter(&pg->mdpage.pv_mtx);
2113 			if ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) == NULL ||
2114 			    pted->pted_pmap != pm) {
2115 				mtx_leave(&pg->mdpage.pv_mtx);
2116 				PMAP_VP_UNLOCK(pm);
2117 				pmap_destroy(pm);
2118 				mtx_enter(&pg->mdpage.pv_mtx);
2119 				continue;
2120 			}
2121 
2122 			PMAP_HASH_LOCK(s);
2123 			if ((pte = pmap_ptedinhash(pted)) != NULL)
2124 				pte_zap(pte, pted);
2125 			PMAP_HASH_UNLOCK(s);
2126 
2127 			pted->pted_va &= ~PTED_VA_MANAGED_M;
2128 			LIST_REMOVE(pted, pted_pv_list);
2129 			mtx_leave(&pg->mdpage.pv_mtx);
2130 
2131 			pmap_remove_pted(pm, pted);
2132 
2133 			PMAP_VP_UNLOCK(pm);
2134 			pmap_destroy(pm);
2135 			mtx_enter(&pg->mdpage.pv_mtx);
2136 		}
2137 		mtx_leave(&pg->mdpage.pv_mtx);
2138 		/* page is being reclaimed, sync icache next use */
2139 		atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
2140 		return;
2141 	}
2142 
2143 	mtx_enter(&pg->mdpage.pv_mtx);
2144 	LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list)
2145 		pmap_pted_ro(pted, prot);
2146 	mtx_leave(&pg->mdpage.pv_mtx);
2147 }
2148 
2149 void
pmap_protect(pmap_t pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)2150 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
2151 {
2152 	if (prot & (PROT_READ | PROT_EXEC)) {
2153 		struct pte_desc *pted;
2154 
2155 		PMAP_VP_LOCK(pm);
2156 		while (sva < eva) {
2157 			pted = pmap_vp_lookup(pm, sva);
2158 			if (pted && PTED_VALID(pted))
2159 				pmap_pted_ro(pted, prot);
2160 			sva += PAGE_SIZE;
2161 		}
2162 		PMAP_VP_UNLOCK(pm);
2163 		return;
2164 	}
2165 	pmap_remove(pm, sva, eva);
2166 }
2167 
2168 /*
2169  * Restrict given range to physical memory
2170  */
2171 void
pmap_real_memory(paddr_t * start,vsize_t * size)2172 pmap_real_memory(paddr_t *start, vsize_t *size)
2173 {
2174 	struct mem_region *mp;
2175 
2176 	for (mp = pmap_mem; mp->size; mp++) {
2177 		if (((*start + *size) > mp->start)
2178 			&& (*start < (mp->start + mp->size)))
2179 		{
2180 			if (*start < mp->start) {
2181 				*size -= mp->start - *start;
2182 				*start = mp->start;
2183 			}
2184 			if ((*start + *size) > (mp->start + mp->size))
2185 				*size = mp->start + mp->size - *start;
2186 			return;
2187 		}
2188 	}
2189 	*size = 0;
2190 }
2191 
2192 void
pmap_init()2193 pmap_init()
2194 {
2195 	pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
2196 	    "pmap", NULL);
2197 	pool_setlowat(&pmap_pmap_pool, 2);
2198 	pool_init(&pmap_vp_pool, sizeof(struct pmapvp), 0, IPL_VM, 0,
2199 	    "vp", &pool_allocator_single);
2200 	pool_setlowat(&pmap_vp_pool, 10);
2201 	pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
2202 	    "pted", NULL);
2203 	pool_setlowat(&pmap_pted_pool, 20);
2204 
2205 	pmap_initialized = 1;
2206 }
2207 
2208 void
pmap_proc_iflush(struct process * pr,vaddr_t va,vsize_t len)2209 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
2210 {
2211 	paddr_t pa;
2212 	vsize_t clen;
2213 
2214 	while (len > 0) {
2215 		/* add one to always round up to the next page */
2216 		clen = round_page(va + 1) - va;
2217 		if (clen > len)
2218 			clen = len;
2219 
2220 		if (pmap_extract(pr->ps_vmspace->vm_map.pmap, va, &pa)) {
2221 			syncicache((void *)pa, clen);
2222 		}
2223 
2224 		len -= clen;
2225 		va += clen;
2226 	}
2227 }
2228 
2229 /*
2230  * There are two routines, pte_spill_r and pte_spill_v
2231  * the _r version only handles kernel faults which are not user
2232  * accesses. The _v version handles all user faults and kernel copyin/copyout
2233  * "user" accesses.
2234  */
2235 int
pte_spill_r(u_int32_t va,u_int32_t msr,u_int32_t dsisr,int exec_fault)2236 pte_spill_r(u_int32_t va, u_int32_t msr, u_int32_t dsisr, int exec_fault)
2237 {
2238 	pmap_t pm;
2239 	struct pte_desc *pted;
2240 	struct pte_desc pted_store;
2241 
2242 	/* lookup is done physical to prevent faults */
2243 
2244 	/*
2245 	 * This function only handles kernel faults, not supervisor copyins.
2246 	 */
2247 	if (msr & PSL_PR)
2248 		return 0;
2249 
2250 	/* if copyin, throw to full exception handler */
2251 	if (VP_SR(va) == PPC_USER_SR)
2252 		return 0;
2253 
2254 	pm = pmap_kernel();
2255 
2256 	/* 0 - physmaxaddr mapped 1-1 */
2257 	if (va < physmaxaddr) {
2258 		u_int32_t aligned_va;
2259 		vm_prot_t prot = PROT_READ | PROT_WRITE;
2260 		extern caddr_t kernel_text;
2261 		extern caddr_t etext;
2262 
2263 		pted = &pted_store;
2264 
2265 		if (va >= trunc_page((vaddr_t)&kernel_text) &&
2266 		    va < round_page((vaddr_t)&etext)) {
2267 			prot |= PROT_EXEC;
2268 		}
2269 
2270 		aligned_va = trunc_page(va);
2271 		if (ppc_proc_is_64b) {
2272 			pmap_fill_pte64(pm, aligned_va, aligned_va,
2273 			    pted, prot, PMAP_CACHE_WB);
2274 			pte_insert64(pted);
2275 		} else {
2276 			pmap_fill_pte32(pm, aligned_va, aligned_va,
2277 			    pted, prot, PMAP_CACHE_WB);
2278 			pte_insert32(pted);
2279 		}
2280 		return 1;
2281 	}
2282 
2283 	return pte_spill_v(pm, va, dsisr, exec_fault);
2284 }
2285 
2286 int
pte_spill_v(pmap_t pm,u_int32_t va,u_int32_t dsisr,int exec_fault)2287 pte_spill_v(pmap_t pm, u_int32_t va, u_int32_t dsisr, int exec_fault)
2288 {
2289 	struct pte_desc *pted;
2290 	int inserted = 0;
2291 
2292 	/*
2293 	 * DSISR_DABR is set if the PowerPC 970 attempted to read or
2294 	 * write an execute-only page.
2295 	 */
2296 	if (dsisr & DSISR_DABR)
2297 		return 0;
2298 
2299 	/*
2300 	 * If the current mapping is RO and the access was a write
2301 	 * we return 0
2302 	 */
2303 	PMAP_VP_LOCK(pm);
2304 	pted = pmap_vp_lookup(pm, va);
2305 	if (pted == NULL || !PTED_VALID(pted))
2306 		goto out;
2307 
2308 	/* Attempted to write a read-only page. */
2309 	if (dsisr & DSISR_STORE) {
2310 		if (ppc_proc_is_64b) {
2311 			if ((pted->p.pted_pte64.pte_lo & PTE_PP_64) ==
2312 			    PTE_RO_64)
2313 				goto out;
2314 		} else {
2315 			if ((pted->p.pted_pte32.pte_lo & PTE_PP_32) ==
2316 			    PTE_RO_32)
2317 				goto out;
2318 		}
2319 	}
2320 
2321 	/* Attempted to execute non-executable page. */
2322 	if ((exec_fault != 0) && ((pted->pted_va & PTED_VA_EXEC_M) == 0))
2323 		goto out;
2324 
2325 	inserted = 1;
2326 	if (ppc_proc_is_64b)
2327 		pte_insert64(pted);
2328 	else
2329 		pte_insert32(pted);
2330 
2331 out:
2332 	PMAP_VP_UNLOCK(pm);
2333 	return (inserted);
2334 }
2335 
2336 
2337 /*
2338  * should pte_insert code avoid wired mappings?
2339  * is the stack safe?
2340  * is the pted safe? (physical)
2341  * -ugh
2342  */
2343 void
pte_insert64(struct pte_desc * pted)2344 pte_insert64(struct pte_desc *pted)
2345 {
2346 	struct pte_64 *ptp64;
2347 	int off, secondary;
2348 	int sr, idx, i;
2349 	void *pte;
2350 	int s;
2351 
2352 	PMAP_HASH_LOCK(s);
2353 	if ((pte = pmap_ptedinhash(pted)) != NULL)
2354 		pte_zap(pte, pted);
2355 
2356 	pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
2357 
2358 	sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va);
2359 	idx = pteidx(sr, pted->pted_va);
2360 
2361 	/*
2362 	 * instead of starting at the beginning of each pteg,
2363 	 * the code should pick a random location with in the primary
2364 	 * then search all of the entries, then if not yet found,
2365 	 * do the same for the secondary.
2366 	 * this would reduce the frontloading of the pteg.
2367 	 */
2368 
2369 	/* first just try fill of primary hash */
2370 	ptp64 = pmap_ptable64 + (idx) * 8;
2371 	for (i = 0; i < 8; i++) {
2372 		if (ptp64[i].pte_hi & PTE_VALID_64)
2373 			continue;
2374 
2375 		pted->pted_va |= i;
2376 
2377 		/* Add a Page Table Entry, section 7.6.3.1. */
2378 		ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64;
2379 		ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo;
2380 		eieio();	/* Order 1st PTE update before 2nd. */
2381 		ptp64[i].pte_hi |= PTE_VALID_64;
2382 		sync();		/* Ensure updates completed. */
2383 
2384 		goto out;
2385 	}
2386 
2387 	/* try fill of secondary hash */
2388 	ptp64 = pmap_ptable64 + (idx ^ pmap_ptab_mask) * 8;
2389 	for (i = 0; i < 8; i++) {
2390 		if (ptp64[i].pte_hi & PTE_VALID_64)
2391 			continue;
2392 
2393 		pted->pted_va |= (i | PTED_VA_HID_M);
2394 
2395 		/* Add a Page Table Entry, section 7.6.3.1. */
2396 		ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64;
2397 		ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo;
2398 		eieio();	/* Order 1st PTE update before 2nd. */
2399 		ptp64[i].pte_hi |= (PTE_HID_64|PTE_VALID_64);
2400 		sync();		/* Ensure updates completed. */
2401 
2402 		goto out;
2403 	}
2404 
2405 	/* need decent replacement algorithm */
2406 	off = ppc_mftb();
2407 	secondary = off & 8;
2408 
2409 
2410 	pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M);
2411 
2412 	idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0));
2413 
2414 	ptp64 = pmap_ptable64 + (idx * 8);
2415 	ptp64 += PTED_PTEGIDX(pted); /* increment by index into pteg */
2416 
2417 	if (ptp64->pte_hi & PTE_VALID_64) {
2418 		vaddr_t va;
2419 
2420 		/* Bits 9-19 */
2421 		idx = (idx ^ ((ptp64->pte_hi & PTE_HID_64) ?
2422 		    pmap_ptab_mask : 0));
2423 		va = (ptp64->pte_hi >> PTE_VSID_SHIFT_64) ^ idx;
2424 		va <<= ADDR_PIDX_SHIFT;
2425 		/* Bits 4-8 */
2426 		va |= (ptp64->pte_hi & PTE_API_64) << ADDR_API_SHIFT_32;
2427 		/* Bits 0-3 */
2428 		va |= (ptp64->pte_hi >> PTE_VSID_SHIFT_64)
2429 		    << ADDR_SR_SHIFT;
2430 
2431 		pte_del(ptp64, va);
2432 
2433 		pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64,
2434 		    ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64));
2435 	}
2436 
2437 	/* Add a Page Table Entry, section 7.6.3.1. */
2438 	ptp64->pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64;
2439 	if (secondary)
2440 		ptp64->pte_hi |= PTE_HID_64;
2441 	ptp64->pte_lo = pted->p.pted_pte64.pte_lo;
2442 	eieio();	/* Order 1st PTE update before 2nd. */
2443 	ptp64->pte_hi |= PTE_VALID_64;
2444 	sync();		/* Ensure updates completed. */
2445 
2446 out:
2447 	PMAP_HASH_UNLOCK(s);
2448 }
2449 
2450 void
pte_insert32(struct pte_desc * pted)2451 pte_insert32(struct pte_desc *pted)
2452 {
2453 	struct pte_32 *ptp32;
2454 	int off, secondary;
2455 	int sr, idx, i;
2456 	void *pte;
2457 	int s;
2458 
2459 	PMAP_HASH_LOCK(s);
2460 	if ((pte = pmap_ptedinhash(pted)) != NULL)
2461 		pte_zap(pte, pted);
2462 
2463 	pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
2464 
2465 	sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va);
2466 	idx = pteidx(sr, pted->pted_va);
2467 
2468 	/*
2469 	 * instead of starting at the beginning of each pteg,
2470 	 * the code should pick a random location with in the primary
2471 	 * then search all of the entries, then if not yet found,
2472 	 * do the same for the secondary.
2473 	 * this would reduce the frontloading of the pteg.
2474 	 */
2475 
2476 	/* first just try fill of primary hash */
2477 	ptp32 = pmap_ptable32 + (idx) * 8;
2478 	for (i = 0; i < 8; i++) {
2479 		if (ptp32[i].pte_hi & PTE_VALID_32)
2480 			continue;
2481 
2482 		pted->pted_va |= i;
2483 
2484 		/* Add a Page Table Entry, section 7.6.3.1. */
2485 		ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32;
2486 		ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo;
2487 		eieio();	/* Order 1st PTE update before 2nd. */
2488 		ptp32[i].pte_hi |= PTE_VALID_32;
2489 		sync();		/* Ensure updates completed. */
2490 
2491 		goto out;
2492 	}
2493 
2494 	/* try fill of secondary hash */
2495 	ptp32 = pmap_ptable32 + (idx ^ pmap_ptab_mask) * 8;
2496 	for (i = 0; i < 8; i++) {
2497 		if (ptp32[i].pte_hi & PTE_VALID_32)
2498 			continue;
2499 
2500 		pted->pted_va |= (i | PTED_VA_HID_M);
2501 
2502 		/* Add a Page Table Entry, section 7.6.3.1. */
2503 		ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32;
2504 		ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo;
2505 		eieio();	/* Order 1st PTE update before 2nd. */
2506 		ptp32[i].pte_hi |= (PTE_HID_32|PTE_VALID_32);
2507 		sync();		/* Ensure updates completed. */
2508 
2509 		goto out;
2510 	}
2511 
2512 	/* need decent replacement algorithm */
2513 	off = ppc_mftb();
2514 	secondary = off & 8;
2515 
2516 	pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M);
2517 
2518 	idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0));
2519 
2520 	ptp32 = pmap_ptable32 + (idx * 8);
2521 	ptp32 += PTED_PTEGIDX(pted); /* increment by index into pteg */
2522 
2523 	if (ptp32->pte_hi & PTE_VALID_32) {
2524 		vaddr_t va;
2525 
2526 		va = ((ptp32->pte_hi & PTE_API_32) << ADDR_API_SHIFT_32) |
2527 		     ((((ptp32->pte_hi >> PTE_VSID_SHIFT_32) & SR_VSID)
2528 			^(idx ^ ((ptp32->pte_hi & PTE_HID_32) ? 0x3ff : 0)))
2529 			    & 0x3ff) << PAGE_SHIFT;
2530 
2531 		pte_del(ptp32, va);
2532 
2533 		pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32,
2534 		    ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32));
2535 	}
2536 
2537 	/* Add a Page Table Entry, section 7.6.3.1. */
2538 	ptp32->pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32;
2539 	if (secondary)
2540 		ptp32->pte_hi |= PTE_HID_32;
2541 	ptp32->pte_lo = pted->p.pted_pte32.pte_lo;
2542 	eieio();	/* Order 1st PTE update before 2nd. */
2543 	ptp32->pte_hi |= PTE_VALID_32;
2544 	sync();		/* Ensure updates completed. */
2545 
2546 out:
2547 	PMAP_HASH_UNLOCK(s);
2548 }
2549