1 /* $OpenBSD: pmap.c,v 1.185 2024/09/06 10:54:08 jsg Exp $ */
2
3 /*
4 * Copyright (c) 2015 Martin Pieuchot
5 * Copyright (c) 2001, 2002, 2007 Dale Rahn.
6 * All rights reserved.
7 *
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
19 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
20 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
21 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
22 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
23 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
24 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
25 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
26 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
27 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 *
29 * Effort sponsored in part by the Defense Advanced Research Projects
30 * Agency (DARPA) and Air Force Research Laboratory, Air Force
31 * Materiel Command, USAF, under agreement number F30602-01-2-0537.
32 */
33
34 /*
35 * powerpc lazy icache management.
36 * The icache does not snoop dcache accesses. The icache also will not load
37 * modified data from the dcache, but the unmodified data in ram.
38 * Before the icache is loaded, the dcache must be synced to ram to prevent
39 * the icache from loading stale data.
40 * pg->pg_flags PG_PMAP_EXE bit is used to track if the dcache is clean
41 * and the icache may have valid data in it.
42 * if the PG_PMAP_EXE bit is set (and the page is not currently RWX)
43 * the icache will only have valid code in it. If the bit is clear
44 * memory may not match the dcache contents or the icache may contain
45 * data from a previous page.
46 *
47 * pmap enter
48 * !E NONE -> R no action
49 * !E NONE|R -> RW no action
50 * !E NONE|R -> RX flush dcache, inval icache (that page only), set E
51 * !E NONE|R -> RWX flush dcache, inval icache (that page only), set E
52 * !E NONE|RW -> RWX flush dcache, inval icache (that page only), set E
53 * E NONE -> R no action
54 * E NONE|R -> RW clear PG_PMAP_EXE bit
55 * E NONE|R -> RX no action
56 * E NONE|R -> RWX no action
57 * E NONE|RW -> RWX -invalid source state
58 *
59 * pamp_protect
60 * E RW -> R - invalid source state
61 * !E RW -> R - no action
62 * * RX -> R - no action
63 * * RWX -> R - sync dcache, inval icache
64 * * RWX -> RW - clear PG_PMAP_EXE
65 * * RWX -> RX - sync dcache, inval icache
66 * * * -> NONE - no action
67 *
68 * pmap_page_protect (called with arg PROT_NONE if page is to be reused)
69 * * RW -> R - as pmap_protect
70 * * RX -> R - as pmap_protect
71 * * RWX -> R - as pmap_protect
72 * * RWX -> RW - as pmap_protect
73 * * RWX -> RX - as pmap_protect
74 * * * -> NONE - clear PG_PMAP_EXE
75 *
76 */
77
78 #include <sys/param.h>
79 #include <sys/systm.h>
80 #include <sys/proc.h>
81 #include <sys/queue.h>
82 #include <sys/pool.h>
83 #include <sys/atomic.h>
84 #include <sys/user.h>
85
86 #include <uvm/uvm_extern.h>
87
88 #include <machine/pcb.h>
89 #include <powerpc/powerpc.h>
90 #include <powerpc/bat.h>
91 #include <machine/pmap.h>
92
93 struct bat battable[16];
94
95 struct dumpmem dumpmem[VM_PHYSSEG_MAX];
96 u_int ndumpmem;
97
98 struct pmap kernel_pmap_;
99 static struct mem_region *pmap_mem, *pmap_avail;
100 struct mem_region pmap_allocated[10];
101 int pmap_cnt_avail;
102 int pmap_cnt_allocated;
103
104 struct pte_64 *pmap_ptable64;
105 struct pte_32 *pmap_ptable32;
106 int pmap_ptab_cnt;
107 u_int pmap_ptab_mask;
108
109 #define HTABSIZE_32 (pmap_ptab_cnt * 64)
110 #define HTABMEMSZ_64 (pmap_ptab_cnt * 8 * sizeof(struct pte_64))
111 #define HTABSIZE_64 (ffs(pmap_ptab_cnt) - 12)
112
113 static u_int usedsr[NPMAPS / sizeof(u_int) / 8];
114
115 struct pte_desc {
116 /* Linked list of phys -> virt entries */
117 LIST_ENTRY(pte_desc) pted_pv_list;
118 union {
119 struct pte_32 pted_pte32;
120 struct pte_64 pted_pte64;
121 } p;
122 pmap_t pted_pmap;
123 vaddr_t pted_va;
124 };
125
126 void pmap_attr_save(paddr_t pa, u_int32_t bits);
127 void pmap_pted_ro(struct pte_desc *, vm_prot_t);
128 void pmap_pted_ro64(struct pte_desc *, vm_prot_t);
129 void pmap_pted_ro32(struct pte_desc *, vm_prot_t);
130
131 /*
132 * Some functions are called in real mode and cannot be profiled.
133 */
134 #define __noprof __attribute__((__no_instrument_function__))
135
136 /* VP routines */
137 int pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags);
138 struct pte_desc *pmap_vp_remove(pmap_t pm, vaddr_t va);
139 void pmap_vp_destroy(pmap_t pm);
140 struct pte_desc *pmap_vp_lookup(pmap_t pm, vaddr_t va) __noprof;
141
142 /* PV routines */
143 void pmap_enter_pv(struct pte_desc *pted, struct vm_page *);
144 void pmap_remove_pv(struct pte_desc *pted);
145
146
147 /* pte hash table routines */
148 static inline void *pmap_ptedinhash(struct pte_desc *);
149 void pte_insert32(struct pte_desc *) __noprof;
150 void pte_insert64(struct pte_desc *) __noprof;
151 void pmap_fill_pte64(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t,
152 int) __noprof;
153 void pmap_fill_pte32(pmap_t, vaddr_t, paddr_t, struct pte_desc *, vm_prot_t,
154 int) __noprof;
155
156 void pmap_syncicache_user_virt(pmap_t pm, vaddr_t va);
157
158 void pmap_remove_pted(pmap_t, struct pte_desc *);
159
160 /* setup/initialization functions */
161 void pmap_avail_setup(void);
162 void pmap_avail_fixup(void);
163 void pmap_remove_avail(paddr_t base, paddr_t end);
164 void *pmap_steal_avail(size_t size, int align);
165
166 /* asm interface */
167 int pte_spill_r(u_int32_t, u_int32_t, u_int32_t, int) __noprof;
168 int pte_spill_v(pmap_t, u_int32_t, u_int32_t, int) __noprof;
169
170 u_int32_t pmap_setusr(pmap_t pm, vaddr_t va);
171 void pmap_popusr(u_int32_t oldsr);
172
173 /* pte invalidation */
174 void pte_del(void *, vaddr_t);
175 void pte_zap(void *, struct pte_desc *);
176
177 /* XXX - panic on pool get failures? */
178 struct pool pmap_pmap_pool;
179 struct pool pmap_vp_pool;
180 struct pool pmap_pted_pool;
181
182 int pmap_initialized = 0;
183 int physmem;
184 int physmaxaddr;
185
186 #ifdef MULTIPROCESSOR
187 struct __ppc_lock pmap_hash_lock = PPC_LOCK_INITIALIZER;
188
189 #define PMAP_HASH_LOCK(s) \
190 do { \
191 s = ppc_intr_disable(); \
192 __ppc_lock(&pmap_hash_lock); \
193 } while (0)
194
195 #define PMAP_HASH_UNLOCK(s) \
196 do { \
197 __ppc_unlock(&pmap_hash_lock); \
198 ppc_intr_enable(s); \
199 } while (0)
200
201 #define PMAP_VP_LOCK_INIT(pm) mtx_init(&pm->pm_mtx, IPL_VM)
202
203 #define PMAP_VP_LOCK(pm) \
204 do { \
205 if (pm != pmap_kernel()) \
206 mtx_enter(&pm->pm_mtx); \
207 } while (0)
208
209 #define PMAP_VP_UNLOCK(pm) \
210 do { \
211 if (pm != pmap_kernel()) \
212 mtx_leave(&pm->pm_mtx); \
213 } while (0)
214
215 #define PMAP_VP_ASSERT_LOCKED(pm) \
216 do { \
217 if (pm != pmap_kernel()) \
218 MUTEX_ASSERT_LOCKED(&pm->pm_mtx); \
219 } while (0)
220
221 #else /* ! MULTIPROCESSOR */
222
223 #define PMAP_HASH_LOCK(s) (void)s
224 #define PMAP_HASH_UNLOCK(s) /* nothing */
225
226 #define PMAP_VP_LOCK_INIT(pm) /* nothing */
227 #define PMAP_VP_LOCK(pm) /* nothing */
228 #define PMAP_VP_UNLOCK(pm) /* nothing */
229 #define PMAP_VP_ASSERT_LOCKED(pm) /* nothing */
230 #endif /* MULTIPROCESSOR */
231
232 /* virtual to physical helpers */
233 static inline int
VP_SR(vaddr_t va)234 VP_SR(vaddr_t va)
235 {
236 return (va >>VP_SR_POS) & VP_SR_MASK;
237 }
238
239 static inline int
VP_IDX1(vaddr_t va)240 VP_IDX1(vaddr_t va)
241 {
242 return (va >> VP_IDX1_POS) & VP_IDX1_MASK;
243 }
244
245 static inline int
VP_IDX2(vaddr_t va)246 VP_IDX2(vaddr_t va)
247 {
248 return (va >> VP_IDX2_POS) & VP_IDX2_MASK;
249 }
250
251 #if VP_IDX1_SIZE != VP_IDX2_SIZE
252 #error pmap allocation code expects IDX1 and IDX2 size to be same
253 #endif
254 struct pmapvp {
255 void *vp[VP_IDX1_SIZE];
256 };
257
258
259 /*
260 * VP routines, virtual to physical translation information.
261 * These data structures are based off of the pmap, per process.
262 */
263
264 /*
265 * This is used for pmap_kernel() mappings, they are not to be removed
266 * from the vp table because they were statically initialized at the
267 * initial pmap initialization. This is so that memory allocation
268 * is not necessary in the pmap_kernel() mappings.
269 * Otherwise bad race conditions can appear.
270 */
271 struct pte_desc *
pmap_vp_lookup(pmap_t pm,vaddr_t va)272 pmap_vp_lookup(pmap_t pm, vaddr_t va)
273 {
274 struct pmapvp *vp1;
275 struct pmapvp *vp2;
276 struct pte_desc *pted;
277
278 PMAP_VP_ASSERT_LOCKED(pm);
279
280 vp1 = pm->pm_vp[VP_SR(va)];
281 if (vp1 == NULL) {
282 return NULL;
283 }
284
285 vp2 = vp1->vp[VP_IDX1(va)];
286 if (vp2 == NULL) {
287 return NULL;
288 }
289
290 pted = vp2->vp[VP_IDX2(va)];
291
292 return pted;
293 }
294
295 /*
296 * Remove, and return, pted at specified address, NULL if not present
297 */
298 struct pte_desc *
pmap_vp_remove(pmap_t pm,vaddr_t va)299 pmap_vp_remove(pmap_t pm, vaddr_t va)
300 {
301 struct pmapvp *vp1;
302 struct pmapvp *vp2;
303 struct pte_desc *pted;
304
305 PMAP_VP_ASSERT_LOCKED(pm);
306
307 vp1 = pm->pm_vp[VP_SR(va)];
308 if (vp1 == NULL) {
309 return NULL;
310 }
311
312 vp2 = vp1->vp[VP_IDX1(va)];
313 if (vp2 == NULL) {
314 return NULL;
315 }
316
317 pted = vp2->vp[VP_IDX2(va)];
318 vp2->vp[VP_IDX2(va)] = NULL;
319
320 return pted;
321 }
322
323 /*
324 * Create a V -> P mapping for the given pmap and virtual address
325 * with reference to the pte descriptor that is used to map the page.
326 * This code should track allocations of vp table allocations
327 * so they can be freed efficiently.
328 */
329 int
pmap_vp_enter(pmap_t pm,vaddr_t va,struct pte_desc * pted,int flags)330 pmap_vp_enter(pmap_t pm, vaddr_t va, struct pte_desc *pted, int flags)
331 {
332 struct pmapvp *vp1;
333 struct pmapvp *vp2;
334
335 PMAP_VP_ASSERT_LOCKED(pm);
336
337 vp1 = pm->pm_vp[VP_SR(va)];
338 if (vp1 == NULL) {
339 vp1 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
340 if (vp1 == NULL) {
341 if ((flags & PMAP_CANFAIL) == 0)
342 panic("pmap_vp_enter: failed to allocate vp1");
343 return ENOMEM;
344 }
345 pm->pm_vp[VP_SR(va)] = vp1;
346 }
347
348 vp2 = vp1->vp[VP_IDX1(va)];
349 if (vp2 == NULL) {
350 vp2 = pool_get(&pmap_vp_pool, PR_NOWAIT | PR_ZERO);
351 if (vp2 == NULL) {
352 if ((flags & PMAP_CANFAIL) == 0)
353 panic("pmap_vp_enter: failed to allocate vp2");
354 return ENOMEM;
355 }
356 vp1->vp[VP_IDX1(va)] = vp2;
357 }
358
359 vp2->vp[VP_IDX2(va)] = pted;
360
361 return 0;
362 }
363
364 static inline void
tlbie(vaddr_t va)365 tlbie(vaddr_t va)
366 {
367 asm volatile ("tlbie %0" :: "r"(va & ~PAGE_MASK));
368 }
369
370 static inline void
tlbsync(void)371 tlbsync(void)
372 {
373 asm volatile ("tlbsync");
374 }
375 static inline void
eieio(void)376 eieio(void)
377 {
378 asm volatile ("eieio");
379 }
380
381 static inline void
sync(void)382 sync(void)
383 {
384 asm volatile ("sync");
385 }
386
387 static inline void
tlbia(void)388 tlbia(void)
389 {
390 vaddr_t va;
391
392 sync();
393 for (va = 0; va < 0x00040000; va += 0x00001000)
394 tlbie(va);
395 eieio();
396 tlbsync();
397 sync();
398 }
399
400 static inline int
ptesr(sr_t * sr,vaddr_t va)401 ptesr(sr_t *sr, vaddr_t va)
402 {
403 return sr[(u_int)va >> ADDR_SR_SHIFT];
404 }
405
406 static inline int
pteidx(sr_t sr,vaddr_t va)407 pteidx(sr_t sr, vaddr_t va)
408 {
409 int hash;
410 hash = (sr & SR_VSID) ^ (((u_int)va & ADDR_PIDX) >> ADDR_PIDX_SHIFT);
411 return hash & pmap_ptab_mask;
412 }
413
414 #define PTED_VA_PTEGIDX_M 0x07
415 #define PTED_VA_HID_M 0x08
416 #define PTED_VA_MANAGED_M 0x10
417 #define PTED_VA_WIRED_M 0x20
418 #define PTED_VA_EXEC_M 0x40
419
420 static inline u_int32_t
PTED_HID(struct pte_desc * pted)421 PTED_HID(struct pte_desc *pted)
422 {
423 return (pted->pted_va & PTED_VA_HID_M);
424 }
425
426 static inline u_int32_t
PTED_PTEGIDX(struct pte_desc * pted)427 PTED_PTEGIDX(struct pte_desc *pted)
428 {
429 return (pted->pted_va & PTED_VA_PTEGIDX_M);
430 }
431
432 static inline u_int32_t
PTED_MANAGED(struct pte_desc * pted)433 PTED_MANAGED(struct pte_desc *pted)
434 {
435 return (pted->pted_va & PTED_VA_MANAGED_M);
436 }
437
438 static inline u_int32_t
PTED_VALID(struct pte_desc * pted)439 PTED_VALID(struct pte_desc *pted)
440 {
441 if (ppc_proc_is_64b)
442 return (pted->p.pted_pte64.pte_hi & PTE_VALID_64);
443 else
444 return (pted->p.pted_pte32.pte_hi & PTE_VALID_32);
445 }
446
447 /*
448 * PV entries -
449 * manipulate the physical to virtual translations for the entire system.
450 *
451 * QUESTION: should all mapped memory be stored in PV tables? Or
452 * is it alright to only store "ram" memory. Currently device mappings
453 * are not stored.
454 * It makes sense to pre-allocate mappings for all of "ram" memory, since
455 * it is likely that it will be mapped at some point, but would it also
456 * make sense to use a tree/table like is use for pmap to store device
457 * mappings?
458 * Further notes: It seems that the PV table is only used for pmap_protect
459 * and other paging related operations. Given this, it is not necessary
460 * to store any pmap_kernel() entries in PV tables and does not make
461 * sense to store device mappings in PV either.
462 *
463 * Note: unlike other powerpc pmap designs, the array is only an array
464 * of pointers. Since the same structure is used for holding information
465 * in the VP table, the PV table, and for kernel mappings, the wired entries.
466 * Allocate one data structure to hold all of the info, instead of replicating
467 * it multiple times.
468 *
469 * One issue of making this a single data structure is that two pointers are
470 * wasted for every page which does not map ram (device mappings), this
471 * should be a low percentage of mapped pages in the system, so should not
472 * have too noticeable unnecessary ram consumption.
473 */
474
475 void
pmap_enter_pv(struct pte_desc * pted,struct vm_page * pg)476 pmap_enter_pv(struct pte_desc *pted, struct vm_page *pg)
477 {
478 if (__predict_false(!pmap_initialized)) {
479 return;
480 }
481
482 mtx_enter(&pg->mdpage.pv_mtx);
483 LIST_INSERT_HEAD(&(pg->mdpage.pv_list), pted, pted_pv_list);
484 pted->pted_va |= PTED_VA_MANAGED_M;
485 mtx_leave(&pg->mdpage.pv_mtx);
486 }
487
488 void
pmap_remove_pv(struct pte_desc * pted)489 pmap_remove_pv(struct pte_desc *pted)
490 {
491 struct vm_page *pg;
492
493 if (ppc_proc_is_64b)
494 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64);
495 else
496 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32);
497
498 mtx_enter(&pg->mdpage.pv_mtx);
499 pted->pted_va &= ~PTED_VA_MANAGED_M;
500 LIST_REMOVE(pted, pted_pv_list);
501 mtx_leave(&pg->mdpage.pv_mtx);
502 }
503
504
505 /* PTE_CHG_32 == PTE_CHG_64 */
506 /* PTE_REF_32 == PTE_REF_64 */
507 static __inline u_int
pmap_pte2flags(u_int32_t pte)508 pmap_pte2flags(u_int32_t pte)
509 {
510 return (((pte & PTE_REF_32) ? PG_PMAP_REF : 0) |
511 ((pte & PTE_CHG_32) ? PG_PMAP_MOD : 0));
512 }
513
514 static __inline u_int
pmap_flags2pte(u_int32_t flags)515 pmap_flags2pte(u_int32_t flags)
516 {
517 return (((flags & PG_PMAP_REF) ? PTE_REF_32 : 0) |
518 ((flags & PG_PMAP_MOD) ? PTE_CHG_32 : 0));
519 }
520
521 void
pmap_attr_save(paddr_t pa,u_int32_t bits)522 pmap_attr_save(paddr_t pa, u_int32_t bits)
523 {
524 struct vm_page *pg;
525
526 pg = PHYS_TO_VM_PAGE(pa);
527 if (pg == NULL)
528 return;
529
530 atomic_setbits_int(&pg->pg_flags, pmap_pte2flags(bits));
531 }
532
533 int
pmap_enter(pmap_t pm,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)534 pmap_enter(pmap_t pm, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
535 {
536 struct pte_desc *pted;
537 struct vm_page *pg;
538 boolean_t nocache = (pa & PMAP_NOCACHE) != 0;
539 boolean_t wt = (pa & PMAP_WT) != 0;
540 int need_sync = 0;
541 int cache, error = 0;
542
543 KASSERT(!(wt && nocache));
544 pa &= PMAP_PA_MASK;
545
546 PMAP_VP_LOCK(pm);
547 pted = pmap_vp_lookup(pm, va);
548 if (pted && PTED_VALID(pted)) {
549 pmap_remove_pted(pm, pted);
550 /* we lost our pted if it was user */
551 if (pm != pmap_kernel())
552 pted = pmap_vp_lookup(pm, va);
553 }
554
555 pm->pm_stats.resident_count++;
556
557 /* Do not have pted for this, get one and put it in VP */
558 if (pted == NULL) {
559 pted = pool_get(&pmap_pted_pool, PR_NOWAIT | PR_ZERO);
560 if (pted == NULL) {
561 if ((flags & PMAP_CANFAIL) == 0) {
562 error = ENOMEM;
563 goto out;
564 }
565 panic("pmap_enter: failed to allocate pted");
566 }
567 error = pmap_vp_enter(pm, va, pted, flags);
568 if (error) {
569 pool_put(&pmap_pted_pool, pted);
570 goto out;
571 }
572 }
573
574 pg = PHYS_TO_VM_PAGE(pa);
575 if (pg != NULL && (pg->pg_flags & PG_PMAP_UC))
576 nocache = TRUE;
577 if (wt)
578 cache = PMAP_CACHE_WT;
579 else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache)
580 cache = PMAP_CACHE_WB;
581 else
582 cache = PMAP_CACHE_CI;
583
584 /* Calculate PTE */
585 if (ppc_proc_is_64b)
586 pmap_fill_pte64(pm, va, pa, pted, prot, cache);
587 else
588 pmap_fill_pte32(pm, va, pa, pted, prot, cache);
589
590 if (pg != NULL) {
591 pmap_enter_pv(pted, pg); /* only managed mem */
592 }
593
594 /*
595 * Insert into HTAB
596 * We were told to map the page, probably called from vm_fault,
597 * so map the page!
598 */
599 if (ppc_proc_is_64b)
600 pte_insert64(pted);
601 else
602 pte_insert32(pted);
603
604 if (prot & PROT_EXEC) {
605 u_int sn = VP_SR(va);
606
607 pm->pm_exec[sn]++;
608 if (pm->pm_sr[sn] & SR_NOEXEC)
609 pm->pm_sr[sn] &= ~SR_NOEXEC;
610
611 if (pg != NULL) {
612 need_sync = ((pg->pg_flags & PG_PMAP_EXE) == 0);
613 if (prot & PROT_WRITE)
614 atomic_clearbits_int(&pg->pg_flags,
615 PG_PMAP_EXE);
616 else
617 atomic_setbits_int(&pg->pg_flags,
618 PG_PMAP_EXE);
619 } else
620 need_sync = 1;
621 } else {
622 /*
623 * Should we be paranoid about writeable non-exec
624 * mappings ? if so, clear the exec tag
625 */
626 if ((prot & PROT_WRITE) && (pg != NULL))
627 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
628 }
629
630 /* only instruction sync executable pages */
631 if (need_sync)
632 pmap_syncicache_user_virt(pm, va);
633
634 out:
635 PMAP_VP_UNLOCK(pm);
636 return (error);
637 }
638
639 /*
640 * Remove the given range of mapping entries.
641 */
642 void
pmap_remove(pmap_t pm,vaddr_t sva,vaddr_t eva)643 pmap_remove(pmap_t pm, vaddr_t sva, vaddr_t eva)
644 {
645 struct pte_desc *pted;
646 vaddr_t va;
647
648 PMAP_VP_LOCK(pm);
649 for (va = sva; va < eva; va += PAGE_SIZE) {
650 pted = pmap_vp_lookup(pm, va);
651 if (pted && PTED_VALID(pted))
652 pmap_remove_pted(pm, pted);
653 }
654 PMAP_VP_UNLOCK(pm);
655 }
656
657 /*
658 * remove a single mapping, notice that this code is O(1)
659 */
660 void
pmap_remove_pted(pmap_t pm,struct pte_desc * pted)661 pmap_remove_pted(pmap_t pm, struct pte_desc *pted)
662 {
663 void *pte;
664 int s;
665
666 KASSERT(pm == pted->pted_pmap);
667 PMAP_VP_ASSERT_LOCKED(pm);
668
669 pm->pm_stats.resident_count--;
670
671 PMAP_HASH_LOCK(s);
672 if ((pte = pmap_ptedinhash(pted)) != NULL)
673 pte_zap(pte, pted);
674 PMAP_HASH_UNLOCK(s);
675
676 if (pted->pted_va & PTED_VA_EXEC_M) {
677 u_int sn = VP_SR(pted->pted_va);
678
679 pted->pted_va &= ~PTED_VA_EXEC_M;
680 pm->pm_exec[sn]--;
681 if (pm->pm_exec[sn] == 0)
682 pm->pm_sr[sn] |= SR_NOEXEC;
683 }
684
685 if (ppc_proc_is_64b)
686 pted->p.pted_pte64.pte_hi &= ~PTE_VALID_64;
687 else
688 pted->p.pted_pte32.pte_hi &= ~PTE_VALID_32;
689
690 if (PTED_MANAGED(pted))
691 pmap_remove_pv(pted);
692
693 if (pm != pmap_kernel()) {
694 (void)pmap_vp_remove(pm, pted->pted_va);
695 pool_put(&pmap_pted_pool, pted);
696 }
697 }
698
699 /*
700 * Enter a kernel mapping for the given page.
701 * kernel mappings have a larger set of prerequisites than normal mappings.
702 *
703 * 1. no memory should be allocated to create a kernel mapping.
704 * 2. a vp mapping should already exist, even if invalid. (see 1)
705 * 3. all vp tree mappings should already exist (see 1)
706 *
707 */
708 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)709 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
710 {
711 struct pte_desc *pted;
712 struct vm_page *pg;
713 boolean_t nocache = (pa & PMAP_NOCACHE) != 0;
714 boolean_t wt = (pa & PMAP_WT) != 0;
715 pmap_t pm;
716 int cache;
717
718 KASSERT(!(wt && nocache));
719 pa &= PMAP_PA_MASK;
720
721 pm = pmap_kernel();
722
723 pted = pmap_vp_lookup(pm, va);
724 if (pted && PTED_VALID(pted))
725 pmap_remove_pted(pm, pted); /* pted is reused */
726
727 pm->pm_stats.resident_count++;
728
729 if (prot & PROT_WRITE) {
730 pg = PHYS_TO_VM_PAGE(pa);
731 if (pg != NULL)
732 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
733 }
734
735 /* Do not have pted for this, get one and put it in VP */
736 if (pted == NULL) {
737 panic("pted not preallocated in pmap_kernel() va %lx pa %lx",
738 va, pa);
739 }
740
741 pg = PHYS_TO_VM_PAGE(pa);
742 if (wt)
743 cache = PMAP_CACHE_WT;
744 else if (pg != NULL && !(pg->pg_flags & PG_DEV) && !nocache)
745 cache = PMAP_CACHE_WB;
746 else
747 cache = PMAP_CACHE_CI;
748
749 /* Calculate PTE */
750 if (ppc_proc_is_64b)
751 pmap_fill_pte64(pm, va, pa, pted, prot, cache);
752 else
753 pmap_fill_pte32(pm, va, pa, pted, prot, cache);
754
755 /*
756 * Insert into HTAB
757 * We were told to map the page, probably called from vm_fault,
758 * so map the page!
759 */
760 if (ppc_proc_is_64b)
761 pte_insert64(pted);
762 else
763 pte_insert32(pted);
764
765 pted->pted_va |= PTED_VA_WIRED_M;
766
767 if (prot & PROT_EXEC) {
768 u_int sn = VP_SR(va);
769
770 pm->pm_exec[sn]++;
771 if (pm->pm_sr[sn] & SR_NOEXEC)
772 pm->pm_sr[sn] &= ~SR_NOEXEC;
773 }
774 }
775
776 /*
777 * remove kernel (pmap_kernel()) mappings
778 */
779 void
pmap_kremove(vaddr_t va,vsize_t len)780 pmap_kremove(vaddr_t va, vsize_t len)
781 {
782 struct pte_desc *pted;
783
784 for (len >>= PAGE_SHIFT; len > 0; len--, va += PAGE_SIZE) {
785 pted = pmap_vp_lookup(pmap_kernel(), va);
786 if (pted && PTED_VALID(pted))
787 pmap_remove_pted(pmap_kernel(), pted);
788 }
789 }
790
791 static inline void *
pmap_ptedinhash(struct pte_desc * pted)792 pmap_ptedinhash(struct pte_desc *pted)
793 {
794 vaddr_t va = pted->pted_va & ~PAGE_MASK;
795 pmap_t pm = pted->pted_pmap;
796 int sr, idx;
797
798 sr = ptesr(pm->pm_sr, va);
799 idx = pteidx(sr, va);
800
801 if (ppc_proc_is_64b) {
802 struct pte_64 *pte = pmap_ptable64;
803
804 pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8;
805 pte += PTED_PTEGIDX(pted);
806
807 /*
808 * We now have the pointer to where it will be, if it is
809 * currently mapped. If the mapping was thrown away in
810 * exchange for another page mapping, then this page is
811 * not currently in the HASH.
812 */
813 if ((pted->p.pted_pte64.pte_hi |
814 (PTED_HID(pted) ? PTE_HID_64 : 0)) == pte->pte_hi)
815 return (pte);
816 } else {
817 struct pte_32 *pte = pmap_ptable32;
818
819 pte += (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0)) * 8;
820 pte += PTED_PTEGIDX(pted);
821
822 /*
823 * We now have the pointer to where it will be, if it is
824 * currently mapped. If the mapping was thrown away in
825 * exchange for another page mapping, then this page is
826 * not currently in the HASH.
827 */
828 if ((pted->p.pted_pte32.pte_hi |
829 (PTED_HID(pted) ? PTE_HID_32 : 0)) == pte->pte_hi)
830 return (pte);
831 }
832
833 return (NULL);
834 }
835
836 /*
837 * Delete a Page Table Entry, section 7.6.3.3.
838 *
839 * Note: pte must be locked.
840 */
841 void
pte_del(void * pte,vaddr_t va)842 pte_del(void *pte, vaddr_t va)
843 {
844 if (ppc_proc_is_64b)
845 ((struct pte_64 *)pte)->pte_hi &= ~PTE_VALID_64;
846 else
847 ((struct pte_32 *)pte)->pte_hi &= ~PTE_VALID_32;
848
849 sync(); /* Ensure update completed. */
850 tlbie(va); /* Invalidate old translation. */
851 eieio(); /* Order tlbie before tlbsync. */
852 tlbsync(); /* Ensure tlbie completed on all processors. */
853 sync(); /* Ensure tlbsync and update completed. */
854 }
855
856 void
pte_zap(void * pte,struct pte_desc * pted)857 pte_zap(void *pte, struct pte_desc *pted)
858 {
859 pte_del(pte, pted->pted_va);
860
861 if (!PTED_MANAGED(pted))
862 return;
863
864 if (ppc_proc_is_64b) {
865 pmap_attr_save(pted->p.pted_pte64.pte_lo & PTE_RPGN_64,
866 ((struct pte_64 *)pte)->pte_lo & (PTE_REF_64|PTE_CHG_64));
867 } else {
868 pmap_attr_save(pted->p.pted_pte32.pte_lo & PTE_RPGN_32,
869 ((struct pte_32 *)pte)->pte_lo & (PTE_REF_32|PTE_CHG_32));
870 }
871 }
872
873 /*
874 * What about execution control? Even at only a segment granularity.
875 */
876 void
pmap_fill_pte64(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int cache)877 pmap_fill_pte64(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
878 vm_prot_t prot, int cache)
879 {
880 sr_t sr;
881 struct pte_64 *pte64;
882
883 sr = ptesr(pm->pm_sr, va);
884 pte64 = &pted->p.pted_pte64;
885
886 pte64->pte_hi = (((u_int64_t)sr & SR_VSID) <<
887 PTE_VSID_SHIFT_64) |
888 ((va >> ADDR_API_SHIFT_64) & PTE_API_64) | PTE_VALID_64;
889 pte64->pte_lo = (pa & PTE_RPGN_64);
890
891
892 if (cache == PMAP_CACHE_WB)
893 pte64->pte_lo |= PTE_M_64;
894 else if (cache == PMAP_CACHE_WT)
895 pte64->pte_lo |= (PTE_W_64 | PTE_M_64);
896 else
897 pte64->pte_lo |= (PTE_M_64 | PTE_I_64 | PTE_G_64);
898
899 if ((prot & (PROT_READ | PROT_WRITE)) == 0)
900 pte64->pte_lo |= PTE_AC_64;
901
902 if (prot & PROT_WRITE)
903 pte64->pte_lo |= PTE_RW_64;
904 else
905 pte64->pte_lo |= PTE_RO_64;
906
907 pted->pted_va = va & ~PAGE_MASK;
908
909 if (prot & PROT_EXEC)
910 pted->pted_va |= PTED_VA_EXEC_M;
911 else
912 pte64->pte_lo |= PTE_N_64;
913
914 pted->pted_pmap = pm;
915 }
916
917 /*
918 * What about execution control? Even at only a segment granularity.
919 */
920 void
pmap_fill_pte32(pmap_t pm,vaddr_t va,paddr_t pa,struct pte_desc * pted,vm_prot_t prot,int cache)921 pmap_fill_pte32(pmap_t pm, vaddr_t va, paddr_t pa, struct pte_desc *pted,
922 vm_prot_t prot, int cache)
923 {
924 sr_t sr;
925 struct pte_32 *pte32;
926
927 sr = ptesr(pm->pm_sr, va);
928 pte32 = &pted->p.pted_pte32;
929
930 pte32->pte_hi = ((sr & SR_VSID) << PTE_VSID_SHIFT_32) |
931 ((va >> ADDR_API_SHIFT_32) & PTE_API_32) | PTE_VALID_32;
932 pte32->pte_lo = (pa & PTE_RPGN_32);
933
934 if (cache == PMAP_CACHE_WB)
935 pte32->pte_lo |= PTE_M_32;
936 else if (cache == PMAP_CACHE_WT)
937 pte32->pte_lo |= (PTE_W_32 | PTE_M_32);
938 else
939 pte32->pte_lo |= (PTE_M_32 | PTE_I_32 | PTE_G_32);
940
941 if (prot & PROT_WRITE)
942 pte32->pte_lo |= PTE_RW_32;
943 else
944 pte32->pte_lo |= PTE_RO_32;
945
946 pted->pted_va = va & ~PAGE_MASK;
947
948 /* XXX Per-page execution control. */
949 if (prot & PROT_EXEC)
950 pted->pted_va |= PTED_VA_EXEC_M;
951
952 pted->pted_pmap = pm;
953 }
954
955 int
pmap_test_attrs(struct vm_page * pg,u_int flagbit)956 pmap_test_attrs(struct vm_page *pg, u_int flagbit)
957 {
958 u_int bits;
959 struct pte_desc *pted;
960 u_int ptebit = pmap_flags2pte(flagbit);
961 int s;
962
963 /* PTE_CHG_32 == PTE_CHG_64 */
964 /* PTE_REF_32 == PTE_REF_64 */
965
966 bits = pg->pg_flags & flagbit;
967 if (bits == flagbit)
968 return bits;
969
970 mtx_enter(&pg->mdpage.pv_mtx);
971 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
972 void *pte;
973
974 PMAP_HASH_LOCK(s);
975 if ((pte = pmap_ptedinhash(pted)) != NULL) {
976 if (ppc_proc_is_64b) {
977 struct pte_64 *ptp64 = pte;
978 bits |= pmap_pte2flags(ptp64->pte_lo & ptebit);
979 } else {
980 struct pte_32 *ptp32 = pte;
981 bits |= pmap_pte2flags(ptp32->pte_lo & ptebit);
982 }
983 }
984 PMAP_HASH_UNLOCK(s);
985
986 if (bits == flagbit)
987 break;
988 }
989 mtx_leave(&pg->mdpage.pv_mtx);
990
991 atomic_setbits_int(&pg->pg_flags, bits);
992
993 return bits;
994 }
995
996 int
pmap_clear_attrs(struct vm_page * pg,u_int flagbit)997 pmap_clear_attrs(struct vm_page *pg, u_int flagbit)
998 {
999 u_int bits;
1000 struct pte_desc *pted;
1001 u_int ptebit = pmap_flags2pte(flagbit);
1002 int s;
1003
1004 /* PTE_CHG_32 == PTE_CHG_64 */
1005 /* PTE_REF_32 == PTE_REF_64 */
1006
1007 bits = pg->pg_flags & flagbit;
1008
1009 mtx_enter(&pg->mdpage.pv_mtx);
1010 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list) {
1011 void *pte;
1012
1013 PMAP_HASH_LOCK(s);
1014 if ((pte = pmap_ptedinhash(pted)) != NULL) {
1015 if (ppc_proc_is_64b) {
1016 struct pte_64 *ptp64 = pte;
1017
1018 bits |= pmap_pte2flags(ptp64->pte_lo & ptebit);
1019
1020 pte_del(ptp64, pted->pted_va);
1021
1022 ptp64->pte_lo &= ~ptebit;
1023 eieio();
1024 ptp64->pte_hi |= PTE_VALID_64;
1025 sync();
1026 } else {
1027 struct pte_32 *ptp32 = pte;
1028
1029 bits |= pmap_pte2flags(ptp32->pte_lo & ptebit);
1030
1031 pte_del(ptp32, pted->pted_va);
1032
1033 ptp32->pte_lo &= ~ptebit;
1034 eieio();
1035 ptp32->pte_hi |= PTE_VALID_32;
1036 sync();
1037 }
1038 }
1039 PMAP_HASH_UNLOCK(s);
1040 }
1041 mtx_leave(&pg->mdpage.pv_mtx);
1042
1043 /*
1044 * this is done a second time, because while walking the list
1045 * a bit could have been promoted via pmap_attr_save()
1046 */
1047 bits |= pg->pg_flags & flagbit;
1048 atomic_clearbits_int(&pg->pg_flags, flagbit);
1049
1050 return bits;
1051 }
1052
1053 /*
1054 * Fill the given physical page with zeros.
1055 */
1056 void
pmap_zero_page(struct vm_page * pg)1057 pmap_zero_page(struct vm_page *pg)
1058 {
1059 vaddr_t va = pmap_map_direct(pg);
1060 int i;
1061
1062 /*
1063 * Loop over & zero cache lines. This code assumes that 64-bit
1064 * CPUs have 128-byte cache lines. We explicitly use ``dcbzl''
1065 * here because we do not clear the DCBZ_SIZE bit of the HID5
1066 * register in order to be compatible with code using ``dcbz''
1067 * and assuming that cache line size is 32.
1068 */
1069 if (ppc_proc_is_64b) {
1070 for (i = 0; i < PAGE_SIZE; i += 128)
1071 asm volatile ("dcbzl 0,%0" :: "r"(va + i));
1072 return;
1073 }
1074
1075 for (i = 0; i < PAGE_SIZE; i += CACHELINESIZE)
1076 asm volatile ("dcbz 0,%0" :: "r"(va + i));
1077 }
1078
1079 /*
1080 * Copy a page.
1081 */
1082 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)1083 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
1084 {
1085 vaddr_t srcva = pmap_map_direct(srcpg);
1086 vaddr_t dstva = pmap_map_direct(dstpg);
1087
1088 memcpy((void *)dstva, (void *)srcva, PAGE_SIZE);
1089 }
1090
1091 int pmap_id_avail = 0;
1092
1093 pmap_t
pmap_create(void)1094 pmap_create(void)
1095 {
1096 u_int bits;
1097 int first, i, k, try, tblidx, tbloff;
1098 int seg;
1099 pmap_t pm;
1100
1101 pm = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO);
1102
1103 pmap_reference(pm);
1104 PMAP_VP_LOCK_INIT(pm);
1105
1106 /*
1107 * Allocate segment registers for this pmap.
1108 * Try not to reuse pmap ids, to spread the hash table usage.
1109 */
1110 first = pmap_id_avail;
1111 again:
1112 for (i = 0; i < NPMAPS; i++) {
1113 try = first + i;
1114 try = try % NPMAPS; /* truncate back into bounds */
1115 tblidx = try / (8 * sizeof usedsr[0]);
1116 tbloff = try % (8 * sizeof usedsr[0]);
1117 bits = usedsr[tblidx];
1118 if ((bits & (1U << tbloff)) == 0) {
1119 if (atomic_cas_uint(&usedsr[tblidx], bits,
1120 bits | (1U << tbloff)) != bits) {
1121 first = try;
1122 goto again;
1123 }
1124 pmap_id_avail = try + 1;
1125
1126 seg = try << 4;
1127 for (k = 0; k < 16; k++)
1128 pm->pm_sr[k] = (seg + k) | SR_NOEXEC;
1129 return (pm);
1130 }
1131 }
1132 panic("out of pmap slots");
1133 }
1134
1135 /*
1136 * Add a reference to a given pmap.
1137 */
1138 void
pmap_reference(pmap_t pm)1139 pmap_reference(pmap_t pm)
1140 {
1141 atomic_inc_int(&pm->pm_refs);
1142 }
1143
1144 /*
1145 * Retire the given pmap from service.
1146 * Should only be called if the map contains no valid mappings.
1147 */
1148 void
pmap_destroy(pmap_t pm)1149 pmap_destroy(pmap_t pm)
1150 {
1151 int refs;
1152
1153 refs = atomic_dec_int_nv(&pm->pm_refs);
1154 if (refs == -1)
1155 panic("re-entering pmap_destroy");
1156 if (refs > 0)
1157 return;
1158
1159 /*
1160 * reference count is zero, free pmap resources and free pmap.
1161 */
1162 pmap_release(pm);
1163 pool_put(&pmap_pmap_pool, pm);
1164 }
1165
1166 /*
1167 * Release any resources held by the given physical map.
1168 * Called when a pmap initialized by pmap_pinit is being released.
1169 */
1170 void
pmap_release(pmap_t pm)1171 pmap_release(pmap_t pm)
1172 {
1173 int i, tblidx, tbloff;
1174
1175 pmap_vp_destroy(pm);
1176 i = (pm->pm_sr[0] & SR_VSID) >> 4;
1177 tblidx = i / (8 * sizeof usedsr[0]);
1178 tbloff = i % (8 * sizeof usedsr[0]);
1179
1180 /* powerpc can do atomic cas, clearbits on same word. */
1181 atomic_clearbits_int(&usedsr[tblidx], 1U << tbloff);
1182 }
1183
1184 void
pmap_vp_destroy(pmap_t pm)1185 pmap_vp_destroy(pmap_t pm)
1186 {
1187 int i, j;
1188 struct pmapvp *vp1;
1189 struct pmapvp *vp2;
1190
1191 for (i = 0; i < VP_SR_SIZE; i++) {
1192 vp1 = pm->pm_vp[i];
1193 if (vp1 == NULL)
1194 continue;
1195
1196 for (j = 0; j < VP_IDX1_SIZE; j++) {
1197 vp2 = vp1->vp[j];
1198 if (vp2 == NULL)
1199 continue;
1200
1201 pool_put(&pmap_vp_pool, vp2);
1202 }
1203 pm->pm_vp[i] = NULL;
1204 pool_put(&pmap_vp_pool, vp1);
1205 }
1206 }
1207
1208 void
pmap_avail_setup(void)1209 pmap_avail_setup(void)
1210 {
1211 struct mem_region *mp;
1212
1213 ppc_mem_regions(&pmap_mem, &pmap_avail);
1214
1215 for (mp = pmap_mem; mp->size !=0; mp++, ndumpmem++) {
1216 physmem += atop(mp->size);
1217 dumpmem[ndumpmem].start = atop(mp->start);
1218 dumpmem[ndumpmem].end = atop(mp->start + mp->size);
1219 }
1220
1221 for (mp = pmap_avail; mp->size !=0 ; mp++) {
1222 if (physmaxaddr < mp->start + mp->size)
1223 physmaxaddr = mp->start + mp->size;
1224 }
1225
1226 for (mp = pmap_avail; mp->size !=0; mp++)
1227 pmap_cnt_avail += 1;
1228 }
1229
1230 void
pmap_avail_fixup(void)1231 pmap_avail_fixup(void)
1232 {
1233 struct mem_region *mp;
1234 u_int32_t align;
1235 u_int32_t end;
1236
1237 mp = pmap_avail;
1238 while(mp->size !=0) {
1239 align = round_page(mp->start);
1240 if (mp->start != align) {
1241 pmap_remove_avail(mp->start, align);
1242 mp = pmap_avail;
1243 continue;
1244 }
1245 end = mp->start+mp->size;
1246 align = trunc_page(end);
1247 if (end != align) {
1248 pmap_remove_avail(align, end);
1249 mp = pmap_avail;
1250 continue;
1251 }
1252 mp++;
1253 }
1254 }
1255
1256 /* remove a given region from avail memory */
1257 void
pmap_remove_avail(paddr_t base,paddr_t end)1258 pmap_remove_avail(paddr_t base, paddr_t end)
1259 {
1260 struct mem_region *mp;
1261 int i;
1262 int mpend;
1263
1264 /* remove given region from available */
1265 for (mp = pmap_avail; mp->size; mp++) {
1266 /*
1267 * Check if this region holds all of the region
1268 */
1269 mpend = mp->start + mp->size;
1270 if (base > mpend) {
1271 continue;
1272 }
1273 if (base <= mp->start) {
1274 if (end <= mp->start)
1275 break; /* region not present -??? */
1276
1277 if (end >= mpend) {
1278 /* covers whole region */
1279 /* shorten */
1280 for (i = mp - pmap_avail;
1281 i < pmap_cnt_avail;
1282 i++) {
1283 pmap_avail[i] = pmap_avail[i+1];
1284 }
1285 pmap_cnt_avail--;
1286 pmap_avail[pmap_cnt_avail].size = 0;
1287 } else {
1288 mp->start = end;
1289 mp->size = mpend - end;
1290 }
1291 } else {
1292 /* start after the beginning */
1293 if (end >= mpend) {
1294 /* just truncate */
1295 mp->size = base - mp->start;
1296 } else {
1297 /* split */
1298 for (i = pmap_cnt_avail;
1299 i > (mp - pmap_avail);
1300 i--) {
1301 pmap_avail[i] = pmap_avail[i - 1];
1302 }
1303 pmap_cnt_avail++;
1304 mp->size = base - mp->start;
1305 mp++;
1306 mp->start = end;
1307 mp->size = mpend - end;
1308 }
1309 }
1310 }
1311 for (mp = pmap_allocated; mp->size != 0; mp++) {
1312 if (base < mp->start) {
1313 if (end == mp->start) {
1314 mp->start = base;
1315 mp->size += end - base;
1316 break;
1317 }
1318 /* lengthen */
1319 for (i = pmap_cnt_allocated; i > (mp - pmap_allocated);
1320 i--) {
1321 pmap_allocated[i] = pmap_allocated[i - 1];
1322 }
1323 pmap_cnt_allocated++;
1324 mp->start = base;
1325 mp->size = end - base;
1326 return;
1327 }
1328 if (base == (mp->start + mp->size)) {
1329 mp->size += end - base;
1330 return;
1331 }
1332 }
1333 if (mp->size == 0) {
1334 mp->start = base;
1335 mp->size = end - base;
1336 pmap_cnt_allocated++;
1337 }
1338 }
1339
1340 void *
pmap_steal_avail(size_t size,int align)1341 pmap_steal_avail(size_t size, int align)
1342 {
1343 struct mem_region *mp;
1344 int start;
1345 int remsize;
1346
1347 for (mp = pmap_avail; mp->size; mp++) {
1348 if (mp->size > size) {
1349 start = (mp->start + (align -1)) & ~(align -1);
1350 remsize = mp->size - (start - mp->start);
1351 if (remsize >= 0) {
1352 pmap_remove_avail(start, start+size);
1353 return (void *)start;
1354 }
1355 }
1356 }
1357 panic ("unable to allocate region with size %zx align %x",
1358 size, align);
1359 }
1360
1361 /*
1362 * Similar to pmap_steal_avail, but operating on vm_physmem since
1363 * uvm_page_physload() has been called.
1364 */
1365 vaddr_t
pmap_steal_memory(vsize_t size,vaddr_t * start,vaddr_t * end)1366 pmap_steal_memory(vsize_t size, vaddr_t *start, vaddr_t *end)
1367 {
1368 int segno;
1369 u_int npg;
1370 vaddr_t va;
1371 paddr_t pa;
1372 struct vm_physseg *seg;
1373
1374 size = round_page(size);
1375 npg = atop(size);
1376
1377 for (segno = 0, seg = vm_physmem; segno < vm_nphysseg; segno++, seg++) {
1378 if (seg->avail_end - seg->avail_start < npg)
1379 continue;
1380 /*
1381 * We can only steal at an ``unused'' segment boundary,
1382 * i.e. either at the start or at the end.
1383 */
1384 if (seg->avail_start == seg->start ||
1385 seg->avail_end == seg->end)
1386 break;
1387 }
1388 if (segno == vm_nphysseg)
1389 va = 0;
1390 else {
1391 if (seg->avail_start == seg->start) {
1392 pa = ptoa(seg->avail_start);
1393 seg->avail_start += npg;
1394 seg->start += npg;
1395 } else {
1396 pa = ptoa(seg->avail_end) - size;
1397 seg->avail_end -= npg;
1398 seg->end -= npg;
1399 }
1400 /*
1401 * If all the segment has been consumed now, remove it.
1402 * Note that the crash dump code still knows about it
1403 * and will dump it correctly.
1404 */
1405 if (seg->start == seg->end) {
1406 if (vm_nphysseg-- == 1)
1407 panic("pmap_steal_memory: out of memory");
1408 while (segno < vm_nphysseg) {
1409 seg[0] = seg[1]; /* struct copy */
1410 seg++;
1411 segno++;
1412 }
1413 }
1414
1415 va = (vaddr_t)pa; /* 1:1 mapping */
1416 bzero((void *)va, size);
1417 }
1418
1419 if (start != NULL)
1420 *start = VM_MIN_KERNEL_ADDRESS;
1421 if (end != NULL)
1422 *end = VM_MAX_KERNEL_ADDRESS;
1423
1424 return (va);
1425 }
1426
1427 void *msgbuf_addr;
1428
1429 /*
1430 * Initialize pmap setup.
1431 * ALL of the code which deals with avail needs rewritten as an actual
1432 * memory allocation.
1433 */
1434 void
pmap_bootstrap(u_int kernelstart,u_int kernelend)1435 pmap_bootstrap(u_int kernelstart, u_int kernelend)
1436 {
1437 struct mem_region *mp;
1438 int i, k;
1439 struct pmapvp *vp1;
1440 struct pmapvp *vp2;
1441 extern vaddr_t ppc_kvm_stolen;
1442
1443 /*
1444 * set the page size (default value is 4K which is ok)
1445 */
1446 uvm_setpagesize();
1447
1448 /*
1449 * Get memory.
1450 */
1451 pmap_avail_setup();
1452
1453 /*
1454 * Page align all regions.
1455 * Non-page memory isn't very interesting to us.
1456 * Also, sort the entries for ascending addresses.
1457 */
1458 kernelstart = trunc_page(kernelstart);
1459 kernelend = round_page(kernelend);
1460 pmap_remove_avail(kernelstart, kernelend);
1461
1462 msgbuf_addr = pmap_steal_avail(MSGBUFSIZE,4);
1463
1464 #ifdef DEBUG
1465 for (mp = pmap_avail; mp->size; mp++) {
1466 bzero((void *)mp->start, mp->size);
1467 }
1468 #endif
1469
1470 #define HTABENTS_32 1024
1471 #define HTABENTS_64 2048
1472
1473 if (ppc_proc_is_64b) {
1474 pmap_ptab_cnt = HTABENTS_64;
1475 while (pmap_ptab_cnt * 2 < physmem)
1476 pmap_ptab_cnt <<= 1;
1477 } else {
1478 pmap_ptab_cnt = HTABENTS_32;
1479 while (HTABSIZE_32 < (ptoa(physmem) >> 7))
1480 pmap_ptab_cnt <<= 1;
1481 }
1482 /*
1483 * allocate suitably aligned memory for HTAB
1484 */
1485 if (ppc_proc_is_64b) {
1486 pmap_ptable64 = pmap_steal_avail(HTABMEMSZ_64, HTABMEMSZ_64);
1487 bzero((void *)pmap_ptable64, HTABMEMSZ_64);
1488 pmap_ptab_mask = pmap_ptab_cnt - 1;
1489 } else {
1490 pmap_ptable32 = pmap_steal_avail(HTABSIZE_32, HTABSIZE_32);
1491 bzero((void *)pmap_ptable32, HTABSIZE_32);
1492 pmap_ptab_mask = pmap_ptab_cnt - 1;
1493 }
1494
1495 /* allocate v->p mappings for pmap_kernel() */
1496 for (i = 0; i < VP_SR_SIZE; i++) {
1497 pmap_kernel()->pm_vp[i] = NULL;
1498 }
1499 vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4);
1500 bzero (vp1, sizeof(struct pmapvp));
1501 pmap_kernel()->pm_vp[PPC_KERNEL_SR] = vp1;
1502 for (i = 0; i < VP_IDX1_SIZE; i++) {
1503 vp2 = vp1->vp[i] = pmap_steal_avail(sizeof (struct pmapvp), 4);
1504 bzero (vp2, sizeof(struct pmapvp));
1505 for (k = 0; k < VP_IDX2_SIZE; k++) {
1506 struct pte_desc *pted;
1507 pted = pmap_steal_avail(sizeof (struct pte_desc), 4);
1508 bzero (pted, sizeof (struct pte_desc));
1509 vp2->vp[k] = pted;
1510 }
1511 }
1512
1513 /*
1514 * Initialize kernel pmap and hardware.
1515 */
1516 #if NPMAPS >= PPC_KERNEL_SEGMENT / 16
1517 usedsr[PPC_KERNEL_SEGMENT / 16 / (sizeof usedsr[0] * 8)]
1518 |= 1 << ((PPC_KERNEL_SEGMENT / 16) % (sizeof usedsr[0] * 8));
1519 #endif
1520 for (i = 0; i < 16; i++)
1521 pmap_kernel()->pm_sr[i] = (PPC_KERNEL_SEG0 + i) | SR_NOEXEC;
1522
1523 if (ppc_nobat) {
1524 vp1 = pmap_steal_avail(sizeof (struct pmapvp), 4);
1525 bzero (vp1, sizeof(struct pmapvp));
1526 pmap_kernel()->pm_vp[0] = vp1;
1527 for (i = 0; i < VP_IDX1_SIZE; i++) {
1528 vp2 = vp1->vp[i] =
1529 pmap_steal_avail(sizeof (struct pmapvp), 4);
1530 bzero (vp2, sizeof(struct pmapvp));
1531 for (k = 0; k < VP_IDX2_SIZE; k++) {
1532 struct pte_desc *pted;
1533 pted = pmap_steal_avail(sizeof (struct pte_desc), 4);
1534 bzero (pted, sizeof (struct pte_desc));
1535 vp2->vp[k] = pted;
1536 }
1537 }
1538
1539 /* first segment contains executable pages */
1540 pmap_kernel()->pm_exec[0]++;
1541 pmap_kernel()->pm_sr[0] &= ~SR_NOEXEC;
1542 } else {
1543 /*
1544 * Setup fixed BAT registers.
1545 *
1546 * Note that we still run in real mode, and the BAT
1547 * registers were cleared in cpu_bootstrap().
1548 */
1549 battable[0].batl = BATL(0x00000000, BAT_M);
1550 if (physmem > atop(0x08000000))
1551 battable[0].batu = BATU(0x00000000, BAT_BL_256M);
1552 else
1553 battable[0].batu = BATU(0x00000000, BAT_BL_128M);
1554
1555 /* Map physical memory with BATs. */
1556 if (physmem > atop(0x10000000)) {
1557 battable[0x1].batl = BATL(0x10000000, BAT_M);
1558 battable[0x1].batu = BATU(0x10000000, BAT_BL_256M);
1559 }
1560 if (physmem > atop(0x20000000)) {
1561 battable[0x2].batl = BATL(0x20000000, BAT_M);
1562 battable[0x2].batu = BATU(0x20000000, BAT_BL_256M);
1563 }
1564 if (physmem > atop(0x30000000)) {
1565 battable[0x3].batl = BATL(0x30000000, BAT_M);
1566 battable[0x3].batu = BATU(0x30000000, BAT_BL_256M);
1567 }
1568 if (physmem > atop(0x40000000)) {
1569 battable[0x4].batl = BATL(0x40000000, BAT_M);
1570 battable[0x4].batu = BATU(0x40000000, BAT_BL_256M);
1571 }
1572 if (physmem > atop(0x50000000)) {
1573 battable[0x5].batl = BATL(0x50000000, BAT_M);
1574 battable[0x5].batu = BATU(0x50000000, BAT_BL_256M);
1575 }
1576 if (physmem > atop(0x60000000)) {
1577 battable[0x6].batl = BATL(0x60000000, BAT_M);
1578 battable[0x6].batu = BATU(0x60000000, BAT_BL_256M);
1579 }
1580 if (physmem > atop(0x70000000)) {
1581 battable[0x7].batl = BATL(0x70000000, BAT_M);
1582 battable[0x7].batu = BATU(0x70000000, BAT_BL_256M);
1583 }
1584 }
1585
1586 ppc_kvm_stolen += reserve_dumppages( (caddr_t)(VM_MIN_KERNEL_ADDRESS +
1587 ppc_kvm_stolen));
1588
1589 pmap_avail_fixup();
1590 for (mp = pmap_avail; mp->size; mp++) {
1591 if (mp->start > 0x80000000)
1592 continue;
1593 if (mp->start + mp->size > 0x80000000)
1594 mp->size = 0x80000000 - mp->start;
1595 uvm_page_physload(atop(mp->start), atop(mp->start+mp->size),
1596 atop(mp->start), atop(mp->start+mp->size), 0);
1597 }
1598 }
1599
1600 void
pmap_enable_mmu(void)1601 pmap_enable_mmu(void)
1602 {
1603 uint32_t scratch, sdr1;
1604 int i;
1605
1606 /*
1607 * For the PowerPC 970, ACCR = 3 inhibits loads and stores to
1608 * pages with PTE_AC_64. This is for execute-only mappings.
1609 */
1610 if (ppc_proc_is_64b)
1611 asm volatile ("mtspr 29, %0" :: "r" (3));
1612
1613 if (!ppc_nobat) {
1614 extern caddr_t etext;
1615
1616 /* DBAT0 used for initial segment */
1617 ppc_mtdbat0l(battable[0].batl);
1618 ppc_mtdbat0u(battable[0].batu);
1619
1620 /* IBAT0 only covering the kernel .text */
1621 ppc_mtibat0l(battable[0].batl);
1622 if (round_page((vaddr_t)&etext) < 8*1024*1024)
1623 ppc_mtibat0u(BATU(0x00000000, BAT_BL_8M));
1624 else
1625 ppc_mtibat0u(BATU(0x00000000, BAT_BL_16M));
1626 }
1627
1628 for (i = 0; i < 16; i++)
1629 ppc_mtsrin(PPC_KERNEL_SEG0 + i, i << ADDR_SR_SHIFT);
1630
1631 if (ppc_proc_is_64b)
1632 sdr1 = (uint32_t)pmap_ptable64 | HTABSIZE_64;
1633 else
1634 sdr1 = (uint32_t)pmap_ptable32 | (pmap_ptab_mask >> 10);
1635
1636 asm volatile ("sync; mtsdr1 %0; isync" :: "r"(sdr1));
1637 tlbia();
1638
1639 asm volatile ("eieio; mfmsr %0; ori %0,%0,%1; mtmsr %0; sync; isync"
1640 : "=r"(scratch) : "K"(PSL_IR|PSL_DR|PSL_ME|PSL_RI));
1641 }
1642
1643 /*
1644 * activate a pmap entry
1645 * All PTE entries exist in the same hash table.
1646 * Segment registers are filled on exit to user mode.
1647 */
1648 void
pmap_activate(struct proc * p)1649 pmap_activate(struct proc *p)
1650 {
1651 struct pcb *pcb = &p->p_addr->u_pcb;
1652
1653 /* Set the current pmap. */
1654 pcb->pcb_pm = p->p_vmspace->vm_map.pmap;
1655 pmap_extract(pmap_kernel(),
1656 (vaddr_t)pcb->pcb_pm, (paddr_t *)&pcb->pcb_pmreal);
1657 curcpu()->ci_curpm = pcb->pcb_pmreal;
1658 }
1659
1660 /*
1661 * deactivate a pmap entry
1662 * NOOP on powerpc
1663 */
1664 void
pmap_deactivate(struct proc * p)1665 pmap_deactivate(struct proc *p)
1666 {
1667 }
1668
1669 /*
1670 * pmap_extract: extract a PA for the given VA
1671 */
1672
1673 boolean_t
pmap_extract(pmap_t pm,vaddr_t va,paddr_t * pa)1674 pmap_extract(pmap_t pm, vaddr_t va, paddr_t *pa)
1675 {
1676 struct pte_desc *pted;
1677
1678 if (pm == pmap_kernel() && va < physmaxaddr) {
1679 *pa = va;
1680 return TRUE;
1681 }
1682
1683 PMAP_VP_LOCK(pm);
1684 pted = pmap_vp_lookup(pm, va);
1685 if (pted == NULL || !PTED_VALID(pted)) {
1686 PMAP_VP_UNLOCK(pm);
1687 return FALSE;
1688 }
1689
1690 if (ppc_proc_is_64b)
1691 *pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) |
1692 (va & ~PTE_RPGN_64);
1693 else
1694 *pa = (pted->p.pted_pte32.pte_lo & PTE_RPGN_32) |
1695 (va & ~PTE_RPGN_32);
1696
1697 PMAP_VP_UNLOCK(pm);
1698 return TRUE;
1699 }
1700
1701 #ifdef ALTIVEC
1702 /*
1703 * Read an instruction from a given virtual memory address.
1704 * Execute-only protection is bypassed.
1705 */
1706 int
pmap_copyinsn(pmap_t pm,vaddr_t va,uint32_t * insn)1707 pmap_copyinsn(pmap_t pm, vaddr_t va, uint32_t *insn)
1708 {
1709 struct pte_desc *pted;
1710 paddr_t pa;
1711
1712 /* Assume pm != pmap_kernel(). */
1713 if (ppc_proc_is_64b) {
1714 /* inline pmap_extract */
1715 PMAP_VP_LOCK(pm);
1716 pted = pmap_vp_lookup(pm, va);
1717 if (pted == NULL || !PTED_VALID(pted)) {
1718 PMAP_VP_UNLOCK(pm);
1719 return EFAULT;
1720 }
1721 pa = (pted->p.pted_pte64.pte_lo & PTE_RPGN_64) |
1722 (va & ~PTE_RPGN_64);
1723 PMAP_VP_UNLOCK(pm);
1724
1725 if (pa > physmaxaddr - sizeof(*insn))
1726 return EFAULT;
1727 *insn = *(uint32_t *)pa;
1728 return 0;
1729 } else
1730 return copyin32((void *)va, insn);
1731 }
1732 #endif
1733
1734 u_int32_t
pmap_setusr(pmap_t pm,vaddr_t va)1735 pmap_setusr(pmap_t pm, vaddr_t va)
1736 {
1737 u_int32_t sr;
1738 u_int32_t oldsr;
1739
1740 sr = ptesr(pm->pm_sr, va);
1741
1742 /* user address range lock?? */
1743 asm volatile ("mfsr %0,%1" : "=r" (oldsr): "n"(PPC_USER_SR));
1744 asm volatile ("isync; mtsr %0,%1; isync" :: "n"(PPC_USER_SR), "r"(sr));
1745 return oldsr;
1746 }
1747
1748 void
pmap_popusr(u_int32_t sr)1749 pmap_popusr(u_int32_t sr)
1750 {
1751 asm volatile ("isync; mtsr %0,%1; isync"
1752 :: "n"(PPC_USER_SR), "r"(sr));
1753 }
1754
1755 int
_copyin(const void * udaddr,void * kaddr,size_t len)1756 _copyin(const void *udaddr, void *kaddr, size_t len)
1757 {
1758 void *p;
1759 size_t l;
1760 u_int32_t oldsr;
1761 faultbuf env;
1762 void *oldh = curpcb->pcb_onfault;
1763
1764 while (len > 0) {
1765 p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK);
1766 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1767 if (l > len)
1768 l = len;
1769 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr);
1770 if (setfault(&env)) {
1771 pmap_popusr(oldsr);
1772 curpcb->pcb_onfault = oldh;
1773 return EFAULT;
1774 }
1775 bcopy(p, kaddr, l);
1776 pmap_popusr(oldsr);
1777 udaddr += l;
1778 kaddr += l;
1779 len -= l;
1780 }
1781 curpcb->pcb_onfault = oldh;
1782 return 0;
1783 }
1784
1785 int
copyout(const void * kaddr,void * udaddr,size_t len)1786 copyout(const void *kaddr, void *udaddr, size_t len)
1787 {
1788 void *p;
1789 size_t l;
1790 u_int32_t oldsr;
1791 faultbuf env;
1792 void *oldh = curpcb->pcb_onfault;
1793
1794 while (len > 0) {
1795 p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK);
1796 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1797 if (l > len)
1798 l = len;
1799 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr);
1800 if (setfault(&env)) {
1801 pmap_popusr(oldsr);
1802 curpcb->pcb_onfault = oldh;
1803 return EFAULT;
1804 }
1805
1806 bcopy(kaddr, p, l);
1807 pmap_popusr(oldsr);
1808 udaddr += l;
1809 kaddr += l;
1810 len -= l;
1811 }
1812 curpcb->pcb_onfault = oldh;
1813 return 0;
1814 }
1815
1816 int
copyin32(const uint32_t * udaddr,uint32_t * kaddr)1817 copyin32(const uint32_t *udaddr, uint32_t *kaddr)
1818 {
1819 volatile uint32_t *p;
1820 u_int32_t oldsr;
1821 faultbuf env;
1822 void *oldh = curpcb->pcb_onfault;
1823
1824 if ((u_int)udaddr & 0x3)
1825 return EFAULT;
1826
1827 p = PPC_USER_ADDR + ((u_int)udaddr & ~PPC_SEGMENT_MASK);
1828 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)udaddr);
1829 if (setfault(&env)) {
1830 pmap_popusr(oldsr);
1831 curpcb->pcb_onfault = oldh;
1832 return EFAULT;
1833 }
1834 *kaddr = *p;
1835 pmap_popusr(oldsr);
1836 curpcb->pcb_onfault = oldh;
1837 return 0;
1838 }
1839
1840 int
_copyinstr(const void * udaddr,void * kaddr,size_t len,size_t * done)1841 _copyinstr(const void *udaddr, void *kaddr, size_t len, size_t *done)
1842 {
1843 const u_char *uaddr = udaddr;
1844 u_char *kp = kaddr;
1845 u_char *up;
1846 u_char c;
1847 void *p;
1848 size_t l;
1849 u_int32_t oldsr;
1850 int cnt = 0;
1851 faultbuf env;
1852 void *oldh = curpcb->pcb_onfault;
1853
1854 while (len > 0) {
1855 p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK);
1856 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1857 up = p;
1858 if (l > len)
1859 l = len;
1860 len -= l;
1861 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr);
1862 if (setfault(&env)) {
1863 if (done != NULL)
1864 *done = cnt;
1865
1866 curpcb->pcb_onfault = oldh;
1867 pmap_popusr(oldsr);
1868 return EFAULT;
1869 }
1870 while (l > 0) {
1871 c = *up;
1872 *kp = c;
1873 if (c == 0) {
1874 if (done != NULL)
1875 *done = cnt + 1;
1876
1877 curpcb->pcb_onfault = oldh;
1878 pmap_popusr(oldsr);
1879 return 0;
1880 }
1881 up++;
1882 kp++;
1883 l--;
1884 cnt++;
1885 uaddr++;
1886 }
1887 pmap_popusr(oldsr);
1888 }
1889 curpcb->pcb_onfault = oldh;
1890 if (done != NULL)
1891 *done = cnt;
1892
1893 return ENAMETOOLONG;
1894 }
1895
1896 int
copyoutstr(const void * kaddr,void * udaddr,size_t len,size_t * done)1897 copyoutstr(const void *kaddr, void *udaddr, size_t len, size_t *done)
1898 {
1899 u_char *uaddr = (void *)udaddr;
1900 const u_char *kp = kaddr;
1901 u_char *up;
1902 u_char c;
1903 void *p;
1904 size_t l;
1905 u_int32_t oldsr;
1906 int cnt = 0;
1907 faultbuf env;
1908 void *oldh = curpcb->pcb_onfault;
1909
1910 while (len > 0) {
1911 p = PPC_USER_ADDR + ((u_int)uaddr & ~PPC_SEGMENT_MASK);
1912 l = (PPC_USER_ADDR + PPC_SEGMENT_LENGTH) - p;
1913 up = p;
1914 if (l > len)
1915 l = len;
1916 len -= l;
1917 oldsr = pmap_setusr(curpcb->pcb_pm, (vaddr_t)uaddr);
1918 if (setfault(&env)) {
1919 if (done != NULL)
1920 *done = cnt;
1921
1922 curpcb->pcb_onfault = oldh;
1923 pmap_popusr(oldsr);
1924 return EFAULT;
1925 }
1926 while (l > 0) {
1927 c = *kp;
1928 *up = c;
1929 if (c == 0) {
1930 if (done != NULL)
1931 *done = cnt + 1;
1932
1933 curpcb->pcb_onfault = oldh;
1934 pmap_popusr(oldsr);
1935 return 0;
1936 }
1937 up++;
1938 kp++;
1939 l--;
1940 cnt++;
1941 uaddr++;
1942 }
1943 pmap_popusr(oldsr);
1944 }
1945 curpcb->pcb_onfault = oldh;
1946 if (done != NULL)
1947 *done = cnt;
1948
1949 return ENAMETOOLONG;
1950 }
1951
1952 /*
1953 * sync instruction cache for user virtual address.
1954 * The address WAS JUST MAPPED, so we have a VALID USERSPACE mapping
1955 */
1956 void
pmap_syncicache_user_virt(pmap_t pm,vaddr_t va)1957 pmap_syncicache_user_virt(pmap_t pm, vaddr_t va)
1958 {
1959 vaddr_t start;
1960 int oldsr;
1961
1962 if (pm != pmap_kernel()) {
1963 start = ((u_int)PPC_USER_ADDR + ((u_int)va &
1964 ~PPC_SEGMENT_MASK));
1965 /* will only ever be page size, will not cross segments */
1966
1967 /* USER SEGMENT LOCK - MPXXX */
1968 oldsr = pmap_setusr(pm, va);
1969 } else {
1970 start = va; /* flush mapped page */
1971 }
1972
1973 syncicache((void *)start, PAGE_SIZE);
1974
1975 if (pm != pmap_kernel()) {
1976 pmap_popusr(oldsr);
1977 /* USER SEGMENT UNLOCK -MPXXX */
1978 }
1979 }
1980
1981 void
pmap_pted_ro(struct pte_desc * pted,vm_prot_t prot)1982 pmap_pted_ro(struct pte_desc *pted, vm_prot_t prot)
1983 {
1984 if (ppc_proc_is_64b)
1985 pmap_pted_ro64(pted, prot);
1986 else
1987 pmap_pted_ro32(pted, prot);
1988 }
1989
1990 void
pmap_pted_ro64(struct pte_desc * pted,vm_prot_t prot)1991 pmap_pted_ro64(struct pte_desc *pted, vm_prot_t prot)
1992 {
1993 pmap_t pm = pted->pted_pmap;
1994 vaddr_t va = pted->pted_va & ~PAGE_MASK;
1995 struct vm_page *pg;
1996 void *pte;
1997 int s;
1998
1999 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte64.pte_lo & PTE_RPGN_64);
2000 if (pg->pg_flags & PG_PMAP_EXE) {
2001 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
2002 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
2003 } else {
2004 pmap_syncicache_user_virt(pm, va);
2005 }
2006 }
2007
2008 pted->p.pted_pte64.pte_lo &= ~PTE_PP_64;
2009 pted->p.pted_pte64.pte_lo |= PTE_RO_64;
2010
2011 if ((prot & PROT_EXEC) == 0)
2012 pted->p.pted_pte64.pte_lo |= PTE_N_64;
2013
2014 if ((prot & (PROT_READ | PROT_WRITE)) == 0)
2015 pted->p.pted_pte64.pte_lo |= PTE_AC_64;
2016
2017 PMAP_HASH_LOCK(s);
2018 if ((pte = pmap_ptedinhash(pted)) != NULL) {
2019 struct pte_64 *ptp64 = pte;
2020
2021 pte_del(ptp64, va);
2022
2023 if (PTED_MANAGED(pted)) { /* XXX */
2024 pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64,
2025 ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64));
2026 }
2027
2028 /* Add a Page Table Entry, section 7.6.3.1. */
2029 ptp64->pte_lo = pted->p.pted_pte64.pte_lo;
2030 eieio(); /* Order 1st PTE update before 2nd. */
2031 ptp64->pte_hi |= PTE_VALID_64;
2032 sync(); /* Ensure updates completed. */
2033 }
2034 PMAP_HASH_UNLOCK(s);
2035 }
2036
2037 void
pmap_pted_ro32(struct pte_desc * pted,vm_prot_t prot)2038 pmap_pted_ro32(struct pte_desc *pted, vm_prot_t prot)
2039 {
2040 pmap_t pm = pted->pted_pmap;
2041 vaddr_t va = pted->pted_va & ~PAGE_MASK;
2042 struct vm_page *pg;
2043 void *pte;
2044 int s;
2045
2046 pg = PHYS_TO_VM_PAGE(pted->p.pted_pte32.pte_lo & PTE_RPGN_32);
2047 if (pg->pg_flags & PG_PMAP_EXE) {
2048 if ((prot & (PROT_WRITE | PROT_EXEC)) == PROT_WRITE) {
2049 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
2050 } else {
2051 pmap_syncicache_user_virt(pm, va);
2052 }
2053 }
2054
2055 pted->p.pted_pte32.pte_lo &= ~PTE_PP_32;
2056 pted->p.pted_pte32.pte_lo |= PTE_RO_32;
2057
2058 PMAP_HASH_LOCK(s);
2059 if ((pte = pmap_ptedinhash(pted)) != NULL) {
2060 struct pte_32 *ptp32 = pte;
2061
2062 pte_del(ptp32, va);
2063
2064 if (PTED_MANAGED(pted)) { /* XXX */
2065 pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32,
2066 ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32));
2067 }
2068
2069 /* Add a Page Table Entry, section 7.6.3.1. */
2070 ptp32->pte_lo &= ~(PTE_CHG_32|PTE_PP_32);
2071 ptp32->pte_lo |= PTE_RO_32;
2072 eieio(); /* Order 1st PTE update before 2nd. */
2073 ptp32->pte_hi |= PTE_VALID_32;
2074 sync(); /* Ensure updates completed. */
2075 }
2076 PMAP_HASH_UNLOCK(s);
2077 }
2078
2079 /*
2080 * Lower the protection on the specified physical page.
2081 *
2082 * There are only two cases, either the protection is going to 0,
2083 * or it is going to read-only.
2084 */
2085 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)2086 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
2087 {
2088 struct pte_desc *pted;
2089 void *pte;
2090 pmap_t pm;
2091 int s;
2092
2093 if (prot == PROT_NONE) {
2094 mtx_enter(&pg->mdpage.pv_mtx);
2095 while ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) != NULL) {
2096 pmap_reference(pted->pted_pmap);
2097 pm = pted->pted_pmap;
2098 mtx_leave(&pg->mdpage.pv_mtx);
2099
2100 PMAP_VP_LOCK(pm);
2101
2102 /*
2103 * We dropped the pvlist lock before grabbing
2104 * the pmap lock to avoid lock ordering
2105 * problems. This means we have to check the
2106 * pvlist again since somebody else might have
2107 * modified it. All we care about is that the
2108 * pvlist entry matches the pmap we just
2109 * locked. If it doesn't, unlock the pmap and
2110 * try again.
2111 */
2112 mtx_enter(&pg->mdpage.pv_mtx);
2113 if ((pted = LIST_FIRST(&(pg->mdpage.pv_list))) == NULL ||
2114 pted->pted_pmap != pm) {
2115 mtx_leave(&pg->mdpage.pv_mtx);
2116 PMAP_VP_UNLOCK(pm);
2117 pmap_destroy(pm);
2118 mtx_enter(&pg->mdpage.pv_mtx);
2119 continue;
2120 }
2121
2122 PMAP_HASH_LOCK(s);
2123 if ((pte = pmap_ptedinhash(pted)) != NULL)
2124 pte_zap(pte, pted);
2125 PMAP_HASH_UNLOCK(s);
2126
2127 pted->pted_va &= ~PTED_VA_MANAGED_M;
2128 LIST_REMOVE(pted, pted_pv_list);
2129 mtx_leave(&pg->mdpage.pv_mtx);
2130
2131 pmap_remove_pted(pm, pted);
2132
2133 PMAP_VP_UNLOCK(pm);
2134 pmap_destroy(pm);
2135 mtx_enter(&pg->mdpage.pv_mtx);
2136 }
2137 mtx_leave(&pg->mdpage.pv_mtx);
2138 /* page is being reclaimed, sync icache next use */
2139 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_EXE);
2140 return;
2141 }
2142
2143 mtx_enter(&pg->mdpage.pv_mtx);
2144 LIST_FOREACH(pted, &(pg->mdpage.pv_list), pted_pv_list)
2145 pmap_pted_ro(pted, prot);
2146 mtx_leave(&pg->mdpage.pv_mtx);
2147 }
2148
2149 void
pmap_protect(pmap_t pm,vaddr_t sva,vaddr_t eva,vm_prot_t prot)2150 pmap_protect(pmap_t pm, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
2151 {
2152 if (prot & (PROT_READ | PROT_EXEC)) {
2153 struct pte_desc *pted;
2154
2155 PMAP_VP_LOCK(pm);
2156 while (sva < eva) {
2157 pted = pmap_vp_lookup(pm, sva);
2158 if (pted && PTED_VALID(pted))
2159 pmap_pted_ro(pted, prot);
2160 sva += PAGE_SIZE;
2161 }
2162 PMAP_VP_UNLOCK(pm);
2163 return;
2164 }
2165 pmap_remove(pm, sva, eva);
2166 }
2167
2168 /*
2169 * Restrict given range to physical memory
2170 */
2171 void
pmap_real_memory(paddr_t * start,vsize_t * size)2172 pmap_real_memory(paddr_t *start, vsize_t *size)
2173 {
2174 struct mem_region *mp;
2175
2176 for (mp = pmap_mem; mp->size; mp++) {
2177 if (((*start + *size) > mp->start)
2178 && (*start < (mp->start + mp->size)))
2179 {
2180 if (*start < mp->start) {
2181 *size -= mp->start - *start;
2182 *start = mp->start;
2183 }
2184 if ((*start + *size) > (mp->start + mp->size))
2185 *size = mp->start + mp->size - *start;
2186 return;
2187 }
2188 }
2189 *size = 0;
2190 }
2191
2192 void
pmap_init()2193 pmap_init()
2194 {
2195 pool_init(&pmap_pmap_pool, sizeof(struct pmap), 0, IPL_NONE, 0,
2196 "pmap", NULL);
2197 pool_setlowat(&pmap_pmap_pool, 2);
2198 pool_init(&pmap_vp_pool, sizeof(struct pmapvp), 0, IPL_VM, 0,
2199 "vp", &pool_allocator_single);
2200 pool_setlowat(&pmap_vp_pool, 10);
2201 pool_init(&pmap_pted_pool, sizeof(struct pte_desc), 0, IPL_VM, 0,
2202 "pted", NULL);
2203 pool_setlowat(&pmap_pted_pool, 20);
2204
2205 pmap_initialized = 1;
2206 }
2207
2208 void
pmap_proc_iflush(struct process * pr,vaddr_t va,vsize_t len)2209 pmap_proc_iflush(struct process *pr, vaddr_t va, vsize_t len)
2210 {
2211 paddr_t pa;
2212 vsize_t clen;
2213
2214 while (len > 0) {
2215 /* add one to always round up to the next page */
2216 clen = round_page(va + 1) - va;
2217 if (clen > len)
2218 clen = len;
2219
2220 if (pmap_extract(pr->ps_vmspace->vm_map.pmap, va, &pa)) {
2221 syncicache((void *)pa, clen);
2222 }
2223
2224 len -= clen;
2225 va += clen;
2226 }
2227 }
2228
2229 /*
2230 * There are two routines, pte_spill_r and pte_spill_v
2231 * the _r version only handles kernel faults which are not user
2232 * accesses. The _v version handles all user faults and kernel copyin/copyout
2233 * "user" accesses.
2234 */
2235 int
pte_spill_r(u_int32_t va,u_int32_t msr,u_int32_t dsisr,int exec_fault)2236 pte_spill_r(u_int32_t va, u_int32_t msr, u_int32_t dsisr, int exec_fault)
2237 {
2238 pmap_t pm;
2239 struct pte_desc *pted;
2240 struct pte_desc pted_store;
2241
2242 /* lookup is done physical to prevent faults */
2243
2244 /*
2245 * This function only handles kernel faults, not supervisor copyins.
2246 */
2247 if (msr & PSL_PR)
2248 return 0;
2249
2250 /* if copyin, throw to full exception handler */
2251 if (VP_SR(va) == PPC_USER_SR)
2252 return 0;
2253
2254 pm = pmap_kernel();
2255
2256 /* 0 - physmaxaddr mapped 1-1 */
2257 if (va < physmaxaddr) {
2258 u_int32_t aligned_va;
2259 vm_prot_t prot = PROT_READ | PROT_WRITE;
2260 extern caddr_t kernel_text;
2261 extern caddr_t etext;
2262
2263 pted = &pted_store;
2264
2265 if (va >= trunc_page((vaddr_t)&kernel_text) &&
2266 va < round_page((vaddr_t)&etext)) {
2267 prot |= PROT_EXEC;
2268 }
2269
2270 aligned_va = trunc_page(va);
2271 if (ppc_proc_is_64b) {
2272 pmap_fill_pte64(pm, aligned_va, aligned_va,
2273 pted, prot, PMAP_CACHE_WB);
2274 pte_insert64(pted);
2275 } else {
2276 pmap_fill_pte32(pm, aligned_va, aligned_va,
2277 pted, prot, PMAP_CACHE_WB);
2278 pte_insert32(pted);
2279 }
2280 return 1;
2281 }
2282
2283 return pte_spill_v(pm, va, dsisr, exec_fault);
2284 }
2285
2286 int
pte_spill_v(pmap_t pm,u_int32_t va,u_int32_t dsisr,int exec_fault)2287 pte_spill_v(pmap_t pm, u_int32_t va, u_int32_t dsisr, int exec_fault)
2288 {
2289 struct pte_desc *pted;
2290 int inserted = 0;
2291
2292 /*
2293 * DSISR_DABR is set if the PowerPC 970 attempted to read or
2294 * write an execute-only page.
2295 */
2296 if (dsisr & DSISR_DABR)
2297 return 0;
2298
2299 /*
2300 * If the current mapping is RO and the access was a write
2301 * we return 0
2302 */
2303 PMAP_VP_LOCK(pm);
2304 pted = pmap_vp_lookup(pm, va);
2305 if (pted == NULL || !PTED_VALID(pted))
2306 goto out;
2307
2308 /* Attempted to write a read-only page. */
2309 if (dsisr & DSISR_STORE) {
2310 if (ppc_proc_is_64b) {
2311 if ((pted->p.pted_pte64.pte_lo & PTE_PP_64) ==
2312 PTE_RO_64)
2313 goto out;
2314 } else {
2315 if ((pted->p.pted_pte32.pte_lo & PTE_PP_32) ==
2316 PTE_RO_32)
2317 goto out;
2318 }
2319 }
2320
2321 /* Attempted to execute non-executable page. */
2322 if ((exec_fault != 0) && ((pted->pted_va & PTED_VA_EXEC_M) == 0))
2323 goto out;
2324
2325 inserted = 1;
2326 if (ppc_proc_is_64b)
2327 pte_insert64(pted);
2328 else
2329 pte_insert32(pted);
2330
2331 out:
2332 PMAP_VP_UNLOCK(pm);
2333 return (inserted);
2334 }
2335
2336
2337 /*
2338 * should pte_insert code avoid wired mappings?
2339 * is the stack safe?
2340 * is the pted safe? (physical)
2341 * -ugh
2342 */
2343 void
pte_insert64(struct pte_desc * pted)2344 pte_insert64(struct pte_desc *pted)
2345 {
2346 struct pte_64 *ptp64;
2347 int off, secondary;
2348 int sr, idx, i;
2349 void *pte;
2350 int s;
2351
2352 PMAP_HASH_LOCK(s);
2353 if ((pte = pmap_ptedinhash(pted)) != NULL)
2354 pte_zap(pte, pted);
2355
2356 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
2357
2358 sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va);
2359 idx = pteidx(sr, pted->pted_va);
2360
2361 /*
2362 * instead of starting at the beginning of each pteg,
2363 * the code should pick a random location with in the primary
2364 * then search all of the entries, then if not yet found,
2365 * do the same for the secondary.
2366 * this would reduce the frontloading of the pteg.
2367 */
2368
2369 /* first just try fill of primary hash */
2370 ptp64 = pmap_ptable64 + (idx) * 8;
2371 for (i = 0; i < 8; i++) {
2372 if (ptp64[i].pte_hi & PTE_VALID_64)
2373 continue;
2374
2375 pted->pted_va |= i;
2376
2377 /* Add a Page Table Entry, section 7.6.3.1. */
2378 ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64;
2379 ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo;
2380 eieio(); /* Order 1st PTE update before 2nd. */
2381 ptp64[i].pte_hi |= PTE_VALID_64;
2382 sync(); /* Ensure updates completed. */
2383
2384 goto out;
2385 }
2386
2387 /* try fill of secondary hash */
2388 ptp64 = pmap_ptable64 + (idx ^ pmap_ptab_mask) * 8;
2389 for (i = 0; i < 8; i++) {
2390 if (ptp64[i].pte_hi & PTE_VALID_64)
2391 continue;
2392
2393 pted->pted_va |= (i | PTED_VA_HID_M);
2394
2395 /* Add a Page Table Entry, section 7.6.3.1. */
2396 ptp64[i].pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64;
2397 ptp64[i].pte_lo = pted->p.pted_pte64.pte_lo;
2398 eieio(); /* Order 1st PTE update before 2nd. */
2399 ptp64[i].pte_hi |= (PTE_HID_64|PTE_VALID_64);
2400 sync(); /* Ensure updates completed. */
2401
2402 goto out;
2403 }
2404
2405 /* need decent replacement algorithm */
2406 off = ppc_mftb();
2407 secondary = off & 8;
2408
2409
2410 pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M);
2411
2412 idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0));
2413
2414 ptp64 = pmap_ptable64 + (idx * 8);
2415 ptp64 += PTED_PTEGIDX(pted); /* increment by index into pteg */
2416
2417 if (ptp64->pte_hi & PTE_VALID_64) {
2418 vaddr_t va;
2419
2420 /* Bits 9-19 */
2421 idx = (idx ^ ((ptp64->pte_hi & PTE_HID_64) ?
2422 pmap_ptab_mask : 0));
2423 va = (ptp64->pte_hi >> PTE_VSID_SHIFT_64) ^ idx;
2424 va <<= ADDR_PIDX_SHIFT;
2425 /* Bits 4-8 */
2426 va |= (ptp64->pte_hi & PTE_API_64) << ADDR_API_SHIFT_32;
2427 /* Bits 0-3 */
2428 va |= (ptp64->pte_hi >> PTE_VSID_SHIFT_64)
2429 << ADDR_SR_SHIFT;
2430
2431 pte_del(ptp64, va);
2432
2433 pmap_attr_save(ptp64->pte_lo & PTE_RPGN_64,
2434 ptp64->pte_lo & (PTE_REF_64|PTE_CHG_64));
2435 }
2436
2437 /* Add a Page Table Entry, section 7.6.3.1. */
2438 ptp64->pte_hi = pted->p.pted_pte64.pte_hi & ~PTE_VALID_64;
2439 if (secondary)
2440 ptp64->pte_hi |= PTE_HID_64;
2441 ptp64->pte_lo = pted->p.pted_pte64.pte_lo;
2442 eieio(); /* Order 1st PTE update before 2nd. */
2443 ptp64->pte_hi |= PTE_VALID_64;
2444 sync(); /* Ensure updates completed. */
2445
2446 out:
2447 PMAP_HASH_UNLOCK(s);
2448 }
2449
2450 void
pte_insert32(struct pte_desc * pted)2451 pte_insert32(struct pte_desc *pted)
2452 {
2453 struct pte_32 *ptp32;
2454 int off, secondary;
2455 int sr, idx, i;
2456 void *pte;
2457 int s;
2458
2459 PMAP_HASH_LOCK(s);
2460 if ((pte = pmap_ptedinhash(pted)) != NULL)
2461 pte_zap(pte, pted);
2462
2463 pted->pted_va &= ~(PTED_VA_HID_M|PTED_VA_PTEGIDX_M);
2464
2465 sr = ptesr(pted->pted_pmap->pm_sr, pted->pted_va);
2466 idx = pteidx(sr, pted->pted_va);
2467
2468 /*
2469 * instead of starting at the beginning of each pteg,
2470 * the code should pick a random location with in the primary
2471 * then search all of the entries, then if not yet found,
2472 * do the same for the secondary.
2473 * this would reduce the frontloading of the pteg.
2474 */
2475
2476 /* first just try fill of primary hash */
2477 ptp32 = pmap_ptable32 + (idx) * 8;
2478 for (i = 0; i < 8; i++) {
2479 if (ptp32[i].pte_hi & PTE_VALID_32)
2480 continue;
2481
2482 pted->pted_va |= i;
2483
2484 /* Add a Page Table Entry, section 7.6.3.1. */
2485 ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32;
2486 ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo;
2487 eieio(); /* Order 1st PTE update before 2nd. */
2488 ptp32[i].pte_hi |= PTE_VALID_32;
2489 sync(); /* Ensure updates completed. */
2490
2491 goto out;
2492 }
2493
2494 /* try fill of secondary hash */
2495 ptp32 = pmap_ptable32 + (idx ^ pmap_ptab_mask) * 8;
2496 for (i = 0; i < 8; i++) {
2497 if (ptp32[i].pte_hi & PTE_VALID_32)
2498 continue;
2499
2500 pted->pted_va |= (i | PTED_VA_HID_M);
2501
2502 /* Add a Page Table Entry, section 7.6.3.1. */
2503 ptp32[i].pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32;
2504 ptp32[i].pte_lo = pted->p.pted_pte32.pte_lo;
2505 eieio(); /* Order 1st PTE update before 2nd. */
2506 ptp32[i].pte_hi |= (PTE_HID_32|PTE_VALID_32);
2507 sync(); /* Ensure updates completed. */
2508
2509 goto out;
2510 }
2511
2512 /* need decent replacement algorithm */
2513 off = ppc_mftb();
2514 secondary = off & 8;
2515
2516 pted->pted_va |= off & (PTED_VA_PTEGIDX_M|PTED_VA_HID_M);
2517
2518 idx = (idx ^ (PTED_HID(pted) ? pmap_ptab_mask : 0));
2519
2520 ptp32 = pmap_ptable32 + (idx * 8);
2521 ptp32 += PTED_PTEGIDX(pted); /* increment by index into pteg */
2522
2523 if (ptp32->pte_hi & PTE_VALID_32) {
2524 vaddr_t va;
2525
2526 va = ((ptp32->pte_hi & PTE_API_32) << ADDR_API_SHIFT_32) |
2527 ((((ptp32->pte_hi >> PTE_VSID_SHIFT_32) & SR_VSID)
2528 ^(idx ^ ((ptp32->pte_hi & PTE_HID_32) ? 0x3ff : 0)))
2529 & 0x3ff) << PAGE_SHIFT;
2530
2531 pte_del(ptp32, va);
2532
2533 pmap_attr_save(ptp32->pte_lo & PTE_RPGN_32,
2534 ptp32->pte_lo & (PTE_REF_32|PTE_CHG_32));
2535 }
2536
2537 /* Add a Page Table Entry, section 7.6.3.1. */
2538 ptp32->pte_hi = pted->p.pted_pte32.pte_hi & ~PTE_VALID_32;
2539 if (secondary)
2540 ptp32->pte_hi |= PTE_HID_32;
2541 ptp32->pte_lo = pted->p.pted_pte32.pte_lo;
2542 eieio(); /* Order 1st PTE update before 2nd. */
2543 ptp32->pte_hi |= PTE_VALID_32;
2544 sync(); /* Ensure updates completed. */
2545
2546 out:
2547 PMAP_HASH_UNLOCK(s);
2548 }
2549