1 /* $OpenBSD: pmap.c,v 1.92 2024/08/23 15:14:45 miod Exp $ */
2 /* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */
3
4 /*-
5 * Copyright (c) 1998, 1999, 2000 The NetBSD Foundation, Inc.
6 * All rights reserved.
7 *
8 * This code is derived from software contributed to The NetBSD Foundation
9 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10 * NASA Ames Research Center and by Chris G. Demetriou.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 */
33
34 /*
35 * Copyright (c) 1991, 1993
36 * The Regents of the University of California. All rights reserved.
37 *
38 * This code is derived from software contributed to Berkeley by
39 * the Systems Programming Group of the University of Utah Computer
40 * Science Department.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)pmap.c 8.6 (Berkeley) 5/27/94
67 */
68
69 /*
70 * DEC Alpha physical map management code.
71 *
72 * History:
73 *
74 * This pmap started life as a Motorola 68851/68030 pmap,
75 * written by Mike Hibler at the University of Utah.
76 *
77 * It was modified for the DEC Alpha by Chris Demetriou
78 * at Carnegie Mellon University.
79 *
80 * Support for non-contiguous physical memory was added by
81 * Jason R. Thorpe of the Numerical Aerospace Simulation
82 * Facility, NASA Ames Research Center and Chris Demetriou.
83 *
84 * Page table management and a major cleanup were undertaken
85 * by Jason R. Thorpe, with lots of help from Ross Harvey of
86 * Avalon Computer Systems and from Chris Demetriou.
87 *
88 * Support for the new UVM pmap interface was written by
89 * Jason R. Thorpe.
90 *
91 * Support for ASNs was written by Jason R. Thorpe, again
92 * with help from Chris Demetriou and Ross Harvey.
93 *
94 * The locking protocol was written by Jason R. Thorpe,
95 * using Chuck Cranor's i386 pmap for UVM as a model.
96 *
97 * TLB shootdown code was written by Jason R. Thorpe.
98 *
99 * Notes:
100 *
101 * All page table access is done via K0SEG. The one exception
102 * to this is for kernel mappings. Since all kernel page
103 * tables are pre-allocated, we can use the Virtual Page Table
104 * to access PTEs that map K1SEG addresses.
105 *
106 * Kernel page table pages are statically allocated in
107 * pmap_bootstrap(), and are never freed. In the future,
108 * support for dynamically adding additional kernel page
109 * table pages may be added. User page table pages are
110 * dynamically allocated and freed.
111 *
112 * Bugs/misfeatures:
113 *
114 * - Some things could be optimized.
115 */
116
117 /*
118 * Manages physical address maps.
119 *
120 * Since the information managed by this module is
121 * also stored by the logical address mapping module,
122 * this module may throw away valid virtual-to-physical
123 * mappings at almost any time. However, invalidations
124 * of virtual-to-physical mappings must be done as
125 * requested.
126 *
127 * In order to cope with hardware architectures which
128 * make virtual-to-physical map invalidates expensive,
129 * this module may delay invalidate or reduced protection
130 * operations until such time as they are actually
131 * necessary. This module is given full information as
132 * to which processors are currently using which maps,
133 * and to when physical maps must be made correct.
134 */
135
136 #include <sys/param.h>
137 #include <sys/systm.h>
138 #include <sys/proc.h>
139 #include <sys/malloc.h>
140 #include <sys/pool.h>
141 #include <sys/user.h>
142 #include <sys/buf.h>
143 #include <sys/atomic.h>
144 #ifdef SYSVSHM
145 #include <sys/shm.h>
146 #endif
147
148 #include <uvm/uvm.h>
149
150 #include <machine/atomic.h>
151 #include <machine/cpu.h>
152 #if defined(MULTIPROCESSOR)
153 #include <machine/rpb.h>
154 #endif
155
156 #ifdef DEBUG
157 #define PDB_FOLLOW 0x0001
158 #define PDB_INIT 0x0002
159 #define PDB_ENTER 0x0004
160 #define PDB_REMOVE 0x0008
161 #define PDB_CREATE 0x0010
162 #define PDB_PTPAGE 0x0020
163 #define PDB_ASN 0x0040
164 #define PDB_BITS 0x0080
165 #define PDB_COLLECT 0x0100
166 #define PDB_PROTECT 0x0200
167 #define PDB_BOOTSTRAP 0x1000
168 #define PDB_PARANOIA 0x2000
169 #define PDB_WIRING 0x4000
170 #define PDB_PVDUMP 0x8000
171
172 int debugmap = 0;
173 int pmapdebug = PDB_PARANOIA|PDB_FOLLOW|PDB_ENTER;
174 #endif
175
176 /*
177 * Given a map and a machine independent protection code,
178 * convert to an alpha protection code.
179 */
180 #define pte_prot(m, p) (protection_codes[m == pmap_kernel() ? 0 : 1][p])
181 int protection_codes[2][8];
182
183 /*
184 * kernel_lev1map:
185 *
186 * Kernel level 1 page table. This maps all kernel level 2
187 * page table pages, and is used as a template for all user
188 * pmap level 1 page tables. When a new user level 1 page
189 * table is allocated, all kernel_lev1map PTEs for kernel
190 * addresses are copied to the new map.
191 *
192 * The kernel also has an initial set of kernel level 2 page
193 * table pages. These map the kernel level 3 page table pages.
194 * As kernel level 3 page table pages are added, more level 2
195 * page table pages may be added to map them. These pages are
196 * never freed.
197 *
198 * Finally, the kernel also has an initial set of kernel level
199 * 3 page table pages. These map pages in K1SEG. More level
200 * 3 page table pages may be added at run-time if additional
201 * K1SEG address space is required. These pages are never freed.
202 *
203 * NOTE: When mappings are inserted into the kernel pmap, all
204 * level 2 and level 3 page table pages must already be allocated
205 * and mapped into the parent page table.
206 */
207 pt_entry_t *kernel_lev1map;
208
209 /*
210 * Virtual Page Table.
211 */
212 pt_entry_t *VPT;
213
214 struct pmap kernel_pmap_store
215 [(PMAP_SIZEOF(ALPHA_MAXPROCS) + sizeof(struct pmap) - 1)
216 / sizeof(struct pmap)];
217
218 paddr_t avail_start; /* PA of first available physical page */
219 paddr_t avail_end; /* PA of last available physical page */
220 vaddr_t pmap_maxkvaddr; /* VA of last avail page (pmap_growkernel) */
221
222 boolean_t pmap_initialized; /* Has pmap_init completed? */
223
224 u_long pmap_pages_stolen; /* instrumentation */
225
226 /*
227 * This variable contains the number of CPU IDs we need to allocate
228 * space for when allocating the pmap structure. It is used to
229 * size a per-CPU array of ASN and ASN Generation number.
230 */
231 u_long pmap_ncpuids;
232
233 #ifndef PMAP_PV_LOWAT
234 #define PMAP_PV_LOWAT 16
235 #endif
236 int pmap_pv_lowat = PMAP_PV_LOWAT;
237
238 /*
239 * List of all pmaps, used to update them when e.g. additional kernel
240 * page tables are allocated. This list is kept LRU-ordered by
241 * pmap_activate().
242 */
243 TAILQ_HEAD(, pmap) pmap_all_pmaps;
244
245 /*
246 * The pools from which pmap structures and sub-structures are allocated.
247 */
248 struct pool pmap_pmap_pool;
249 struct pool pmap_l1pt_pool;
250 struct pool pmap_pv_pool;
251
252 /*
253 * Address Space Numbers.
254 *
255 * On many implementations of the Alpha architecture, the TLB entries and
256 * I-cache blocks are tagged with a unique number within an implementation-
257 * specified range. When a process context becomes active, the ASN is used
258 * to match TLB entries; if a TLB entry for a particular VA does not match
259 * the current ASN, it is ignored (one could think of the processor as
260 * having a collection of <max ASN> separate TLBs). This allows operating
261 * system software to skip the TLB flush that would otherwise be necessary
262 * at context switch time.
263 *
264 * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
265 * causes TLB entries to match any ASN. The PALcode also provides
266 * a TBI (Translation Buffer Invalidate) operation that flushes all
267 * TLB entries that _do not_ have PG_ASM. We use this bit for kernel
268 * mappings, so that invalidation of all user mappings does not invalidate
269 * kernel mappings (which are consistent across all processes).
270 *
271 * pma_asn always indicates to the next ASN to use. When
272 * pma_asn exceeds pmap_max_asn, we start a new ASN generation.
273 *
274 * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
275 * TLB entries and the I-cache are flushed, the generation number is bumped,
276 * and pma_asn is changed to indicate the first non-reserved ASN.
277 *
278 * We reserve ASN #0 for pmaps that use the global kernel_lev1map. This
279 * prevents the following scenario:
280 *
281 * * New ASN generation starts, and process A is given ASN #0.
282 *
283 * * A new process B (and thus new pmap) is created. The ASN,
284 * for lack of a better value, is initialized to 0.
285 *
286 * * Process B runs. It is now using the TLB entries tagged
287 * by process A. *poof*
288 *
289 * In the scenario above, in addition to the processor using incorrect
290 * TLB entries, the PALcode might use incorrect information to service a
291 * TLB miss. (The PALcode uses the recursively mapped Virtual Page Table
292 * to locate the PTE for a faulting address, and tagged TLB entries exist
293 * for the Virtual Page Table addresses in order to speed up this procedure,
294 * as well.)
295 *
296 * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
297 * new pmaps will initially run with no TLB entries for user addresses
298 * or VPT mappings that map user page tables. Since kernel_lev1map only
299 * contains mappings for kernel addresses, and since those mappings
300 * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
301 * safe (since PG_ASM mappings match any ASN).
302 *
303 * On processors that do not support ASNs, the PALcode invalidates
304 * the TLB and I-cache automatically on swpctx. We still go
305 * through the motions of assigning an ASN (really, just refreshing
306 * the ASN generation in this particular case) to keep the logic sane
307 * in other parts of the code.
308 */
309 u_int pmap_max_asn; /* max ASN supported by the system */
310 /* next ASN and current ASN generation */
311 struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
312
313 /*
314 * Locking:
315 *
316 * * pm_mtx (per-pmap) - This lock protects all of the members
317 * of the pmap structure itself.
318 *
319 * * pvh_mtx (per-page) - This locks protects the list of mappings
320 * of a (managed) physical page.
321 *
322 * * pmap_all_pmaps_mtx - This lock protects the global list of
323 * all pmaps. Note that a pm_slock must never be held while this
324 * lock is held.
325 *
326 * * pmap_growkernel_mtx - This lock protects pmap_growkernel()
327 * and the pmap_maxkvaddr variable.
328 *
329 * There is a lock ordering constraint for pmap_growkernel_mtx.
330 * pmap_growkernel() acquires the locks in the following order:
331 *
332 * pmap_growkernel_mtx -> pmap_all_pmaps_mtx ->
333 * pmap->pm_mtx
334 *
335 * Address space number management (global ASN counters and per-pmap
336 * ASN state) are not locked; they use arrays of values indexed
337 * per-processor.
338 *
339 * All internal functions which operate on a pmap are called
340 * with the pmap already locked by the caller (which will be
341 * an interface function).
342 */
343 struct mutex pmap_all_pmaps_mtx;
344 struct mutex pmap_growkernel_mtx;
345
346 #define PMAP_LOCK(pmap) mtx_enter(&pmap->pm_mtx)
347 #define PMAP_UNLOCK(pmap) mtx_leave(&pmap->pm_mtx)
348
349 #if defined(MULTIPROCESSOR)
350 /*
351 * TLB Shootdown:
352 *
353 * When a mapping is changed in a pmap, the TLB entry corresponding to
354 * the virtual address must be invalidated on all processors. In order
355 * to accomplish this on systems with multiple processors, messages are
356 * sent from the processor which performs the mapping change to all
357 * processors on which the pmap is active. For other processors, the
358 * ASN generation numbers for that processor is invalidated, so that
359 * the next time the pmap is activated on that processor, a new ASN
360 * will be allocated (which implicitly invalidates all TLB entries).
361 *
362 * Note, we can use the pool allocator to allocate job entries
363 * since pool pages are mapped with K0SEG, not with the TLB.
364 */
365 struct pmap_tlb_shootdown_job {
366 TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
367 vaddr_t pj_va; /* virtual address */
368 pmap_t pj_pmap; /* the pmap which maps the address */
369 pt_entry_t pj_pte; /* the PTE bits */
370 };
371
372 /* If we have more pending jobs than this, we just nail the whole TLB. */
373 #define PMAP_TLB_SHOOTDOWN_MAXJOBS 6
374
375 struct pmap_tlb_shootdown_q {
376 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;
377 TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_free;
378 int pq_pte; /* aggregate low PTE bits */
379 int pq_tbia; /* pending global flush */
380 struct mutex pq_mtx; /* queue lock */
381 struct pmap_tlb_shootdown_job pq_jobs[PMAP_TLB_SHOOTDOWN_MAXJOBS];
382 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS];
383
384 #define PSJQ_LOCK(pq, s) mtx_enter(&(pq)->pq_mtx)
385 #define PSJQ_UNLOCK(pq, s) mtx_leave(&(pq)->pq_mtx)
386
387 void pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);
388 struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get
389 (struct pmap_tlb_shootdown_q *);
390 void pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,
391 struct pmap_tlb_shootdown_job *);
392 #endif /* MULTIPROCESSOR */
393
394 #define PAGE_IS_MANAGED(pa) (vm_physseg_find(atop(pa), NULL) != -1)
395
396 /*
397 * Internal routines
398 */
399 void alpha_protection_init(void);
400 void pmap_do_remove(pmap_t, vaddr_t, vaddr_t, boolean_t);
401 boolean_t pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *,
402 boolean_t, cpuid_t);
403 void pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, cpuid_t);
404
405 /*
406 * PT page management functions.
407 */
408 int pmap_lev1map_create(pmap_t, cpuid_t);
409 void pmap_lev1map_destroy(pmap_t);
410 int pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
411 void pmap_ptpage_free(pmap_t, pt_entry_t *);
412 void pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, cpuid_t);
413 void pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *);
414 void pmap_l1pt_delref(pmap_t, pt_entry_t *);
415
416 void *pmap_l1pt_alloc(struct pool *, int, int *);
417 void pmap_l1pt_free(struct pool *, void *);
418
419 struct pool_allocator pmap_l1pt_allocator = {
420 pmap_l1pt_alloc, pmap_l1pt_free, 0,
421 };
422
423 void pmap_l1pt_ctor(pt_entry_t *);
424
425 /*
426 * PV table management functions.
427 */
428 int pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
429 boolean_t);
430 void pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, boolean_t);
431 void *pmap_pv_page_alloc(struct pool *, int, int *);
432 void pmap_pv_page_free(struct pool *, void *);
433
434 struct pool_allocator pmap_pv_page_allocator = {
435 pmap_pv_page_alloc, pmap_pv_page_free, 0,
436 };
437
438 #ifdef DEBUG
439 void pmap_pv_dump(paddr_t);
440 #endif
441
442 #define pmap_pv_alloc() pool_get(&pmap_pv_pool, PR_NOWAIT)
443 #define pmap_pv_free(pv) pool_put(&pmap_pv_pool, (pv))
444
445 /*
446 * ASN management functions.
447 */
448 void pmap_asn_alloc(pmap_t, cpuid_t);
449
450 /*
451 * Misc. functions.
452 */
453 boolean_t pmap_physpage_alloc(int, paddr_t *);
454 void pmap_physpage_free(paddr_t);
455 int pmap_physpage_addref(void *);
456 int pmap_physpage_delref(void *);
457
458 /* pmap_physpage_alloc() page usage */
459 #define PGU_NORMAL 0 /* free or normal use */
460 #define PGU_PVENT 1 /* PV entries */
461 #define PGU_L1PT 2 /* level 1 page table */
462 #define PGU_L2PT 3 /* level 2 page table */
463 #define PGU_L3PT 4 /* level 3 page table */
464
465 /*
466 * PMAP_ISACTIVE{,_TEST}:
467 *
468 * Check to see if a pmap is active on the current processor.
469 */
470 #define PMAP_ISACTIVE_TEST(pm, cpu_id) \
471 (((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
472
473 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
474 #define PMAP_ISACTIVE(pm, cpu_id) \
475 ({ \
476 /* \
477 * XXX This test is not MP-safe. \
478 */ \
479 int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id); \
480 \
481 if (curproc != NULL && curproc->p_vmspace != NULL && \
482 (pm) != pmap_kernel() && \
483 (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap))) \
484 panic("PMAP_ISACTIVE, isa: %d pm: %p curpm:%p", \
485 isactive_, (pm), curproc->p_vmspace->vm_map.pmap); \
486 (isactive_); \
487 })
488 #else
489 #define PMAP_ISACTIVE(pm, cpu_id) PMAP_ISACTIVE_TEST(pm, cpu_id)
490 #endif /* DEBUG && !MULTIPROCESSOR */
491
492 /*
493 * PMAP_ACTIVATE_ASN_SANITY:
494 *
495 * DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
496 */
497 #ifdef DEBUG
498 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) \
499 do { \
500 struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)]; \
501 struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)]; \
502 \
503 if ((pmap)->pm_lev1map == kernel_lev1map) { \
504 /* \
505 * This pmap implementation also ensures that pmaps \
506 * referencing kernel_lev1map use a reserved ASN \
507 * ASN to prevent the PALcode from servicing a TLB \
508 * miss with the wrong PTE. \
509 */ \
510 if (__pma->pma_asn != PMAP_ASN_RESERVED) { \
511 printf("kernel_lev1map with non-reserved ASN " \
512 "(line %d)\n", __LINE__); \
513 panic("PMAP_ACTIVATE_ASN_SANITY"); \
514 } \
515 } else { \
516 if (__pma->pma_asngen != __cpma->pma_asngen) { \
517 /* \
518 * ASN generation number isn't valid! \
519 */ \
520 printf("pmap asngen %lu, current %lu " \
521 "(line %d)\n", \
522 __pma->pma_asngen, \
523 __cpma->pma_asngen, \
524 __LINE__); \
525 panic("PMAP_ACTIVATE_ASN_SANITY"); \
526 } \
527 if (__pma->pma_asn == PMAP_ASN_RESERVED) { \
528 /* \
529 * DANGER WILL ROBINSON! We're going to \
530 * pollute the VPT TLB entries! \
531 */ \
532 printf("Using reserved ASN! (line %d)\n", \
533 __LINE__); \
534 panic("PMAP_ACTIVATE_ASN_SANITY"); \
535 } \
536 } \
537 } while (0)
538 #else
539 #define PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id) /* nothing */
540 #endif
541
542 /*
543 * PMAP_ACTIVATE:
544 *
545 * This is essentially the guts of pmap_activate(), without
546 * ASN allocation. This is used by pmap_activate(),
547 * pmap_lev1map_create(), and pmap_lev1map_destroy().
548 *
549 * This is called only when it is known that a pmap is "active"
550 * on the current processor; the ASN must already be valid.
551 */
552 #define PMAP_ACTIVATE(pmap, p, cpu_id) \
553 do { \
554 PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id); \
555 \
556 (p)->p_addr->u_pcb.pcb_hw.apcb_ptbr = \
557 ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
558 (p)->p_addr->u_pcb.pcb_hw.apcb_asn = \
559 (pmap)->pm_asni[(cpu_id)].pma_asn; \
560 \
561 if ((p) == curproc) { \
562 /* \
563 * Page table base register has changed; switch to \
564 * our own context again so that it will take effect. \
565 */ \
566 (void) alpha_pal_swpctx((u_long)p->p_md.md_pcbpaddr); \
567 } \
568 } while (0)
569
570 /*
571 * PMAP_SET_NEEDISYNC:
572 *
573 * Mark that a user pmap needs an I-stream synch on its
574 * way back out to userspace.
575 */
576 #define PMAP_SET_NEEDISYNC(pmap) (pmap)->pm_needisync = ~0UL
577
578 /*
579 * PMAP_SYNC_ISTREAM:
580 *
581 * Synchronize the I-stream for the specified pmap. For user
582 * pmaps, this is deferred until a process using the pmap returns
583 * to userspace.
584 */
585 #if defined(MULTIPROCESSOR)
586 #define PMAP_SYNC_ISTREAM_KERNEL() \
587 do { \
588 alpha_pal_imb(); \
589 alpha_broadcast_ipi(ALPHA_IPI_IMB); \
590 } while (0)
591
592 #define PMAP_SYNC_ISTREAM_USER(pmap) \
593 do { \
594 alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST); \
595 /* for curcpu, do it before userret() */ \
596 } while (0)
597 #else
598 #define PMAP_SYNC_ISTREAM_KERNEL() alpha_pal_imb()
599 #define PMAP_SYNC_ISTREAM_USER(pmap) /* done before userret() */
600 #endif /* MULTIPROCESSOR */
601
602 #define PMAP_SYNC_ISTREAM(pmap) \
603 do { \
604 if ((pmap) == pmap_kernel()) \
605 PMAP_SYNC_ISTREAM_KERNEL(); \
606 else \
607 PMAP_SYNC_ISTREAM_USER(pmap); \
608 } while (0)
609
610 /*
611 * PMAP_INVALIDATE_ASN:
612 *
613 * Invalidate the specified pmap's ASN, so as to force allocation
614 * of a new one the next time pmap_asn_alloc() is called.
615 *
616 * NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
617 * CONDITIONS ARE TRUE:
618 *
619 * (1) The pmap references the global kernel_lev1map.
620 *
621 * (2) The pmap is not active on the current processor.
622 */
623 #define PMAP_INVALIDATE_ASN(pmap, cpu_id) \
624 do { \
625 (pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED; \
626 } while (0)
627
628 /*
629 * PMAP_INVALIDATE_TLB:
630 *
631 * Invalidate the TLB entry for the pmap/va pair.
632 */
633 #define PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id) \
634 do { \
635 if ((hadasm) || (isactive)) { \
636 /* \
637 * Simply invalidating the TLB entry and I-cache \
638 * works in this case. \
639 */ \
640 ALPHA_TBIS((va)); \
641 } else if ((pmap)->pm_asni[(cpu_id)].pma_asngen == \
642 pmap_asn_info[(cpu_id)].pma_asngen) { \
643 /* \
644 * We can't directly invalidate the TLB entry \
645 * in this case, so we have to force allocation \
646 * of a new ASN the next time this pmap becomes \
647 * active. \
648 */ \
649 PMAP_INVALIDATE_ASN((pmap), (cpu_id)); \
650 } \
651 /* \
652 * Nothing to do in this case; the next time the \
653 * pmap becomes active on this processor, a new \
654 * ASN will be allocated anyway. \
655 */ \
656 } while (0)
657
658 /*
659 * PMAP_KERNEL_PTE:
660 *
661 * Get a kernel PTE.
662 *
663 * If debugging, do a table walk. If not debugging, just use
664 * the Virtual Page Table, since all kernel page tables are
665 * pre-allocated and mapped in.
666 */
667 #ifdef DEBUG
668 #define PMAP_KERNEL_PTE(va) \
669 ({ \
670 pt_entry_t *l1pte_, *l2pte_; \
671 \
672 l1pte_ = pmap_l1pte(pmap_kernel(), va); \
673 if (pmap_pte_v(l1pte_) == 0) { \
674 printf("kernel level 1 PTE not valid, va 0x%lx " \
675 "(line %d)\n", (va), __LINE__); \
676 panic("PMAP_KERNEL_PTE"); \
677 } \
678 l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_); \
679 if (pmap_pte_v(l2pte_) == 0) { \
680 printf("kernel level 2 PTE not valid, va 0x%lx " \
681 "(line %d)\n", (va), __LINE__); \
682 panic("PMAP_KERNEL_PTE"); \
683 } \
684 pmap_l3pte(pmap_kernel(), va, l2pte_); \
685 })
686 #else
687 #define PMAP_KERNEL_PTE(va) (&VPT[VPT_INDEX((va))])
688 #endif
689
690 /*
691 * PMAP_SET_PTE:
692 *
693 * Set a PTE to a specified value.
694 */
695 #define PMAP_SET_PTE(ptep, val) *(ptep) = (val)
696
697 /*
698 * PMAP_STAT_{INCR,DECR}:
699 *
700 * Increment or decrement a pmap statistic.
701 */
702 #define PMAP_STAT_INCR(s, v) atomic_add_ulong((unsigned long *)(&(s)), (v))
703 #define PMAP_STAT_DECR(s, v) atomic_sub_ulong((unsigned long *)(&(s)), (v))
704
705 /*
706 * pmap_bootstrap:
707 *
708 * Bootstrap the system to run with virtual memory.
709 *
710 * Note: no locking is necessary in this function.
711 */
712 void
pmap_bootstrap(paddr_t ptaddr,u_int maxasn,u_long ncpuids)713 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
714 {
715 vsize_t lev2mapsize, lev3mapsize;
716 pt_entry_t *lev2map, *lev3map;
717 pt_entry_t pte;
718 int i;
719 #ifdef MULTIPROCESSOR
720 int j;
721 #endif
722
723 #ifdef DEBUG
724 if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
725 printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
726 #endif
727
728 /*
729 * Compute the number of pages kmem_map will have.
730 */
731 kmeminit_nkmempages();
732
733 /*
734 * Figure out how many PTEs are necessary to map the kernel.
735 */
736 lev3mapsize = (VM_PHYS_SIZE + 16 * NCARGS + PAGER_MAP_SIZE) /
737 PAGE_SIZE + (maxthread * UPAGES) + nkmempages;
738
739 #ifdef SYSVSHM
740 lev3mapsize += shminfo.shmall;
741 #endif
742 lev3mapsize = roundup(lev3mapsize, NPTEPG);
743
744 /*
745 * Allocate a level 1 PTE table for the kernel.
746 * This is always one page long.
747 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
748 */
749 kernel_lev1map = (pt_entry_t *)
750 pmap_steal_memory(sizeof(pt_entry_t) * NPTEPG, NULL, NULL);
751
752 /*
753 * Allocate a level 2 PTE table for the kernel.
754 * These must map all of the level3 PTEs.
755 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
756 */
757 lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
758 lev2map = (pt_entry_t *)
759 pmap_steal_memory(sizeof(pt_entry_t) * lev2mapsize, NULL, NULL);
760
761 /*
762 * Allocate a level 3 PTE table for the kernel.
763 * Contains lev3mapsize PTEs.
764 */
765 lev3map = (pt_entry_t *)
766 pmap_steal_memory(sizeof(pt_entry_t) * lev3mapsize, NULL, NULL);
767
768 /*
769 * Set up level 1 page table
770 */
771
772 /* Map all of the level 2 pte pages */
773 for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
774 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
775 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
776 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
777 kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
778 (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
779 }
780
781 /* Map the virtual page table */
782 pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
783 << PG_SHIFT;
784 pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
785 kernel_lev1map[l1pte_index(VPTBASE)] = pte;
786 VPT = (pt_entry_t *)VPTBASE;
787
788 /*
789 * Set up level 2 page table.
790 */
791 /* Map all of the level 3 pte pages */
792 for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
793 pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
794 (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
795 pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
796 lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
797 (i*PAGE_SIZE*NPTEPG))] = pte;
798 }
799
800 /* Initialize the pmap_growkernel_mtx. */
801 mtx_init(&pmap_growkernel_mtx, IPL_NONE);
802
803 /*
804 * Set up level three page table (lev3map)
805 */
806 /* Nothing to do; it's already zeroed */
807
808 /*
809 * Initialize `FYI' variables. Note we're relying on
810 * the fact that BSEARCH sorts the vm_physmem[] array
811 * for us.
812 */
813 avail_start = ptoa(vm_physmem[0].start);
814 avail_end = ptoa(vm_physmem[vm_nphysseg - 1].end);
815
816 pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
817
818 #if 0
819 printf("avail_start = 0x%lx\n", avail_start);
820 printf("avail_end = 0x%lx\n", avail_end);
821 #endif
822
823 /*
824 * Initialize the pmap pools and list.
825 */
826 pmap_ncpuids = ncpuids;
827 pool_init(&pmap_pmap_pool, PMAP_SIZEOF(pmap_ncpuids), 0, IPL_NONE, 0,
828 "pmappl", &pool_allocator_single);
829 pool_init(&pmap_l1pt_pool, PAGE_SIZE, 0, IPL_VM, 0,
830 "l1ptpl", &pmap_l1pt_allocator);
831 pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0,
832 "pvpl", &pmap_pv_page_allocator);
833
834 TAILQ_INIT(&pmap_all_pmaps);
835
836 /*
837 * Initialize the ASN logic.
838 */
839 pmap_max_asn = maxasn;
840 for (i = 0; i < ALPHA_MAXPROCS; i++) {
841 pmap_asn_info[i].pma_asn = 1;
842 pmap_asn_info[i].pma_asngen = 0;
843 }
844
845 /*
846 * Initialize the locks.
847 */
848 mtx_init(&pmap_all_pmaps_mtx, IPL_NONE);
849
850 /*
851 * Initialize kernel pmap. Note that all kernel mappings
852 * have PG_ASM set, so the ASN doesn't really matter for
853 * the kernel pmap. Also, since the kernel pmap always
854 * references kernel_lev1map, it always has an invalid ASN
855 * generation.
856 */
857 memset(pmap_kernel(), 0, sizeof(pmap_kernel()));
858 pmap_kernel()->pm_lev1map = kernel_lev1map;
859 pmap_kernel()->pm_count = 1;
860 for (i = 0; i < ALPHA_MAXPROCS; i++) {
861 pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
862 pmap_kernel()->pm_asni[i].pma_asngen =
863 pmap_asn_info[i].pma_asngen;
864 }
865 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
866 mtx_init(&pmap_kernel()->pm_mtx, IPL_VM);
867
868 #if defined(MULTIPROCESSOR)
869 /*
870 * Initialize the TLB shootdown queues.
871 */
872 for (i = 0; i < ALPHA_MAXPROCS; i++) {
873 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
874 TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_free);
875 for (j = 0; j < PMAP_TLB_SHOOTDOWN_MAXJOBS; j++)
876 TAILQ_INSERT_TAIL(&pmap_tlb_shootdown_q[i].pq_free,
877 &pmap_tlb_shootdown_q[i].pq_jobs[j], pj_list);
878 mtx_init(&pmap_tlb_shootdown_q[i].pq_mtx, IPL_IPI);
879 }
880 #endif
881
882 /*
883 * Set up proc0's PCB such that the ptbr points to the right place
884 * and has the kernel pmap's (really unused) ASN.
885 */
886 proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr =
887 ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
888 proc0.p_addr->u_pcb.pcb_hw.apcb_asn =
889 pmap_kernel()->pm_asni[cpu_number()].pma_asn;
890
891 /*
892 * Mark the kernel pmap `active' on this processor.
893 */
894 atomic_setbits_ulong(&pmap_kernel()->pm_cpus,
895 (1UL << cpu_number()));
896 }
897
898 /*
899 * pmap_steal_memory: [ INTERFACE ]
900 *
901 * Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
902 * This function allows for early dynamic memory allocation until the
903 * virtual memory system has been bootstrapped. After that point, either
904 * kmem_alloc or malloc should be used. This function works by stealing
905 * pages from the (to be) managed page pool, then implicitly mapping the
906 * pages (by using their k0seg addresses) and zeroing them.
907 *
908 * It may be used once the physical memory segments have been pre-loaded
909 * into the vm_physmem[] array. Early memory allocation MUST use this
910 * interface! This cannot be used after vm_page_startup(), and will
911 * generate a panic if tried.
912 *
913 * Note that this memory will never be freed, and in essence it is wired
914 * down.
915 *
916 * Note: no locking is necessary in this function.
917 */
918 vaddr_t
pmap_steal_memory(vsize_t size,vaddr_t * vstartp,vaddr_t * vendp)919 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
920 {
921 int bank, npgs, x;
922 vaddr_t va;
923 paddr_t pa;
924
925 size = round_page(size);
926 npgs = atop(size);
927
928 #if 0
929 printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
930 #endif
931
932 for (bank = 0; bank < vm_nphysseg; bank++) {
933 if (uvm.page_init_done == TRUE)
934 panic("pmap_steal_memory: called _after_ bootstrap");
935
936 #if 0
937 printf(" bank %d: avail_start 0x%lx, start 0x%lx, "
938 "avail_end 0x%lx\n", bank, vm_physmem[bank].avail_start,
939 vm_physmem[bank].start, vm_physmem[bank].avail_end);
940 #endif
941
942 if (vm_physmem[bank].avail_start != vm_physmem[bank].start ||
943 vm_physmem[bank].avail_start >= vm_physmem[bank].avail_end)
944 continue;
945
946 #if 0
947 printf(" avail_end - avail_start = 0x%lx\n",
948 vm_physmem[bank].avail_end - vm_physmem[bank].avail_start);
949 #endif
950
951 if ((vm_physmem[bank].avail_end - vm_physmem[bank].avail_start)
952 < npgs)
953 continue;
954
955 /*
956 * There are enough pages here; steal them!
957 */
958 pa = ptoa(vm_physmem[bank].avail_start);
959 vm_physmem[bank].avail_start += npgs;
960 vm_physmem[bank].start += npgs;
961
962 /*
963 * Have we used up this segment?
964 */
965 if (vm_physmem[bank].avail_start == vm_physmem[bank].end) {
966 if (vm_nphysseg == 1)
967 panic("pmap_steal_memory: out of memory!");
968
969 /* Remove this segment from the list. */
970 vm_nphysseg--;
971 for (x = bank; x < vm_nphysseg; x++) {
972 /* structure copy */
973 vm_physmem[x] = vm_physmem[x + 1];
974 }
975 }
976
977 /*
978 * Fill these in for the caller; we don't modify them,
979 * but the upper layers still want to know.
980 */
981 if (vstartp)
982 *vstartp = VM_MIN_KERNEL_ADDRESS;
983 if (vendp)
984 *vendp = VM_MAX_KERNEL_ADDRESS;
985
986 va = ALPHA_PHYS_TO_K0SEG(pa);
987 memset((caddr_t)va, 0, size);
988 pmap_pages_stolen += npgs;
989 return (va);
990 }
991
992 /*
993 * If we got here, this was no memory left.
994 */
995 panic("pmap_steal_memory: no memory to steal");
996 }
997
998 /*
999 * pmap_init: [ INTERFACE ]
1000 *
1001 * Initialize the pmap module. Called by uvm_init(), to initialize any
1002 * structures that the pmap system needs to map virtual memory.
1003 *
1004 * Note: no locking is necessary in this function.
1005 */
1006 void
pmap_init(void)1007 pmap_init(void)
1008 {
1009
1010 #ifdef DEBUG
1011 if (pmapdebug & PDB_FOLLOW)
1012 printf("pmap_init()\n");
1013 #endif
1014
1015 /* initialize protection array */
1016 alpha_protection_init();
1017
1018 /*
1019 * Set a low water mark on the pv_entry pool, so that we are
1020 * more likely to have these around even in extreme memory
1021 * starvation.
1022 */
1023 pool_setlowat(&pmap_pv_pool, pmap_pv_lowat);
1024
1025 /*
1026 * Now it is safe to enable pv entry recording.
1027 */
1028 pmap_initialized = TRUE;
1029
1030 #if 0
1031 for (bank = 0; bank < vm_nphysseg; bank++) {
1032 printf("bank %d\n", bank);
1033 printf("\tstart = 0x%x\n", ptoa(vm_physmem[bank].start));
1034 printf("\tend = 0x%x\n", ptoa(vm_physmem[bank].end));
1035 printf("\tavail_start = 0x%x\n",
1036 ptoa(vm_physmem[bank].avail_start));
1037 printf("\tavail_end = 0x%x\n",
1038 ptoa(vm_physmem[bank].avail_end));
1039 }
1040 #endif
1041 }
1042
1043 /*
1044 * pmap_create: [ INTERFACE ]
1045 *
1046 * Create and return a physical map.
1047 */
1048 pmap_t
pmap_create(void)1049 pmap_create(void)
1050 {
1051 pmap_t pmap;
1052 int i;
1053
1054 #ifdef DEBUG
1055 if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1056 printf("pmap_create()\n");
1057 #endif
1058
1059 pmap = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO);
1060
1061 pmap->pm_count = 1;
1062 for (i = 0; i < pmap_ncpuids; i++) {
1063 pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1064 /* XXX Locking? */
1065 pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1066 }
1067 mtx_init(&pmap->pm_mtx, IPL_VM);
1068
1069 for (;;) {
1070 mtx_enter(&pmap_growkernel_mtx);
1071 i = pmap_lev1map_create(pmap, cpu_number());
1072 mtx_leave(&pmap_growkernel_mtx);
1073 if (i == 0)
1074 break;
1075 uvm_wait(__func__);
1076 }
1077
1078 mtx_enter(&pmap_all_pmaps_mtx);
1079 TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1080 mtx_leave(&pmap_all_pmaps_mtx);
1081
1082 return (pmap);
1083 }
1084
1085 /*
1086 * pmap_destroy: [ INTERFACE ]
1087 *
1088 * Drop the reference count on the specified pmap, releasing
1089 * all resources if the reference count drops to zero.
1090 */
1091 void
pmap_destroy(pmap_t pmap)1092 pmap_destroy(pmap_t pmap)
1093 {
1094 int refs;
1095
1096 #ifdef DEBUG
1097 if (pmapdebug & PDB_FOLLOW)
1098 printf("pmap_destroy(%p)\n", pmap);
1099 #endif
1100
1101 refs = atomic_dec_int_nv(&pmap->pm_count);
1102 if (refs > 0)
1103 return;
1104
1105 /*
1106 * Remove it from the global list of all pmaps.
1107 */
1108 mtx_enter(&pmap_all_pmaps_mtx);
1109 TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1110 mtx_leave(&pmap_all_pmaps_mtx);
1111
1112 mtx_enter(&pmap_growkernel_mtx);
1113 pmap_lev1map_destroy(pmap);
1114 mtx_leave(&pmap_growkernel_mtx);
1115
1116 pool_put(&pmap_pmap_pool, pmap);
1117 }
1118
1119 /*
1120 * pmap_reference: [ INTERFACE ]
1121 *
1122 * Add a reference to the specified pmap.
1123 */
1124 void
pmap_reference(pmap_t pmap)1125 pmap_reference(pmap_t pmap)
1126 {
1127
1128 #ifdef DEBUG
1129 if (pmapdebug & PDB_FOLLOW)
1130 printf("pmap_reference(%p)\n", pmap);
1131 #endif
1132
1133 atomic_inc_int(&pmap->pm_count);
1134 }
1135
1136 /*
1137 * pmap_remove: [ INTERFACE ]
1138 *
1139 * Remove the given range of addresses from the specified map.
1140 *
1141 * It is assumed that the start and end are properly
1142 * rounded to the page size.
1143 */
1144 void
pmap_remove(pmap_t pmap,vaddr_t sva,vaddr_t eva)1145 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1146 {
1147
1148 #ifdef DEBUG
1149 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1150 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1151 #endif
1152
1153 pmap_do_remove(pmap, sva, eva, TRUE);
1154 }
1155
1156 /*
1157 * pmap_do_remove:
1158 *
1159 * This actually removes the range of addresses from the
1160 * specified map. It is used by pmap_collect() (does not
1161 * want to remove wired mappings) and pmap_remove() (does
1162 * want to remove wired mappings).
1163 */
1164 void
pmap_do_remove(pmap_t pmap,vaddr_t sva,vaddr_t eva,boolean_t dowired)1165 pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired)
1166 {
1167 pt_entry_t *l1pte, *l2pte, *l3pte;
1168 pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1169 vaddr_t l1eva, l2eva, vptva;
1170 boolean_t needisync = FALSE;
1171 cpuid_t cpu_id = cpu_number();
1172
1173 #ifdef DEBUG
1174 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1175 printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1176 #endif
1177
1178 /*
1179 * If this is the kernel pmap, we can use a faster method
1180 * for accessing the PTEs (since the PT pages are always
1181 * resident).
1182 *
1183 * Note that this routine should NEVER be called from an
1184 * interrupt context; pmap_kremove() is used for that.
1185 */
1186 if (pmap == pmap_kernel()) {
1187 PMAP_LOCK(pmap);
1188
1189 KASSERT(dowired == TRUE);
1190
1191 while (sva < eva) {
1192 l3pte = PMAP_KERNEL_PTE(sva);
1193 if (pmap_pte_v(l3pte)) {
1194 #ifdef DIAGNOSTIC
1195 if (PAGE_IS_MANAGED(pmap_pte_pa(l3pte)) &&
1196 pmap_pte_pv(l3pte) == 0)
1197 panic("pmap_remove: managed page "
1198 "without PG_PVLIST for 0x%lx",
1199 sva);
1200 #endif
1201 needisync |= pmap_remove_mapping(pmap, sva,
1202 l3pte, TRUE, cpu_id);
1203 }
1204 sva += PAGE_SIZE;
1205 }
1206
1207 PMAP_UNLOCK(pmap);
1208
1209 if (needisync)
1210 PMAP_SYNC_ISTREAM_KERNEL();
1211 return;
1212 }
1213
1214 #ifdef DIAGNOSTIC
1215 if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1216 panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1217 "address range", sva, eva);
1218 #endif
1219
1220 PMAP_LOCK(pmap);
1221
1222 /*
1223 * If we're already referencing the kernel_lev1map, there
1224 * is no work for us to do.
1225 */
1226 if (pmap->pm_lev1map == kernel_lev1map)
1227 goto out;
1228
1229 saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1230
1231 /*
1232 * Add a reference to the L1 table to it won't get
1233 * removed from under us.
1234 */
1235 pmap_physpage_addref(saved_l1pte);
1236
1237 for (; sva < eva; sva = l1eva, l1pte++) {
1238 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1239 if (pmap_pte_v(l1pte)) {
1240 saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1241
1242 /*
1243 * Add a reference to the L2 table so it won't
1244 * get removed from under us.
1245 */
1246 pmap_physpage_addref(saved_l2pte);
1247
1248 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1249 l2eva =
1250 alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1251 if (pmap_pte_v(l2pte)) {
1252 saved_l3pte = l3pte =
1253 pmap_l3pte(pmap, sva, l2pte);
1254
1255 /*
1256 * Add a reference to the L3 table so
1257 * it won't get removed from under us.
1258 */
1259 pmap_physpage_addref(saved_l3pte);
1260
1261 /*
1262 * Remember this sva; if the L3 table
1263 * gets removed, we need to invalidate
1264 * the VPT TLB entry for it.
1265 */
1266 vptva = sva;
1267
1268 for (; sva < l2eva && sva < eva;
1269 sva += PAGE_SIZE, l3pte++) {
1270 if (pmap_pte_v(l3pte) &&
1271 (dowired == TRUE ||
1272 pmap_pte_w(l3pte) == 0)) {
1273 needisync |=
1274 pmap_remove_mapping(
1275 pmap, sva,
1276 l3pte, TRUE,
1277 cpu_id);
1278 }
1279 }
1280
1281 /*
1282 * Remove the reference to the L3
1283 * table that we added above. This
1284 * may free the L3 table.
1285 */
1286 pmap_l3pt_delref(pmap, vptva,
1287 saved_l3pte, cpu_id);
1288 }
1289 }
1290
1291 /*
1292 * Remove the reference to the L2 table that we
1293 * added above. This may free the L2 table.
1294 */
1295 pmap_l2pt_delref(pmap, l1pte, saved_l2pte);
1296 }
1297 }
1298
1299 /*
1300 * Remove the reference to the L1 table that we added above.
1301 * This may free the L1 table.
1302 */
1303 pmap_l1pt_delref(pmap, saved_l1pte);
1304
1305 if (needisync)
1306 PMAP_SYNC_ISTREAM_USER(pmap);
1307
1308 out:
1309 PMAP_UNLOCK(pmap);
1310 }
1311
1312 /*
1313 * pmap_page_protect: [ INTERFACE ]
1314 *
1315 * Lower the permission for all mappings to a given page to
1316 * the permissions specified.
1317 */
1318 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)1319 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1320 {
1321 pmap_t pmap;
1322 pv_entry_t pv;
1323 boolean_t needkisync = FALSE;
1324 cpuid_t cpu_id = cpu_number();
1325 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1326
1327 #ifdef DEBUG
1328 if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1329 (prot == PROT_NONE && (pmapdebug & PDB_REMOVE)))
1330 printf("pmap_page_protect(%p, %x)\n", pg, prot);
1331 #endif
1332
1333 switch (prot) {
1334 case PROT_READ | PROT_WRITE | PROT_EXEC:
1335 case PROT_READ | PROT_WRITE:
1336 return;
1337
1338 /* copy_on_write */
1339 case PROT_READ | PROT_EXEC:
1340 case PROT_READ:
1341 mtx_enter(&pg->mdpage.pvh_mtx);
1342 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
1343 if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1344 *pv->pv_pte &= ~(PG_KWE | PG_UWE);
1345 PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1346 pmap_pte_asm(pv->pv_pte),
1347 PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1348 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1349 pmap_pte_asm(pv->pv_pte));
1350 }
1351 }
1352 mtx_leave(&pg->mdpage.pvh_mtx);
1353 PMAP_TLB_SHOOTNOW();
1354 return;
1355
1356 /* remove_all */
1357 default:
1358 break;
1359 }
1360
1361 mtx_enter(&pg->mdpage.pvh_mtx);
1362 while ((pv = pg->mdpage.pvh_list) != NULL) {
1363 pmap_reference(pv->pv_pmap);
1364 pmap = pv->pv_pmap;
1365 mtx_leave(&pg->mdpage.pvh_mtx);
1366
1367 PMAP_LOCK(pmap);
1368
1369 /*
1370 * We dropped the pvlist lock before grabbing the pmap
1371 * lock to avoid lock ordering problems. This means
1372 * we have to check the pvlist again since somebody
1373 * else might have modified it. All we care about is
1374 * that the pvlist entry matches the pmap we just
1375 * locked. If it doesn't, unlock the pmap and try
1376 * again.
1377 */
1378 mtx_enter(&pg->mdpage.pvh_mtx);
1379 if ((pv = pg->mdpage.pvh_list) == NULL ||
1380 pv->pv_pmap != pmap) {
1381 mtx_leave(&pg->mdpage.pvh_mtx);
1382 PMAP_UNLOCK(pmap);
1383 pmap_destroy(pmap);
1384 mtx_enter(&pg->mdpage.pvh_mtx);
1385 continue;
1386 }
1387
1388 #ifdef DEBUG
1389 if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1390 pmap_pte_pa(pv->pv_pte) != VM_PAGE_TO_PHYS(pg))
1391 panic("pmap_page_protect: bad mapping");
1392 #endif
1393 if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1394 FALSE, cpu_id) == TRUE) {
1395 if (pmap == pmap_kernel())
1396 needkisync |= TRUE;
1397 else
1398 PMAP_SYNC_ISTREAM_USER(pmap);
1399 }
1400 mtx_leave(&pg->mdpage.pvh_mtx);
1401 PMAP_UNLOCK(pmap);
1402 pmap_destroy(pmap);
1403 mtx_enter(&pg->mdpage.pvh_mtx);
1404 }
1405 mtx_leave(&pg->mdpage.pvh_mtx);
1406
1407 if (needkisync)
1408 PMAP_SYNC_ISTREAM_KERNEL();
1409 }
1410
1411 /*
1412 * pmap_protect: [ INTERFACE ]
1413 *
1414 * Set the physical protection on the specified range of this map
1415 * as requested.
1416 */
1417 void
pmap_protect(pmap_t pmap,vaddr_t sva,vaddr_t eva,vm_prot_t prot)1418 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1419 {
1420 pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1421 boolean_t isactive;
1422 boolean_t hadasm;
1423 vaddr_t l1eva, l2eva;
1424 cpuid_t cpu_id = cpu_number();
1425 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1426
1427 #ifdef DEBUG
1428 if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1429 printf("pmap_protect(%p, %lx, %lx, %x)\n",
1430 pmap, sva, eva, prot);
1431 #endif
1432
1433 if ((prot & PROT_READ) == PROT_NONE) {
1434 pmap_remove(pmap, sva, eva);
1435 return;
1436 }
1437
1438 PMAP_LOCK(pmap);
1439
1440 bits = pte_prot(pmap, prot);
1441 isactive = PMAP_ISACTIVE(pmap, cpu_id);
1442
1443 l1pte = pmap_l1pte(pmap, sva);
1444 for (; sva < eva; sva = l1eva, l1pte++) {
1445 l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1446 if (!pmap_pte_v(l1pte))
1447 continue;
1448
1449 l2pte = pmap_l2pte(pmap, sva, l1pte);
1450 for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1451 l2eva = alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1452 if (!pmap_pte_v(l2pte))
1453 continue;
1454
1455 l3pte = pmap_l3pte(pmap, sva, l2pte);
1456 for (; sva < l2eva && sva < eva;
1457 sva += PAGE_SIZE, l3pte++) {
1458 if (!pmap_pte_v(l3pte))
1459 continue;
1460
1461 if (pmap_pte_prot_chg(l3pte, bits)) {
1462 hadasm = (pmap_pte_asm(l3pte) != 0);
1463 pmap_pte_set_prot(l3pte, bits);
1464 PMAP_INVALIDATE_TLB(pmap, sva, hadasm,
1465 isactive, cpu_id);
1466 PMAP_TLB_SHOOTDOWN(pmap, sva,
1467 hadasm ? PG_ASM : 0);
1468 }
1469 }
1470 }
1471 }
1472
1473 PMAP_TLB_SHOOTNOW();
1474
1475 if (prot & PROT_EXEC)
1476 PMAP_SYNC_ISTREAM(pmap);
1477
1478 PMAP_UNLOCK(pmap);
1479 }
1480
1481 /*
1482 * pmap_enter: [ INTERFACE ]
1483 *
1484 * Insert the given physical page (p) at
1485 * the specified virtual address (v) in the
1486 * target physical map with the protection requested.
1487 *
1488 * If specified, the page will be wired down, meaning
1489 * that the related pte can not be reclaimed.
1490 *
1491 * Note: This is the only routine which MAY NOT lazy-evaluate
1492 * or lose information. That is, this routine must actually
1493 * insert this page into the given map NOW.
1494 */
1495 int
pmap_enter(pmap_t pmap,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)1496 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1497 {
1498 struct vm_page *pg;
1499 pt_entry_t *pte, npte, opte;
1500 paddr_t opa;
1501 boolean_t tflush = TRUE;
1502 boolean_t hadasm = FALSE; /* XXX gcc -Wuninitialized */
1503 boolean_t needisync = FALSE;
1504 boolean_t setisync = FALSE;
1505 boolean_t isactive;
1506 boolean_t wired;
1507 cpuid_t cpu_id = cpu_number();
1508 int error = 0;
1509 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1510
1511 #ifdef DEBUG
1512 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1513 printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1514 pmap, va, pa, prot, flags);
1515 #endif
1516 pg = PHYS_TO_VM_PAGE(pa);
1517 isactive = PMAP_ISACTIVE(pmap, cpu_id);
1518 wired = (flags & PMAP_WIRED) != 0;
1519
1520 /*
1521 * Determine what we need to do about the I-stream. If
1522 * PROT_EXEC is set, we mark a user pmap as needing
1523 * an I-sync on the way back out to userspace. We always
1524 * need an immediate I-sync for the kernel pmap.
1525 */
1526 if (prot & PROT_EXEC) {
1527 if (pmap == pmap_kernel())
1528 needisync = TRUE;
1529 else {
1530 setisync = TRUE;
1531 needisync = (pmap->pm_cpus != 0);
1532 }
1533 }
1534
1535 PMAP_LOCK(pmap);
1536
1537 if (pmap == pmap_kernel()) {
1538 #ifdef DIAGNOSTIC
1539 /*
1540 * Sanity check the virtual address.
1541 */
1542 if (va < VM_MIN_KERNEL_ADDRESS)
1543 panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1544 #endif
1545 pte = PMAP_KERNEL_PTE(va);
1546 } else {
1547 pt_entry_t *l1pte, *l2pte;
1548
1549 #ifdef DIAGNOSTIC
1550 /*
1551 * Sanity check the virtual address.
1552 */
1553 if (va >= VM_MAXUSER_ADDRESS)
1554 panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1555 #endif
1556
1557 KASSERT(pmap->pm_lev1map != kernel_lev1map);
1558
1559 /*
1560 * Check to see if the level 1 PTE is valid, and
1561 * allocate a new level 2 page table page if it's not.
1562 * A reference will be added to the level 2 table when
1563 * the level 3 table is created.
1564 */
1565 l1pte = pmap_l1pte(pmap, va);
1566 if (pmap_pte_v(l1pte) == 0) {
1567 pmap_physpage_addref(l1pte);
1568 error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1569 if (error) {
1570 pmap_l1pt_delref(pmap, l1pte);
1571 if (flags & PMAP_CANFAIL)
1572 goto out;
1573 panic("pmap_enter: unable to create L2 PT "
1574 "page");
1575 }
1576 #ifdef DEBUG
1577 if (pmapdebug & PDB_PTPAGE)
1578 printf("pmap_enter: new level 2 table at "
1579 "0x%lx\n", pmap_pte_pa(l1pte));
1580 #endif
1581 }
1582
1583 /*
1584 * Check to see if the level 2 PTE is valid, and
1585 * allocate a new level 3 page table page if it's not.
1586 * A reference will be added to the level 3 table when
1587 * the mapping is validated.
1588 */
1589 l2pte = pmap_l2pte(pmap, va, l1pte);
1590 if (pmap_pte_v(l2pte) == 0) {
1591 pmap_physpage_addref(l2pte);
1592 error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1593 if (error) {
1594 pmap_l2pt_delref(pmap, l1pte, l2pte);
1595 if (flags & PMAP_CANFAIL)
1596 goto out;
1597 panic("pmap_enter: unable to create L3 PT "
1598 "page");
1599 }
1600 #ifdef DEBUG
1601 if (pmapdebug & PDB_PTPAGE)
1602 printf("pmap_enter: new level 3 table at "
1603 "0x%lx\n", pmap_pte_pa(l2pte));
1604 #endif
1605 }
1606
1607 /*
1608 * Get the PTE that will map the page.
1609 */
1610 pte = pmap_l3pte(pmap, va, l2pte);
1611 }
1612
1613 /* Remember all of the old PTE; used for TBI check later. */
1614 opte = *pte;
1615
1616 /*
1617 * Check to see if the old mapping is valid. If not, validate the
1618 * new one immediately.
1619 */
1620 if (pmap_pte_v(pte) == 0) {
1621 /*
1622 * No need to invalidate the TLB in this case; an invalid
1623 * mapping won't be in the TLB, and a previously valid
1624 * mapping would have been flushed when it was invalidated.
1625 */
1626 tflush = FALSE;
1627
1628 /*
1629 * No need to synchronize the I-stream, either, for basically
1630 * the same reason.
1631 */
1632 setisync = needisync = FALSE;
1633
1634 if (pmap != pmap_kernel()) {
1635 /*
1636 * New mappings gain a reference on the level 3
1637 * table.
1638 */
1639 pmap_physpage_addref(pte);
1640 }
1641 goto validate_enterpv;
1642 }
1643
1644 opa = pmap_pte_pa(pte);
1645 hadasm = (pmap_pte_asm(pte) != 0);
1646
1647 if (opa == pa) {
1648 /*
1649 * Mapping has not changed; must be a protection or
1650 * wiring change.
1651 */
1652 if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1653 #ifdef DEBUG
1654 if (pmapdebug & PDB_ENTER)
1655 printf("pmap_enter: wiring change -> %d\n",
1656 wired);
1657 #endif
1658 /*
1659 * Adjust the wiring count.
1660 */
1661 if (wired)
1662 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1663 else
1664 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1665 }
1666
1667 /*
1668 * Set the PTE.
1669 */
1670 goto validate;
1671 }
1672
1673 /*
1674 * The mapping has changed. We need to invalidate the
1675 * old mapping before creating the new one.
1676 */
1677 #ifdef DEBUG
1678 if (pmapdebug & PDB_ENTER)
1679 printf("pmap_enter: removing old mapping 0x%lx\n", va);
1680 #endif
1681 if (pmap != pmap_kernel()) {
1682 /*
1683 * Gain an extra reference on the level 3 table.
1684 * pmap_remove_mapping() will delete a reference,
1685 * and we don't want the table to be erroneously
1686 * freed.
1687 */
1688 pmap_physpage_addref(pte);
1689 }
1690 needisync |= pmap_remove_mapping(pmap, va, pte, TRUE, cpu_id);
1691
1692 validate_enterpv:
1693 /*
1694 * Enter the mapping into the pv_table if appropriate.
1695 */
1696 if (pg != NULL) {
1697 error = pmap_pv_enter(pmap, pg, va, pte, TRUE);
1698 if (error) {
1699 pmap_l3pt_delref(pmap, va, pte, cpu_id);
1700 if (flags & PMAP_CANFAIL)
1701 goto out;
1702 panic("pmap_enter: unable to enter mapping in PV "
1703 "table");
1704 }
1705 }
1706
1707 /*
1708 * Increment counters.
1709 */
1710 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1711 if (wired)
1712 PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1713
1714 validate:
1715 /*
1716 * Build the new PTE.
1717 */
1718 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1719 if (pg != NULL) {
1720 int attrs;
1721
1722 #ifdef DIAGNOSTIC
1723 if ((flags & PROT_MASK) & ~prot)
1724 panic("pmap_enter: access type exceeds prot");
1725 #endif
1726 if (flags & PROT_WRITE)
1727 atomic_setbits_int(&pg->pg_flags,
1728 PG_PMAP_REF | PG_PMAP_MOD);
1729 else if (flags & PROT_MASK)
1730 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1731
1732 /*
1733 * Set up referenced/modified emulation for new mapping.
1734 */
1735 attrs = pg->pg_flags;
1736 if ((attrs & PG_PMAP_REF) == 0)
1737 npte |= PG_FOR | PG_FOW | PG_FOE;
1738 else if ((attrs & PG_PMAP_MOD) == 0)
1739 npte |= PG_FOW;
1740
1741 /*
1742 * Mapping was entered on PV list.
1743 */
1744 npte |= PG_PVLIST;
1745 }
1746 if (wired)
1747 npte |= PG_WIRED;
1748 #ifdef DEBUG
1749 if (pmapdebug & PDB_ENTER)
1750 printf("pmap_enter: new pte = 0x%lx\n", npte);
1751 #endif
1752
1753 /*
1754 * If the PALcode portion of the new PTE is the same as the
1755 * old PTE, no TBI is necessary.
1756 */
1757 if (PG_PALCODE(opte) == PG_PALCODE(npte))
1758 tflush = FALSE;
1759
1760 /*
1761 * Set the new PTE.
1762 */
1763 PMAP_SET_PTE(pte, npte);
1764
1765 /*
1766 * Invalidate the TLB entry for this VA and any appropriate
1767 * caches.
1768 */
1769 if (tflush) {
1770 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1771 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1772 PMAP_TLB_SHOOTNOW();
1773 }
1774 if (setisync)
1775 PMAP_SET_NEEDISYNC(pmap);
1776 if (needisync)
1777 PMAP_SYNC_ISTREAM(pmap);
1778
1779 out:
1780 PMAP_UNLOCK(pmap);
1781
1782 return error;
1783 }
1784
1785 /*
1786 * pmap_kenter_pa: [ INTERFACE ]
1787 *
1788 * Enter a va -> pa mapping into the kernel pmap without any
1789 * physical->virtual tracking.
1790 *
1791 * Note: no locking is necessary in this function.
1792 */
1793 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)1794 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1795 {
1796 pt_entry_t *pte, npte;
1797 cpuid_t cpu_id = cpu_number();
1798 boolean_t needisync = FALSE;
1799 pmap_t pmap = pmap_kernel();
1800 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1801
1802 #ifdef DEBUG
1803 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1804 printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1805 va, pa, prot);
1806 #endif
1807
1808 #ifdef DIAGNOSTIC
1809 /*
1810 * Sanity check the virtual address.
1811 */
1812 if (va < VM_MIN_KERNEL_ADDRESS)
1813 panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1814 #endif
1815
1816 pte = PMAP_KERNEL_PTE(va);
1817
1818 if (pmap_pte_v(pte) == 0)
1819 PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1820 if (pmap_pte_w(pte) == 0)
1821 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1822
1823 if ((prot & PROT_EXEC) != 0 || pmap_pte_exec(pte))
1824 needisync = TRUE;
1825
1826 /*
1827 * Build the new PTE.
1828 */
1829 npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1830 PG_V | PG_WIRED;
1831
1832 /*
1833 * Set the new PTE.
1834 */
1835 PMAP_SET_PTE(pte, npte);
1836 #if defined(MULTIPROCESSOR)
1837 alpha_mb(); /* XXX alpha_wmb()? */
1838 #endif
1839
1840 /*
1841 * Invalidate the TLB entry for this VA and any appropriate
1842 * caches.
1843 */
1844 PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id);
1845 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1846 PMAP_TLB_SHOOTNOW();
1847
1848 if (needisync)
1849 PMAP_SYNC_ISTREAM_KERNEL();
1850 }
1851
1852 /*
1853 * pmap_kremove: [ INTERFACE ]
1854 *
1855 * Remove a mapping entered with pmap_kenter_pa() starting at va,
1856 * for size bytes (assumed to be page rounded).
1857 */
1858 void
pmap_kremove(vaddr_t va,vsize_t size)1859 pmap_kremove(vaddr_t va, vsize_t size)
1860 {
1861 pt_entry_t *pte;
1862 boolean_t needisync = FALSE;
1863 cpuid_t cpu_id = cpu_number();
1864 pmap_t pmap = pmap_kernel();
1865 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1866
1867 #ifdef DEBUG
1868 if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1869 printf("pmap_kremove(%lx, %lx)\n",
1870 va, size);
1871 #endif
1872
1873 #ifdef DIAGNOSTIC
1874 if (va < VM_MIN_KERNEL_ADDRESS)
1875 panic("pmap_kremove: user address");
1876 #endif
1877
1878 for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
1879 pte = PMAP_KERNEL_PTE(va);
1880 if (pmap_pte_v(pte)) {
1881 #ifdef DIAGNOSTIC
1882 if (pmap_pte_pv(pte))
1883 panic("pmap_kremove: PG_PVLIST mapping for "
1884 "0x%lx", va);
1885 #endif
1886 if (pmap_pte_exec(pte))
1887 needisync = TRUE;
1888
1889 /* Zap the mapping. */
1890 PMAP_SET_PTE(pte, PG_NV);
1891 #if defined(MULTIPROCESSOR)
1892 alpha_mb(); /* XXX alpha_wmb()? */
1893 #endif
1894 PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id);
1895 PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1896
1897 /* Update stats. */
1898 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
1899 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1900 }
1901 }
1902
1903 PMAP_TLB_SHOOTNOW();
1904
1905 if (needisync)
1906 PMAP_SYNC_ISTREAM_KERNEL();
1907 }
1908
1909 /*
1910 * pmap_unwire: [ INTERFACE ]
1911 *
1912 * Clear the wired attribute for a map/virtual-address pair.
1913 *
1914 * The mapping must already exist in the pmap.
1915 */
1916 void
pmap_unwire(pmap_t pmap,vaddr_t va)1917 pmap_unwire(pmap_t pmap, vaddr_t va)
1918 {
1919 pt_entry_t *pte;
1920
1921 #ifdef DEBUG
1922 if (pmapdebug & PDB_FOLLOW)
1923 printf("pmap_unwire(%p, %lx)\n", pmap, va);
1924 #endif
1925
1926 PMAP_LOCK(pmap);
1927
1928 pte = pmap_l3pte(pmap, va, NULL);
1929 #ifdef DIAGNOSTIC
1930 if (pte == NULL || pmap_pte_v(pte) == 0)
1931 panic("pmap_unwire");
1932 #endif
1933
1934 /*
1935 * If wiring actually changed (always?) clear the wire bit and
1936 * update the wire count. Note that wiring is not a hardware
1937 * characteristic so there is no need to invalidate the TLB.
1938 */
1939 if (pmap_pte_w_chg(pte, 0)) {
1940 pmap_pte_set_w(pte, FALSE);
1941 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1942 }
1943 #ifdef DIAGNOSTIC
1944 else {
1945 printf("pmap_unwire: wiring for pmap %p va 0x%lx "
1946 "didn't change!\n", pmap, va);
1947 }
1948 #endif
1949
1950 PMAP_UNLOCK(pmap);
1951 }
1952
1953 /*
1954 * pmap_extract: [ INTERFACE ]
1955 *
1956 * Extract the physical address associated with the given
1957 * pmap/virtual address pair.
1958 */
1959 boolean_t
pmap_extract(pmap_t pmap,vaddr_t va,paddr_t * pap)1960 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
1961 {
1962 pt_entry_t *l1pte, *l2pte, *l3pte;
1963 boolean_t rv = FALSE;
1964 paddr_t pa;
1965
1966 #ifdef DEBUG
1967 if (pmapdebug & PDB_FOLLOW)
1968 printf("pmap_extract(%p, %lx) -> ", pmap, va);
1969 #endif
1970
1971 if (pmap == pmap_kernel()) {
1972 if (va < ALPHA_K0SEG_BASE) {
1973 /* nothing */
1974 } else if (va <= ALPHA_K0SEG_END) {
1975 pa = ALPHA_K0SEG_TO_PHYS(va);
1976 *pap = pa;
1977 rv = TRUE;
1978 } else {
1979 l3pte = PMAP_KERNEL_PTE(va);
1980 if (pmap_pte_v(l3pte)) {
1981 pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
1982 *pap = pa;
1983 rv = TRUE;
1984 }
1985 }
1986 goto out_nolock;
1987 }
1988
1989 PMAP_LOCK(pmap);
1990
1991 l1pte = pmap_l1pte(pmap, va);
1992 if (pmap_pte_v(l1pte) == 0)
1993 goto out;
1994
1995 l2pte = pmap_l2pte(pmap, va, l1pte);
1996 if (pmap_pte_v(l2pte) == 0)
1997 goto out;
1998
1999 l3pte = pmap_l3pte(pmap, va, l2pte);
2000 if (pmap_pte_v(l3pte) == 0)
2001 goto out;
2002
2003 pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2004 *pap = pa;
2005 rv = TRUE;
2006 out:
2007 PMAP_UNLOCK(pmap);
2008 out_nolock:
2009 #ifdef DEBUG
2010 if (pmapdebug & PDB_FOLLOW) {
2011 if (rv)
2012 printf("0x%lx\n", pa);
2013 else
2014 printf("failed\n");
2015 }
2016 #endif
2017 return (rv);
2018 }
2019
2020 /*
2021 * pmap_collect: [ INTERFACE ]
2022 *
2023 * Garbage collects the physical map system for pages which are no
2024 * longer used. Success need not be guaranteed -- that is, there
2025 * may well be pages which are not referenced, but others may be
2026 * collected.
2027 *
2028 * Called by the pageout daemon when pages are scarce.
2029 */
2030 void
pmap_collect(pmap_t pmap)2031 pmap_collect(pmap_t pmap)
2032 {
2033
2034 #ifdef DEBUG
2035 if (pmapdebug & PDB_FOLLOW)
2036 printf("pmap_collect(%p)\n", pmap);
2037 #endif
2038
2039 /*
2040 * If called for the kernel pmap, just return. We
2041 * handle this case in the event that we ever want
2042 * to have swappable kernel threads.
2043 */
2044 if (pmap == pmap_kernel())
2045 return;
2046
2047 /*
2048 * This process is about to be swapped out; free all of
2049 * the PT pages by removing the physical mappings for its
2050 * entire address space. Note: pmap_do_remove() performs
2051 * all necessary locking.
2052 */
2053 pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, FALSE);
2054 }
2055
2056 /*
2057 * pmap_activate: [ INTERFACE ]
2058 *
2059 * Activate the pmap used by the specified process. This includes
2060 * reloading the MMU context if the current process, and marking
2061 * the pmap in use by the processor.
2062 *
2063 * Note: We may use only spin locks here, since we are called
2064 * by a critical section in cpu_switch()!
2065 */
2066 void
pmap_activate(struct proc * p)2067 pmap_activate(struct proc *p)
2068 {
2069 struct pmap *pmap = p->p_vmspace->vm_map.pmap;
2070 cpuid_t cpu_id = cpu_number();
2071
2072 #ifdef DEBUG
2073 if (pmapdebug & PDB_FOLLOW)
2074 printf("pmap_activate(%p)\n", p);
2075 #endif
2076
2077 /* Mark the pmap in use by this processor. */
2078 atomic_setbits_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2079
2080 /* Allocate an ASN. */
2081 pmap_asn_alloc(pmap, cpu_id);
2082
2083 PMAP_ACTIVATE(pmap, p, cpu_id);
2084 }
2085
2086 /*
2087 * pmap_deactivate: [ INTERFACE ]
2088 *
2089 * Mark that the pmap used by the specified process is no longer
2090 * in use by the processor.
2091 *
2092 * The comment above pmap_activate() wrt. locking applies here,
2093 * as well. Note that we use only a single `atomic' operation,
2094 * so no locking is necessary.
2095 */
2096 void
pmap_deactivate(struct proc * p)2097 pmap_deactivate(struct proc *p)
2098 {
2099 struct pmap *pmap = p->p_vmspace->vm_map.pmap;
2100
2101 #ifdef DEBUG
2102 if (pmapdebug & PDB_FOLLOW)
2103 printf("pmap_deactivate(%p)\n", p);
2104 #endif
2105
2106 /*
2107 * Mark the pmap no longer in use by this processor.
2108 */
2109 atomic_clearbits_ulong(&pmap->pm_cpus, (1UL << cpu_number()));
2110 }
2111
2112 /*
2113 * pmap_zero_page: [ INTERFACE ]
2114 *
2115 * Zero the specified (machine independent) page by mapping the page
2116 * into virtual memory and clear its contents, one machine dependent
2117 * page at a time.
2118 *
2119 * Note: no locking is necessary in this function.
2120 */
2121 void
pmap_zero_page(struct vm_page * pg)2122 pmap_zero_page(struct vm_page *pg)
2123 {
2124 paddr_t phys = VM_PAGE_TO_PHYS(pg);
2125 u_long *p0, *p1, *pend;
2126
2127 #ifdef DEBUG
2128 if (pmapdebug & PDB_FOLLOW)
2129 printf("pmap_zero_page(%lx)\n", phys);
2130 #endif
2131
2132 p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2133 p1 = NULL;
2134 pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2135
2136 /*
2137 * Unroll the loop a bit, doing 16 quadwords per iteration.
2138 * Do only 8 back-to-back stores, and alternate registers.
2139 */
2140 do {
2141 __asm volatile(
2142 "# BEGIN loop body\n"
2143 " addq %2, (8 * 8), %1 \n"
2144 " stq $31, (0 * 8)(%0) \n"
2145 " stq $31, (1 * 8)(%0) \n"
2146 " stq $31, (2 * 8)(%0) \n"
2147 " stq $31, (3 * 8)(%0) \n"
2148 " stq $31, (4 * 8)(%0) \n"
2149 " stq $31, (5 * 8)(%0) \n"
2150 " stq $31, (6 * 8)(%0) \n"
2151 " stq $31, (7 * 8)(%0) \n"
2152 " \n"
2153 " addq %3, (8 * 8), %0 \n"
2154 " stq $31, (0 * 8)(%1) \n"
2155 " stq $31, (1 * 8)(%1) \n"
2156 " stq $31, (2 * 8)(%1) \n"
2157 " stq $31, (3 * 8)(%1) \n"
2158 " stq $31, (4 * 8)(%1) \n"
2159 " stq $31, (5 * 8)(%1) \n"
2160 " stq $31, (6 * 8)(%1) \n"
2161 " stq $31, (7 * 8)(%1) \n"
2162 " # END loop body"
2163 : "=r" (p0), "=r" (p1)
2164 : "0" (p0), "1" (p1)
2165 : "memory");
2166 } while (p0 < pend);
2167 }
2168
2169 /*
2170 * pmap_copy_page: [ INTERFACE ]
2171 *
2172 * Copy the specified (machine independent) page by mapping the page
2173 * into virtual memory and using memcpy to copy the page, one machine
2174 * dependent page at a time.
2175 *
2176 * Note: no locking is necessary in this function.
2177 */
2178 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)2179 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
2180 {
2181 paddr_t src = VM_PAGE_TO_PHYS(srcpg);
2182 paddr_t dst = VM_PAGE_TO_PHYS(dstpg);
2183 caddr_t s, d;
2184
2185 #ifdef DEBUG
2186 if (pmapdebug & PDB_FOLLOW)
2187 printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2188 #endif
2189 s = (caddr_t)ALPHA_PHYS_TO_K0SEG(src);
2190 d = (caddr_t)ALPHA_PHYS_TO_K0SEG(dst);
2191 memcpy(d, s, PAGE_SIZE);
2192 }
2193
2194 /*
2195 * pmap_clear_modify: [ INTERFACE ]
2196 *
2197 * Clear the modify bits on the specified physical page.
2198 */
2199 boolean_t
pmap_clear_modify(struct vm_page * pg)2200 pmap_clear_modify(struct vm_page *pg)
2201 {
2202 boolean_t rv = FALSE;
2203 cpuid_t cpu_id = cpu_number();
2204
2205 #ifdef DEBUG
2206 if (pmapdebug & PDB_FOLLOW)
2207 printf("pmap_clear_modify(%p)\n", pg);
2208 #endif
2209
2210 mtx_enter(&pg->mdpage.pvh_mtx);
2211 if (pg->pg_flags & PG_PMAP_MOD) {
2212 rv = TRUE;
2213 pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2214 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD);
2215 }
2216 mtx_leave(&pg->mdpage.pvh_mtx);
2217
2218 return (rv);
2219 }
2220
2221 /*
2222 * pmap_clear_reference: [ INTERFACE ]
2223 *
2224 * Clear the reference bit on the specified physical page.
2225 */
2226 boolean_t
pmap_clear_reference(struct vm_page * pg)2227 pmap_clear_reference(struct vm_page *pg)
2228 {
2229 boolean_t rv = FALSE;
2230 cpuid_t cpu_id = cpu_number();
2231
2232 #ifdef DEBUG
2233 if (pmapdebug & PDB_FOLLOW)
2234 printf("pmap_clear_reference(%p)\n", pg);
2235 #endif
2236
2237 mtx_enter(&pg->mdpage.pvh_mtx);
2238 if (pg->pg_flags & PG_PMAP_REF) {
2239 rv = TRUE;
2240 pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2241 atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF);
2242 }
2243 mtx_leave(&pg->mdpage.pvh_mtx);
2244
2245 return (rv);
2246 }
2247
2248 /*
2249 * pmap_is_referenced: [ INTERFACE ]
2250 *
2251 * Return whether or not the specified physical page is referenced
2252 * by any physical maps.
2253 */
2254 boolean_t
pmap_is_referenced(struct vm_page * pg)2255 pmap_is_referenced(struct vm_page *pg)
2256 {
2257 boolean_t rv;
2258
2259 rv = ((pg->pg_flags & PG_PMAP_REF) != 0);
2260 #ifdef DEBUG
2261 if (pmapdebug & PDB_FOLLOW) {
2262 printf("pmap_is_referenced(%p) -> %c\n", pg, "FT"[rv]);
2263 }
2264 #endif
2265 return (rv);
2266 }
2267
2268 /*
2269 * pmap_is_modified: [ INTERFACE ]
2270 *
2271 * Return whether or not the specified physical page is modified
2272 * by any physical maps.
2273 */
2274 boolean_t
pmap_is_modified(struct vm_page * pg)2275 pmap_is_modified(struct vm_page *pg)
2276 {
2277 boolean_t rv;
2278
2279 rv = ((pg->pg_flags & PG_PMAP_MOD) != 0);
2280 #ifdef DEBUG
2281 if (pmapdebug & PDB_FOLLOW) {
2282 printf("pmap_is_modified(%p) -> %c\n", pg, "FT"[rv]);
2283 }
2284 #endif
2285 return (rv);
2286 }
2287
2288 /*
2289 * Miscellaneous support routines follow
2290 */
2291
2292 /*
2293 * alpha_protection_init:
2294 *
2295 * Initialize Alpha protection code array.
2296 *
2297 * Note: no locking is necessary in this function.
2298 */
2299 void
alpha_protection_init(void)2300 alpha_protection_init(void)
2301 {
2302 int prot, *kp, *up;
2303
2304 kp = protection_codes[0];
2305 up = protection_codes[1];
2306
2307 for (prot = 0; prot < 8; prot++) {
2308 kp[prot] = PG_ASM;
2309 up[prot] = 0;
2310
2311 if (prot & PROT_READ) {
2312 kp[prot] |= PG_KRE;
2313 up[prot] |= PG_KRE | PG_URE;
2314 }
2315 if (prot & PROT_WRITE) {
2316 kp[prot] |= PG_KWE;
2317 up[prot] |= PG_KWE | PG_UWE;
2318 }
2319 if (prot & PROT_EXEC) {
2320 kp[prot] |= PG_EXEC | PG_KRE;
2321 up[prot] |= PG_EXEC | PG_KRE | PG_URE;
2322 } else {
2323 kp[prot] |= PG_FOE;
2324 up[prot] |= PG_FOE;
2325 }
2326 }
2327 }
2328
2329 /*
2330 * pmap_remove_mapping:
2331 *
2332 * Invalidate a single page denoted by pmap/va.
2333 *
2334 * If (pte != NULL), it is the already computed PTE for the page.
2335 *
2336 * Note: locking in this function is complicated by the fact
2337 * that we can be called when the PV list is already locked.
2338 * (pmap_page_protect()). In this case, the caller must be
2339 * careful to get the next PV entry while we remove this entry
2340 * from beneath it. We assume that the pmap itself is already
2341 * locked; dolock applies only to the PV list.
2342 *
2343 * Returns TRUE or FALSE, indicating if an I-stream sync needs
2344 * to be initiated (for this CPU or for other CPUs).
2345 */
2346 boolean_t
pmap_remove_mapping(pmap_t pmap,vaddr_t va,pt_entry_t * pte,boolean_t dolock,cpuid_t cpu_id)2347 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2348 boolean_t dolock, cpuid_t cpu_id)
2349 {
2350 paddr_t pa;
2351 struct vm_page *pg;
2352 boolean_t onpv;
2353 boolean_t hadasm;
2354 boolean_t isactive;
2355 boolean_t needisync = FALSE;
2356 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2357
2358 #ifdef DEBUG
2359 if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2360 printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2361 pmap, va, pte, dolock, cpu_id);
2362 #endif
2363
2364 /*
2365 * PTE not provided, compute it from pmap and va.
2366 */
2367 if (pte == PT_ENTRY_NULL) {
2368 pte = pmap_l3pte(pmap, va, NULL);
2369 if (pmap_pte_v(pte) == 0)
2370 return (FALSE);
2371 }
2372
2373 pa = pmap_pte_pa(pte);
2374 onpv = (pmap_pte_pv(pte) != 0);
2375 if (onpv) {
2376 /*
2377 * Remove it from the PV table such that nobody will
2378 * attempt to modify the PTE behind our back.
2379 */
2380 pg = PHYS_TO_VM_PAGE(pa);
2381 KASSERT(pg != NULL);
2382 pmap_pv_remove(pmap, pg, va, dolock);
2383 }
2384
2385 hadasm = (pmap_pte_asm(pte) != 0);
2386 isactive = PMAP_ISACTIVE(pmap, cpu_id);
2387
2388 /*
2389 * Determine what we need to do about the I-stream. If
2390 * PG_EXEC was set, we mark a user pmap as needing an
2391 * I-sync on the way out to userspace. We always need
2392 * an immediate I-sync for the kernel pmap.
2393 */
2394 if (pmap_pte_exec(pte)) {
2395 if (pmap == pmap_kernel())
2396 needisync = TRUE;
2397 else {
2398 PMAP_SET_NEEDISYNC(pmap);
2399 needisync = (pmap->pm_cpus != 0);
2400 }
2401 }
2402
2403 /*
2404 * Update statistics
2405 */
2406 if (pmap_pte_w(pte))
2407 PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2408 PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2409
2410 /*
2411 * Invalidate the PTE after saving the reference modify info.
2412 */
2413 #ifdef DEBUG
2414 if (pmapdebug & PDB_REMOVE)
2415 printf("remove: invalidating pte at %p\n", pte);
2416 #endif
2417 PMAP_SET_PTE(pte, PG_NV);
2418
2419 PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2420 PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2421 PMAP_TLB_SHOOTNOW();
2422
2423 /*
2424 * If we're removing a user mapping, check to see if we
2425 * can free page table pages.
2426 */
2427 if (pmap != pmap_kernel()) {
2428 /*
2429 * Delete the reference on the level 3 table. It will
2430 * delete references on the level 2 and 1 tables as
2431 * appropriate.
2432 */
2433 pmap_l3pt_delref(pmap, va, pte, cpu_id);
2434 }
2435
2436 return (needisync);
2437 }
2438
2439 /*
2440 * pmap_changebit:
2441 *
2442 * Set or clear the specified PTE bits for all mappings on the
2443 * specified page.
2444 *
2445 * Note: we assume that the pvlist is already locked. There is no
2446 * need to lock the pmap itself as amapping cannot be removed while
2447 * we are holding the pvlist lock.
2448 */
2449 void
pmap_changebit(struct vm_page * pg,u_long set,u_long mask,cpuid_t cpu_id)2450 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, cpuid_t cpu_id)
2451 {
2452 pv_entry_t pv;
2453 pt_entry_t *pte, npte;
2454 vaddr_t va;
2455 boolean_t hadasm, isactive;
2456 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2457
2458 #ifdef DEBUG
2459 if (pmapdebug & PDB_BITS)
2460 printf("pmap_changebit(0x%lx, 0x%lx, 0x%lx)\n",
2461 VM_PAGE_TO_PHYS(pg), set, mask);
2462 #endif
2463
2464 MUTEX_ASSERT_LOCKED(&pg->mdpage.pvh_mtx);
2465
2466 /*
2467 * Loop over all current mappings setting/clearing as appropriate.
2468 */
2469 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2470 va = pv->pv_va;
2471
2472 pte = pv->pv_pte;
2473 npte = (*pte | set) & mask;
2474 if (*pte != npte) {
2475 hadasm = (pmap_pte_asm(pte) != 0);
2476 isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2477 PMAP_SET_PTE(pte, npte);
2478 PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2479 cpu_id);
2480 PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2481 hadasm ? PG_ASM : 0);
2482 }
2483 }
2484
2485 PMAP_TLB_SHOOTNOW();
2486 }
2487
2488 /*
2489 * pmap_emulate_reference:
2490 *
2491 * Emulate reference and/or modified bit hits.
2492 * Return non-zero if this was an execute fault on a non-exec mapping,
2493 * otherwise return 0.
2494 */
2495 int
pmap_emulate_reference(struct proc * p,vaddr_t v,int user,int type)2496 pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
2497 {
2498 struct pmap *pmap;
2499 pt_entry_t faultoff, *pte;
2500 struct vm_page *pg;
2501 paddr_t pa;
2502 boolean_t didlock = FALSE;
2503 boolean_t exec = FALSE;
2504 cpuid_t cpu_id = cpu_number();
2505
2506 #ifdef DEBUG
2507 if (pmapdebug & PDB_FOLLOW)
2508 printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2509 p, v, user, type);
2510 #endif
2511
2512 /*
2513 * Convert process and virtual address to physical address.
2514 */
2515 if (v >= VM_MIN_KERNEL_ADDRESS) {
2516 if (user)
2517 panic("pmap_emulate_reference: user ref to kernel");
2518 /*
2519 * No need to lock here; kernel PT pages never go away.
2520 */
2521 pte = PMAP_KERNEL_PTE(v);
2522 } else {
2523 #ifdef DIAGNOSTIC
2524 if (p == NULL)
2525 panic("pmap_emulate_reference: bad proc");
2526 if (p->p_vmspace == NULL)
2527 panic("pmap_emulate_reference: bad p_vmspace");
2528 #endif
2529 pmap = p->p_vmspace->vm_map.pmap;
2530 PMAP_LOCK(pmap);
2531 didlock = TRUE;
2532 pte = pmap_l3pte(pmap, v, NULL);
2533 /*
2534 * We'll unlock below where we're done with the PTE.
2535 */
2536 }
2537 if (pte == NULL || !pmap_pte_v(pte)) {
2538 if (didlock)
2539 PMAP_UNLOCK(pmap);
2540 return (0);
2541 }
2542 exec = pmap_pte_exec(pte);
2543 if (!exec && type == ALPHA_MMCSR_FOE) {
2544 if (didlock)
2545 PMAP_UNLOCK(pmap);
2546 return (1);
2547 }
2548 #ifdef DEBUG
2549 if (pmapdebug & PDB_FOLLOW) {
2550 printf("\tpte = %p, ", pte);
2551 printf("*pte = 0x%lx\n", *pte);
2552 }
2553 #endif
2554 #ifdef DEBUG /* These checks are more expensive */
2555 #ifndef MULTIPROCESSOR
2556 /*
2557 * Quoting the Alpha ARM 14.3.1.4/5/6:
2558 * ``The Translation Buffer may reload and cache the old PTE value
2559 * between the time the FOR (resp. FOW, FOE) fault invalidates the
2560 * old value from the Translation Buffer and the time software
2561 * updates the PTE in memory. Software that depends on the
2562 * processor-provided invalidate must thus be prepared to take
2563 * another FOR (resp. FOW, FOE) fault on a page after clearing the
2564 * page's PTE<FOR(resp. FOW, FOE)> bit. The second fault will
2565 * invalidate the stale PTE from the Translation Buffer, and the
2566 * processor cannot load another stale copy. Thus, in the worst case,
2567 * a multiprocessor system will take an initial FOR (resp. FOW, FOE)
2568 * fault and then an additional FOR (resp. FOW, FOE) fault on each
2569 * processor. In practice, even a single repetition is unlikely.''
2570 *
2571 * In practice, spurious faults on the other processors happen, at
2572 * least on fast 21264 or better processors.
2573 */
2574 if (type == ALPHA_MMCSR_FOW) {
2575 if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) {
2576 panic("pmap_emulate_reference(%d,%d): "
2577 "write but unwritable pte 0x%lx",
2578 user, type, *pte);
2579 }
2580 if (!(*pte & PG_FOW)) {
2581 panic("pmap_emulate_reference(%d,%d): "
2582 "write but not FOW pte 0x%lx",
2583 user, type, *pte);
2584 }
2585 } else {
2586 if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) {
2587 panic("pmap_emulate_reference(%d,%d): "
2588 "!write but unreadable pte 0x%lx",
2589 user, type, *pte);
2590 }
2591 if (!(*pte & (PG_FOR | PG_FOE))) {
2592 panic("pmap_emulate_reference(%d,%d): "
2593 "!write but not FOR|FOE pte 0x%lx",
2594 user, type, *pte);
2595 }
2596 }
2597 #endif /* MULTIPROCESSOR */
2598 /* Other diagnostics? */
2599 #endif
2600 pa = pmap_pte_pa(pte);
2601
2602 /*
2603 * We're now done with the PTE. If it was a user pmap, unlock
2604 * it now.
2605 */
2606 if (didlock)
2607 PMAP_UNLOCK(pmap);
2608
2609 #ifdef DEBUG
2610 if (pmapdebug & PDB_FOLLOW)
2611 printf("\tpa = 0x%lx\n", pa);
2612 #endif
2613
2614 pg = PHYS_TO_VM_PAGE(pa);
2615
2616 #ifdef DIAGNOSTIC
2617 if (pg == NULL) {
2618 panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
2619 "pa 0x%lx (pte %p 0x%08lx) not managed",
2620 p, v, user, type, pa, pte, *pte);
2621 }
2622 #endif
2623
2624 /*
2625 * Twiddle the appropriate bits to reflect the reference
2626 * and/or modification..
2627 *
2628 * The rules:
2629 * (1) always mark page as used, and
2630 * (2) if it was a write fault, mark page as modified.
2631 */
2632
2633 mtx_enter(&pg->mdpage.pvh_mtx);
2634 if (type == ALPHA_MMCSR_FOW) {
2635 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF | PG_PMAP_MOD);
2636 faultoff = PG_FOR | PG_FOW;
2637 } else {
2638 atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
2639 faultoff = PG_FOR;
2640 if (exec) {
2641 faultoff |= PG_FOE;
2642 }
2643 }
2644 pmap_changebit(pg, 0, ~faultoff, cpu_id);
2645 mtx_leave(&pg->mdpage.pvh_mtx);
2646
2647 return (0);
2648 }
2649
2650 #ifdef DEBUG
2651 /*
2652 * pmap_pv_dump:
2653 *
2654 * Dump the physical->virtual data for the specified page.
2655 */
2656 void
pmap_pv_dump(paddr_t pa)2657 pmap_pv_dump(paddr_t pa)
2658 {
2659 struct vm_page *pg;
2660 pv_entry_t pv;
2661
2662 pg = PHYS_TO_VM_PAGE(pa);
2663
2664 printf("pa 0x%lx (attrs = 0x%x):\n",
2665 pa, pg->pg_flags & (PG_PMAP_REF | PG_PMAP_MOD));
2666 mtx_enter(&pg->mdpage.pvh_mtx);
2667 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next)
2668 printf(" pmap %p, va 0x%lx\n",
2669 pv->pv_pmap, pv->pv_va);
2670 mtx_leave(&pg->mdpage.pvh_mtx);
2671 printf("\n");
2672 }
2673 #endif
2674
2675 /*
2676 * vtophys:
2677 *
2678 * Return the physical address corresponding to the K0SEG or
2679 * K1SEG address provided.
2680 *
2681 * Note: no locking is necessary in this function.
2682 */
2683 paddr_t
vtophys(vaddr_t vaddr)2684 vtophys(vaddr_t vaddr)
2685 {
2686 pt_entry_t *pte;
2687 paddr_t paddr = 0;
2688
2689 if (vaddr < ALPHA_K0SEG_BASE)
2690 printf("vtophys: invalid vaddr 0x%lx", vaddr);
2691 else if (vaddr <= ALPHA_K0SEG_END)
2692 paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2693 else {
2694 pte = PMAP_KERNEL_PTE(vaddr);
2695 if (pmap_pte_v(pte))
2696 paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2697 }
2698
2699 #if 0
2700 printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2701 #endif
2702
2703 return (paddr);
2704 }
2705
2706 /******************** pv_entry management ********************/
2707
2708 /*
2709 * pmap_pv_enter:
2710 *
2711 * Add a physical->virtual entry to the pv_table.
2712 */
2713 int
pmap_pv_enter(pmap_t pmap,struct vm_page * pg,vaddr_t va,pt_entry_t * pte,boolean_t dolock)2714 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2715 boolean_t dolock)
2716 {
2717 pv_entry_t newpv;
2718
2719 /*
2720 * Allocate and fill in the new pv_entry.
2721 */
2722 newpv = pmap_pv_alloc();
2723 if (newpv == NULL)
2724 return (ENOMEM);
2725 newpv->pv_va = va;
2726 newpv->pv_pmap = pmap;
2727 newpv->pv_pte = pte;
2728
2729 if (dolock)
2730 mtx_enter(&pg->mdpage.pvh_mtx);
2731
2732 #ifdef DEBUG
2733 {
2734 pv_entry_t pv;
2735 /*
2736 * Make sure the entry doesn't already exist.
2737 */
2738 for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2739 if (pmap == pv->pv_pmap && va == pv->pv_va) {
2740 printf("pmap = %p, va = 0x%lx\n", pmap, va);
2741 panic("pmap_pv_enter: already in pv table");
2742 }
2743 }
2744 }
2745 #endif
2746
2747 /*
2748 * ...and put it in the list.
2749 */
2750 newpv->pv_next = pg->mdpage.pvh_list;
2751 pg->mdpage.pvh_list = newpv;
2752
2753 if (dolock)
2754 mtx_leave(&pg->mdpage.pvh_mtx);
2755
2756 return (0);
2757 }
2758
2759 /*
2760 * pmap_pv_remove:
2761 *
2762 * Remove a physical->virtual entry from the pv_table.
2763 */
2764 void
pmap_pv_remove(pmap_t pmap,struct vm_page * pg,vaddr_t va,boolean_t dolock)2765 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, boolean_t dolock)
2766 {
2767 pv_entry_t pv, *pvp;
2768
2769 if (dolock)
2770 mtx_enter(&pg->mdpage.pvh_mtx);
2771
2772 /*
2773 * Find the entry to remove.
2774 */
2775 for (pvp = &pg->mdpage.pvh_list, pv = *pvp;
2776 pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2777 if (pmap == pv->pv_pmap && va == pv->pv_va)
2778 break;
2779
2780 #ifdef DEBUG
2781 if (pv == NULL)
2782 panic("pmap_pv_remove: not in pv table");
2783 #endif
2784
2785 *pvp = pv->pv_next;
2786
2787 if (dolock)
2788 mtx_leave(&pg->mdpage.pvh_mtx);
2789
2790 pmap_pv_free(pv);
2791 }
2792
2793 /*
2794 * pmap_pv_page_alloc:
2795 *
2796 * Allocate a page for the pv_entry pool.
2797 */
2798 void *
pmap_pv_page_alloc(struct pool * pp,int flags,int * slowdown)2799 pmap_pv_page_alloc(struct pool *pp, int flags, int *slowdown)
2800 {
2801 paddr_t pg;
2802
2803 *slowdown = 0;
2804 if (pmap_physpage_alloc(PGU_PVENT, &pg))
2805 return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
2806 return (NULL);
2807 }
2808
2809 /*
2810 * pmap_pv_page_free:
2811 *
2812 * Free a pv_entry pool page.
2813 */
2814 void
pmap_pv_page_free(struct pool * pp,void * v)2815 pmap_pv_page_free(struct pool *pp, void *v)
2816 {
2817
2818 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
2819 }
2820
2821 /******************** misc. functions ********************/
2822
2823 /*
2824 * pmap_physpage_alloc:
2825 *
2826 * Allocate a single page from the VM system and return the
2827 * physical address for that page.
2828 */
2829 boolean_t
pmap_physpage_alloc(int usage,paddr_t * pap)2830 pmap_physpage_alloc(int usage, paddr_t *pap)
2831 {
2832 struct vm_page *pg;
2833 paddr_t pa;
2834
2835 /*
2836 * Don't ask for a zeroed page in the L1PT case -- we will
2837 * properly initialize it in the constructor.
2838 */
2839
2840 pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
2841 UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2842 if (pg != NULL) {
2843 pa = VM_PAGE_TO_PHYS(pg);
2844
2845 #ifdef DIAGNOSTIC
2846 if (pg->wire_count != 0) {
2847 printf("pmap_physpage_alloc: page 0x%lx has "
2848 "%d references\n", pa, pg->wire_count);
2849 panic("pmap_physpage_alloc");
2850 }
2851 #endif
2852 *pap = pa;
2853 return (TRUE);
2854 }
2855 return (FALSE);
2856 }
2857
2858 /*
2859 * pmap_physpage_free:
2860 *
2861 * Free the single page table page at the specified physical address.
2862 */
2863 void
pmap_physpage_free(paddr_t pa)2864 pmap_physpage_free(paddr_t pa)
2865 {
2866 struct vm_page *pg;
2867
2868 if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2869 panic("pmap_physpage_free: bogus physical page address");
2870
2871 #ifdef DIAGNOSTIC
2872 if (pg->wire_count != 0)
2873 panic("pmap_physpage_free: page still has references");
2874 #endif
2875
2876 uvm_pagefree(pg);
2877 }
2878
2879 /*
2880 * pmap_physpage_addref:
2881 *
2882 * Add a reference to the specified special use page.
2883 */
2884 int
pmap_physpage_addref(void * kva)2885 pmap_physpage_addref(void *kva)
2886 {
2887 struct vm_page *pg;
2888 paddr_t pa;
2889 int rval;
2890
2891 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2892 pg = PHYS_TO_VM_PAGE(pa);
2893
2894 rval = ++pg->wire_count;
2895
2896 return (rval);
2897 }
2898
2899 /*
2900 * pmap_physpage_delref:
2901 *
2902 * Delete a reference to the specified special use page.
2903 */
2904 int
pmap_physpage_delref(void * kva)2905 pmap_physpage_delref(void *kva)
2906 {
2907 struct vm_page *pg;
2908 paddr_t pa;
2909 int rval;
2910
2911 pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2912 pg = PHYS_TO_VM_PAGE(pa);
2913
2914 #ifdef DIAGNOSTIC
2915 /*
2916 * Make sure we never have a negative reference count.
2917 */
2918 if (pg->wire_count == 0)
2919 panic("pmap_physpage_delref: reference count already zero");
2920 #endif
2921
2922 rval = --pg->wire_count;
2923
2924 return (rval);
2925 }
2926
2927 /******************** page table page management ********************/
2928
2929 /*
2930 * pmap_growkernel: [ INTERFACE ]
2931 *
2932 * Grow the kernel address space. This is a hint from the
2933 * upper layer to pre-allocate more kernel PT pages.
2934 */
2935 vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)2936 pmap_growkernel(vaddr_t maxkvaddr)
2937 {
2938 struct pmap *kpm = pmap_kernel(), *pm;
2939 paddr_t ptaddr;
2940 pt_entry_t *l1pte, *l2pte, pte;
2941 vaddr_t va;
2942 int l1idx;
2943
2944 mtx_enter(&pmap_growkernel_mtx);
2945
2946 if (maxkvaddr <= pmap_maxkvaddr)
2947 goto out; /* we are OK */
2948
2949 va = pmap_maxkvaddr;
2950
2951 while (va < maxkvaddr) {
2952 /*
2953 * If there is no valid L1 PTE (i.e. no L2 PT page),
2954 * allocate a new L2 PT page and insert it into the
2955 * L1 map.
2956 */
2957 l1pte = pmap_l1pte(kpm, va);
2958 if (pmap_pte_v(l1pte) == 0) {
2959 /*
2960 * XXX PGU_NORMAL? It's not a "traditional" PT page.
2961 */
2962 if (uvm.page_init_done == FALSE) {
2963 /*
2964 * We're growing the kernel pmap early (from
2965 * uvm_pageboot_alloc()). This case must
2966 * be handled a little differently.
2967 */
2968 ptaddr = ALPHA_K0SEG_TO_PHYS(
2969 pmap_steal_memory(PAGE_SIZE, NULL, NULL));
2970 } else if (pmap_physpage_alloc(PGU_NORMAL,
2971 &ptaddr) == FALSE)
2972 goto die;
2973 pte = (atop(ptaddr) << PG_SHIFT) |
2974 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
2975 *l1pte = pte;
2976
2977 l1idx = l1pte_index(va);
2978
2979 /* Update all the user pmaps. */
2980 mtx_enter(&pmap_all_pmaps_mtx);
2981 for (pm = TAILQ_FIRST(&pmap_all_pmaps);
2982 pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
2983 /* Skip the kernel pmap. */
2984 if (pm == pmap_kernel())
2985 continue;
2986
2987 PMAP_LOCK(pm);
2988 KDASSERT(pm->pm_lev1map != kernel_lev1map);
2989 pm->pm_lev1map[l1idx] = pte;
2990 PMAP_UNLOCK(pm);
2991 }
2992 mtx_leave(&pmap_all_pmaps_mtx);
2993 }
2994
2995 /*
2996 * Have an L2 PT page now, add the L3 PT page.
2997 */
2998 l2pte = pmap_l2pte(kpm, va, l1pte);
2999 KASSERT(pmap_pte_v(l2pte) == 0);
3000 if (uvm.page_init_done == FALSE) {
3001 /*
3002 * See above.
3003 */
3004 ptaddr = ALPHA_K0SEG_TO_PHYS(
3005 pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3006 } else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == FALSE)
3007 goto die;
3008 *l2pte = (atop(ptaddr) << PG_SHIFT) |
3009 PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3010 va += ALPHA_L2SEG_SIZE;
3011 }
3012
3013 #if 0
3014 /* Invalidate the L1 PT cache. */
3015 pool_cache_invalidate(&pmap_l1pt_cache);
3016 #endif
3017
3018 pmap_maxkvaddr = va;
3019
3020 out:
3021 mtx_leave(&pmap_growkernel_mtx);
3022
3023 return (pmap_maxkvaddr);
3024
3025 die:
3026 mtx_leave(&pmap_growkernel_mtx);
3027 panic("pmap_growkernel: out of memory");
3028 }
3029
3030 /*
3031 * pmap_lev1map_create:
3032 *
3033 * Create a new level 1 page table for the specified pmap.
3034 *
3035 * Note: growkernel must already by held and the pmap either
3036 * already locked or unreferenced globally.
3037 */
3038 int
pmap_lev1map_create(pmap_t pmap,cpuid_t cpu_id)3039 pmap_lev1map_create(pmap_t pmap, cpuid_t cpu_id)
3040 {
3041 pt_entry_t *l1pt;
3042
3043 KASSERT(pmap != pmap_kernel());
3044 KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED);
3045
3046 /* Don't sleep -- we're called with locks held. */
3047 l1pt = pool_get(&pmap_l1pt_pool, PR_NOWAIT);
3048 if (l1pt == NULL)
3049 return (ENOMEM);
3050
3051 pmap_l1pt_ctor(l1pt);
3052 pmap->pm_lev1map = l1pt;
3053
3054 return (0);
3055 }
3056
3057 /*
3058 * pmap_lev1map_destroy:
3059 *
3060 * Destroy the level 1 page table for the specified pmap.
3061 *
3062 * Note: growkernel must already by held and the pmap either
3063 * already locked or unreferenced globally.
3064 */
3065 void
pmap_lev1map_destroy(pmap_t pmap)3066 pmap_lev1map_destroy(pmap_t pmap)
3067 {
3068 pt_entry_t *l1pt = pmap->pm_lev1map;
3069
3070 KASSERT(pmap != pmap_kernel());
3071
3072 /*
3073 * Go back to referencing the global kernel_lev1map.
3074 */
3075 pmap->pm_lev1map = kernel_lev1map;
3076
3077 /*
3078 * Free the old level 1 page table page.
3079 */
3080 pool_put(&pmap_l1pt_pool, l1pt);
3081 }
3082
3083 /*
3084 * pmap_l1pt_ctor:
3085 *
3086 * Constructor for L1 PT pages.
3087 */
3088 void
pmap_l1pt_ctor(pt_entry_t * l1pt)3089 pmap_l1pt_ctor(pt_entry_t *l1pt)
3090 {
3091 pt_entry_t pte;
3092 int i;
3093
3094 /*
3095 * Initialize the new level 1 table by zeroing the
3096 * user portion and copying the kernel mappings into
3097 * the kernel portion.
3098 */
3099 for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3100 l1pt[i] = 0;
3101
3102 for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3103 i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3104 l1pt[i] = kernel_lev1map[i];
3105
3106 /*
3107 * Now, map the new virtual page table. NOTE: NO ASM!
3108 */
3109 pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3110 PG_V | PG_KRE | PG_KWE;
3111 l1pt[l1pte_index(VPTBASE)] = pte;
3112 }
3113
3114 /*
3115 * pmap_l1pt_alloc:
3116 *
3117 * Page allocator for L1 PT pages.
3118 *
3119 * Note: The growkernel lock is held across allocations
3120 * from this pool, so we don't need to acquire it
3121 * ourselves.
3122 */
3123 void *
pmap_l1pt_alloc(struct pool * pp,int flags,int * slowdown)3124 pmap_l1pt_alloc(struct pool *pp, int flags, int *slowdown)
3125 {
3126 paddr_t ptpa;
3127
3128 /*
3129 * Attempt to allocate a free page.
3130 */
3131 *slowdown = 0;
3132 if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == FALSE)
3133 return (NULL);
3134
3135 return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3136 }
3137
3138 /*
3139 * pmap_l1pt_free:
3140 *
3141 * Page freer for L1 PT pages.
3142 */
3143 void
pmap_l1pt_free(struct pool * pp,void * v)3144 pmap_l1pt_free(struct pool *pp, void *v)
3145 {
3146
3147 pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3148 }
3149
3150 /*
3151 * pmap_ptpage_alloc:
3152 *
3153 * Allocate a level 2 or level 3 page table page, and
3154 * initialize the PTE that references it.
3155 *
3156 * Note: the pmap must already be locked.
3157 */
3158 int
pmap_ptpage_alloc(pmap_t pmap,pt_entry_t * pte,int usage)3159 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3160 {
3161 paddr_t ptpa;
3162
3163 /*
3164 * Allocate the page table page.
3165 */
3166 if (pmap_physpage_alloc(usage, &ptpa) == FALSE)
3167 return (ENOMEM);
3168
3169 /*
3170 * Initialize the referencing PTE.
3171 */
3172 PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3173 PG_V | PG_KRE | PG_KWE | PG_WIRED |
3174 (pmap == pmap_kernel() ? PG_ASM : 0));
3175
3176 return (0);
3177 }
3178
3179 /*
3180 * pmap_ptpage_free:
3181 *
3182 * Free the level 2 or level 3 page table page referenced
3183 * be the provided PTE.
3184 *
3185 * Note: the pmap must already be locked.
3186 */
3187 void
pmap_ptpage_free(pmap_t pmap,pt_entry_t * pte)3188 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3189 {
3190 paddr_t ptpa;
3191
3192 /*
3193 * Extract the physical address of the page from the PTE
3194 * and clear the entry.
3195 */
3196 ptpa = pmap_pte_pa(pte);
3197 PMAP_SET_PTE(pte, PG_NV);
3198
3199 #ifdef DEBUG
3200 pmap_zero_page(PHYS_TO_VM_PAGE(ptpa));
3201 #endif
3202 pmap_physpage_free(ptpa);
3203 }
3204
3205 /*
3206 * pmap_l3pt_delref:
3207 *
3208 * Delete a reference on a level 3 PT page. If the reference drops
3209 * to zero, free it.
3210 *
3211 * Note: the pmap must already be locked.
3212 */
3213 void
pmap_l3pt_delref(pmap_t pmap,vaddr_t va,pt_entry_t * l3pte,cpuid_t cpu_id)3214 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, cpuid_t cpu_id)
3215 {
3216 pt_entry_t *l1pte, *l2pte;
3217 PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3218
3219 l1pte = pmap_l1pte(pmap, va);
3220 l2pte = pmap_l2pte(pmap, va, l1pte);
3221
3222 #ifdef DIAGNOSTIC
3223 if (pmap == pmap_kernel())
3224 panic("pmap_l3pt_delref: kernel pmap");
3225 #endif
3226
3227 if (pmap_physpage_delref(l3pte) == 0) {
3228 /*
3229 * No more mappings; we can free the level 3 table.
3230 */
3231 #ifdef DEBUG
3232 if (pmapdebug & PDB_PTPAGE)
3233 printf("pmap_l3pt_delref: freeing level 3 table at "
3234 "0x%lx\n", pmap_pte_pa(l2pte));
3235 #endif
3236 pmap_ptpage_free(pmap, l2pte);
3237
3238 /*
3239 * We've freed a level 3 table, so we must
3240 * invalidate the TLB entry for that PT page
3241 * in the Virtual Page Table VA range, because
3242 * otherwise the PALcode will service a TLB
3243 * miss using the stale VPT TLB entry it entered
3244 * behind our back to shortcut to the VA's PTE.
3245 */
3246 PMAP_INVALIDATE_TLB(pmap,
3247 (vaddr_t)(&VPT[VPT_INDEX(va)]), FALSE,
3248 PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3249 PMAP_TLB_SHOOTDOWN(pmap,
3250 (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3251 PMAP_TLB_SHOOTNOW();
3252
3253 /*
3254 * We've freed a level 3 table, so delete the reference
3255 * on the level 2 table.
3256 */
3257 pmap_l2pt_delref(pmap, l1pte, l2pte);
3258 }
3259 }
3260
3261 /*
3262 * pmap_l2pt_delref:
3263 *
3264 * Delete a reference on a level 2 PT page. If the reference drops
3265 * to zero, free it.
3266 *
3267 * Note: the pmap must already be locked.
3268 */
3269 void
pmap_l2pt_delref(pmap_t pmap,pt_entry_t * l1pte,pt_entry_t * l2pte)3270 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte)
3271 {
3272 KASSERT(pmap != pmap_kernel());
3273 if (pmap_physpage_delref(l2pte) == 0) {
3274 /*
3275 * No more mappings in this segment; we can free the
3276 * level 2 table.
3277 */
3278 #ifdef DEBUG
3279 if (pmapdebug & PDB_PTPAGE)
3280 printf("pmap_l2pt_delref: freeing level 2 table at "
3281 "0x%lx\n", pmap_pte_pa(l1pte));
3282 #endif
3283 pmap_ptpage_free(pmap, l1pte);
3284
3285 /*
3286 * We've freed a level 2 table, so delete the reference
3287 * on the level 1 table.
3288 */
3289 pmap_l1pt_delref(pmap, l1pte);
3290 }
3291 }
3292
3293 /*
3294 * pmap_l1pt_delref:
3295 *
3296 * Delete a reference on a level 1 PT page. If the reference drops
3297 * to zero, free it.
3298 *
3299 * Note: the pmap must already be locked.
3300 */
3301 void
pmap_l1pt_delref(pmap_t pmap,pt_entry_t * l1pte)3302 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte)
3303 {
3304 KASSERT(pmap != pmap_kernel());
3305 pmap_physpage_delref(l1pte);
3306 }
3307
3308 /******************** Address Space Number management ********************/
3309
3310 /*
3311 * pmap_asn_alloc:
3312 *
3313 * Allocate and assign an ASN to the specified pmap.
3314 *
3315 * Note: the pmap must already be locked. This may be called from
3316 * an interprocessor interrupt, and in that case, the sender of
3317 * the IPI has the pmap lock.
3318 */
3319 void
pmap_asn_alloc(pmap_t pmap,cpuid_t cpu_id)3320 pmap_asn_alloc(pmap_t pmap, cpuid_t cpu_id)
3321 {
3322 struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3323 struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3324
3325 #ifdef DEBUG
3326 if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3327 printf("pmap_asn_alloc(%p)\n", pmap);
3328 #endif
3329
3330 /*
3331 * If the pmap is still using the global kernel_lev1map, there
3332 * is no need to assign an ASN at this time, because only
3333 * kernel mappings exist in that map, and all kernel mappings
3334 * have PG_ASM set. If the pmap eventually gets its own
3335 * lev1map, an ASN will be allocated at that time.
3336 *
3337 * Only the kernel pmap will reference kernel_lev1map. Do the
3338 * same old fixups, but note that we no longer need the pmap
3339 * to be locked if we're in this mode, since pm_lev1map will
3340 * never change.
3341 */
3342 if (pmap->pm_lev1map == kernel_lev1map) {
3343 #ifdef DEBUG
3344 if (pmapdebug & PDB_ASN)
3345 printf("pmap_asn_alloc: still references "
3346 "kernel_lev1map\n");
3347 #endif
3348 #if defined(MULTIPROCESSOR)
3349 /*
3350 * In a multiprocessor system, it's possible to
3351 * get here without having PMAP_ASN_RESERVED in
3352 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3353 *
3354 * So, what we do here, is simply assign the reserved
3355 * ASN for kernel_lev1map users and let things
3356 * continue on. We do, however, let uniprocessor
3357 * configurations continue to make its assertion.
3358 */
3359 pma->pma_asn = PMAP_ASN_RESERVED;
3360 #else
3361 KASSERT(pma->pma_asn == PMAP_ASN_RESERVED);
3362 #endif /* MULTIPROCESSOR */
3363 return;
3364 }
3365
3366 /*
3367 * On processors which do not implement ASNs, the swpctx PALcode
3368 * operation will automatically invalidate the TLB and I-cache,
3369 * so we don't need to do that here.
3370 */
3371 if (pmap_max_asn == 0) {
3372 /*
3373 * Refresh the pmap's generation number, to
3374 * simplify logic elsewhere.
3375 */
3376 pma->pma_asngen = cpma->pma_asngen;
3377 #ifdef DEBUG
3378 if (pmapdebug & PDB_ASN)
3379 printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3380 pma->pma_asngen);
3381 #endif
3382 return;
3383 }
3384
3385 /*
3386 * Hopefully, we can continue using the one we have...
3387 */
3388 if (pma->pma_asn != PMAP_ASN_RESERVED &&
3389 pma->pma_asngen == cpma->pma_asngen) {
3390 /*
3391 * ASN is still in the current generation; keep on using it.
3392 */
3393 #ifdef DEBUG
3394 if (pmapdebug & PDB_ASN)
3395 printf("pmap_asn_alloc: same generation, keeping %u\n",
3396 pma->pma_asn);
3397 #endif
3398 return;
3399 }
3400
3401 /*
3402 * Need to assign a new ASN. Grab the next one, incrementing
3403 * the generation number if we have to.
3404 */
3405 if (cpma->pma_asn > pmap_max_asn) {
3406 /*
3407 * Invalidate all non-PG_ASM TLB entries and the
3408 * I-cache, and bump the generation number.
3409 */
3410 ALPHA_TBIAP();
3411 alpha_pal_imb();
3412
3413 cpma->pma_asn = 1;
3414 cpma->pma_asngen++;
3415 #ifdef DIAGNOSTIC
3416 if (cpma->pma_asngen == 0) {
3417 /*
3418 * The generation number has wrapped. We could
3419 * handle this scenario by traversing all of
3420 * the pmaps, and invalidating the generation
3421 * number on those which are not currently
3422 * in use by this processor.
3423 *
3424 * However... considering that we're using
3425 * an unsigned 64-bit integer for generation
3426 * numbers, on non-ASN CPUs, we won't wrap
3427 * for approx. 585 million years, or 75 billion
3428 * years on a 128-ASN CPU (assuming 1000 switch
3429 * operations per second).
3430 *
3431 * So, we don't bother.
3432 */
3433 panic("pmap_asn_alloc: too much uptime");
3434 }
3435 #endif
3436 #ifdef DEBUG
3437 if (pmapdebug & PDB_ASN)
3438 printf("pmap_asn_alloc: generation bumped to %lu\n",
3439 cpma->pma_asngen);
3440 #endif
3441 }
3442
3443 /*
3444 * Assign the new ASN and validate the generation number.
3445 */
3446 pma->pma_asn = cpma->pma_asn++;
3447 pma->pma_asngen = cpma->pma_asngen;
3448
3449 #ifdef DEBUG
3450 if (pmapdebug & PDB_ASN)
3451 printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3452 pma->pma_asn, pmap);
3453 #endif
3454
3455 /*
3456 * Have a new ASN, so there's no need to sync the I-stream
3457 * on the way back out to userspace.
3458 */
3459 atomic_clearbits_ulong(&pmap->pm_needisync, (1UL << cpu_id));
3460 }
3461
3462 #if defined(MULTIPROCESSOR)
3463 /******************** TLB shootdown code ********************/
3464
3465 /*
3466 * pmap_tlb_shootdown:
3467 *
3468 * Cause the TLB entry for pmap/va to be shot down.
3469 *
3470 * NOTE: The pmap must be locked here.
3471 */
3472 void
pmap_tlb_shootdown(pmap_t pmap,vaddr_t va,pt_entry_t pte,u_long * cpumaskp)3473 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3474 {
3475 struct pmap_tlb_shootdown_q *pq;
3476 struct pmap_tlb_shootdown_job *pj;
3477 struct cpu_info *ci, *self = curcpu();
3478 u_long cpumask;
3479 CPU_INFO_ITERATOR cii;
3480 #if 0
3481 int s;
3482 #endif
3483
3484 cpumask = 0;
3485
3486 CPU_INFO_FOREACH(cii, ci) {
3487 if (ci == self)
3488 continue;
3489
3490 /*
3491 * The pmap must be locked (unless its the kernel
3492 * pmap, in which case it is okay for it to be
3493 * unlocked), which prevents it from becoming
3494 * active on any additional processors. This makes
3495 * it safe to check for activeness. If it's not
3496 * active on the processor in question, then just
3497 * mark it as needing a new ASN the next time it
3498 * does, saving the IPI. We always have to send
3499 * the IPI for the kernel pmap.
3500 *
3501 * Note if it's marked active now, and it becomes
3502 * inactive by the time the processor receives
3503 * the IPI, that's okay, because it does the right
3504 * thing with it later.
3505 */
3506 if (pmap != pmap_kernel() &&
3507 PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3508 PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3509 continue;
3510 }
3511
3512 cpumask |= 1UL << ci->ci_cpuid;
3513
3514 pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3515
3516 PSJQ_LOCK(pq, s);
3517
3518 pq->pq_pte |= pte;
3519
3520 /*
3521 * If a global flush is already pending, we
3522 * don't really have to do anything else.
3523 */
3524 if (pq->pq_tbia) {
3525 PSJQ_UNLOCK(pq, s);
3526 continue;
3527 }
3528
3529 pj = pmap_tlb_shootdown_job_get(pq);
3530 if (pj == NULL) {
3531 /*
3532 * Couldn't allocate a job entry. Just
3533 * tell the processor to kill everything.
3534 */
3535 pq->pq_tbia = 1;
3536 } else {
3537 pj->pj_pmap = pmap;
3538 pj->pj_va = va;
3539 pj->pj_pte = pte;
3540 TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3541 }
3542
3543 PSJQ_UNLOCK(pq, s);
3544 }
3545
3546 *cpumaskp |= cpumask;
3547 }
3548
3549 /*
3550 * pmap_tlb_shootnow:
3551 *
3552 * Process the TLB shootdowns that we have been accumulating
3553 * for the specified processor set.
3554 */
3555 void
pmap_tlb_shootnow(u_long cpumask)3556 pmap_tlb_shootnow(u_long cpumask)
3557 {
3558
3559 alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3560 }
3561
3562 /*
3563 * pmap_do_tlb_shootdown:
3564 *
3565 * Process pending TLB shootdown operations for this processor.
3566 */
3567 void
pmap_do_tlb_shootdown(struct cpu_info * ci,struct trapframe * framep)3568 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3569 {
3570 u_long cpu_id = ci->ci_cpuid;
3571 u_long cpu_mask = (1UL << cpu_id);
3572 struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3573 struct pmap_tlb_shootdown_job *pj;
3574 #if 0
3575 int s;
3576 #endif
3577
3578 PSJQ_LOCK(pq, s);
3579
3580 if (pq->pq_tbia) {
3581 if (pq->pq_pte & PG_ASM)
3582 ALPHA_TBIA();
3583 else
3584 ALPHA_TBIAP();
3585 pq->pq_tbia = 0;
3586 pmap_tlb_shootdown_q_drain(pq);
3587 } else {
3588 while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
3589 TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
3590 PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3591 pj->pj_pte & PG_ASM,
3592 pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3593 pmap_tlb_shootdown_job_put(pq, pj);
3594 }
3595 }
3596 pq->pq_pte = 0;
3597
3598 PSJQ_UNLOCK(pq, s);
3599 }
3600
3601 /*
3602 * pmap_tlb_shootdown_q_drain:
3603 *
3604 * Drain a processor's TLB shootdown queue. We do not perform
3605 * the shootdown operations. This is merely a convenience
3606 * function.
3607 *
3608 * Note: We expect the queue to be locked.
3609 */
3610 void
pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q * pq)3611 pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *pq)
3612 {
3613 struct pmap_tlb_shootdown_job *pj;
3614
3615 while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
3616 TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
3617 pmap_tlb_shootdown_job_put(pq, pj);
3618 }
3619 }
3620
3621 /*
3622 * pmap_tlb_shootdown_job_get:
3623 *
3624 * Get a TLB shootdown job queue entry. This places a limit on
3625 * the number of outstanding jobs a processor may have.
3626 *
3627 * Note: We expect the queue to be locked.
3628 */
3629 struct pmap_tlb_shootdown_job *
pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q * pq)3630 pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q *pq)
3631 {
3632 struct pmap_tlb_shootdown_job *pj;
3633
3634 pj = TAILQ_FIRST(&pq->pq_free);
3635 if (pj != NULL)
3636 TAILQ_REMOVE(&pq->pq_free, pj, pj_list);
3637 return (pj);
3638 }
3639
3640 /*
3641 * pmap_tlb_shootdown_job_put:
3642 *
3643 * Put a TLB shootdown job queue entry onto the free list.
3644 *
3645 * Note: We expect the queue to be locked.
3646 */
3647 void
pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q * pq,struct pmap_tlb_shootdown_job * pj)3648 pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *pq,
3649 struct pmap_tlb_shootdown_job *pj)
3650 {
3651 TAILQ_INSERT_TAIL(&pq->pq_free, pj, pj_list);
3652 }
3653 #endif /* MULTIPROCESSOR */
3654