xref: /openbsd/sys/arch/alpha/alpha/pmap.c (revision d7d91d03)
1 /* $OpenBSD: pmap.c,v 1.92 2024/08/23 15:14:45 miod Exp $ */
2 /* $NetBSD: pmap.c,v 1.154 2000/12/07 22:18:55 thorpej Exp $ */
3 
4 /*-
5  * Copyright (c) 1998, 1999, 2000 The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
10  * NASA Ames Research Center and by Chris G. Demetriou.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 
34 /*
35  * Copyright (c) 1991, 1993
36  *	The Regents of the University of California.  All rights reserved.
37  *
38  * This code is derived from software contributed to Berkeley by
39  * the Systems Programming Group of the University of Utah Computer
40  * Science Department.
41  *
42  * Redistribution and use in source and binary forms, with or without
43  * modification, are permitted provided that the following conditions
44  * are met:
45  * 1. Redistributions of source code must retain the above copyright
46  *    notice, this list of conditions and the following disclaimer.
47  * 2. Redistributions in binary form must reproduce the above copyright
48  *    notice, this list of conditions and the following disclaimer in the
49  *    documentation and/or other materials provided with the distribution.
50  * 3. Neither the name of the University nor the names of its contributors
51  *    may be used to endorse or promote products derived from this software
52  *    without specific prior written permission.
53  *
54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64  * SUCH DAMAGE.
65  *
66  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
67  */
68 
69 /*
70  * DEC Alpha physical map management code.
71  *
72  * History:
73  *
74  *	This pmap started life as a Motorola 68851/68030 pmap,
75  *	written by Mike Hibler at the University of Utah.
76  *
77  *	It was modified for the DEC Alpha by Chris Demetriou
78  *	at Carnegie Mellon University.
79  *
80  *	Support for non-contiguous physical memory was added by
81  *	Jason R. Thorpe of the Numerical Aerospace Simulation
82  *	Facility, NASA Ames Research Center and Chris Demetriou.
83  *
84  *	Page table management and a major cleanup were undertaken
85  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
86  *	Avalon Computer Systems and from Chris Demetriou.
87  *
88  *	Support for the new UVM pmap interface was written by
89  *	Jason R. Thorpe.
90  *
91  *	Support for ASNs was written by Jason R. Thorpe, again
92  *	with help from Chris Demetriou and Ross Harvey.
93  *
94  *	The locking protocol was written by Jason R. Thorpe,
95  *	using Chuck Cranor's i386 pmap for UVM as a model.
96  *
97  *	TLB shootdown code was written by Jason R. Thorpe.
98  *
99  * Notes:
100  *
101  *	All page table access is done via K0SEG.  The one exception
102  *	to this is for kernel mappings.  Since all kernel page
103  *	tables are pre-allocated, we can use the Virtual Page Table
104  *	to access PTEs that map K1SEG addresses.
105  *
106  *	Kernel page table pages are statically allocated in
107  *	pmap_bootstrap(), and are never freed.  In the future,
108  *	support for dynamically adding additional kernel page
109  *	table pages may be added.  User page table pages are
110  *	dynamically allocated and freed.
111  *
112  * Bugs/misfeatures:
113  *
114  *	- Some things could be optimized.
115  */
116 
117 /*
118  *	Manages physical address maps.
119  *
120  *	Since the information managed by this module is
121  *	also stored by the logical address mapping module,
122  *	this module may throw away valid virtual-to-physical
123  *	mappings at almost any time.  However, invalidations
124  *	of virtual-to-physical mappings must be done as
125  *	requested.
126  *
127  *	In order to cope with hardware architectures which
128  *	make virtual-to-physical map invalidates expensive,
129  *	this module may delay invalidate or reduced protection
130  *	operations until such time as they are actually
131  *	necessary.  This module is given full information as
132  *	to which processors are currently using which maps,
133  *	and to when physical maps must be made correct.
134  */
135 
136 #include <sys/param.h>
137 #include <sys/systm.h>
138 #include <sys/proc.h>
139 #include <sys/malloc.h>
140 #include <sys/pool.h>
141 #include <sys/user.h>
142 #include <sys/buf.h>
143 #include <sys/atomic.h>
144 #ifdef SYSVSHM
145 #include <sys/shm.h>
146 #endif
147 
148 #include <uvm/uvm.h>
149 
150 #include <machine/atomic.h>
151 #include <machine/cpu.h>
152 #if defined(MULTIPROCESSOR)
153 #include <machine/rpb.h>
154 #endif
155 
156 #ifdef DEBUG
157 #define	PDB_FOLLOW	0x0001
158 #define	PDB_INIT	0x0002
159 #define	PDB_ENTER	0x0004
160 #define	PDB_REMOVE	0x0008
161 #define	PDB_CREATE	0x0010
162 #define	PDB_PTPAGE	0x0020
163 #define	PDB_ASN		0x0040
164 #define	PDB_BITS	0x0080
165 #define	PDB_COLLECT	0x0100
166 #define	PDB_PROTECT	0x0200
167 #define	PDB_BOOTSTRAP	0x1000
168 #define	PDB_PARANOIA	0x2000
169 #define	PDB_WIRING	0x4000
170 #define	PDB_PVDUMP	0x8000
171 
172 int debugmap = 0;
173 int pmapdebug = PDB_PARANOIA|PDB_FOLLOW|PDB_ENTER;
174 #endif
175 
176 /*
177  * Given a map and a machine independent protection code,
178  * convert to an alpha protection code.
179  */
180 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
181 int	protection_codes[2][8];
182 
183 /*
184  * kernel_lev1map:
185  *
186  *	Kernel level 1 page table.  This maps all kernel level 2
187  *	page table pages, and is used as a template for all user
188  *	pmap level 1 page tables.  When a new user level 1 page
189  *	table is allocated, all kernel_lev1map PTEs for kernel
190  *	addresses are copied to the new map.
191  *
192  *	The kernel also has an initial set of kernel level 2 page
193  *	table pages.  These map the kernel level 3 page table pages.
194  *	As kernel level 3 page table pages are added, more level 2
195  *	page table pages may be added to map them.  These pages are
196  *	never freed.
197  *
198  *	Finally, the kernel also has an initial set of kernel level
199  *	3 page table pages.  These map pages in K1SEG.  More level
200  *	3 page table pages may be added at run-time if additional
201  *	K1SEG address space is required.  These pages are never freed.
202  *
203  * NOTE: When mappings are inserted into the kernel pmap, all
204  * level 2 and level 3 page table pages must already be allocated
205  * and mapped into the parent page table.
206  */
207 pt_entry_t	*kernel_lev1map;
208 
209 /*
210  * Virtual Page Table.
211  */
212 pt_entry_t	*VPT;
213 
214 struct pmap	kernel_pmap_store
215 	[(PMAP_SIZEOF(ALPHA_MAXPROCS) + sizeof(struct pmap) - 1)
216 		/ sizeof(struct pmap)];
217 
218 paddr_t    	avail_start;	/* PA of first available physical page */
219 paddr_t		avail_end;	/* PA of last available physical page */
220 vaddr_t		pmap_maxkvaddr;	/* VA of last avail page (pmap_growkernel) */
221 
222 boolean_t	pmap_initialized;	/* Has pmap_init completed? */
223 
224 u_long		pmap_pages_stolen;	/* instrumentation */
225 
226 /*
227  * This variable contains the number of CPU IDs we need to allocate
228  * space for when allocating the pmap structure.  It is used to
229  * size a per-CPU array of ASN and ASN Generation number.
230  */
231 u_long		pmap_ncpuids;
232 
233 #ifndef PMAP_PV_LOWAT
234 #define	PMAP_PV_LOWAT	16
235 #endif
236 int		pmap_pv_lowat = PMAP_PV_LOWAT;
237 
238 /*
239  * List of all pmaps, used to update them when e.g. additional kernel
240  * page tables are allocated.  This list is kept LRU-ordered by
241  * pmap_activate().
242  */
243 TAILQ_HEAD(, pmap) pmap_all_pmaps;
244 
245 /*
246  * The pools from which pmap structures and sub-structures are allocated.
247  */
248 struct pool pmap_pmap_pool;
249 struct pool pmap_l1pt_pool;
250 struct pool pmap_pv_pool;
251 
252 /*
253  * Address Space Numbers.
254  *
255  * On many implementations of the Alpha architecture, the TLB entries and
256  * I-cache blocks are tagged with a unique number within an implementation-
257  * specified range.  When a process context becomes active, the ASN is used
258  * to match TLB entries; if a TLB entry for a particular VA does not match
259  * the current ASN, it is ignored (one could think of the processor as
260  * having a collection of <max ASN> separate TLBs).  This allows operating
261  * system software to skip the TLB flush that would otherwise be necessary
262  * at context switch time.
263  *
264  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
265  * causes TLB entries to match any ASN.  The PALcode also provides
266  * a TBI (Translation Buffer Invalidate) operation that flushes all
267  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
268  * mappings, so that invalidation of all user mappings does not invalidate
269  * kernel mappings (which are consistent across all processes).
270  *
271  * pma_asn always indicates to the next ASN to use.  When
272  * pma_asn exceeds pmap_max_asn, we start a new ASN generation.
273  *
274  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
275  * TLB entries and the I-cache are flushed, the generation number is bumped,
276  * and pma_asn is changed to indicate the first non-reserved ASN.
277  *
278  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
279  * prevents the following scenario:
280  *
281  *	* New ASN generation starts, and process A is given ASN #0.
282  *
283  *	* A new process B (and thus new pmap) is created.  The ASN,
284  *	  for lack of a better value, is initialized to 0.
285  *
286  *	* Process B runs.  It is now using the TLB entries tagged
287  *	  by process A.  *poof*
288  *
289  * In the scenario above, in addition to the processor using incorrect
290  * TLB entries, the PALcode might use incorrect information to service a
291  * TLB miss.  (The PALcode uses the recursively mapped Virtual Page Table
292  * to locate the PTE for a faulting address, and tagged TLB entries exist
293  * for the Virtual Page Table addresses in order to speed up this procedure,
294  * as well.)
295  *
296  * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
297  * new pmaps will initially run with no TLB entries for user addresses
298  * or VPT mappings that map user page tables.  Since kernel_lev1map only
299  * contains mappings for kernel addresses, and since those mappings
300  * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
301  * safe (since PG_ASM mappings match any ASN).
302  *
303  * On processors that do not support ASNs, the PALcode invalidates
304  * the TLB and I-cache automatically on swpctx.  We still go
305  * through the motions of assigning an ASN (really, just refreshing
306  * the ASN generation in this particular case) to keep the logic sane
307  * in other parts of the code.
308  */
309 u_int	pmap_max_asn;		/* max ASN supported by the system */
310 				/* next ASN and current ASN generation */
311 struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
312 
313 /*
314  * Locking:
315  *
316  *	* pm_mtx (per-pmap) - This lock protects all of the members
317  *	  of the pmap structure itself.
318  *
319  *	* pvh_mtx (per-page) - This locks protects the list of mappings
320  *	  of a (managed) physical page.
321  *
322  *	* pmap_all_pmaps_mtx - This lock protects the global list of
323  *	  all pmaps.  Note that a pm_slock must never be held while this
324  *	  lock is held.
325  *
326  *	* pmap_growkernel_mtx - This lock protects pmap_growkernel()
327  *	  and the pmap_maxkvaddr variable.
328  *
329  *	  There is a lock ordering constraint for pmap_growkernel_mtx.
330  *	  pmap_growkernel() acquires the locks in the following order:
331  *
332  *		pmap_growkernel_mtx -> pmap_all_pmaps_mtx ->
333  *		    pmap->pm_mtx
334  *
335  *	Address space number management (global ASN counters and per-pmap
336  *	ASN state) are not locked; they use arrays of values indexed
337  *	per-processor.
338  *
339  *	All internal functions which operate on a pmap are called
340  *	with the pmap already locked by the caller (which will be
341  *	an interface function).
342  */
343 struct mutex pmap_all_pmaps_mtx;
344 struct mutex pmap_growkernel_mtx;
345 
346 #define PMAP_LOCK(pmap)		mtx_enter(&pmap->pm_mtx)
347 #define PMAP_UNLOCK(pmap)	mtx_leave(&pmap->pm_mtx)
348 
349 #if defined(MULTIPROCESSOR)
350 /*
351  * TLB Shootdown:
352  *
353  * When a mapping is changed in a pmap, the TLB entry corresponding to
354  * the virtual address must be invalidated on all processors.  In order
355  * to accomplish this on systems with multiple processors, messages are
356  * sent from the processor which performs the mapping change to all
357  * processors on which the pmap is active.  For other processors, the
358  * ASN generation numbers for that processor is invalidated, so that
359  * the next time the pmap is activated on that processor, a new ASN
360  * will be allocated (which implicitly invalidates all TLB entries).
361  *
362  * Note, we can use the pool allocator to allocate job entries
363  * since pool pages are mapped with K0SEG, not with the TLB.
364  */
365 struct pmap_tlb_shootdown_job {
366 	TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
367 	vaddr_t pj_va;			/* virtual address */
368 	pmap_t pj_pmap;			/* the pmap which maps the address */
369 	pt_entry_t pj_pte;		/* the PTE bits */
370 };
371 
372 /* If we have more pending jobs than this, we just nail the whole TLB. */
373 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
374 
375 struct pmap_tlb_shootdown_q {
376 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;
377 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_free;
378 	int pq_pte;			/* aggregate low PTE bits */
379 	int pq_tbia;			/* pending global flush */
380 	struct mutex pq_mtx;		/* queue lock */
381 	struct pmap_tlb_shootdown_job pq_jobs[PMAP_TLB_SHOOTDOWN_MAXJOBS];
382 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS];
383 
384 #define	PSJQ_LOCK(pq, s)	mtx_enter(&(pq)->pq_mtx)
385 #define	PSJQ_UNLOCK(pq, s)	mtx_leave(&(pq)->pq_mtx)
386 
387 void	pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);
388 struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get
389 	    (struct pmap_tlb_shootdown_q *);
390 void	pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,
391 	    struct pmap_tlb_shootdown_job *);
392 #endif /* MULTIPROCESSOR */
393 
394 #define	PAGE_IS_MANAGED(pa)	(vm_physseg_find(atop(pa), NULL) != -1)
395 
396 /*
397  * Internal routines
398  */
399 void	alpha_protection_init(void);
400 void	pmap_do_remove(pmap_t, vaddr_t, vaddr_t, boolean_t);
401 boolean_t pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *,
402 	    boolean_t, cpuid_t);
403 void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, cpuid_t);
404 
405 /*
406  * PT page management functions.
407  */
408 int	pmap_lev1map_create(pmap_t, cpuid_t);
409 void	pmap_lev1map_destroy(pmap_t);
410 int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
411 void	pmap_ptpage_free(pmap_t, pt_entry_t *);
412 void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, cpuid_t);
413 void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *);
414 void	pmap_l1pt_delref(pmap_t, pt_entry_t *);
415 
416 void	*pmap_l1pt_alloc(struct pool *, int, int *);
417 void	pmap_l1pt_free(struct pool *, void *);
418 
419 struct pool_allocator pmap_l1pt_allocator = {
420 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
421 };
422 
423 void	pmap_l1pt_ctor(pt_entry_t *);
424 
425 /*
426  * PV table management functions.
427  */
428 int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
429 	    boolean_t);
430 void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, boolean_t);
431 void	*pmap_pv_page_alloc(struct pool *, int, int *);
432 void	pmap_pv_page_free(struct pool *, void *);
433 
434 struct pool_allocator pmap_pv_page_allocator = {
435 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
436 };
437 
438 #ifdef DEBUG
439 void	pmap_pv_dump(paddr_t);
440 #endif
441 
442 #define	pmap_pv_alloc()		pool_get(&pmap_pv_pool, PR_NOWAIT)
443 #define	pmap_pv_free(pv)	pool_put(&pmap_pv_pool, (pv))
444 
445 /*
446  * ASN management functions.
447  */
448 void	pmap_asn_alloc(pmap_t, cpuid_t);
449 
450 /*
451  * Misc. functions.
452  */
453 boolean_t pmap_physpage_alloc(int, paddr_t *);
454 void	pmap_physpage_free(paddr_t);
455 int	pmap_physpage_addref(void *);
456 int	pmap_physpage_delref(void *);
457 
458 /* pmap_physpage_alloc() page usage */
459 #define	PGU_NORMAL		0		/* free or normal use */
460 #define	PGU_PVENT		1		/* PV entries */
461 #define	PGU_L1PT		2		/* level 1 page table */
462 #define	PGU_L2PT		3		/* level 2 page table */
463 #define	PGU_L3PT		4		/* level 3 page table */
464 
465 /*
466  * PMAP_ISACTIVE{,_TEST}:
467  *
468  *	Check to see if a pmap is active on the current processor.
469  */
470 #define	PMAP_ISACTIVE_TEST(pm, cpu_id)					\
471 	(((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
472 
473 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
474 #define	PMAP_ISACTIVE(pm, cpu_id)					\
475 ({									\
476 	/*								\
477 	 * XXX This test is not MP-safe.				\
478 	 */								\
479 	int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id);			\
480 									\
481 	if (curproc != NULL && curproc->p_vmspace != NULL &&		\
482 	    (pm) != pmap_kernel() &&					\
483 	    (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap)))	\
484 		panic("PMAP_ISACTIVE, isa: %d pm: %p curpm:%p",		\
485 		    isactive_, (pm), curproc->p_vmspace->vm_map.pmap);	\
486 	(isactive_);							\
487 })
488 #else
489 #define	PMAP_ISACTIVE(pm, cpu_id)	PMAP_ISACTIVE_TEST(pm, cpu_id)
490 #endif /* DEBUG && !MULTIPROCESSOR */
491 
492 /*
493  * PMAP_ACTIVATE_ASN_SANITY:
494  *
495  *	DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
496  */
497 #ifdef DEBUG
498 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)				\
499 do {									\
500 	struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)];	\
501 	struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)];	\
502 									\
503 	if ((pmap)->pm_lev1map == kernel_lev1map) {			\
504 		/*							\
505 		 * This pmap implementation also ensures that pmaps	\
506 		 * referencing kernel_lev1map use a reserved ASN	\
507 		 * ASN to prevent the PALcode from servicing a TLB	\
508 		 * miss	with the wrong PTE.				\
509 		 */							\
510 		if (__pma->pma_asn != PMAP_ASN_RESERVED) {		\
511 			printf("kernel_lev1map with non-reserved ASN "	\
512 			    "(line %d)\n", __LINE__);			\
513 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
514 		}							\
515 	} else {							\
516 		if (__pma->pma_asngen != __cpma->pma_asngen) {		\
517 			/*						\
518 			 * ASN generation number isn't valid!		\
519 			 */						\
520 			printf("pmap asngen %lu, current %lu "		\
521 			    "(line %d)\n",				\
522 			    __pma->pma_asngen, 				\
523 			    __cpma->pma_asngen, 			\
524 			    __LINE__);					\
525 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
526 		}							\
527 		if (__pma->pma_asn == PMAP_ASN_RESERVED) {		\
528 			/*						\
529 			 * DANGER WILL ROBINSON!  We're going to	\
530 			 * pollute the VPT TLB entries!			\
531 			 */						\
532 			printf("Using reserved ASN! (line %d)\n",	\
533 			    __LINE__);					\
534 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
535 		}							\
536 	}								\
537 } while (0)
538 #else
539 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)	/* nothing */
540 #endif
541 
542 /*
543  * PMAP_ACTIVATE:
544  *
545  *	This is essentially the guts of pmap_activate(), without
546  *	ASN allocation.  This is used by pmap_activate(),
547  *	pmap_lev1map_create(), and pmap_lev1map_destroy().
548  *
549  *	This is called only when it is known that a pmap is "active"
550  *	on the current processor; the ASN must already be valid.
551  */
552 #define	PMAP_ACTIVATE(pmap, p, cpu_id)					\
553 do {									\
554 	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id);				\
555 									\
556 	(p)->p_addr->u_pcb.pcb_hw.apcb_ptbr =				\
557 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
558 	(p)->p_addr->u_pcb.pcb_hw.apcb_asn =				\
559 	    (pmap)->pm_asni[(cpu_id)].pma_asn;				\
560 									\
561 	if ((p) == curproc) {						\
562 		/*							\
563 		 * Page table base register has changed; switch to	\
564 		 * our own context again so that it will take effect.	\
565 		 */							\
566 		(void) alpha_pal_swpctx((u_long)p->p_md.md_pcbpaddr);	\
567 	}								\
568 } while (0)
569 
570 /*
571  * PMAP_SET_NEEDISYNC:
572  *
573  *	Mark that a user pmap needs an I-stream synch on its
574  *	way back out to userspace.
575  */
576 #define	PMAP_SET_NEEDISYNC(pmap)	(pmap)->pm_needisync = ~0UL
577 
578 /*
579  * PMAP_SYNC_ISTREAM:
580  *
581  *	Synchronize the I-stream for the specified pmap.  For user
582  *	pmaps, this is deferred until a process using the pmap returns
583  *	to userspace.
584  */
585 #if defined(MULTIPROCESSOR)
586 #define	PMAP_SYNC_ISTREAM_KERNEL()					\
587 do {									\
588 	alpha_pal_imb();						\
589 	alpha_broadcast_ipi(ALPHA_IPI_IMB);				\
590 } while (0)
591 
592 #define	PMAP_SYNC_ISTREAM_USER(pmap)					\
593 do {									\
594 	alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST);		\
595 	/* for curcpu, do it before userret() */			\
596 } while (0)
597 #else
598 #define	PMAP_SYNC_ISTREAM_KERNEL()	alpha_pal_imb()
599 #define	PMAP_SYNC_ISTREAM_USER(pmap)	/* done before userret() */
600 #endif /* MULTIPROCESSOR */
601 
602 #define	PMAP_SYNC_ISTREAM(pmap)						\
603 do {									\
604 	if ((pmap) == pmap_kernel())					\
605 		PMAP_SYNC_ISTREAM_KERNEL();				\
606 	else								\
607 		PMAP_SYNC_ISTREAM_USER(pmap);				\
608 } while (0)
609 
610 /*
611  * PMAP_INVALIDATE_ASN:
612  *
613  *	Invalidate the specified pmap's ASN, so as to force allocation
614  *	of a new one the next time pmap_asn_alloc() is called.
615  *
616  *	NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
617  *	CONDITIONS ARE TRUE:
618  *
619  *		(1) The pmap references the global kernel_lev1map.
620  *
621  *		(2) The pmap is not active on the current processor.
622  */
623 #define	PMAP_INVALIDATE_ASN(pmap, cpu_id)				\
624 do {									\
625 	(pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED;		\
626 } while (0)
627 
628 /*
629  * PMAP_INVALIDATE_TLB:
630  *
631  *	Invalidate the TLB entry for the pmap/va pair.
632  */
633 #define	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id)		\
634 do {									\
635 	if ((hadasm) || (isactive)) {					\
636 		/*							\
637 		 * Simply invalidating the TLB entry and I-cache	\
638 		 * works in this case.					\
639 		 */							\
640 		ALPHA_TBIS((va));					\
641 	} else if ((pmap)->pm_asni[(cpu_id)].pma_asngen ==		\
642 		    pmap_asn_info[(cpu_id)].pma_asngen) {		\
643 		/*							\
644 		 * We can't directly invalidate the TLB entry		\
645 		 * in this case, so we have to force allocation		\
646 		 * of a new ASN the next time this pmap becomes		\
647 		 * active.						\
648 		 */							\
649 		PMAP_INVALIDATE_ASN((pmap), (cpu_id));			\
650 	}								\
651 		/*							\
652 		 * Nothing to do in this case; the next time the	\
653 		 * pmap becomes active on this processor, a new		\
654 		 * ASN will be allocated anyway.			\
655 		 */							\
656 } while (0)
657 
658 /*
659  * PMAP_KERNEL_PTE:
660  *
661  *	Get a kernel PTE.
662  *
663  *	If debugging, do a table walk.  If not debugging, just use
664  *	the Virtual Page Table, since all kernel page tables are
665  *	pre-allocated and mapped in.
666  */
667 #ifdef DEBUG
668 #define	PMAP_KERNEL_PTE(va)						\
669 ({									\
670 	pt_entry_t *l1pte_, *l2pte_;					\
671 									\
672 	l1pte_ = pmap_l1pte(pmap_kernel(), va);				\
673 	if (pmap_pte_v(l1pte_) == 0) {					\
674 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
675 		    "(line %d)\n", (va), __LINE__);			\
676 		panic("PMAP_KERNEL_PTE");				\
677 	}								\
678 	l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_);			\
679 	if (pmap_pte_v(l2pte_) == 0) {					\
680 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
681 		    "(line %d)\n", (va), __LINE__);			\
682 		panic("PMAP_KERNEL_PTE");				\
683 	}								\
684 	pmap_l3pte(pmap_kernel(), va, l2pte_);				\
685 })
686 #else
687 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
688 #endif
689 
690 /*
691  * PMAP_SET_PTE:
692  *
693  *	Set a PTE to a specified value.
694  */
695 #define	PMAP_SET_PTE(ptep, val)	*(ptep) = (val)
696 
697 /*
698  * PMAP_STAT_{INCR,DECR}:
699  *
700  *	Increment or decrement a pmap statistic.
701  */
702 #define	PMAP_STAT_INCR(s, v)	atomic_add_ulong((unsigned long *)(&(s)), (v))
703 #define	PMAP_STAT_DECR(s, v)	atomic_sub_ulong((unsigned long *)(&(s)), (v))
704 
705 /*
706  * pmap_bootstrap:
707  *
708  *	Bootstrap the system to run with virtual memory.
709  *
710  *	Note: no locking is necessary in this function.
711  */
712 void
pmap_bootstrap(paddr_t ptaddr,u_int maxasn,u_long ncpuids)713 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
714 {
715 	vsize_t lev2mapsize, lev3mapsize;
716 	pt_entry_t *lev2map, *lev3map;
717 	pt_entry_t pte;
718 	int i;
719 #ifdef MULTIPROCESSOR
720 	int j;
721 #endif
722 
723 #ifdef DEBUG
724 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
725 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
726 #endif
727 
728 	/*
729 	 * Compute the number of pages kmem_map will have.
730 	 */
731 	kmeminit_nkmempages();
732 
733 	/*
734 	 * Figure out how many PTEs are necessary to map the kernel.
735 	 */
736 	lev3mapsize = (VM_PHYS_SIZE + 16 * NCARGS + PAGER_MAP_SIZE) /
737 	    PAGE_SIZE + (maxthread * UPAGES) + nkmempages;
738 
739 #ifdef SYSVSHM
740 	lev3mapsize += shminfo.shmall;
741 #endif
742 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
743 
744 	/*
745 	 * Allocate a level 1 PTE table for the kernel.
746 	 * This is always one page long.
747 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
748 	 */
749 	kernel_lev1map = (pt_entry_t *)
750 	    pmap_steal_memory(sizeof(pt_entry_t) * NPTEPG, NULL, NULL);
751 
752 	/*
753 	 * Allocate a level 2 PTE table for the kernel.
754 	 * These must map all of the level3 PTEs.
755 	 * IF THIS IS NOT A MULTIPLE OF PAGE_SIZE, ALL WILL GO TO HELL.
756 	 */
757 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
758 	lev2map = (pt_entry_t *)
759 	    pmap_steal_memory(sizeof(pt_entry_t) * lev2mapsize, NULL, NULL);
760 
761 	/*
762 	 * Allocate a level 3 PTE table for the kernel.
763 	 * Contains lev3mapsize PTEs.
764 	 */
765 	lev3map = (pt_entry_t *)
766 	    pmap_steal_memory(sizeof(pt_entry_t) * lev3mapsize, NULL, NULL);
767 
768 	/*
769 	 * Set up level 1 page table
770 	 */
771 
772 	/* Map all of the level 2 pte pages */
773 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
774 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
775 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
776 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
777 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
778 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
779 	}
780 
781 	/* Map the virtual page table */
782 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
783 	    << PG_SHIFT;
784 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
785 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
786 	VPT = (pt_entry_t *)VPTBASE;
787 
788 	/*
789 	 * Set up level 2 page table.
790 	 */
791 	/* Map all of the level 3 pte pages */
792 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
793 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
794 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
795 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
796 		lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
797 		    (i*PAGE_SIZE*NPTEPG))] = pte;
798 	}
799 
800 	/* Initialize the pmap_growkernel_mtx. */
801 	mtx_init(&pmap_growkernel_mtx, IPL_NONE);
802 
803 	/*
804 	 * Set up level three page table (lev3map)
805 	 */
806 	/* Nothing to do; it's already zeroed */
807 
808 	/*
809 	 * Initialize `FYI' variables.  Note we're relying on
810 	 * the fact that BSEARCH sorts the vm_physmem[] array
811 	 * for us.
812 	 */
813 	avail_start = ptoa(vm_physmem[0].start);
814 	avail_end = ptoa(vm_physmem[vm_nphysseg - 1].end);
815 
816 	pmap_maxkvaddr = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
817 
818 #if 0
819 	printf("avail_start = 0x%lx\n", avail_start);
820 	printf("avail_end = 0x%lx\n", avail_end);
821 #endif
822 
823 	/*
824 	 * Initialize the pmap pools and list.
825 	 */
826 	pmap_ncpuids = ncpuids;
827 	pool_init(&pmap_pmap_pool, PMAP_SIZEOF(pmap_ncpuids), 0, IPL_NONE, 0,
828 	    "pmappl", &pool_allocator_single);
829 	pool_init(&pmap_l1pt_pool, PAGE_SIZE, 0, IPL_VM, 0,
830 	    "l1ptpl", &pmap_l1pt_allocator);
831 	pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, IPL_VM, 0,
832 	    "pvpl", &pmap_pv_page_allocator);
833 
834 	TAILQ_INIT(&pmap_all_pmaps);
835 
836 	/*
837 	 * Initialize the ASN logic.
838 	 */
839 	pmap_max_asn = maxasn;
840 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
841 		pmap_asn_info[i].pma_asn = 1;
842 		pmap_asn_info[i].pma_asngen = 0;
843 	}
844 
845 	/*
846 	 * Initialize the locks.
847 	 */
848 	mtx_init(&pmap_all_pmaps_mtx, IPL_NONE);
849 
850 	/*
851 	 * Initialize kernel pmap.  Note that all kernel mappings
852 	 * have PG_ASM set, so the ASN doesn't really matter for
853 	 * the kernel pmap.  Also, since the kernel pmap always
854 	 * references kernel_lev1map, it always has an invalid ASN
855 	 * generation.
856 	 */
857 	memset(pmap_kernel(), 0, sizeof(pmap_kernel()));
858 	pmap_kernel()->pm_lev1map = kernel_lev1map;
859 	pmap_kernel()->pm_count = 1;
860 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
861 		pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
862 		pmap_kernel()->pm_asni[i].pma_asngen =
863 		    pmap_asn_info[i].pma_asngen;
864 	}
865 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
866 	mtx_init(&pmap_kernel()->pm_mtx, IPL_VM);
867 
868 #if defined(MULTIPROCESSOR)
869 	/*
870 	 * Initialize the TLB shootdown queues.
871 	 */
872 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
873 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
874 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_free);
875 		for (j = 0; j < PMAP_TLB_SHOOTDOWN_MAXJOBS; j++)
876 			TAILQ_INSERT_TAIL(&pmap_tlb_shootdown_q[i].pq_free,
877 			    &pmap_tlb_shootdown_q[i].pq_jobs[j], pj_list);
878 		mtx_init(&pmap_tlb_shootdown_q[i].pq_mtx, IPL_IPI);
879 	}
880 #endif
881 
882 	/*
883 	 * Set up proc0's PCB such that the ptbr points to the right place
884 	 * and has the kernel pmap's (really unused) ASN.
885 	 */
886 	proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr =
887 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
888 	proc0.p_addr->u_pcb.pcb_hw.apcb_asn =
889 	    pmap_kernel()->pm_asni[cpu_number()].pma_asn;
890 
891 	/*
892 	 * Mark the kernel pmap `active' on this processor.
893 	 */
894 	atomic_setbits_ulong(&pmap_kernel()->pm_cpus,
895 	    (1UL << cpu_number()));
896 }
897 
898 /*
899  * pmap_steal_memory:		[ INTERFACE ]
900  *
901  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
902  *	This function allows for early dynamic memory allocation until the
903  *	virtual memory system has been bootstrapped.  After that point, either
904  *	kmem_alloc or malloc should be used.  This function works by stealing
905  *	pages from the (to be) managed page pool, then implicitly mapping the
906  *	pages (by using their k0seg addresses) and zeroing them.
907  *
908  *	It may be used once the physical memory segments have been pre-loaded
909  *	into the vm_physmem[] array.  Early memory allocation MUST use this
910  *	interface!  This cannot be used after vm_page_startup(), and will
911  *	generate a panic if tried.
912  *
913  *	Note that this memory will never be freed, and in essence it is wired
914  *	down.
915  *
916  *	Note: no locking is necessary in this function.
917  */
918 vaddr_t
pmap_steal_memory(vsize_t size,vaddr_t * vstartp,vaddr_t * vendp)919 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
920 {
921 	int bank, npgs, x;
922 	vaddr_t va;
923 	paddr_t pa;
924 
925 	size = round_page(size);
926 	npgs = atop(size);
927 
928 #if 0
929 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
930 #endif
931 
932 	for (bank = 0; bank < vm_nphysseg; bank++) {
933 		if (uvm.page_init_done == TRUE)
934 			panic("pmap_steal_memory: called _after_ bootstrap");
935 
936 #if 0
937 		printf("     bank %d: avail_start 0x%lx, start 0x%lx, "
938 		    "avail_end 0x%lx\n", bank, vm_physmem[bank].avail_start,
939 		    vm_physmem[bank].start, vm_physmem[bank].avail_end);
940 #endif
941 
942 		if (vm_physmem[bank].avail_start != vm_physmem[bank].start ||
943 		    vm_physmem[bank].avail_start >= vm_physmem[bank].avail_end)
944 			continue;
945 
946 #if 0
947 		printf("             avail_end - avail_start = 0x%lx\n",
948 		    vm_physmem[bank].avail_end - vm_physmem[bank].avail_start);
949 #endif
950 
951 		if ((vm_physmem[bank].avail_end - vm_physmem[bank].avail_start)
952 		    < npgs)
953 			continue;
954 
955 		/*
956 		 * There are enough pages here; steal them!
957 		 */
958 		pa = ptoa(vm_physmem[bank].avail_start);
959 		vm_physmem[bank].avail_start += npgs;
960 		vm_physmem[bank].start += npgs;
961 
962 		/*
963 		 * Have we used up this segment?
964 		 */
965 		if (vm_physmem[bank].avail_start == vm_physmem[bank].end) {
966 			if (vm_nphysseg == 1)
967 				panic("pmap_steal_memory: out of memory!");
968 
969 			/* Remove this segment from the list. */
970 			vm_nphysseg--;
971 			for (x = bank; x < vm_nphysseg; x++) {
972 				/* structure copy */
973 				vm_physmem[x] = vm_physmem[x + 1];
974 			}
975 		}
976 
977 		/*
978 		 * Fill these in for the caller; we don't modify them,
979 		 * but the upper layers still want to know.
980 		 */
981 		if (vstartp)
982 			*vstartp = VM_MIN_KERNEL_ADDRESS;
983 		if (vendp)
984 			*vendp = VM_MAX_KERNEL_ADDRESS;
985 
986 		va = ALPHA_PHYS_TO_K0SEG(pa);
987 		memset((caddr_t)va, 0, size);
988 		pmap_pages_stolen += npgs;
989 		return (va);
990 	}
991 
992 	/*
993 	 * If we got here, this was no memory left.
994 	 */
995 	panic("pmap_steal_memory: no memory to steal");
996 }
997 
998 /*
999  * pmap_init:			[ INTERFACE ]
1000  *
1001  *	Initialize the pmap module.  Called by uvm_init(), to initialize any
1002  *	structures that the pmap system needs to map virtual memory.
1003  *
1004  *	Note: no locking is necessary in this function.
1005  */
1006 void
pmap_init(void)1007 pmap_init(void)
1008 {
1009 
1010 #ifdef DEBUG
1011         if (pmapdebug & PDB_FOLLOW)
1012                 printf("pmap_init()\n");
1013 #endif
1014 
1015 	/* initialize protection array */
1016 	alpha_protection_init();
1017 
1018 	/*
1019 	 * Set a low water mark on the pv_entry pool, so that we are
1020 	 * more likely to have these around even in extreme memory
1021 	 * starvation.
1022 	 */
1023 	pool_setlowat(&pmap_pv_pool, pmap_pv_lowat);
1024 
1025 	/*
1026 	 * Now it is safe to enable pv entry recording.
1027 	 */
1028 	pmap_initialized = TRUE;
1029 
1030 #if 0
1031 	for (bank = 0; bank < vm_nphysseg; bank++) {
1032 		printf("bank %d\n", bank);
1033 		printf("\tstart = 0x%x\n", ptoa(vm_physmem[bank].start));
1034 		printf("\tend = 0x%x\n", ptoa(vm_physmem[bank].end));
1035 		printf("\tavail_start = 0x%x\n",
1036 		    ptoa(vm_physmem[bank].avail_start));
1037 		printf("\tavail_end = 0x%x\n",
1038 		    ptoa(vm_physmem[bank].avail_end));
1039 	}
1040 #endif
1041 }
1042 
1043 /*
1044  * pmap_create:			[ INTERFACE ]
1045  *
1046  *	Create and return a physical map.
1047  */
1048 pmap_t
pmap_create(void)1049 pmap_create(void)
1050 {
1051 	pmap_t pmap;
1052 	int i;
1053 
1054 #ifdef DEBUG
1055 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1056 		printf("pmap_create()\n");
1057 #endif
1058 
1059 	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK|PR_ZERO);
1060 
1061 	pmap->pm_count = 1;
1062 	for (i = 0; i < pmap_ncpuids; i++) {
1063 		pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1064 		/* XXX Locking? */
1065 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1066 	}
1067 	mtx_init(&pmap->pm_mtx, IPL_VM);
1068 
1069 	for (;;) {
1070 		mtx_enter(&pmap_growkernel_mtx);
1071 		i = pmap_lev1map_create(pmap, cpu_number());
1072 		mtx_leave(&pmap_growkernel_mtx);
1073 		if (i == 0)
1074 			break;
1075 		uvm_wait(__func__);
1076 	}
1077 
1078 	mtx_enter(&pmap_all_pmaps_mtx);
1079 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1080 	mtx_leave(&pmap_all_pmaps_mtx);
1081 
1082 	return (pmap);
1083 }
1084 
1085 /*
1086  * pmap_destroy:		[ INTERFACE ]
1087  *
1088  *	Drop the reference count on the specified pmap, releasing
1089  *	all resources if the reference count drops to zero.
1090  */
1091 void
pmap_destroy(pmap_t pmap)1092 pmap_destroy(pmap_t pmap)
1093 {
1094 	int refs;
1095 
1096 #ifdef DEBUG
1097 	if (pmapdebug & PDB_FOLLOW)
1098 		printf("pmap_destroy(%p)\n", pmap);
1099 #endif
1100 
1101 	refs = atomic_dec_int_nv(&pmap->pm_count);
1102 	if (refs > 0)
1103 		return;
1104 
1105 	/*
1106 	 * Remove it from the global list of all pmaps.
1107 	 */
1108 	mtx_enter(&pmap_all_pmaps_mtx);
1109 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1110 	mtx_leave(&pmap_all_pmaps_mtx);
1111 
1112 	mtx_enter(&pmap_growkernel_mtx);
1113 	pmap_lev1map_destroy(pmap);
1114 	mtx_leave(&pmap_growkernel_mtx);
1115 
1116 	pool_put(&pmap_pmap_pool, pmap);
1117 }
1118 
1119 /*
1120  * pmap_reference:		[ INTERFACE ]
1121  *
1122  *	Add a reference to the specified pmap.
1123  */
1124 void
pmap_reference(pmap_t pmap)1125 pmap_reference(pmap_t pmap)
1126 {
1127 
1128 #ifdef DEBUG
1129 	if (pmapdebug & PDB_FOLLOW)
1130 		printf("pmap_reference(%p)\n", pmap);
1131 #endif
1132 
1133 	atomic_inc_int(&pmap->pm_count);
1134 }
1135 
1136 /*
1137  * pmap_remove:			[ INTERFACE ]
1138  *
1139  *	Remove the given range of addresses from the specified map.
1140  *
1141  *	It is assumed that the start and end are properly
1142  *	rounded to the page size.
1143  */
1144 void
pmap_remove(pmap_t pmap,vaddr_t sva,vaddr_t eva)1145 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1146 {
1147 
1148 #ifdef DEBUG
1149 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1150 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1151 #endif
1152 
1153 	pmap_do_remove(pmap, sva, eva, TRUE);
1154 }
1155 
1156 /*
1157  * pmap_do_remove:
1158  *
1159  *	This actually removes the range of addresses from the
1160  *	specified map.  It is used by pmap_collect() (does not
1161  *	want to remove wired mappings) and pmap_remove() (does
1162  *	want to remove wired mappings).
1163  */
1164 void
pmap_do_remove(pmap_t pmap,vaddr_t sva,vaddr_t eva,boolean_t dowired)1165 pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired)
1166 {
1167 	pt_entry_t *l1pte, *l2pte, *l3pte;
1168 	pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1169 	vaddr_t l1eva, l2eva, vptva;
1170 	boolean_t needisync = FALSE;
1171 	cpuid_t cpu_id = cpu_number();
1172 
1173 #ifdef DEBUG
1174 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1175 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1176 #endif
1177 
1178 	/*
1179 	 * If this is the kernel pmap, we can use a faster method
1180 	 * for accessing the PTEs (since the PT pages are always
1181 	 * resident).
1182 	 *
1183 	 * Note that this routine should NEVER be called from an
1184 	 * interrupt context; pmap_kremove() is used for that.
1185 	 */
1186 	if (pmap == pmap_kernel()) {
1187 		PMAP_LOCK(pmap);
1188 
1189 		KASSERT(dowired == TRUE);
1190 
1191 		while (sva < eva) {
1192 			l3pte = PMAP_KERNEL_PTE(sva);
1193 			if (pmap_pte_v(l3pte)) {
1194 #ifdef DIAGNOSTIC
1195 				if (PAGE_IS_MANAGED(pmap_pte_pa(l3pte)) &&
1196 				    pmap_pte_pv(l3pte) == 0)
1197 					panic("pmap_remove: managed page "
1198 					    "without PG_PVLIST for 0x%lx",
1199 					    sva);
1200 #endif
1201 				needisync |= pmap_remove_mapping(pmap, sva,
1202 				    l3pte, TRUE, cpu_id);
1203 			}
1204 			sva += PAGE_SIZE;
1205 		}
1206 
1207 		PMAP_UNLOCK(pmap);
1208 
1209 		if (needisync)
1210 			PMAP_SYNC_ISTREAM_KERNEL();
1211 		return;
1212 	}
1213 
1214 #ifdef DIAGNOSTIC
1215 	if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1216 		panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1217 		    "address range", sva, eva);
1218 #endif
1219 
1220 	PMAP_LOCK(pmap);
1221 
1222 	/*
1223 	 * If we're already referencing the kernel_lev1map, there
1224 	 * is no work for us to do.
1225 	 */
1226 	if (pmap->pm_lev1map == kernel_lev1map)
1227 		goto out;
1228 
1229 	saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1230 
1231 	/*
1232 	 * Add a reference to the L1 table to it won't get
1233 	 * removed from under us.
1234 	 */
1235 	pmap_physpage_addref(saved_l1pte);
1236 
1237 	for (; sva < eva; sva = l1eva, l1pte++) {
1238 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1239 		if (pmap_pte_v(l1pte)) {
1240 			saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1241 
1242 			/*
1243 			 * Add a reference to the L2 table so it won't
1244 			 * get removed from under us.
1245 			 */
1246 			pmap_physpage_addref(saved_l2pte);
1247 
1248 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1249 				l2eva =
1250 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1251 				if (pmap_pte_v(l2pte)) {
1252 					saved_l3pte = l3pte =
1253 					    pmap_l3pte(pmap, sva, l2pte);
1254 
1255 					/*
1256 					 * Add a reference to the L3 table so
1257 					 * it won't get removed from under us.
1258 					 */
1259 					pmap_physpage_addref(saved_l3pte);
1260 
1261 					/*
1262 					 * Remember this sva; if the L3 table
1263 					 * gets removed, we need to invalidate
1264 					 * the VPT TLB entry for it.
1265 					 */
1266 					vptva = sva;
1267 
1268 					for (; sva < l2eva && sva < eva;
1269 					     sva += PAGE_SIZE, l3pte++) {
1270 						if (pmap_pte_v(l3pte) &&
1271 						    (dowired == TRUE ||
1272 						     pmap_pte_w(l3pte) == 0)) {
1273 							needisync |=
1274 							    pmap_remove_mapping(
1275 								pmap, sva,
1276 								l3pte, TRUE,
1277 								cpu_id);
1278 						}
1279 					}
1280 
1281 					/*
1282 					 * Remove the reference to the L3
1283 					 * table that we added above.  This
1284 					 * may free the L3 table.
1285 					 */
1286 					pmap_l3pt_delref(pmap, vptva,
1287 					    saved_l3pte, cpu_id);
1288 				}
1289 			}
1290 
1291 			/*
1292 			 * Remove the reference to the L2 table that we
1293 			 * added above.  This may free the L2 table.
1294 			 */
1295 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte);
1296 		}
1297 	}
1298 
1299 	/*
1300 	 * Remove the reference to the L1 table that we added above.
1301 	 * This may free the L1 table.
1302 	 */
1303 	pmap_l1pt_delref(pmap, saved_l1pte);
1304 
1305 	if (needisync)
1306 		PMAP_SYNC_ISTREAM_USER(pmap);
1307 
1308  out:
1309 	PMAP_UNLOCK(pmap);
1310 }
1311 
1312 /*
1313  * pmap_page_protect:		[ INTERFACE ]
1314  *
1315  *	Lower the permission for all mappings to a given page to
1316  *	the permissions specified.
1317  */
1318 void
pmap_page_protect(struct vm_page * pg,vm_prot_t prot)1319 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1320 {
1321 	pmap_t pmap;
1322 	pv_entry_t pv;
1323 	boolean_t needkisync = FALSE;
1324 	cpuid_t cpu_id = cpu_number();
1325 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1326 
1327 #ifdef DEBUG
1328 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1329 	    (prot == PROT_NONE && (pmapdebug & PDB_REMOVE)))
1330 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
1331 #endif
1332 
1333 	switch (prot) {
1334 	case PROT_READ | PROT_WRITE | PROT_EXEC:
1335 	case PROT_READ | PROT_WRITE:
1336 		return;
1337 
1338 	/* copy_on_write */
1339 	case PROT_READ | PROT_EXEC:
1340 	case PROT_READ:
1341 		mtx_enter(&pg->mdpage.pvh_mtx);
1342 		for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
1343 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1344 				*pv->pv_pte &= ~(PG_KWE | PG_UWE);
1345 				PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1346 				    pmap_pte_asm(pv->pv_pte),
1347 				    PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1348 				PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1349 				    pmap_pte_asm(pv->pv_pte));
1350 			}
1351 		}
1352 		mtx_leave(&pg->mdpage.pvh_mtx);
1353 		PMAP_TLB_SHOOTNOW();
1354 		return;
1355 
1356 	/* remove_all */
1357 	default:
1358 		break;
1359 	}
1360 
1361 	mtx_enter(&pg->mdpage.pvh_mtx);
1362 	while ((pv = pg->mdpage.pvh_list) != NULL) {
1363 		pmap_reference(pv->pv_pmap);
1364 		pmap = pv->pv_pmap;
1365 		mtx_leave(&pg->mdpage.pvh_mtx);
1366 
1367 		PMAP_LOCK(pmap);
1368 
1369 		/*
1370 		 * We dropped the pvlist lock before grabbing the pmap
1371 		 * lock to avoid lock ordering problems.  This means
1372 		 * we have to check the pvlist again since somebody
1373 		 * else might have modified it.  All we care about is
1374 		 * that the pvlist entry matches the pmap we just
1375 		 * locked.  If it doesn't, unlock the pmap and try
1376 		 * again.
1377 		 */
1378 		mtx_enter(&pg->mdpage.pvh_mtx);
1379 		if ((pv = pg->mdpage.pvh_list) == NULL ||
1380 		    pv->pv_pmap != pmap) {
1381 			mtx_leave(&pg->mdpage.pvh_mtx);
1382 			PMAP_UNLOCK(pmap);
1383 			pmap_destroy(pmap);
1384 			mtx_enter(&pg->mdpage.pvh_mtx);
1385 			continue;
1386 		}
1387 
1388 #ifdef DEBUG
1389 		if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1390 		    pmap_pte_pa(pv->pv_pte) != VM_PAGE_TO_PHYS(pg))
1391 			panic("pmap_page_protect: bad mapping");
1392 #endif
1393 		if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1394 		    FALSE, cpu_id) == TRUE) {
1395 			if (pmap == pmap_kernel())
1396 				needkisync |= TRUE;
1397 			else
1398 				PMAP_SYNC_ISTREAM_USER(pmap);
1399 		}
1400 		mtx_leave(&pg->mdpage.pvh_mtx);
1401 		PMAP_UNLOCK(pmap);
1402 		pmap_destroy(pmap);
1403 		mtx_enter(&pg->mdpage.pvh_mtx);
1404 	}
1405 	mtx_leave(&pg->mdpage.pvh_mtx);
1406 
1407 	if (needkisync)
1408 		PMAP_SYNC_ISTREAM_KERNEL();
1409 }
1410 
1411 /*
1412  * pmap_protect:		[ INTERFACE ]
1413  *
1414  *	Set the physical protection on the specified range of this map
1415  *	as requested.
1416  */
1417 void
pmap_protect(pmap_t pmap,vaddr_t sva,vaddr_t eva,vm_prot_t prot)1418 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1419 {
1420 	pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1421 	boolean_t isactive;
1422 	boolean_t hadasm;
1423 	vaddr_t l1eva, l2eva;
1424 	cpuid_t cpu_id = cpu_number();
1425 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1426 
1427 #ifdef DEBUG
1428 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1429 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
1430 		    pmap, sva, eva, prot);
1431 #endif
1432 
1433 	if ((prot & PROT_READ) == PROT_NONE) {
1434 		pmap_remove(pmap, sva, eva);
1435 		return;
1436 	}
1437 
1438 	PMAP_LOCK(pmap);
1439 
1440 	bits = pte_prot(pmap, prot);
1441 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1442 
1443 	l1pte = pmap_l1pte(pmap, sva);
1444 	for (; sva < eva; sva = l1eva, l1pte++) {
1445 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1446 		if (!pmap_pte_v(l1pte))
1447 			continue;
1448 
1449 		l2pte = pmap_l2pte(pmap, sva, l1pte);
1450 		for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1451 			l2eva = alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1452 			if (!pmap_pte_v(l2pte))
1453 				continue;
1454 
1455 			l3pte = pmap_l3pte(pmap, sva, l2pte);
1456 			for (; sva < l2eva && sva < eva;
1457 			     sva += PAGE_SIZE, l3pte++) {
1458 				if (!pmap_pte_v(l3pte))
1459 					continue;
1460 
1461 				if (pmap_pte_prot_chg(l3pte, bits)) {
1462 					hadasm = (pmap_pte_asm(l3pte) != 0);
1463 					pmap_pte_set_prot(l3pte, bits);
1464 					PMAP_INVALIDATE_TLB(pmap, sva, hadasm,
1465 					   isactive, cpu_id);
1466 					PMAP_TLB_SHOOTDOWN(pmap, sva,
1467 					   hadasm ? PG_ASM : 0);
1468 				}
1469 			}
1470 		}
1471 	}
1472 
1473 	PMAP_TLB_SHOOTNOW();
1474 
1475 	if (prot & PROT_EXEC)
1476 		PMAP_SYNC_ISTREAM(pmap);
1477 
1478 	PMAP_UNLOCK(pmap);
1479 }
1480 
1481 /*
1482  * pmap_enter:			[ INTERFACE ]
1483  *
1484  *	Insert the given physical page (p) at
1485  *	the specified virtual address (v) in the
1486  *	target physical map with the protection requested.
1487  *
1488  *	If specified, the page will be wired down, meaning
1489  *	that the related pte can not be reclaimed.
1490  *
1491  *	Note:  This is the only routine which MAY NOT lazy-evaluate
1492  *	or lose information.  That is, this routine must actually
1493  *	insert this page into the given map NOW.
1494  */
1495 int
pmap_enter(pmap_t pmap,vaddr_t va,paddr_t pa,vm_prot_t prot,int flags)1496 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1497 {
1498 	struct vm_page *pg;
1499 	pt_entry_t *pte, npte, opte;
1500 	paddr_t opa;
1501 	boolean_t tflush = TRUE;
1502 	boolean_t hadasm = FALSE;	/* XXX gcc -Wuninitialized */
1503 	boolean_t needisync = FALSE;
1504 	boolean_t setisync = FALSE;
1505 	boolean_t isactive;
1506 	boolean_t wired;
1507 	cpuid_t cpu_id = cpu_number();
1508 	int error = 0;
1509 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1510 
1511 #ifdef DEBUG
1512 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1513 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1514 		       pmap, va, pa, prot, flags);
1515 #endif
1516 	pg = PHYS_TO_VM_PAGE(pa);
1517 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1518 	wired = (flags & PMAP_WIRED) != 0;
1519 
1520 	/*
1521 	 * Determine what we need to do about the I-stream.  If
1522 	 * PROT_EXEC is set, we mark a user pmap as needing
1523 	 * an I-sync on the way back out to userspace.  We always
1524 	 * need an immediate I-sync for the kernel pmap.
1525 	 */
1526 	if (prot & PROT_EXEC) {
1527 		if (pmap == pmap_kernel())
1528 			needisync = TRUE;
1529 		else {
1530 			setisync = TRUE;
1531 			needisync = (pmap->pm_cpus != 0);
1532 		}
1533 	}
1534 
1535 	PMAP_LOCK(pmap);
1536 
1537 	if (pmap == pmap_kernel()) {
1538 #ifdef DIAGNOSTIC
1539 		/*
1540 		 * Sanity check the virtual address.
1541 		 */
1542 		if (va < VM_MIN_KERNEL_ADDRESS)
1543 			panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1544 #endif
1545 		pte = PMAP_KERNEL_PTE(va);
1546 	} else {
1547 		pt_entry_t *l1pte, *l2pte;
1548 
1549 #ifdef DIAGNOSTIC
1550 		/*
1551 		 * Sanity check the virtual address.
1552 		 */
1553 		if (va >= VM_MAXUSER_ADDRESS)
1554 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1555 #endif
1556 
1557 		KASSERT(pmap->pm_lev1map != kernel_lev1map);
1558 
1559 		/*
1560 		 * Check to see if the level 1 PTE is valid, and
1561 		 * allocate a new level 2 page table page if it's not.
1562 		 * A reference will be added to the level 2 table when
1563 		 * the level 3 table is created.
1564 		 */
1565 		l1pte = pmap_l1pte(pmap, va);
1566 		if (pmap_pte_v(l1pte) == 0) {
1567 			pmap_physpage_addref(l1pte);
1568 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1569 			if (error) {
1570 				pmap_l1pt_delref(pmap, l1pte);
1571 				if (flags & PMAP_CANFAIL)
1572 					goto out;
1573 				panic("pmap_enter: unable to create L2 PT "
1574 				    "page");
1575 			}
1576 #ifdef DEBUG
1577 			if (pmapdebug & PDB_PTPAGE)
1578 				printf("pmap_enter: new level 2 table at "
1579 				    "0x%lx\n", pmap_pte_pa(l1pte));
1580 #endif
1581 		}
1582 
1583 		/*
1584 		 * Check to see if the level 2 PTE is valid, and
1585 		 * allocate a new level 3 page table page if it's not.
1586 		 * A reference will be added to the level 3 table when
1587 		 * the mapping is validated.
1588 		 */
1589 		l2pte = pmap_l2pte(pmap, va, l1pte);
1590 		if (pmap_pte_v(l2pte) == 0) {
1591 			pmap_physpage_addref(l2pte);
1592 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1593 			if (error) {
1594 				pmap_l2pt_delref(pmap, l1pte, l2pte);
1595 				if (flags & PMAP_CANFAIL)
1596 					goto out;
1597 				panic("pmap_enter: unable to create L3 PT "
1598 				    "page");
1599 			}
1600 #ifdef DEBUG
1601 			if (pmapdebug & PDB_PTPAGE)
1602 				printf("pmap_enter: new level 3 table at "
1603 				    "0x%lx\n", pmap_pte_pa(l2pte));
1604 #endif
1605 		}
1606 
1607 		/*
1608 		 * Get the PTE that will map the page.
1609 		 */
1610 		pte = pmap_l3pte(pmap, va, l2pte);
1611 	}
1612 
1613 	/* Remember all of the old PTE; used for TBI check later. */
1614 	opte = *pte;
1615 
1616 	/*
1617 	 * Check to see if the old mapping is valid.  If not, validate the
1618 	 * new one immediately.
1619 	 */
1620 	if (pmap_pte_v(pte) == 0) {
1621 		/*
1622 		 * No need to invalidate the TLB in this case; an invalid
1623 		 * mapping won't be in the TLB, and a previously valid
1624 		 * mapping would have been flushed when it was invalidated.
1625 		 */
1626 		tflush = FALSE;
1627 
1628 		/*
1629 		 * No need to synchronize the I-stream, either, for basically
1630 		 * the same reason.
1631 		 */
1632 		setisync = needisync = FALSE;
1633 
1634 		if (pmap != pmap_kernel()) {
1635 			/*
1636 			 * New mappings gain a reference on the level 3
1637 			 * table.
1638 			 */
1639 			pmap_physpage_addref(pte);
1640 		}
1641 		goto validate_enterpv;
1642 	}
1643 
1644 	opa = pmap_pte_pa(pte);
1645 	hadasm = (pmap_pte_asm(pte) != 0);
1646 
1647 	if (opa == pa) {
1648 		/*
1649 		 * Mapping has not changed; must be a protection or
1650 		 * wiring change.
1651 		 */
1652 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1653 #ifdef DEBUG
1654 			if (pmapdebug & PDB_ENTER)
1655 				printf("pmap_enter: wiring change -> %d\n",
1656 				    wired);
1657 #endif
1658 			/*
1659 			 * Adjust the wiring count.
1660 			 */
1661 			if (wired)
1662 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1663 			else
1664 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1665 		}
1666 
1667 		/*
1668 		 * Set the PTE.
1669 		 */
1670 		goto validate;
1671 	}
1672 
1673 	/*
1674 	 * The mapping has changed.  We need to invalidate the
1675 	 * old mapping before creating the new one.
1676 	 */
1677 #ifdef DEBUG
1678 	if (pmapdebug & PDB_ENTER)
1679 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
1680 #endif
1681 	if (pmap != pmap_kernel()) {
1682 		/*
1683 		 * Gain an extra reference on the level 3 table.
1684 		 * pmap_remove_mapping() will delete a reference,
1685 		 * and we don't want the table to be erroneously
1686 		 * freed.
1687 		 */
1688 		pmap_physpage_addref(pte);
1689 	}
1690 	needisync |= pmap_remove_mapping(pmap, va, pte, TRUE, cpu_id);
1691 
1692  validate_enterpv:
1693 	/*
1694 	 * Enter the mapping into the pv_table if appropriate.
1695 	 */
1696 	if (pg != NULL) {
1697 		error = pmap_pv_enter(pmap, pg, va, pte, TRUE);
1698 		if (error) {
1699 			pmap_l3pt_delref(pmap, va, pte, cpu_id);
1700 			if (flags & PMAP_CANFAIL)
1701 				goto out;
1702 			panic("pmap_enter: unable to enter mapping in PV "
1703 			    "table");
1704 		}
1705 	}
1706 
1707 	/*
1708 	 * Increment counters.
1709 	 */
1710 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1711 	if (wired)
1712 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1713 
1714  validate:
1715 	/*
1716 	 * Build the new PTE.
1717 	 */
1718 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1719 	if (pg != NULL) {
1720 		int attrs;
1721 
1722 #ifdef DIAGNOSTIC
1723 		if ((flags & PROT_MASK) & ~prot)
1724 			panic("pmap_enter: access type exceeds prot");
1725 #endif
1726 		if (flags & PROT_WRITE)
1727 			atomic_setbits_int(&pg->pg_flags,
1728 			    PG_PMAP_REF | PG_PMAP_MOD);
1729 		else if (flags & PROT_MASK)
1730 			atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
1731 
1732 		/*
1733 		 * Set up referenced/modified emulation for new mapping.
1734 		 */
1735 		attrs = pg->pg_flags;
1736 		if ((attrs & PG_PMAP_REF) == 0)
1737 			npte |= PG_FOR | PG_FOW | PG_FOE;
1738 		else if ((attrs & PG_PMAP_MOD) == 0)
1739 			npte |= PG_FOW;
1740 
1741 		/*
1742 		 * Mapping was entered on PV list.
1743 		 */
1744 		npte |= PG_PVLIST;
1745 	}
1746 	if (wired)
1747 		npte |= PG_WIRED;
1748 #ifdef DEBUG
1749 	if (pmapdebug & PDB_ENTER)
1750 		printf("pmap_enter: new pte = 0x%lx\n", npte);
1751 #endif
1752 
1753 	/*
1754 	 * If the PALcode portion of the new PTE is the same as the
1755 	 * old PTE, no TBI is necessary.
1756 	 */
1757 	if (PG_PALCODE(opte) == PG_PALCODE(npte))
1758 		tflush = FALSE;
1759 
1760 	/*
1761 	 * Set the new PTE.
1762 	 */
1763 	PMAP_SET_PTE(pte, npte);
1764 
1765 	/*
1766 	 * Invalidate the TLB entry for this VA and any appropriate
1767 	 * caches.
1768 	 */
1769 	if (tflush) {
1770 		PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1771 		PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1772 		PMAP_TLB_SHOOTNOW();
1773 	}
1774 	if (setisync)
1775 		PMAP_SET_NEEDISYNC(pmap);
1776 	if (needisync)
1777 		PMAP_SYNC_ISTREAM(pmap);
1778 
1779 out:
1780 	PMAP_UNLOCK(pmap);
1781 
1782 	return error;
1783 }
1784 
1785 /*
1786  * pmap_kenter_pa:		[ INTERFACE ]
1787  *
1788  *	Enter a va -> pa mapping into the kernel pmap without any
1789  *	physical->virtual tracking.
1790  *
1791  *	Note: no locking is necessary in this function.
1792  */
1793 void
pmap_kenter_pa(vaddr_t va,paddr_t pa,vm_prot_t prot)1794 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1795 {
1796 	pt_entry_t *pte, npte;
1797 	cpuid_t cpu_id = cpu_number();
1798 	boolean_t needisync = FALSE;
1799 	pmap_t pmap = pmap_kernel();
1800 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1801 
1802 #ifdef DEBUG
1803 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1804 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1805 		    va, pa, prot);
1806 #endif
1807 
1808 #ifdef DIAGNOSTIC
1809 	/*
1810 	 * Sanity check the virtual address.
1811 	 */
1812 	if (va < VM_MIN_KERNEL_ADDRESS)
1813 		panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1814 #endif
1815 
1816 	pte = PMAP_KERNEL_PTE(va);
1817 
1818 	if (pmap_pte_v(pte) == 0)
1819 		PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1820 	if (pmap_pte_w(pte) == 0)
1821 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1822 
1823 	if ((prot & PROT_EXEC) != 0 || pmap_pte_exec(pte))
1824 		needisync = TRUE;
1825 
1826 	/*
1827 	 * Build the new PTE.
1828 	 */
1829 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1830 	    PG_V | PG_WIRED;
1831 
1832 	/*
1833 	 * Set the new PTE.
1834 	 */
1835 	PMAP_SET_PTE(pte, npte);
1836 #if defined(MULTIPROCESSOR)
1837 	alpha_mb();		/* XXX alpha_wmb()? */
1838 #endif
1839 
1840 	/*
1841 	 * Invalidate the TLB entry for this VA and any appropriate
1842 	 * caches.
1843 	 */
1844 	PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id);
1845 	PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1846 	PMAP_TLB_SHOOTNOW();
1847 
1848 	if (needisync)
1849 		PMAP_SYNC_ISTREAM_KERNEL();
1850 }
1851 
1852 /*
1853  * pmap_kremove:		[ INTERFACE ]
1854  *
1855  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
1856  *	for size bytes (assumed to be page rounded).
1857  */
1858 void
pmap_kremove(vaddr_t va,vsize_t size)1859 pmap_kremove(vaddr_t va, vsize_t size)
1860 {
1861 	pt_entry_t *pte;
1862 	boolean_t needisync = FALSE;
1863 	cpuid_t cpu_id = cpu_number();
1864 	pmap_t pmap = pmap_kernel();
1865 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1866 
1867 #ifdef DEBUG
1868 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1869 		printf("pmap_kremove(%lx, %lx)\n",
1870 		    va, size);
1871 #endif
1872 
1873 #ifdef DIAGNOSTIC
1874 	if (va < VM_MIN_KERNEL_ADDRESS)
1875 		panic("pmap_kremove: user address");
1876 #endif
1877 
1878 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
1879 		pte = PMAP_KERNEL_PTE(va);
1880 		if (pmap_pte_v(pte)) {
1881 #ifdef DIAGNOSTIC
1882 			if (pmap_pte_pv(pte))
1883 				panic("pmap_kremove: PG_PVLIST mapping for "
1884 				    "0x%lx", va);
1885 #endif
1886 			if (pmap_pte_exec(pte))
1887 				needisync = TRUE;
1888 
1889 			/* Zap the mapping. */
1890 			PMAP_SET_PTE(pte, PG_NV);
1891 #if defined(MULTIPROCESSOR)
1892 			alpha_mb();		/* XXX alpha_wmb()? */
1893 #endif
1894 			PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id);
1895 			PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
1896 
1897 			/* Update stats. */
1898 			PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
1899 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1900 		}
1901 	}
1902 
1903 	PMAP_TLB_SHOOTNOW();
1904 
1905 	if (needisync)
1906 		PMAP_SYNC_ISTREAM_KERNEL();
1907 }
1908 
1909 /*
1910  * pmap_unwire:			[ INTERFACE ]
1911  *
1912  *	Clear the wired attribute for a map/virtual-address pair.
1913  *
1914  *	The mapping must already exist in the pmap.
1915  */
1916 void
pmap_unwire(pmap_t pmap,vaddr_t va)1917 pmap_unwire(pmap_t pmap, vaddr_t va)
1918 {
1919 	pt_entry_t *pte;
1920 
1921 #ifdef DEBUG
1922 	if (pmapdebug & PDB_FOLLOW)
1923 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
1924 #endif
1925 
1926 	PMAP_LOCK(pmap);
1927 
1928 	pte = pmap_l3pte(pmap, va, NULL);
1929 #ifdef DIAGNOSTIC
1930 	if (pte == NULL || pmap_pte_v(pte) == 0)
1931 		panic("pmap_unwire");
1932 #endif
1933 
1934 	/*
1935 	 * If wiring actually changed (always?) clear the wire bit and
1936 	 * update the wire count.  Note that wiring is not a hardware
1937 	 * characteristic so there is no need to invalidate the TLB.
1938 	 */
1939 	if (pmap_pte_w_chg(pte, 0)) {
1940 		pmap_pte_set_w(pte, FALSE);
1941 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1942 	}
1943 #ifdef DIAGNOSTIC
1944 	else {
1945 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
1946 		    "didn't change!\n", pmap, va);
1947 	}
1948 #endif
1949 
1950 	PMAP_UNLOCK(pmap);
1951 }
1952 
1953 /*
1954  * pmap_extract:		[ INTERFACE ]
1955  *
1956  *	Extract the physical address associated with the given
1957  *	pmap/virtual address pair.
1958  */
1959 boolean_t
pmap_extract(pmap_t pmap,vaddr_t va,paddr_t * pap)1960 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
1961 {
1962 	pt_entry_t *l1pte, *l2pte, *l3pte;
1963 	boolean_t rv = FALSE;
1964 	paddr_t pa;
1965 
1966 #ifdef DEBUG
1967 	if (pmapdebug & PDB_FOLLOW)
1968 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
1969 #endif
1970 
1971 	if (pmap == pmap_kernel()) {
1972 		if (va < ALPHA_K0SEG_BASE) {
1973 			/* nothing */
1974 		} else if (va <= ALPHA_K0SEG_END) {
1975 			pa = ALPHA_K0SEG_TO_PHYS(va);
1976 			*pap = pa;
1977 			rv = TRUE;
1978 		} else {
1979 			l3pte = PMAP_KERNEL_PTE(va);
1980 			if (pmap_pte_v(l3pte)) {
1981 				pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
1982 				*pap = pa;
1983 				rv = TRUE;
1984 			}
1985 		}
1986 		goto out_nolock;
1987 	}
1988 
1989 	PMAP_LOCK(pmap);
1990 
1991 	l1pte = pmap_l1pte(pmap, va);
1992 	if (pmap_pte_v(l1pte) == 0)
1993 		goto out;
1994 
1995 	l2pte = pmap_l2pte(pmap, va, l1pte);
1996 	if (pmap_pte_v(l2pte) == 0)
1997 		goto out;
1998 
1999 	l3pte = pmap_l3pte(pmap, va, l2pte);
2000 	if (pmap_pte_v(l3pte) == 0)
2001 		goto out;
2002 
2003 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2004 	*pap = pa;
2005 	rv = TRUE;
2006  out:
2007 	PMAP_UNLOCK(pmap);
2008  out_nolock:
2009 #ifdef DEBUG
2010 	if (pmapdebug & PDB_FOLLOW) {
2011 		if (rv)
2012 			printf("0x%lx\n", pa);
2013 		else
2014 			printf("failed\n");
2015 	}
2016 #endif
2017 	return (rv);
2018 }
2019 
2020 /*
2021  * pmap_collect:		[ INTERFACE ]
2022  *
2023  *	Garbage collects the physical map system for pages which are no
2024  *	longer used.  Success need not be guaranteed -- that is, there
2025  *	may well be pages which are not referenced, but others may be
2026  *	collected.
2027  *
2028  *	Called by the pageout daemon when pages are scarce.
2029  */
2030 void
pmap_collect(pmap_t pmap)2031 pmap_collect(pmap_t pmap)
2032 {
2033 
2034 #ifdef DEBUG
2035 	if (pmapdebug & PDB_FOLLOW)
2036 		printf("pmap_collect(%p)\n", pmap);
2037 #endif
2038 
2039 	/*
2040 	 * If called for the kernel pmap, just return.  We
2041 	 * handle this case in the event that we ever want
2042 	 * to have swappable kernel threads.
2043 	 */
2044 	if (pmap == pmap_kernel())
2045 		return;
2046 
2047 	/*
2048 	 * This process is about to be swapped out; free all of
2049 	 * the PT pages by removing the physical mappings for its
2050 	 * entire address space.  Note: pmap_do_remove() performs
2051 	 * all necessary locking.
2052 	 */
2053 	pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, FALSE);
2054 }
2055 
2056 /*
2057  * pmap_activate:		[ INTERFACE ]
2058  *
2059  *	Activate the pmap used by the specified process.  This includes
2060  *	reloading the MMU context if the current process, and marking
2061  *	the pmap in use by the processor.
2062  *
2063  *	Note: We may use only spin locks here, since we are called
2064  *	by a critical section in cpu_switch()!
2065  */
2066 void
pmap_activate(struct proc * p)2067 pmap_activate(struct proc *p)
2068 {
2069 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
2070 	cpuid_t cpu_id = cpu_number();
2071 
2072 #ifdef DEBUG
2073 	if (pmapdebug & PDB_FOLLOW)
2074 		printf("pmap_activate(%p)\n", p);
2075 #endif
2076 
2077 	/* Mark the pmap in use by this processor. */
2078 	atomic_setbits_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2079 
2080 	/* Allocate an ASN. */
2081 	pmap_asn_alloc(pmap, cpu_id);
2082 
2083 	PMAP_ACTIVATE(pmap, p, cpu_id);
2084 }
2085 
2086 /*
2087  * pmap_deactivate:		[ INTERFACE ]
2088  *
2089  *	Mark that the pmap used by the specified process is no longer
2090  *	in use by the processor.
2091  *
2092  *	The comment above pmap_activate() wrt. locking applies here,
2093  *	as well.  Note that we use only a single `atomic' operation,
2094  *	so no locking is necessary.
2095  */
2096 void
pmap_deactivate(struct proc * p)2097 pmap_deactivate(struct proc *p)
2098 {
2099 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
2100 
2101 #ifdef DEBUG
2102 	if (pmapdebug & PDB_FOLLOW)
2103 		printf("pmap_deactivate(%p)\n", p);
2104 #endif
2105 
2106 	/*
2107 	 * Mark the pmap no longer in use by this processor.
2108 	 */
2109 	atomic_clearbits_ulong(&pmap->pm_cpus, (1UL << cpu_number()));
2110 }
2111 
2112 /*
2113  * pmap_zero_page:		[ INTERFACE ]
2114  *
2115  *	Zero the specified (machine independent) page by mapping the page
2116  *	into virtual memory and clear its contents, one machine dependent
2117  *	page at a time.
2118  *
2119  *	Note: no locking is necessary in this function.
2120  */
2121 void
pmap_zero_page(struct vm_page * pg)2122 pmap_zero_page(struct vm_page *pg)
2123 {
2124 	paddr_t phys = VM_PAGE_TO_PHYS(pg);
2125 	u_long *p0, *p1, *pend;
2126 
2127 #ifdef DEBUG
2128 	if (pmapdebug & PDB_FOLLOW)
2129 		printf("pmap_zero_page(%lx)\n", phys);
2130 #endif
2131 
2132 	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2133 	p1 = NULL;
2134 	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2135 
2136 	/*
2137 	 * Unroll the loop a bit, doing 16 quadwords per iteration.
2138 	 * Do only 8 back-to-back stores, and alternate registers.
2139 	 */
2140 	do {
2141 		__asm volatile(
2142 		"# BEGIN loop body\n"
2143 		"	addq	%2, (8 * 8), %1		\n"
2144 		"	stq	$31, (0 * 8)(%0)	\n"
2145 		"	stq	$31, (1 * 8)(%0)	\n"
2146 		"	stq	$31, (2 * 8)(%0)	\n"
2147 		"	stq	$31, (3 * 8)(%0)	\n"
2148 		"	stq	$31, (4 * 8)(%0)	\n"
2149 		"	stq	$31, (5 * 8)(%0)	\n"
2150 		"	stq	$31, (6 * 8)(%0)	\n"
2151 		"	stq	$31, (7 * 8)(%0)	\n"
2152 		"					\n"
2153 		"	addq	%3, (8 * 8), %0		\n"
2154 		"	stq	$31, (0 * 8)(%1)	\n"
2155 		"	stq	$31, (1 * 8)(%1)	\n"
2156 		"	stq	$31, (2 * 8)(%1)	\n"
2157 		"	stq	$31, (3 * 8)(%1)	\n"
2158 		"	stq	$31, (4 * 8)(%1)	\n"
2159 		"	stq	$31, (5 * 8)(%1)	\n"
2160 		"	stq	$31, (6 * 8)(%1)	\n"
2161 		"	stq	$31, (7 * 8)(%1)	\n"
2162 		"	# END loop body"
2163 		: "=r" (p0), "=r" (p1)
2164 		: "0" (p0), "1" (p1)
2165 		: "memory");
2166 	} while (p0 < pend);
2167 }
2168 
2169 /*
2170  * pmap_copy_page:		[ INTERFACE ]
2171  *
2172  *	Copy the specified (machine independent) page by mapping the page
2173  *	into virtual memory and using memcpy to copy the page, one machine
2174  *	dependent page at a time.
2175  *
2176  *	Note: no locking is necessary in this function.
2177  */
2178 void
pmap_copy_page(struct vm_page * srcpg,struct vm_page * dstpg)2179 pmap_copy_page(struct vm_page *srcpg, struct vm_page *dstpg)
2180 {
2181 	paddr_t src = VM_PAGE_TO_PHYS(srcpg);
2182 	paddr_t dst = VM_PAGE_TO_PHYS(dstpg);
2183 	caddr_t s, d;
2184 
2185 #ifdef DEBUG
2186 	if (pmapdebug & PDB_FOLLOW)
2187 		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2188 #endif
2189         s = (caddr_t)ALPHA_PHYS_TO_K0SEG(src);
2190         d = (caddr_t)ALPHA_PHYS_TO_K0SEG(dst);
2191 	memcpy(d, s, PAGE_SIZE);
2192 }
2193 
2194 /*
2195  * pmap_clear_modify:		[ INTERFACE ]
2196  *
2197  *	Clear the modify bits on the specified physical page.
2198  */
2199 boolean_t
pmap_clear_modify(struct vm_page * pg)2200 pmap_clear_modify(struct vm_page *pg)
2201 {
2202 	boolean_t rv = FALSE;
2203 	cpuid_t cpu_id = cpu_number();
2204 
2205 #ifdef DEBUG
2206 	if (pmapdebug & PDB_FOLLOW)
2207 		printf("pmap_clear_modify(%p)\n", pg);
2208 #endif
2209 
2210 	mtx_enter(&pg->mdpage.pvh_mtx);
2211 	if (pg->pg_flags & PG_PMAP_MOD) {
2212 		rv = TRUE;
2213 		pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2214 		atomic_clearbits_int(&pg->pg_flags, PG_PMAP_MOD);
2215 	}
2216 	mtx_leave(&pg->mdpage.pvh_mtx);
2217 
2218 	return (rv);
2219 }
2220 
2221 /*
2222  * pmap_clear_reference:	[ INTERFACE ]
2223  *
2224  *	Clear the reference bit on the specified physical page.
2225  */
2226 boolean_t
pmap_clear_reference(struct vm_page * pg)2227 pmap_clear_reference(struct vm_page *pg)
2228 {
2229 	boolean_t rv = FALSE;
2230 	cpuid_t cpu_id = cpu_number();
2231 
2232 #ifdef DEBUG
2233 	if (pmapdebug & PDB_FOLLOW)
2234 		printf("pmap_clear_reference(%p)\n", pg);
2235 #endif
2236 
2237 	mtx_enter(&pg->mdpage.pvh_mtx);
2238 	if (pg->pg_flags & PG_PMAP_REF) {
2239 		rv = TRUE;
2240 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2241 		atomic_clearbits_int(&pg->pg_flags, PG_PMAP_REF);
2242 	}
2243 	mtx_leave(&pg->mdpage.pvh_mtx);
2244 
2245 	return (rv);
2246 }
2247 
2248 /*
2249  * pmap_is_referenced:		[ INTERFACE ]
2250  *
2251  *	Return whether or not the specified physical page is referenced
2252  *	by any physical maps.
2253  */
2254 boolean_t
pmap_is_referenced(struct vm_page * pg)2255 pmap_is_referenced(struct vm_page *pg)
2256 {
2257 	boolean_t rv;
2258 
2259 	rv = ((pg->pg_flags & PG_PMAP_REF) != 0);
2260 #ifdef DEBUG
2261 	if (pmapdebug & PDB_FOLLOW) {
2262 		printf("pmap_is_referenced(%p) -> %c\n", pg, "FT"[rv]);
2263 	}
2264 #endif
2265 	return (rv);
2266 }
2267 
2268 /*
2269  * pmap_is_modified:		[ INTERFACE ]
2270  *
2271  *	Return whether or not the specified physical page is modified
2272  *	by any physical maps.
2273  */
2274 boolean_t
pmap_is_modified(struct vm_page * pg)2275 pmap_is_modified(struct vm_page *pg)
2276 {
2277 	boolean_t rv;
2278 
2279 	rv = ((pg->pg_flags & PG_PMAP_MOD) != 0);
2280 #ifdef DEBUG
2281 	if (pmapdebug & PDB_FOLLOW) {
2282 		printf("pmap_is_modified(%p) -> %c\n", pg, "FT"[rv]);
2283 	}
2284 #endif
2285 	return (rv);
2286 }
2287 
2288 /*
2289  * Miscellaneous support routines follow
2290  */
2291 
2292 /*
2293  * alpha_protection_init:
2294  *
2295  *	Initialize Alpha protection code array.
2296  *
2297  *	Note: no locking is necessary in this function.
2298  */
2299 void
alpha_protection_init(void)2300 alpha_protection_init(void)
2301 {
2302 	int prot, *kp, *up;
2303 
2304 	kp = protection_codes[0];
2305 	up = protection_codes[1];
2306 
2307 	for (prot = 0; prot < 8; prot++) {
2308 		kp[prot] = PG_ASM;
2309 		up[prot] = 0;
2310 
2311 		if (prot & PROT_READ) {
2312 			kp[prot] |= PG_KRE;
2313 			up[prot] |= PG_KRE | PG_URE;
2314 		}
2315 		if (prot & PROT_WRITE) {
2316 			kp[prot] |= PG_KWE;
2317 			up[prot] |= PG_KWE | PG_UWE;
2318 		}
2319 		if (prot & PROT_EXEC) {
2320 			kp[prot] |= PG_EXEC | PG_KRE;
2321 			up[prot] |= PG_EXEC | PG_KRE | PG_URE;
2322 		} else {
2323 			kp[prot] |= PG_FOE;
2324 			up[prot] |= PG_FOE;
2325 		}
2326 	}
2327 }
2328 
2329 /*
2330  * pmap_remove_mapping:
2331  *
2332  *	Invalidate a single page denoted by pmap/va.
2333  *
2334  *	If (pte != NULL), it is the already computed PTE for the page.
2335  *
2336  *	Note: locking in this function is complicated by the fact
2337  *	that we can be called when the PV list is already locked.
2338  *	(pmap_page_protect()).  In this case, the caller must be
2339  *	careful to get the next PV entry while we remove this entry
2340  *	from beneath it.  We assume that the pmap itself is already
2341  *	locked; dolock applies only to the PV list.
2342  *
2343  *	Returns TRUE or FALSE, indicating if an I-stream sync needs
2344  *	to be initiated (for this CPU or for other CPUs).
2345  */
2346 boolean_t
pmap_remove_mapping(pmap_t pmap,vaddr_t va,pt_entry_t * pte,boolean_t dolock,cpuid_t cpu_id)2347 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2348     boolean_t dolock, cpuid_t cpu_id)
2349 {
2350 	paddr_t pa;
2351 	struct vm_page *pg;
2352 	boolean_t onpv;
2353 	boolean_t hadasm;
2354 	boolean_t isactive;
2355 	boolean_t needisync = FALSE;
2356 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2357 
2358 #ifdef DEBUG
2359 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2360 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2361 		       pmap, va, pte, dolock, cpu_id);
2362 #endif
2363 
2364 	/*
2365 	 * PTE not provided, compute it from pmap and va.
2366 	 */
2367 	if (pte == PT_ENTRY_NULL) {
2368 		pte = pmap_l3pte(pmap, va, NULL);
2369 		if (pmap_pte_v(pte) == 0)
2370 			return (FALSE);
2371 	}
2372 
2373 	pa = pmap_pte_pa(pte);
2374 	onpv = (pmap_pte_pv(pte) != 0);
2375 	if (onpv) {
2376 		/*
2377 		 * Remove it from the PV table such that nobody will
2378 		 * attempt to modify the PTE behind our back.
2379 		 */
2380 		pg = PHYS_TO_VM_PAGE(pa);
2381 		KASSERT(pg != NULL);
2382 		pmap_pv_remove(pmap, pg, va, dolock);
2383 	}
2384 
2385 	hadasm = (pmap_pte_asm(pte) != 0);
2386 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
2387 
2388 	/*
2389 	 * Determine what we need to do about the I-stream.  If
2390 	 * PG_EXEC was set, we mark a user pmap as needing an
2391 	 * I-sync on the way out to userspace.  We always need
2392 	 * an immediate I-sync for the kernel pmap.
2393 	 */
2394 	if (pmap_pte_exec(pte)) {
2395 		if (pmap == pmap_kernel())
2396 			needisync = TRUE;
2397 		else {
2398 			PMAP_SET_NEEDISYNC(pmap);
2399 			needisync = (pmap->pm_cpus != 0);
2400 		}
2401 	}
2402 
2403 	/*
2404 	 * Update statistics
2405 	 */
2406 	if (pmap_pte_w(pte))
2407 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2408 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2409 
2410 	/*
2411 	 * Invalidate the PTE after saving the reference modify info.
2412 	 */
2413 #ifdef DEBUG
2414 	if (pmapdebug & PDB_REMOVE)
2415 		printf("remove: invalidating pte at %p\n", pte);
2416 #endif
2417 	PMAP_SET_PTE(pte, PG_NV);
2418 
2419 	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2420 	PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2421 	PMAP_TLB_SHOOTNOW();
2422 
2423 	/*
2424 	 * If we're removing a user mapping, check to see if we
2425 	 * can free page table pages.
2426 	 */
2427 	if (pmap != pmap_kernel()) {
2428 		/*
2429 		 * Delete the reference on the level 3 table.  It will
2430 		 * delete references on the level 2 and 1 tables as
2431 		 * appropriate.
2432 		 */
2433 		pmap_l3pt_delref(pmap, va, pte, cpu_id);
2434 	}
2435 
2436 	return (needisync);
2437 }
2438 
2439 /*
2440  * pmap_changebit:
2441  *
2442  *	Set or clear the specified PTE bits for all mappings on the
2443  *	specified page.
2444  *
2445  *	Note: we assume that the pvlist is already locked.  There is no
2446  *	need to lock the pmap itself as amapping cannot be removed while
2447  *	we are holding the pvlist lock.
2448  */
2449 void
pmap_changebit(struct vm_page * pg,u_long set,u_long mask,cpuid_t cpu_id)2450 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, cpuid_t cpu_id)
2451 {
2452 	pv_entry_t pv;
2453 	pt_entry_t *pte, npte;
2454 	vaddr_t va;
2455 	boolean_t hadasm, isactive;
2456 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2457 
2458 #ifdef DEBUG
2459 	if (pmapdebug & PDB_BITS)
2460 		printf("pmap_changebit(0x%lx, 0x%lx, 0x%lx)\n",
2461 		    VM_PAGE_TO_PHYS(pg), set, mask);
2462 #endif
2463 
2464 	MUTEX_ASSERT_LOCKED(&pg->mdpage.pvh_mtx);
2465 
2466 	/*
2467 	 * Loop over all current mappings setting/clearing as appropriate.
2468 	 */
2469 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2470 		va = pv->pv_va;
2471 
2472 		pte = pv->pv_pte;
2473 		npte = (*pte | set) & mask;
2474 		if (*pte != npte) {
2475 			hadasm = (pmap_pte_asm(pte) != 0);
2476 			isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2477 			PMAP_SET_PTE(pte, npte);
2478 			PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2479 			    cpu_id);
2480 			PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2481 			    hadasm ? PG_ASM : 0);
2482 		}
2483 	}
2484 
2485 	PMAP_TLB_SHOOTNOW();
2486 }
2487 
2488 /*
2489  * pmap_emulate_reference:
2490  *
2491  *	Emulate reference and/or modified bit hits.
2492  *	Return non-zero if this was an execute fault on a non-exec mapping,
2493  *	otherwise return 0.
2494  */
2495 int
pmap_emulate_reference(struct proc * p,vaddr_t v,int user,int type)2496 pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int type)
2497 {
2498 	struct pmap *pmap;
2499 	pt_entry_t faultoff, *pte;
2500 	struct vm_page *pg;
2501 	paddr_t pa;
2502 	boolean_t didlock = FALSE;
2503 	boolean_t exec = FALSE;
2504 	cpuid_t cpu_id = cpu_number();
2505 
2506 #ifdef DEBUG
2507 	if (pmapdebug & PDB_FOLLOW)
2508 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2509 		    p, v, user, type);
2510 #endif
2511 
2512 	/*
2513 	 * Convert process and virtual address to physical address.
2514 	 */
2515 	if (v >= VM_MIN_KERNEL_ADDRESS) {
2516 		if (user)
2517 			panic("pmap_emulate_reference: user ref to kernel");
2518 		/*
2519 		 * No need to lock here; kernel PT pages never go away.
2520 		 */
2521 		pte = PMAP_KERNEL_PTE(v);
2522 	} else {
2523 #ifdef DIAGNOSTIC
2524 		if (p == NULL)
2525 			panic("pmap_emulate_reference: bad proc");
2526 		if (p->p_vmspace == NULL)
2527 			panic("pmap_emulate_reference: bad p_vmspace");
2528 #endif
2529 		pmap = p->p_vmspace->vm_map.pmap;
2530 		PMAP_LOCK(pmap);
2531 		didlock = TRUE;
2532 		pte = pmap_l3pte(pmap, v, NULL);
2533 		/*
2534 		 * We'll unlock below where we're done with the PTE.
2535 		 */
2536 	}
2537 	if (pte == NULL || !pmap_pte_v(pte)) {
2538 		if (didlock)
2539 			PMAP_UNLOCK(pmap);
2540 		return (0);
2541 	}
2542 	exec = pmap_pte_exec(pte);
2543 	if (!exec && type == ALPHA_MMCSR_FOE) {
2544 		if (didlock)
2545 			PMAP_UNLOCK(pmap);
2546 		return (1);
2547 	}
2548 #ifdef DEBUG
2549 	if (pmapdebug & PDB_FOLLOW) {
2550 		printf("\tpte = %p, ", pte);
2551 		printf("*pte = 0x%lx\n", *pte);
2552 	}
2553 #endif
2554 #ifdef DEBUG				/* These checks are more expensive */
2555 #ifndef MULTIPROCESSOR
2556 	/*
2557 	 * Quoting the Alpha ARM 14.3.1.4/5/6:
2558 	 * ``The Translation Buffer may reload and cache the old PTE value
2559 	 *   between the time the FOR (resp. FOW, FOE) fault invalidates the
2560 	 *   old value from the Translation Buffer and the time software
2561 	 *   updates the PTE in memory.  Software that depends on the
2562 	 *   processor-provided invalidate must thus be prepared to take
2563 	 *   another FOR (resp. FOW, FOE) fault on a page after clearing the
2564 	 *   page's PTE<FOR(resp. FOW, FOE)> bit. The second fault will
2565 	 *   invalidate the stale PTE from the Translation Buffer, and the
2566 	 *   processor cannot load another stale copy. Thus, in the worst case,
2567 	 *   a multiprocessor system will take an initial FOR (resp. FOW, FOE)
2568 	 *   fault and then an additional FOR (resp. FOW, FOE) fault on each
2569 	 *   processor. In practice, even a single repetition is unlikely.''
2570 	 *
2571 	 * In practice, spurious faults on the other processors happen, at
2572 	 * least on fast 21264 or better processors.
2573 	 */
2574 	if (type == ALPHA_MMCSR_FOW) {
2575 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE))) {
2576 			panic("pmap_emulate_reference(%d,%d): "
2577 			    "write but unwritable pte 0x%lx",
2578 			    user, type, *pte);
2579 		}
2580 		if (!(*pte & PG_FOW)) {
2581 			panic("pmap_emulate_reference(%d,%d): "
2582 			    "write but not FOW pte 0x%lx",
2583 			    user, type, *pte);
2584 		}
2585 	} else {
2586 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE))) {
2587 			panic("pmap_emulate_reference(%d,%d): "
2588 			    "!write but unreadable pte 0x%lx",
2589 			    user, type, *pte);
2590 		}
2591 		if (!(*pte & (PG_FOR | PG_FOE))) {
2592 			panic("pmap_emulate_reference(%d,%d): "
2593 			    "!write but not FOR|FOE pte 0x%lx",
2594 			    user, type, *pte);
2595 		}
2596 	}
2597 #endif /* MULTIPROCESSOR */
2598 	/* Other diagnostics? */
2599 #endif
2600 	pa = pmap_pte_pa(pte);
2601 
2602 	/*
2603 	 * We're now done with the PTE.  If it was a user pmap, unlock
2604 	 * it now.
2605 	 */
2606 	if (didlock)
2607 		PMAP_UNLOCK(pmap);
2608 
2609 #ifdef DEBUG
2610 	if (pmapdebug & PDB_FOLLOW)
2611 		printf("\tpa = 0x%lx\n", pa);
2612 #endif
2613 
2614 	pg = PHYS_TO_VM_PAGE(pa);
2615 
2616 #ifdef DIAGNOSTIC
2617 	if (pg == NULL) {
2618 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): "
2619 		    "pa 0x%lx (pte %p 0x%08lx) not managed",
2620 		    p, v, user, type, pa, pte, *pte);
2621 	}
2622 #endif
2623 
2624 	/*
2625 	 * Twiddle the appropriate bits to reflect the reference
2626 	 * and/or modification..
2627 	 *
2628 	 * The rules:
2629 	 * 	(1) always mark page as used, and
2630 	 *	(2) if it was a write fault, mark page as modified.
2631 	 */
2632 
2633 	mtx_enter(&pg->mdpage.pvh_mtx);
2634 	if (type == ALPHA_MMCSR_FOW) {
2635 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF | PG_PMAP_MOD);
2636 		faultoff = PG_FOR | PG_FOW;
2637 	} else {
2638 		atomic_setbits_int(&pg->pg_flags, PG_PMAP_REF);
2639 		faultoff = PG_FOR;
2640 		if (exec) {
2641 			faultoff |= PG_FOE;
2642 		}
2643 	}
2644 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
2645 	mtx_leave(&pg->mdpage.pvh_mtx);
2646 
2647 	return (0);
2648 }
2649 
2650 #ifdef DEBUG
2651 /*
2652  * pmap_pv_dump:
2653  *
2654  *	Dump the physical->virtual data for the specified page.
2655  */
2656 void
pmap_pv_dump(paddr_t pa)2657 pmap_pv_dump(paddr_t pa)
2658 {
2659 	struct vm_page *pg;
2660 	pv_entry_t pv;
2661 
2662 	pg = PHYS_TO_VM_PAGE(pa);
2663 
2664 	printf("pa 0x%lx (attrs = 0x%x):\n",
2665 	    pa, pg->pg_flags & (PG_PMAP_REF | PG_PMAP_MOD));
2666 	mtx_enter(&pg->mdpage.pvh_mtx);
2667 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next)
2668 		printf("     pmap %p, va 0x%lx\n",
2669 		    pv->pv_pmap, pv->pv_va);
2670 	mtx_leave(&pg->mdpage.pvh_mtx);
2671 	printf("\n");
2672 }
2673 #endif
2674 
2675 /*
2676  * vtophys:
2677  *
2678  *	Return the physical address corresponding to the K0SEG or
2679  *	K1SEG address provided.
2680  *
2681  *	Note: no locking is necessary in this function.
2682  */
2683 paddr_t
vtophys(vaddr_t vaddr)2684 vtophys(vaddr_t vaddr)
2685 {
2686 	pt_entry_t *pte;
2687 	paddr_t paddr = 0;
2688 
2689 	if (vaddr < ALPHA_K0SEG_BASE)
2690 		printf("vtophys: invalid vaddr 0x%lx", vaddr);
2691 	else if (vaddr <= ALPHA_K0SEG_END)
2692 		paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2693 	else {
2694 		pte = PMAP_KERNEL_PTE(vaddr);
2695 		if (pmap_pte_v(pte))
2696 			paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2697 	}
2698 
2699 #if 0
2700 	printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2701 #endif
2702 
2703 	return (paddr);
2704 }
2705 
2706 /******************** pv_entry management ********************/
2707 
2708 /*
2709  * pmap_pv_enter:
2710  *
2711  *	Add a physical->virtual entry to the pv_table.
2712  */
2713 int
pmap_pv_enter(pmap_t pmap,struct vm_page * pg,vaddr_t va,pt_entry_t * pte,boolean_t dolock)2714 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2715     boolean_t dolock)
2716 {
2717 	pv_entry_t newpv;
2718 
2719 	/*
2720 	 * Allocate and fill in the new pv_entry.
2721 	 */
2722 	newpv = pmap_pv_alloc();
2723 	if (newpv == NULL)
2724 		return (ENOMEM);
2725 	newpv->pv_va = va;
2726 	newpv->pv_pmap = pmap;
2727 	newpv->pv_pte = pte;
2728 
2729 	if (dolock)
2730 		mtx_enter(&pg->mdpage.pvh_mtx);
2731 
2732 #ifdef DEBUG
2733     {
2734 	pv_entry_t pv;
2735 	/*
2736 	 * Make sure the entry doesn't already exist.
2737 	 */
2738 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2739 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
2740 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
2741 			panic("pmap_pv_enter: already in pv table");
2742 		}
2743 	}
2744     }
2745 #endif
2746 
2747 	/*
2748 	 * ...and put it in the list.
2749 	 */
2750 	newpv->pv_next = pg->mdpage.pvh_list;
2751 	pg->mdpage.pvh_list = newpv;
2752 
2753 	if (dolock)
2754 		mtx_leave(&pg->mdpage.pvh_mtx);
2755 
2756 	return (0);
2757 }
2758 
2759 /*
2760  * pmap_pv_remove:
2761  *
2762  *	Remove a physical->virtual entry from the pv_table.
2763  */
2764 void
pmap_pv_remove(pmap_t pmap,struct vm_page * pg,vaddr_t va,boolean_t dolock)2765 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, boolean_t dolock)
2766 {
2767 	pv_entry_t pv, *pvp;
2768 
2769 	if (dolock)
2770 		mtx_enter(&pg->mdpage.pvh_mtx);
2771 
2772 	/*
2773 	 * Find the entry to remove.
2774 	 */
2775 	for (pvp = &pg->mdpage.pvh_list, pv = *pvp;
2776 	    pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2777 		if (pmap == pv->pv_pmap && va == pv->pv_va)
2778 			break;
2779 
2780 #ifdef DEBUG
2781 	if (pv == NULL)
2782 		panic("pmap_pv_remove: not in pv table");
2783 #endif
2784 
2785 	*pvp = pv->pv_next;
2786 
2787 	if (dolock)
2788 		mtx_leave(&pg->mdpage.pvh_mtx);
2789 
2790 	pmap_pv_free(pv);
2791 }
2792 
2793 /*
2794  * pmap_pv_page_alloc:
2795  *
2796  *	Allocate a page for the pv_entry pool.
2797  */
2798 void *
pmap_pv_page_alloc(struct pool * pp,int flags,int * slowdown)2799 pmap_pv_page_alloc(struct pool *pp, int flags, int *slowdown)
2800 {
2801 	paddr_t pg;
2802 
2803 	*slowdown = 0;
2804 	if (pmap_physpage_alloc(PGU_PVENT, &pg))
2805 		return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
2806 	return (NULL);
2807 }
2808 
2809 /*
2810  * pmap_pv_page_free:
2811  *
2812  *	Free a pv_entry pool page.
2813  */
2814 void
pmap_pv_page_free(struct pool * pp,void * v)2815 pmap_pv_page_free(struct pool *pp, void *v)
2816 {
2817 
2818 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
2819 }
2820 
2821 /******************** misc. functions ********************/
2822 
2823 /*
2824  * pmap_physpage_alloc:
2825  *
2826  *	Allocate a single page from the VM system and return the
2827  *	physical address for that page.
2828  */
2829 boolean_t
pmap_physpage_alloc(int usage,paddr_t * pap)2830 pmap_physpage_alloc(int usage, paddr_t *pap)
2831 {
2832 	struct vm_page *pg;
2833 	paddr_t pa;
2834 
2835 	/*
2836 	 * Don't ask for a zeroed page in the L1PT case -- we will
2837 	 * properly initialize it in the constructor.
2838 	 */
2839 
2840 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
2841 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
2842 	if (pg != NULL) {
2843 		pa = VM_PAGE_TO_PHYS(pg);
2844 
2845 #ifdef DIAGNOSTIC
2846 		if (pg->wire_count != 0) {
2847 			printf("pmap_physpage_alloc: page 0x%lx has "
2848 			    "%d references\n", pa, pg->wire_count);
2849 			panic("pmap_physpage_alloc");
2850 		}
2851 #endif
2852 		*pap = pa;
2853 		return (TRUE);
2854 	}
2855 	return (FALSE);
2856 }
2857 
2858 /*
2859  * pmap_physpage_free:
2860  *
2861  *	Free the single page table page at the specified physical address.
2862  */
2863 void
pmap_physpage_free(paddr_t pa)2864 pmap_physpage_free(paddr_t pa)
2865 {
2866 	struct vm_page *pg;
2867 
2868 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
2869 		panic("pmap_physpage_free: bogus physical page address");
2870 
2871 #ifdef DIAGNOSTIC
2872 	if (pg->wire_count != 0)
2873 		panic("pmap_physpage_free: page still has references");
2874 #endif
2875 
2876 	uvm_pagefree(pg);
2877 }
2878 
2879 /*
2880  * pmap_physpage_addref:
2881  *
2882  *	Add a reference to the specified special use page.
2883  */
2884 int
pmap_physpage_addref(void * kva)2885 pmap_physpage_addref(void *kva)
2886 {
2887 	struct vm_page *pg;
2888 	paddr_t pa;
2889 	int rval;
2890 
2891 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2892 	pg = PHYS_TO_VM_PAGE(pa);
2893 
2894 	rval = ++pg->wire_count;
2895 
2896 	return (rval);
2897 }
2898 
2899 /*
2900  * pmap_physpage_delref:
2901  *
2902  *	Delete a reference to the specified special use page.
2903  */
2904 int
pmap_physpage_delref(void * kva)2905 pmap_physpage_delref(void *kva)
2906 {
2907 	struct vm_page *pg;
2908 	paddr_t pa;
2909 	int rval;
2910 
2911 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
2912 	pg = PHYS_TO_VM_PAGE(pa);
2913 
2914 #ifdef DIAGNOSTIC
2915 	/*
2916 	 * Make sure we never have a negative reference count.
2917 	 */
2918 	if (pg->wire_count == 0)
2919 		panic("pmap_physpage_delref: reference count already zero");
2920 #endif
2921 
2922 	rval = --pg->wire_count;
2923 
2924 	return (rval);
2925 }
2926 
2927 /******************** page table page management ********************/
2928 
2929 /*
2930  * pmap_growkernel:		[ INTERFACE ]
2931  *
2932  *	Grow the kernel address space.  This is a hint from the
2933  *	upper layer to pre-allocate more kernel PT pages.
2934  */
2935 vaddr_t
pmap_growkernel(vaddr_t maxkvaddr)2936 pmap_growkernel(vaddr_t maxkvaddr)
2937 {
2938 	struct pmap *kpm = pmap_kernel(), *pm;
2939 	paddr_t ptaddr;
2940 	pt_entry_t *l1pte, *l2pte, pte;
2941 	vaddr_t va;
2942 	int l1idx;
2943 
2944 	mtx_enter(&pmap_growkernel_mtx);
2945 
2946 	if (maxkvaddr <= pmap_maxkvaddr)
2947 		goto out;		/* we are OK */
2948 
2949 	va = pmap_maxkvaddr;
2950 
2951 	while (va < maxkvaddr) {
2952 		/*
2953 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
2954 		 * allocate a new L2 PT page and insert it into the
2955 		 * L1 map.
2956 		 */
2957 		l1pte = pmap_l1pte(kpm, va);
2958 		if (pmap_pte_v(l1pte) == 0) {
2959 			/*
2960 			 * XXX PGU_NORMAL?  It's not a "traditional" PT page.
2961 			 */
2962 			if (uvm.page_init_done == FALSE) {
2963 				/*
2964 				 * We're growing the kernel pmap early (from
2965 				 * uvm_pageboot_alloc()).  This case must
2966 				 * be handled a little differently.
2967 				 */
2968 				ptaddr = ALPHA_K0SEG_TO_PHYS(
2969 				    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
2970 			} else if (pmap_physpage_alloc(PGU_NORMAL,
2971 				   &ptaddr) == FALSE)
2972 				goto die;
2973 			pte = (atop(ptaddr) << PG_SHIFT) |
2974 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
2975 			*l1pte = pte;
2976 
2977 			l1idx = l1pte_index(va);
2978 
2979 			/* Update all the user pmaps. */
2980 			mtx_enter(&pmap_all_pmaps_mtx);
2981 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
2982 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
2983 				/* Skip the kernel pmap. */
2984 				if (pm == pmap_kernel())
2985 					continue;
2986 
2987 				PMAP_LOCK(pm);
2988 				KDASSERT(pm->pm_lev1map != kernel_lev1map);
2989 				pm->pm_lev1map[l1idx] = pte;
2990 				PMAP_UNLOCK(pm);
2991 			}
2992 			mtx_leave(&pmap_all_pmaps_mtx);
2993 		}
2994 
2995 		/*
2996 		 * Have an L2 PT page now, add the L3 PT page.
2997 		 */
2998 		l2pte = pmap_l2pte(kpm, va, l1pte);
2999 		KASSERT(pmap_pte_v(l2pte) == 0);
3000 		if (uvm.page_init_done == FALSE) {
3001 			/*
3002 			 * See above.
3003 			 */
3004 			ptaddr = ALPHA_K0SEG_TO_PHYS(
3005 			    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3006 		} else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == FALSE)
3007 			goto die;
3008 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
3009 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3010 		va += ALPHA_L2SEG_SIZE;
3011 	}
3012 
3013 #if 0
3014 	/* Invalidate the L1 PT cache. */
3015 	pool_cache_invalidate(&pmap_l1pt_cache);
3016 #endif
3017 
3018 	pmap_maxkvaddr = va;
3019 
3020  out:
3021 	mtx_leave(&pmap_growkernel_mtx);
3022 
3023 	return (pmap_maxkvaddr);
3024 
3025  die:
3026 	mtx_leave(&pmap_growkernel_mtx);
3027 	panic("pmap_growkernel: out of memory");
3028 }
3029 
3030 /*
3031  * pmap_lev1map_create:
3032  *
3033  *	Create a new level 1 page table for the specified pmap.
3034  *
3035  *	Note: growkernel must already by held and the pmap either
3036  *	already locked or unreferenced globally.
3037  */
3038 int
pmap_lev1map_create(pmap_t pmap,cpuid_t cpu_id)3039 pmap_lev1map_create(pmap_t pmap, cpuid_t cpu_id)
3040 {
3041 	pt_entry_t *l1pt;
3042 
3043 	KASSERT(pmap != pmap_kernel());
3044 	KASSERT(pmap->pm_asni[cpu_id].pma_asn == PMAP_ASN_RESERVED);
3045 
3046 	/* Don't sleep -- we're called with locks held. */
3047 	l1pt = pool_get(&pmap_l1pt_pool, PR_NOWAIT);
3048 	if (l1pt == NULL)
3049 		return (ENOMEM);
3050 
3051 	pmap_l1pt_ctor(l1pt);
3052 	pmap->pm_lev1map = l1pt;
3053 
3054 	return (0);
3055 }
3056 
3057 /*
3058  * pmap_lev1map_destroy:
3059  *
3060  *	Destroy the level 1 page table for the specified pmap.
3061  *
3062  *	Note: growkernel must already by held and the pmap either
3063  *	already locked or unreferenced globally.
3064  */
3065 void
pmap_lev1map_destroy(pmap_t pmap)3066 pmap_lev1map_destroy(pmap_t pmap)
3067 {
3068 	pt_entry_t *l1pt = pmap->pm_lev1map;
3069 
3070 	KASSERT(pmap != pmap_kernel());
3071 
3072 	/*
3073 	 * Go back to referencing the global kernel_lev1map.
3074 	 */
3075 	pmap->pm_lev1map = kernel_lev1map;
3076 
3077 	/*
3078 	 * Free the old level 1 page table page.
3079 	 */
3080 	pool_put(&pmap_l1pt_pool, l1pt);
3081 }
3082 
3083 /*
3084  * pmap_l1pt_ctor:
3085  *
3086  *	Constructor for L1 PT pages.
3087  */
3088 void
pmap_l1pt_ctor(pt_entry_t * l1pt)3089 pmap_l1pt_ctor(pt_entry_t *l1pt)
3090 {
3091 	pt_entry_t pte;
3092 	int i;
3093 
3094 	/*
3095 	 * Initialize the new level 1 table by zeroing the
3096 	 * user portion and copying the kernel mappings into
3097 	 * the kernel portion.
3098 	 */
3099 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3100 		l1pt[i] = 0;
3101 
3102 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3103 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3104 		l1pt[i] = kernel_lev1map[i];
3105 
3106 	/*
3107 	 * Now, map the new virtual page table.  NOTE: NO ASM!
3108 	 */
3109 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3110 	    PG_V | PG_KRE | PG_KWE;
3111 	l1pt[l1pte_index(VPTBASE)] = pte;
3112 }
3113 
3114 /*
3115  * pmap_l1pt_alloc:
3116  *
3117  *	Page allocator for L1 PT pages.
3118  *
3119  *	Note: The growkernel lock is held across allocations
3120  *	from this pool, so we don't need to acquire it
3121  *	ourselves.
3122  */
3123 void *
pmap_l1pt_alloc(struct pool * pp,int flags,int * slowdown)3124 pmap_l1pt_alloc(struct pool *pp, int flags, int *slowdown)
3125 {
3126 	paddr_t ptpa;
3127 
3128 	/*
3129 	 * Attempt to allocate a free page.
3130 	 */
3131 	*slowdown = 0;
3132 	if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == FALSE)
3133 		return (NULL);
3134 
3135 	return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3136 }
3137 
3138 /*
3139  * pmap_l1pt_free:
3140  *
3141  *	Page freer for L1 PT pages.
3142  */
3143 void
pmap_l1pt_free(struct pool * pp,void * v)3144 pmap_l1pt_free(struct pool *pp, void *v)
3145 {
3146 
3147 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3148 }
3149 
3150 /*
3151  * pmap_ptpage_alloc:
3152  *
3153  *	Allocate a level 2 or level 3 page table page, and
3154  *	initialize the PTE that references it.
3155  *
3156  *	Note: the pmap must already be locked.
3157  */
3158 int
pmap_ptpage_alloc(pmap_t pmap,pt_entry_t * pte,int usage)3159 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3160 {
3161 	paddr_t ptpa;
3162 
3163 	/*
3164 	 * Allocate the page table page.
3165 	 */
3166 	if (pmap_physpage_alloc(usage, &ptpa) == FALSE)
3167 		return (ENOMEM);
3168 
3169 	/*
3170 	 * Initialize the referencing PTE.
3171 	 */
3172 	PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3173 	    PG_V | PG_KRE | PG_KWE | PG_WIRED |
3174 	    (pmap == pmap_kernel() ? PG_ASM : 0));
3175 
3176 	return (0);
3177 }
3178 
3179 /*
3180  * pmap_ptpage_free:
3181  *
3182  *	Free the level 2 or level 3 page table page referenced
3183  *	be the provided PTE.
3184  *
3185  *	Note: the pmap must already be locked.
3186  */
3187 void
pmap_ptpage_free(pmap_t pmap,pt_entry_t * pte)3188 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3189 {
3190 	paddr_t ptpa;
3191 
3192 	/*
3193 	 * Extract the physical address of the page from the PTE
3194 	 * and clear the entry.
3195 	 */
3196 	ptpa = pmap_pte_pa(pte);
3197 	PMAP_SET_PTE(pte, PG_NV);
3198 
3199 #ifdef DEBUG
3200 	pmap_zero_page(PHYS_TO_VM_PAGE(ptpa));
3201 #endif
3202 	pmap_physpage_free(ptpa);
3203 }
3204 
3205 /*
3206  * pmap_l3pt_delref:
3207  *
3208  *	Delete a reference on a level 3 PT page.  If the reference drops
3209  *	to zero, free it.
3210  *
3211  *	Note: the pmap must already be locked.
3212  */
3213 void
pmap_l3pt_delref(pmap_t pmap,vaddr_t va,pt_entry_t * l3pte,cpuid_t cpu_id)3214 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, cpuid_t cpu_id)
3215 {
3216 	pt_entry_t *l1pte, *l2pte;
3217 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3218 
3219 	l1pte = pmap_l1pte(pmap, va);
3220 	l2pte = pmap_l2pte(pmap, va, l1pte);
3221 
3222 #ifdef DIAGNOSTIC
3223 	if (pmap == pmap_kernel())
3224 		panic("pmap_l3pt_delref: kernel pmap");
3225 #endif
3226 
3227 	if (pmap_physpage_delref(l3pte) == 0) {
3228 		/*
3229 		 * No more mappings; we can free the level 3 table.
3230 		 */
3231 #ifdef DEBUG
3232 		if (pmapdebug & PDB_PTPAGE)
3233 			printf("pmap_l3pt_delref: freeing level 3 table at "
3234 			    "0x%lx\n", pmap_pte_pa(l2pte));
3235 #endif
3236 		pmap_ptpage_free(pmap, l2pte);
3237 
3238 		/*
3239 		 * We've freed a level 3 table, so we must
3240 		 * invalidate the TLB entry for that PT page
3241 		 * in the Virtual Page Table VA range, because
3242 		 * otherwise the PALcode will service a TLB
3243 		 * miss using the stale VPT TLB entry it entered
3244 		 * behind our back to shortcut to the VA's PTE.
3245 		 */
3246 		PMAP_INVALIDATE_TLB(pmap,
3247 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), FALSE,
3248 		    PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3249 		PMAP_TLB_SHOOTDOWN(pmap,
3250 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3251 		PMAP_TLB_SHOOTNOW();
3252 
3253 		/*
3254 		 * We've freed a level 3 table, so delete the reference
3255 		 * on the level 2 table.
3256 		 */
3257 		pmap_l2pt_delref(pmap, l1pte, l2pte);
3258 	}
3259 }
3260 
3261 /*
3262  * pmap_l2pt_delref:
3263  *
3264  *	Delete a reference on a level 2 PT page.  If the reference drops
3265  *	to zero, free it.
3266  *
3267  *	Note: the pmap must already be locked.
3268  */
3269 void
pmap_l2pt_delref(pmap_t pmap,pt_entry_t * l1pte,pt_entry_t * l2pte)3270 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte)
3271 {
3272 	KASSERT(pmap != pmap_kernel());
3273 	if (pmap_physpage_delref(l2pte) == 0) {
3274 		/*
3275 		 * No more mappings in this segment; we can free the
3276 		 * level 2 table.
3277 		 */
3278 #ifdef DEBUG
3279 		if (pmapdebug & PDB_PTPAGE)
3280 			printf("pmap_l2pt_delref: freeing level 2 table at "
3281 			    "0x%lx\n", pmap_pte_pa(l1pte));
3282 #endif
3283 		pmap_ptpage_free(pmap, l1pte);
3284 
3285 		/*
3286 		 * We've freed a level 2 table, so delete the reference
3287 		 * on the level 1 table.
3288 		 */
3289 		pmap_l1pt_delref(pmap, l1pte);
3290 	}
3291 }
3292 
3293 /*
3294  * pmap_l1pt_delref:
3295  *
3296  *	Delete a reference on a level 1 PT page.  If the reference drops
3297  *	to zero, free it.
3298  *
3299  *	Note: the pmap must already be locked.
3300  */
3301 void
pmap_l1pt_delref(pmap_t pmap,pt_entry_t * l1pte)3302 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte)
3303 {
3304 	KASSERT(pmap != pmap_kernel());
3305 	pmap_physpage_delref(l1pte);
3306 }
3307 
3308 /******************** Address Space Number management ********************/
3309 
3310 /*
3311  * pmap_asn_alloc:
3312  *
3313  *	Allocate and assign an ASN to the specified pmap.
3314  *
3315  *	Note: the pmap must already be locked.  This may be called from
3316  *	an interprocessor interrupt, and in that case, the sender of
3317  *	the IPI has the pmap lock.
3318  */
3319 void
pmap_asn_alloc(pmap_t pmap,cpuid_t cpu_id)3320 pmap_asn_alloc(pmap_t pmap, cpuid_t cpu_id)
3321 {
3322 	struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3323 	struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3324 
3325 #ifdef DEBUG
3326 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3327 		printf("pmap_asn_alloc(%p)\n", pmap);
3328 #endif
3329 
3330 	/*
3331 	 * If the pmap is still using the global kernel_lev1map, there
3332 	 * is no need to assign an ASN at this time, because only
3333 	 * kernel mappings exist in that map, and all kernel mappings
3334 	 * have PG_ASM set.  If the pmap eventually gets its own
3335 	 * lev1map, an ASN will be allocated at that time.
3336 	 *
3337 	 * Only the kernel pmap will reference kernel_lev1map.  Do the
3338 	 * same old fixups, but note that we no longer need the pmap
3339 	 * to be locked if we're in this mode, since pm_lev1map will
3340 	 * never change.
3341 	 */
3342 	if (pmap->pm_lev1map == kernel_lev1map) {
3343 #ifdef DEBUG
3344 		if (pmapdebug & PDB_ASN)
3345 			printf("pmap_asn_alloc: still references "
3346 			    "kernel_lev1map\n");
3347 #endif
3348 #if defined(MULTIPROCESSOR)
3349 		/*
3350 		 * In a multiprocessor system, it's possible to
3351 		 * get here without having PMAP_ASN_RESERVED in
3352 		 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3353 		 *
3354 		 * So, what we do here, is simply assign the reserved
3355 		 * ASN for kernel_lev1map users and let things
3356 		 * continue on.  We do, however, let uniprocessor
3357 		 * configurations continue to make its assertion.
3358 		 */
3359 		pma->pma_asn = PMAP_ASN_RESERVED;
3360 #else
3361 		KASSERT(pma->pma_asn == PMAP_ASN_RESERVED);
3362 #endif /* MULTIPROCESSOR */
3363 		return;
3364 	}
3365 
3366 	/*
3367 	 * On processors which do not implement ASNs, the swpctx PALcode
3368 	 * operation will automatically invalidate the TLB and I-cache,
3369 	 * so we don't need to do that here.
3370 	 */
3371 	if (pmap_max_asn == 0) {
3372 		/*
3373 		 * Refresh the pmap's generation number, to
3374 		 * simplify logic elsewhere.
3375 		 */
3376 		pma->pma_asngen = cpma->pma_asngen;
3377 #ifdef DEBUG
3378 		if (pmapdebug & PDB_ASN)
3379 			printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3380 			    pma->pma_asngen);
3381 #endif
3382 		return;
3383 	}
3384 
3385 	/*
3386 	 * Hopefully, we can continue using the one we have...
3387 	 */
3388 	if (pma->pma_asn != PMAP_ASN_RESERVED &&
3389 	    pma->pma_asngen == cpma->pma_asngen) {
3390 		/*
3391 		 * ASN is still in the current generation; keep on using it.
3392 		 */
3393 #ifdef DEBUG
3394 		if (pmapdebug & PDB_ASN)
3395 			printf("pmap_asn_alloc: same generation, keeping %u\n",
3396 			    pma->pma_asn);
3397 #endif
3398 		return;
3399 	}
3400 
3401 	/*
3402 	 * Need to assign a new ASN.  Grab the next one, incrementing
3403 	 * the generation number if we have to.
3404 	 */
3405 	if (cpma->pma_asn > pmap_max_asn) {
3406 		/*
3407 		 * Invalidate all non-PG_ASM TLB entries and the
3408 		 * I-cache, and bump the generation number.
3409 		 */
3410 		ALPHA_TBIAP();
3411 		alpha_pal_imb();
3412 
3413 		cpma->pma_asn = 1;
3414 		cpma->pma_asngen++;
3415 #ifdef DIAGNOSTIC
3416 		if (cpma->pma_asngen == 0) {
3417 			/*
3418 			 * The generation number has wrapped.  We could
3419 			 * handle this scenario by traversing all of
3420 			 * the pmaps, and invalidating the generation
3421 			 * number on those which are not currently
3422 			 * in use by this processor.
3423 			 *
3424 			 * However... considering that we're using
3425 			 * an unsigned 64-bit integer for generation
3426 			 * numbers, on non-ASN CPUs, we won't wrap
3427 			 * for approx. 585 million years, or 75 billion
3428 			 * years on a 128-ASN CPU (assuming 1000 switch
3429 			 * operations per second).
3430 			 *
3431 			 * So, we don't bother.
3432 			 */
3433 			panic("pmap_asn_alloc: too much uptime");
3434 		}
3435 #endif
3436 #ifdef DEBUG
3437 		if (pmapdebug & PDB_ASN)
3438 			printf("pmap_asn_alloc: generation bumped to %lu\n",
3439 			    cpma->pma_asngen);
3440 #endif
3441 	}
3442 
3443 	/*
3444 	 * Assign the new ASN and validate the generation number.
3445 	 */
3446 	pma->pma_asn = cpma->pma_asn++;
3447 	pma->pma_asngen = cpma->pma_asngen;
3448 
3449 #ifdef DEBUG
3450 	if (pmapdebug & PDB_ASN)
3451 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3452 		    pma->pma_asn, pmap);
3453 #endif
3454 
3455 	/*
3456 	 * Have a new ASN, so there's no need to sync the I-stream
3457 	 * on the way back out to userspace.
3458 	 */
3459 	atomic_clearbits_ulong(&pmap->pm_needisync, (1UL << cpu_id));
3460 }
3461 
3462 #if defined(MULTIPROCESSOR)
3463 /******************** TLB shootdown code ********************/
3464 
3465 /*
3466  * pmap_tlb_shootdown:
3467  *
3468  *	Cause the TLB entry for pmap/va to be shot down.
3469  *
3470  *	NOTE: The pmap must be locked here.
3471  */
3472 void
pmap_tlb_shootdown(pmap_t pmap,vaddr_t va,pt_entry_t pte,u_long * cpumaskp)3473 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3474 {
3475 	struct pmap_tlb_shootdown_q *pq;
3476 	struct pmap_tlb_shootdown_job *pj;
3477 	struct cpu_info *ci, *self = curcpu();
3478 	u_long cpumask;
3479 	CPU_INFO_ITERATOR cii;
3480 #if 0
3481 	int s;
3482 #endif
3483 
3484 	cpumask = 0;
3485 
3486 	CPU_INFO_FOREACH(cii, ci) {
3487 		if (ci == self)
3488 			continue;
3489 
3490 		/*
3491 		 * The pmap must be locked (unless its the kernel
3492 		 * pmap, in which case it is okay for it to be
3493 		 * unlocked), which prevents it from  becoming
3494 		 * active on any additional processors.  This makes
3495 		 * it safe to check for activeness.  If it's not
3496 		 * active on the processor in question, then just
3497 		 * mark it as needing a new ASN the next time it
3498 		 * does, saving the IPI.  We always have to send
3499 		 * the IPI for the kernel pmap.
3500 		 *
3501 		 * Note if it's marked active now, and it becomes
3502 		 * inactive by the time the processor receives
3503 		 * the IPI, that's okay, because it does the right
3504 		 * thing with it later.
3505 		 */
3506 		if (pmap != pmap_kernel() &&
3507 		    PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3508 			PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3509 			continue;
3510 		}
3511 
3512 		cpumask |= 1UL << ci->ci_cpuid;
3513 
3514 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3515 
3516 		PSJQ_LOCK(pq, s);
3517 
3518 		pq->pq_pte |= pte;
3519 
3520 		/*
3521 		 * If a global flush is already pending, we
3522 		 * don't really have to do anything else.
3523 		 */
3524 		if (pq->pq_tbia) {
3525 			PSJQ_UNLOCK(pq, s);
3526 			continue;
3527 		}
3528 
3529 		pj = pmap_tlb_shootdown_job_get(pq);
3530 		if (pj == NULL) {
3531 			/*
3532 			 * Couldn't allocate a job entry.  Just
3533 			 * tell the processor to kill everything.
3534 			 */
3535 			pq->pq_tbia = 1;
3536 		} else {
3537 			pj->pj_pmap = pmap;
3538 			pj->pj_va = va;
3539 			pj->pj_pte = pte;
3540 			TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3541 		}
3542 
3543 		PSJQ_UNLOCK(pq, s);
3544 	}
3545 
3546 	*cpumaskp |= cpumask;
3547 }
3548 
3549 /*
3550  * pmap_tlb_shootnow:
3551  *
3552  *	Process the TLB shootdowns that we have been accumulating
3553  *	for the specified processor set.
3554  */
3555 void
pmap_tlb_shootnow(u_long cpumask)3556 pmap_tlb_shootnow(u_long cpumask)
3557 {
3558 
3559 	alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3560 }
3561 
3562 /*
3563  * pmap_do_tlb_shootdown:
3564  *
3565  *	Process pending TLB shootdown operations for this processor.
3566  */
3567 void
pmap_do_tlb_shootdown(struct cpu_info * ci,struct trapframe * framep)3568 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3569 {
3570 	u_long cpu_id = ci->ci_cpuid;
3571 	u_long cpu_mask = (1UL << cpu_id);
3572 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3573 	struct pmap_tlb_shootdown_job *pj;
3574 #if 0
3575 	int s;
3576 #endif
3577 
3578 	PSJQ_LOCK(pq, s);
3579 
3580 	if (pq->pq_tbia) {
3581 		if (pq->pq_pte & PG_ASM)
3582 			ALPHA_TBIA();
3583 		else
3584 			ALPHA_TBIAP();
3585 		pq->pq_tbia = 0;
3586 		pmap_tlb_shootdown_q_drain(pq);
3587 	} else {
3588 		while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
3589 			TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
3590 			PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3591 			    pj->pj_pte & PG_ASM,
3592 			    pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3593 			pmap_tlb_shootdown_job_put(pq, pj);
3594 		}
3595 	}
3596 	pq->pq_pte = 0;
3597 
3598 	PSJQ_UNLOCK(pq, s);
3599 }
3600 
3601 /*
3602  * pmap_tlb_shootdown_q_drain:
3603  *
3604  *	Drain a processor's TLB shootdown queue.  We do not perform
3605  *	the shootdown operations.  This is merely a convenience
3606  *	function.
3607  *
3608  *	Note: We expect the queue to be locked.
3609  */
3610 void
pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q * pq)3611 pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *pq)
3612 {
3613 	struct pmap_tlb_shootdown_job *pj;
3614 
3615 	while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
3616 		TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
3617 		pmap_tlb_shootdown_job_put(pq, pj);
3618 	}
3619 }
3620 
3621 /*
3622  * pmap_tlb_shootdown_job_get:
3623  *
3624  *	Get a TLB shootdown job queue entry.  This places a limit on
3625  *	the number of outstanding jobs a processor may have.
3626  *
3627  *	Note: We expect the queue to be locked.
3628  */
3629 struct pmap_tlb_shootdown_job *
pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q * pq)3630 pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q *pq)
3631 {
3632 	struct pmap_tlb_shootdown_job *pj;
3633 
3634 	pj = TAILQ_FIRST(&pq->pq_free);
3635 	if (pj != NULL)
3636 		TAILQ_REMOVE(&pq->pq_free, pj, pj_list);
3637 	return (pj);
3638 }
3639 
3640 /*
3641  * pmap_tlb_shootdown_job_put:
3642  *
3643  *	Put a TLB shootdown job queue entry onto the free list.
3644  *
3645  *	Note: We expect the queue to be locked.
3646  */
3647 void
pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q * pq,struct pmap_tlb_shootdown_job * pj)3648 pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *pq,
3649     struct pmap_tlb_shootdown_job *pj)
3650 {
3651 	TAILQ_INSERT_TAIL(&pq->pq_free, pj, pj_list);
3652 }
3653 #endif /* MULTIPROCESSOR */
3654