xref: /netbsd/sys/arch/alpha/alpha/pmap.c (revision c4a72b64)
1 /* $NetBSD: pmap.c,v 1.194 2002/10/14 05:11:21 chs Exp $ */
2 
3 /*-
4  * Copyright (c) 1998, 1999, 2000, 2001 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9  * NASA Ames Research Center and by Chris G. Demetriou.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  * 3. All advertising materials mentioning features or use of this software
20  *    must display the following acknowledgement:
21  *	This product includes software developed by the NetBSD
22  *	Foundation, Inc. and its contributors.
23  * 4. Neither the name of The NetBSD Foundation nor the names of its
24  *    contributors may be used to endorse or promote products derived
25  *    from this software without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
28  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
29  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
30  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
31  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37  * POSSIBILITY OF SUCH DAMAGE.
38  */
39 
40 /*
41  * Copyright (c) 1991, 1993
42  *	The Regents of the University of California.  All rights reserved.
43  *
44  * This code is derived from software contributed to Berkeley by
45  * the Systems Programming Group of the University of Utah Computer
46  * Science Department.
47  *
48  * Redistribution and use in source and binary forms, with or without
49  * modification, are permitted provided that the following conditions
50  * are met:
51  * 1. Redistributions of source code must retain the above copyright
52  *    notice, this list of conditions and the following disclaimer.
53  * 2. Redistributions in binary form must reproduce the above copyright
54  *    notice, this list of conditions and the following disclaimer in the
55  *    documentation and/or other materials provided with the distribution.
56  * 3. All advertising materials mentioning features or use of this software
57  *    must display the following acknowledgement:
58  *	This product includes software developed by the University of
59  *	California, Berkeley and its contributors.
60  * 4. Neither the name of the University nor the names of its contributors
61  *    may be used to endorse or promote products derived from this software
62  *    without specific prior written permission.
63  *
64  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
65  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
66  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
67  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
68  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
69  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
70  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
71  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
72  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
73  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
74  * SUCH DAMAGE.
75  *
76  *	@(#)pmap.c	8.6 (Berkeley) 5/27/94
77  */
78 
79 /*
80  * DEC Alpha physical map management code.
81  *
82  * History:
83  *
84  *	This pmap started life as a Motorola 68851/68030 pmap,
85  *	written by Mike Hibler at the University of Utah.
86  *
87  *	It was modified for the DEC Alpha by Chris Demetriou
88  *	at Carnegie Mellon University.
89  *
90  *	Support for non-contiguous physical memory was added by
91  *	Jason R. Thorpe of the Numerical Aerospace Simulation
92  *	Facility, NASA Ames Research Center and Chris Demetriou.
93  *
94  *	Page table management and a major cleanup were undertaken
95  *	by Jason R. Thorpe, with lots of help from Ross Harvey of
96  *	Avalon Computer Systems and from Chris Demetriou.
97  *
98  *	Support for the new UVM pmap interface was written by
99  *	Jason R. Thorpe.
100  *
101  *	Support for ASNs was written by Jason R. Thorpe, again
102  *	with help from Chris Demetriou and Ross Harvey.
103  *
104  *	The locking protocol was written by Jason R. Thorpe,
105  *	using Chuck Cranor's i386 pmap for UVM as a model.
106  *
107  *	TLB shootdown code was written by Jason R. Thorpe.
108  *
109  * Notes:
110  *
111  *	All page table access is done via K0SEG.  The one exception
112  *	to this is for kernel mappings.  Since all kernel page
113  *	tables are pre-allocated, we can use the Virtual Page Table
114  *	to access PTEs that map K1SEG addresses.
115  *
116  *	Kernel page table pages are statically allocated in
117  *	pmap_bootstrap(), and are never freed.  In the future,
118  *	support for dynamically adding additional kernel page
119  *	table pages may be added.  User page table pages are
120  *	dynamically allocated and freed.
121  *
122  *	This pmap implementation only supports NBPG == PAGE_SIZE.
123  *	In practice, this is not a problem since PAGE_SIZE is
124  *	initialized to the hardware page size in alpha_init().
125  *
126  * Bugs/misfeatures:
127  *
128  *	- Some things could be optimized.
129  */
130 
131 /*
132  *	Manages physical address maps.
133  *
134  *	Since the information managed by this module is
135  *	also stored by the logical address mapping module,
136  *	this module may throw away valid virtual-to-physical
137  *	mappings at almost any time.  However, invalidations
138  *	of virtual-to-physical mappings must be done as
139  *	requested.
140  *
141  *	In order to cope with hardware architectures which
142  *	make virtual-to-physical map invalidates expensive,
143  *	this module may delay invalidate or reduced protection
144  *	operations until such time as they are actually
145  *	necessary.  This module is given full information as
146  *	to which processors are currently using which maps,
147  *	and to when physical maps must be made correct.
148  */
149 
150 #include "opt_lockdebug.h"
151 #include "opt_sysv.h"
152 #include "opt_multiprocessor.h"
153 
154 #include <sys/cdefs.h>			/* RCS ID & Copyright macro defns */
155 
156 __KERNEL_RCSID(0, "$NetBSD: pmap.c,v 1.194 2002/10/14 05:11:21 chs Exp $");
157 
158 #include <sys/param.h>
159 #include <sys/systm.h>
160 #include <sys/proc.h>
161 #include <sys/malloc.h>
162 #include <sys/pool.h>
163 #include <sys/user.h>
164 #include <sys/buf.h>
165 #ifdef SYSVSHM
166 #include <sys/shm.h>
167 #endif
168 
169 #include <uvm/uvm.h>
170 
171 #include <machine/atomic.h>
172 #include <machine/cpu.h>
173 #if defined(_PMAP_MAY_USE_PROM_CONSOLE) || defined(MULTIPROCESSOR)
174 #include <machine/rpb.h>
175 #endif
176 
177 #ifdef DEBUG
178 #define	PDB_FOLLOW	0x0001
179 #define	PDB_INIT	0x0002
180 #define	PDB_ENTER	0x0004
181 #define	PDB_REMOVE	0x0008
182 #define	PDB_CREATE	0x0010
183 #define	PDB_PTPAGE	0x0020
184 #define	PDB_ASN		0x0040
185 #define	PDB_BITS	0x0080
186 #define	PDB_COLLECT	0x0100
187 #define	PDB_PROTECT	0x0200
188 #define	PDB_BOOTSTRAP	0x1000
189 #define	PDB_PARANOIA	0x2000
190 #define	PDB_WIRING	0x4000
191 #define	PDB_PVDUMP	0x8000
192 
193 int debugmap = 0;
194 int pmapdebug = PDB_PARANOIA;
195 #endif
196 
197 /*
198  * Given a map and a machine independent protection code,
199  * convert to an alpha protection code.
200  */
201 #define pte_prot(m, p)	(protection_codes[m == pmap_kernel() ? 0 : 1][p])
202 int	protection_codes[2][8];
203 
204 /*
205  * kernel_lev1map:
206  *
207  *	Kernel level 1 page table.  This maps all kernel level 2
208  *	page table pages, and is used as a template for all user
209  *	pmap level 1 page tables.  When a new user level 1 page
210  *	table is allocated, all kernel_lev1map PTEs for kernel
211  *	addresses are copied to the new map.
212  *
213  *	The kernel also has an initial set of kernel level 2 page
214  *	table pages.  These map the kernel level 3 page table pages.
215  *	As kernel level 3 page table pages are added, more level 2
216  *	page table pages may be added to map them.  These pages are
217  *	never freed.
218  *
219  *	Finally, the kernel also has an initial set of kernel level
220  *	3 page table pages.  These map pages in K1SEG.  More level
221  *	3 page table pages may be added at run-time if additional
222  *	K1SEG address space is required.  These pages are never freed.
223  *
224  * NOTE: When mappings are inserted into the kernel pmap, all
225  * level 2 and level 3 page table pages must already be allocated
226  * and mapped into the parent page table.
227  */
228 pt_entry_t	*kernel_lev1map;
229 
230 /*
231  * Virtual Page Table.
232  */
233 pt_entry_t	*VPT;
234 
235 u_long		kernel_pmap_store[PMAP_SIZEOF(ALPHA_MAXPROCS) / sizeof(u_long)];
236 
237 paddr_t    	avail_start;	/* PA of first available physical page */
238 paddr_t		avail_end;	/* PA of last available physical page */
239 static vaddr_t	virtual_end;	/* VA of last avail page (end of kernel AS) */
240 
241 boolean_t	pmap_initialized;	/* Has pmap_init completed? */
242 
243 u_long		pmap_pages_stolen;	/* instrumentation */
244 
245 /*
246  * This variable contains the number of CPU IDs we need to allocate
247  * space for when allocating the pmap structure.  It is used to
248  * size a per-CPU array of ASN and ASN Generation number.
249  */
250 u_long		pmap_ncpuids;
251 
252 #ifndef PMAP_PV_LOWAT
253 #define	PMAP_PV_LOWAT	16
254 #endif
255 int		pmap_pv_lowat = PMAP_PV_LOWAT;
256 
257 /*
258  * List of all pmaps, used to update them when e.g. additional kernel
259  * page tables are allocated.  This list is kept LRU-ordered by
260  * pmap_activate().
261  */
262 TAILQ_HEAD(, pmap) pmap_all_pmaps;
263 
264 /*
265  * The pools from which pmap structures and sub-structures are allocated.
266  */
267 struct pool pmap_pmap_pool;
268 struct pool pmap_l1pt_pool;
269 struct pool_cache pmap_l1pt_cache;
270 struct pool pmap_pv_pool;
271 
272 /*
273  * Address Space Numbers.
274  *
275  * On many implementations of the Alpha architecture, the TLB entries and
276  * I-cache blocks are tagged with a unique number within an implementation-
277  * specified range.  When a process context becomes active, the ASN is used
278  * to match TLB entries; if a TLB entry for a particular VA does not match
279  * the current ASN, it is ignored (one could think of the processor as
280  * having a collection of <max ASN> separate TLBs).  This allows operating
281  * system software to skip the TLB flush that would otherwise be necessary
282  * at context switch time.
283  *
284  * Alpha PTEs have a bit in them (PG_ASM - Address Space Match) that
285  * causes TLB entries to match any ASN.  The PALcode also provides
286  * a TBI (Translation Buffer Invalidate) operation that flushes all
287  * TLB entries that _do not_ have PG_ASM.  We use this bit for kernel
288  * mappings, so that invalidation of all user mappings does not invalidate
289  * kernel mappings (which are consistent across all processes).
290  *
291  * pmap_next_asn always indicates to the next ASN to use.  When
292  * pmap_next_asn exceeds pmap_max_asn, we start a new ASN generation.
293  *
294  * When a new ASN generation is created, the per-process (i.e. non-PG_ASM)
295  * TLB entries and the I-cache are flushed, the generation number is bumped,
296  * and pmap_next_asn is changed to indicate the first non-reserved ASN.
297  *
298  * We reserve ASN #0 for pmaps that use the global kernel_lev1map.  This
299  * prevents the following scenario:
300  *
301  *	* New ASN generation starts, and process A is given ASN #0.
302  *
303  *	* A new process B (and thus new pmap) is created.  The ASN,
304  *	  for lack of a better value, is initialized to 0.
305  *
306  *	* Process B runs.  It is now using the TLB entries tagged
307  *	  by process A.  *poof*
308  *
309  * In the scenario above, in addition to the processor using using incorrect
310  * TLB entires, the PALcode might use incorrect information to service a
311  * TLB miss.  (The PALcode uses the recursively mapped Virtual Page Table
312  * to locate the PTE for a faulting address, and tagged TLB entires exist
313  * for the Virtual Page Table addresses in order to speed up this procedure,
314  * as well.)
315  *
316  * By reserving an ASN for kernel_lev1map users, we are guaranteeing that
317  * new pmaps will initially run with no TLB entries for user addresses
318  * or VPT mappings that map user page tables.  Since kernel_lev1map only
319  * contains mappings for kernel addresses, and since those mappings
320  * are always made with PG_ASM, sharing an ASN for kernel_lev1map users is
321  * safe (since PG_ASM mappings match any ASN).
322  *
323  * On processors that do not support ASNs, the PALcode invalidates
324  * the TLB and I-cache automatically on swpctx.  We still still go
325  * through the motions of assigning an ASN (really, just refreshing
326  * the ASN generation in this particular case) to keep the logic sane
327  * in other parts of the code.
328  */
329 u_int	pmap_max_asn;		/* max ASN supported by the system */
330 				/* next ASN and current ASN generation */
331 struct pmap_asn_info pmap_asn_info[ALPHA_MAXPROCS];
332 
333 /*
334  * Locking:
335  *
336  *	This pmap module uses two types of locks: `normal' (sleep)
337  *	locks and `simple' (spin) locks.  They are used as follows:
338  *
339  *	READ/WRITE SPIN LOCKS
340  *	---------------------
341  *
342  *	* pmap_main_lock - This lock is used to prevent deadlock and/or
343  *	  provide mutex access to the pmap module.  Most operations lock
344  *	  the pmap first, then PV lists as needed.  However, some operations,
345  *	  such as pmap_page_protect(), lock the PV lists before locking
346  *	  the pmaps.  To prevent deadlock, we require a mutex lock on the
347  *	  pmap module if locking in the PV->pmap direction.  This is
348  *	  implemented by acquiring a (shared) read lock on pmap_main_lock
349  *	  if locking pmap->PV and a (exclusive) write lock if locking in
350  *	  the PV->pmap direction.  Since only one thread can hold a write
351  *	  lock at a time, this provides the mutex.
352  *
353  *	SIMPLE LOCKS
354  *	------------
355  *
356  *	* pm_slock (per-pmap) - This lock protects all of the members
357  *	  of the pmap structure itself.  This lock will be asserted
358  *	  in pmap_activate() and pmap_deactivate() from a critical
359  *	  section of cpu_switch(), and must never sleep.  Note that
360  *	  in the case of the kernel pmap, interrupts which cause
361  *	  memory allocation *must* be blocked while this lock is
362  *	  asserted.
363  *
364  *	* pvh_slock (per-vm_page) - This lock protects the PV list
365  *	  for a specified managed page.
366  *
367  *	* pmap_all_pmaps_slock - This lock protects the global list of
368  *	  all pmaps.  Note that a pm_slock must never be held while this
369  *	  lock is held.
370  *
371  *	* pmap_growkernel_slock - This lock protects pmap_growkernel()
372  *	  and the virtual_end variable.
373  *
374  *	  There is a lock ordering constraint for pmap_growkernel_slock.
375  *	  pmap_growkernel() acquires the locks in the following order:
376  *
377  *		pmap_growkernel_slock -> pmap_all_pmaps_slock ->
378  *		    pmap->pm_slock
379  *
380  *	  But pmap_lev1map_create() is called with pmap->pm_slock held,
381  *	  and also needs to acquire the pmap_growkernel_slock.  So,
382  *	  we require that the caller of pmap_lev1map_create() (currently,
383  *	  the only caller is pmap_enter()) acquire pmap_growkernel_slock
384  *	  before acquring pmap->pm_slock.
385  *
386  *	Address space number management (global ASN counters and per-pmap
387  *	ASN state) are not locked; they use arrays of values indexed
388  *	per-processor.
389  *
390  *	All internal functions which operate on a pmap are called
391  *	with the pmap already locked by the caller (which will be
392  *	an interface function).
393  */
394 struct lock pmap_main_lock;
395 struct simplelock pmap_all_pmaps_slock;
396 struct simplelock pmap_growkernel_slock;
397 
398 #if defined(MULTIPROCESSOR) || defined(LOCKDEBUG)
399 #define	PMAP_MAP_TO_HEAD_LOCK() \
400 	spinlockmgr(&pmap_main_lock, LK_SHARED, NULL)
401 #define	PMAP_MAP_TO_HEAD_UNLOCK() \
402 	spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL)
403 #define	PMAP_HEAD_TO_MAP_LOCK() \
404 	spinlockmgr(&pmap_main_lock, LK_EXCLUSIVE, NULL)
405 #define	PMAP_HEAD_TO_MAP_UNLOCK() \
406 	spinlockmgr(&pmap_main_lock, LK_RELEASE, NULL)
407 #else
408 #define	PMAP_MAP_TO_HEAD_LOCK()		/* nothing */
409 #define	PMAP_MAP_TO_HEAD_UNLOCK()	/* nothing */
410 #define	PMAP_HEAD_TO_MAP_LOCK()		/* nothing */
411 #define	PMAP_HEAD_TO_MAP_UNLOCK()	/* nothing */
412 #endif /* MULTIPROCESSOR || LOCKDEBUG */
413 
414 #if defined(MULTIPROCESSOR)
415 /*
416  * TLB Shootdown:
417  *
418  * When a mapping is changed in a pmap, the TLB entry corresponding to
419  * the virtual address must be invalidated on all processors.  In order
420  * to accomplish this on systems with multiple processors, messages are
421  * sent from the processor which performs the mapping change to all
422  * processors on which the pmap is active.  For other processors, the
423  * ASN generation numbers for that processor is invalidated, so that
424  * the next time the pmap is activated on that processor, a new ASN
425  * will be allocated (which implicitly invalidates all TLB entries).
426  *
427  * Note, we can use the pool allocator to allocate job entries
428  * since pool pages are mapped with K0SEG, not with the TLB.
429  */
430 struct pmap_tlb_shootdown_job {
431 	TAILQ_ENTRY(pmap_tlb_shootdown_job) pj_list;
432 	vaddr_t pj_va;			/* virtual address */
433 	pmap_t pj_pmap;			/* the pmap which maps the address */
434 	pt_entry_t pj_pte;		/* the PTE bits */
435 };
436 
437 struct pmap_tlb_shootdown_q {
438 	TAILQ_HEAD(, pmap_tlb_shootdown_job) pq_head;
439 	int pq_pte;			/* aggregate PTE bits */
440 	int pq_count;			/* number of pending requests */
441 	int pq_tbia;			/* pending global flush */
442 	struct simplelock pq_slock;	/* spin lock on queue */
443 } pmap_tlb_shootdown_q[ALPHA_MAXPROCS];
444 
445 #define	PSJQ_LOCK(pq, s)						\
446 do {									\
447 	s = splvm();							\
448 	simple_lock(&(pq)->pq_slock);					\
449 } while (0)
450 
451 #define	PSJQ_UNLOCK(pq, s)						\
452 do {									\
453 	simple_unlock(&(pq)->pq_slock);					\
454 	splx(s);							\
455 } while (0)
456 
457 /* If we have more pending jobs than this, we just nail the whole TLB. */
458 #define	PMAP_TLB_SHOOTDOWN_MAXJOBS	6
459 
460 struct pool pmap_tlb_shootdown_job_pool;
461 
462 void	pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *);
463 struct pmap_tlb_shootdown_job *pmap_tlb_shootdown_job_get
464 	    (struct pmap_tlb_shootdown_q *);
465 void	pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *,
466 	    struct pmap_tlb_shootdown_job *);
467 #endif /* MULTIPROCESSOR */
468 
469 #define	PAGE_IS_MANAGED(pa)	(vm_physseg_find(atop(pa), NULL) != -1)
470 
471 /*
472  * Internal routines
473  */
474 void	alpha_protection_init(void);
475 void	pmap_do_remove(pmap_t, vaddr_t, vaddr_t, boolean_t);
476 boolean_t pmap_remove_mapping(pmap_t, vaddr_t, pt_entry_t *,
477 	    boolean_t, long);
478 void	pmap_changebit(struct vm_page *, pt_entry_t, pt_entry_t, long);
479 
480 /*
481  * PT page management functions.
482  */
483 int	pmap_lev1map_create(pmap_t, long);
484 void	pmap_lev1map_destroy(pmap_t, long);
485 int	pmap_ptpage_alloc(pmap_t, pt_entry_t *, int);
486 void	pmap_ptpage_free(pmap_t, pt_entry_t *);
487 void	pmap_l3pt_delref(pmap_t, vaddr_t, pt_entry_t *, long);
488 void	pmap_l2pt_delref(pmap_t, pt_entry_t *, pt_entry_t *, long);
489 void	pmap_l1pt_delref(pmap_t, pt_entry_t *, long);
490 
491 void	*pmap_l1pt_alloc(struct pool *, int);
492 void	pmap_l1pt_free(struct pool *, void *);
493 
494 struct pool_allocator pmap_l1pt_allocator = {
495 	pmap_l1pt_alloc, pmap_l1pt_free, 0,
496 };
497 
498 int	pmap_l1pt_ctor(void *, void *, int);
499 
500 /*
501  * PV table management functions.
502  */
503 int	pmap_pv_enter(pmap_t, struct vm_page *, vaddr_t, pt_entry_t *,
504 	    boolean_t);
505 void	pmap_pv_remove(pmap_t, struct vm_page *, vaddr_t, boolean_t);
506 void	*pmap_pv_page_alloc(struct pool *, int);
507 void	pmap_pv_page_free(struct pool *, void *);
508 
509 struct pool_allocator pmap_pv_page_allocator = {
510 	pmap_pv_page_alloc, pmap_pv_page_free, 0,
511 };
512 
513 #ifdef DEBUG
514 void	pmap_pv_dump(paddr_t);
515 #endif
516 
517 #define	pmap_pv_alloc()		pool_get(&pmap_pv_pool, PR_NOWAIT)
518 #define	pmap_pv_free(pv)	pool_put(&pmap_pv_pool, (pv))
519 
520 /*
521  * ASN management functions.
522  */
523 void	pmap_asn_alloc(pmap_t, long);
524 
525 /*
526  * Misc. functions.
527  */
528 boolean_t pmap_physpage_alloc(int, paddr_t *);
529 void	pmap_physpage_free(paddr_t);
530 int	pmap_physpage_addref(void *);
531 int	pmap_physpage_delref(void *);
532 
533 /*
534  * PMAP_ISACTIVE{,_TEST}:
535  *
536  *	Check to see if a pmap is active on the current processor.
537  */
538 #define	PMAP_ISACTIVE_TEST(pm, cpu_id)					\
539 	(((pm)->pm_cpus & (1UL << (cpu_id))) != 0)
540 
541 #if defined(DEBUG) && !defined(MULTIPROCESSOR)
542 #define	PMAP_ISACTIVE(pm, cpu_id)					\
543 ({									\
544 	/*								\
545 	 * XXX This test is not MP-safe.				\
546 	 */								\
547 	int isactive_ = PMAP_ISACTIVE_TEST(pm, cpu_id);			\
548 									\
549 	if (curproc != NULL && curproc->p_vmspace != NULL &&		\
550 	   (isactive_ ^ ((pm) == curproc->p_vmspace->vm_map.pmap)))	\
551 		panic("PMAP_ISACTIVE");					\
552 	(isactive_);							\
553 })
554 #else
555 #define	PMAP_ISACTIVE(pm, cpu_id)	PMAP_ISACTIVE_TEST(pm, cpu_id)
556 #endif /* DEBUG && !MULTIPROCESSOR */
557 
558 /*
559  * PMAP_ACTIVATE_ASN_SANITY:
560  *
561  *	DEBUG sanity checks for ASNs within PMAP_ACTIVATE.
562  */
563 #ifdef DEBUG
564 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)				\
565 do {									\
566 	struct pmap_asn_info *__pma = &(pmap)->pm_asni[(cpu_id)];	\
567 	struct pmap_asn_info *__cpma = &pmap_asn_info[(cpu_id)];	\
568 									\
569 	if ((pmap)->pm_lev1map == kernel_lev1map) {			\
570 		/*							\
571 		 * This pmap implementation also ensures that pmaps	\
572 		 * referencing kernel_lev1map use a reserved ASN	\
573 		 * ASN to prevent the PALcode from servicing a TLB	\
574 		 * miss	with the wrong PTE.				\
575 		 */							\
576 		if (__pma->pma_asn != PMAP_ASN_RESERVED) {		\
577 			printf("kernel_lev1map with non-reserved ASN "	\
578 			    "(line %d)\n", __LINE__);			\
579 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
580 		}							\
581 	} else {							\
582 		if (__pma->pma_asngen != __cpma->pma_asngen) {		\
583 			/*						\
584 			 * ASN generation number isn't valid!		\
585 			 */						\
586 			printf("pmap asngen %lu, current %lu "		\
587 			    "(line %d)\n",				\
588 			    __pma->pma_asngen,				\
589 			    __cpma->pma_asngen,				\
590 			    __LINE__);					\
591 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
592 		}							\
593 		if (__pma->pma_asn == PMAP_ASN_RESERVED) {		\
594 			/*						\
595 			 * DANGER WILL ROBINSON!  We're going to	\
596 			 * pollute the VPT TLB entries!			\
597 			 */						\
598 			printf("Using reserved ASN! (line %d)\n",	\
599 			    __LINE__);					\
600 			panic("PMAP_ACTIVATE_ASN_SANITY");		\
601 		}							\
602 	}								\
603 } while (0)
604 #else
605 #define	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id)	/* nothing */
606 #endif
607 
608 /*
609  * PMAP_ACTIVATE:
610  *
611  *	This is essentially the guts of pmap_activate(), without
612  *	ASN allocation.  This is used by pmap_activate(),
613  *	pmap_lev1map_create(), and pmap_lev1map_destroy().
614  *
615  *	This is called only when it is known that a pmap is "active"
616  *	on the current processor; the ASN must already be valid.
617  */
618 #define	PMAP_ACTIVATE(pmap, p, cpu_id)					\
619 do {									\
620 	PMAP_ACTIVATE_ASN_SANITY(pmap, cpu_id);				\
621 									\
622 	(p)->p_addr->u_pcb.pcb_hw.apcb_ptbr =				\
623 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)(pmap)->pm_lev1map) >> PGSHIFT; \
624 	(p)->p_addr->u_pcb.pcb_hw.apcb_asn = 				\
625 	    (pmap)->pm_asni[(cpu_id)].pma_asn;				\
626 									\
627 	if ((p) == curproc) {						\
628 		/*							\
629 		 * Page table base register has changed; switch to	\
630 		 * our own context again so that it will take effect.	\
631 		 */							\
632 		(void) alpha_pal_swpctx((u_long)p->p_md.md_pcbpaddr);	\
633 	}								\
634 } while (0)
635 
636 #if defined(MULTIPROCESSOR)
637 /*
638  * PMAP_LEV1MAP_SHOOTDOWN:
639  *
640  *	"Shoot down" the level 1 map on other CPUs.
641  */
642 #define	PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id)				\
643 do {									\
644 	u_long __cpumask = (pmap)->pm_cpus & ~(1UL << (cpu_id));	\
645 									\
646 	if (__cpumask != 0) {						\
647 		alpha_multicast_ipi(__cpumask,				\
648 		    ALPHA_IPI_PMAP_REACTIVATE);				\
649 		/* XXXSMP BARRIER OPERATION */				\
650 	}								\
651 } while (/*CONSTCOND*/0)
652 #else
653 #define	PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id)	/* nothing */
654 #endif /* MULTIPROCESSOR */
655 
656 /*
657  * PMAP_SET_NEEDISYNC:
658  *
659  *	Mark that a user pmap needs an I-stream synch on its
660  *	way back out to userspace.
661  */
662 #define	PMAP_SET_NEEDISYNC(pmap)	(pmap)->pm_needisync = ~0UL
663 
664 /*
665  * PMAP_SYNC_ISTREAM:
666  *
667  *	Synchronize the I-stream for the specified pmap.  For user
668  *	pmaps, this is deferred until a process using the pmap returns
669  *	to userspace.
670  */
671 #if defined(MULTIPROCESSOR)
672 #define	PMAP_SYNC_ISTREAM_KERNEL()					\
673 do {									\
674 	alpha_pal_imb();						\
675 	alpha_broadcast_ipi(ALPHA_IPI_IMB);				\
676 } while (0)
677 
678 #define	PMAP_SYNC_ISTREAM_USER(pmap)					\
679 do {									\
680 	alpha_multicast_ipi((pmap)->pm_cpus, ALPHA_IPI_AST);		\
681 	/* for curcpu, will happen in userret() */			\
682 } while (0)
683 #else
684 #define	PMAP_SYNC_ISTREAM_KERNEL()	alpha_pal_imb()
685 #define	PMAP_SYNC_ISTREAM_USER(pmap)	/* will happen in userret() */
686 #endif /* MULTIPROCESSOR */
687 
688 #define	PMAP_SYNC_ISTREAM(pmap)						\
689 do {									\
690 	if ((pmap) == pmap_kernel())					\
691 		PMAP_SYNC_ISTREAM_KERNEL();				\
692 	else								\
693 		PMAP_SYNC_ISTREAM_USER(pmap);				\
694 } while (0)
695 
696 /*
697  * PMAP_INVALIDATE_ASN:
698  *
699  *	Invalidate the specified pmap's ASN, so as to force allocation
700  *	of a new one the next time pmap_asn_alloc() is called.
701  *
702  *	NOTE: THIS MUST ONLY BE CALLED IF AT LEAST ONE OF THE FOLLOWING
703  *	CONDITIONS ARE TRUE:
704  *
705  *		(1) The pmap references the global kernel_lev1map.
706  *
707  *		(2) The pmap is not active on the current processor.
708  */
709 #define	PMAP_INVALIDATE_ASN(pmap, cpu_id)				\
710 do {									\
711 	(pmap)->pm_asni[(cpu_id)].pma_asn = PMAP_ASN_RESERVED;		\
712 } while (0)
713 
714 /*
715  * PMAP_INVALIDATE_TLB:
716  *
717  *	Invalidate the TLB entry for the pmap/va pair.
718  */
719 #define	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id)		\
720 do {									\
721 	if ((hadasm) || (isactive)) {					\
722 		/*							\
723 		 * Simply invalidating the TLB entry and I-cache	\
724 		 * works in this case.					\
725 		 */							\
726 		ALPHA_TBIS((va));					\
727 	} else if ((pmap)->pm_asni[(cpu_id)].pma_asngen ==		\
728 		   pmap_asn_info[(cpu_id)].pma_asngen) {		\
729 		/*							\
730 		 * We can't directly invalidate the TLB entry		\
731 		 * in this case, so we have to force allocation		\
732 		 * of a new ASN the next time this pmap becomes		\
733 		 * active.						\
734 		 */							\
735 		PMAP_INVALIDATE_ASN((pmap), (cpu_id));			\
736 	}								\
737 		/*							\
738 		 * Nothing to do in this case; the next time the	\
739 		 * pmap becomes active on this processor, a new		\
740 		 * ASN will be allocated anyway.			\
741 		 */							\
742 } while (0)
743 
744 /*
745  * PMAP_KERNEL_PTE:
746  *
747  *	Get a kernel PTE.
748  *
749  *	If debugging, do a table walk.  If not debugging, just use
750  *	the Virtual Page Table, since all kernel page tables are
751  *	pre-allocated and mapped in.
752  */
753 #ifdef DEBUG
754 #define	PMAP_KERNEL_PTE(va)						\
755 ({									\
756 	pt_entry_t *l1pte_, *l2pte_;					\
757 									\
758 	l1pte_ = pmap_l1pte(pmap_kernel(), va);				\
759 	if (pmap_pte_v(l1pte_) == 0) {					\
760 		printf("kernel level 1 PTE not valid, va 0x%lx "	\
761 		    "(line %d)\n", (va), __LINE__);			\
762 		panic("PMAP_KERNEL_PTE");				\
763 	}								\
764 	l2pte_ = pmap_l2pte(pmap_kernel(), va, l1pte_);			\
765 	if (pmap_pte_v(l2pte_) == 0) {					\
766 		printf("kernel level 2 PTE not valid, va 0x%lx "	\
767 		    "(line %d)\n", (va), __LINE__);			\
768 		panic("PMAP_KERNEL_PTE");				\
769 	}								\
770 	pmap_l3pte(pmap_kernel(), va, l2pte_);				\
771 })
772 #else
773 #define	PMAP_KERNEL_PTE(va)	(&VPT[VPT_INDEX((va))])
774 #endif
775 
776 /*
777  * PMAP_SET_PTE:
778  *
779  *	Set a PTE to a specified value.
780  */
781 #define	PMAP_SET_PTE(ptep, val)	*(ptep) = (val)
782 
783 /*
784  * PMAP_STAT_{INCR,DECR}:
785  *
786  *	Increment or decrement a pmap statistic.
787  */
788 #define	PMAP_STAT_INCR(s, v)	atomic_add_ulong((unsigned long *)(&(s)), (v))
789 #define	PMAP_STAT_DECR(s, v)	atomic_sub_ulong((unsigned long *)(&(s)), (v))
790 
791 /*
792  * pmap_bootstrap:
793  *
794  *	Bootstrap the system to run with virtual memory.
795  *
796  *	Note: no locking is necessary in this function.
797  */
798 void
799 pmap_bootstrap(paddr_t ptaddr, u_int maxasn, u_long ncpuids)
800 {
801 	vsize_t lev2mapsize, lev3mapsize;
802 	pt_entry_t *lev2map, *lev3map;
803 	pt_entry_t pte;
804 	int i;
805 
806 #ifdef DEBUG
807 	if (pmapdebug & (PDB_FOLLOW|PDB_BOOTSTRAP))
808 		printf("pmap_bootstrap(0x%lx, %u)\n", ptaddr, maxasn);
809 #endif
810 
811 	/*
812 	 * Compute the number of pages kmem_map will have.
813 	 */
814 	kmeminit_nkmempages();
815 
816 	/*
817 	 * Figure out how many initial PTE's are necessary to map the
818 	 * kernel.  We also reserve space for kmem_alloc_pageable()
819 	 * for vm_fork().
820 	 */
821 	lev3mapsize = (VM_PHYS_SIZE + (ubc_nwins << ubc_winshift) +
822 		nbuf * MAXBSIZE + 16 * NCARGS + PAGER_MAP_SIZE) / NBPG +
823 		(maxproc * UPAGES) + nkmempages;
824 
825 #ifdef SYSVSHM
826 	lev3mapsize += shminfo.shmall;
827 #endif
828 	lev3mapsize = roundup(lev3mapsize, NPTEPG);
829 
830 	/*
831 	 * Initialize `FYI' variables.  Note we're relying on
832 	 * the fact that BSEARCH sorts the vm_physmem[] array
833 	 * for us.
834 	 */
835 	avail_start = ptoa(vm_physmem[0].start);
836 	avail_end = ptoa(vm_physmem[vm_nphysseg - 1].end);
837 	virtual_end = VM_MIN_KERNEL_ADDRESS + lev3mapsize * PAGE_SIZE;
838 
839 #if 0
840 	printf("avail_start = 0x%lx\n", avail_start);
841 	printf("avail_end = 0x%lx\n", avail_end);
842 	printf("virtual_end = 0x%lx\n", virtual_end);
843 #endif
844 
845 	/*
846 	 * Allocate a level 1 PTE table for the kernel.
847 	 * This is always one page long.
848 	 * IF THIS IS NOT A MULTIPLE OF NBPG, ALL WILL GO TO HELL.
849 	 */
850 	kernel_lev1map = (pt_entry_t *)
851 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * NPTEPG);
852 
853 	/*
854 	 * Allocate a level 2 PTE table for the kernel.
855 	 * These must map all of the level3 PTEs.
856 	 * IF THIS IS NOT A MULTIPLE OF NBPG, ALL WILL GO TO HELL.
857 	 */
858 	lev2mapsize = roundup(howmany(lev3mapsize, NPTEPG), NPTEPG);
859 	lev2map = (pt_entry_t *)
860 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev2mapsize);
861 
862 	/*
863 	 * Allocate a level 3 PTE table for the kernel.
864 	 * Contains lev3mapsize PTEs.
865 	 */
866 	lev3map = (pt_entry_t *)
867 	    uvm_pageboot_alloc(sizeof(pt_entry_t) * lev3mapsize);
868 
869 	/*
870 	 * Set up level 1 page table
871 	 */
872 
873 	/* Map all of the level 2 pte pages */
874 	for (i = 0; i < howmany(lev2mapsize, NPTEPG); i++) {
875 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev2map) +
876 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
877 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
878 		kernel_lev1map[l1pte_index(VM_MIN_KERNEL_ADDRESS +
879 		    (i*PAGE_SIZE*NPTEPG*NPTEPG))] = pte;
880 	}
881 
882 	/* Map the virtual page table */
883 	pte = (ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT)
884 	    << PG_SHIFT;
885 	pte |= PG_V | PG_KRE | PG_KWE; /* NOTE NO ASM */
886 	kernel_lev1map[l1pte_index(VPTBASE)] = pte;
887 	VPT = (pt_entry_t *)VPTBASE;
888 
889 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
890     {
891 	extern pt_entry_t prom_pte;			/* XXX */
892 	extern int prom_mapped;				/* XXX */
893 
894 	if (pmap_uses_prom_console()) {
895 		/*
896 		 * XXX Save old PTE so we can remap the PROM, if
897 		 * XXX necessary.
898 		 */
899 		prom_pte = *(pt_entry_t *)ptaddr & ~PG_ASM;
900 	}
901 	prom_mapped = 0;
902 
903 	/*
904 	 * Actually, this code lies.  The prom is still mapped, and will
905 	 * remain so until the context switch after alpha_init() returns.
906 	 */
907     }
908 #endif
909 
910 	/*
911 	 * Set up level 2 page table.
912 	 */
913 	/* Map all of the level 3 pte pages */
914 	for (i = 0; i < howmany(lev3mapsize, NPTEPG); i++) {
915 		pte = (ALPHA_K0SEG_TO_PHYS(((vaddr_t)lev3map) +
916 		    (i*PAGE_SIZE)) >> PGSHIFT) << PG_SHIFT;
917 		pte |= PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
918 		lev2map[l2pte_index(VM_MIN_KERNEL_ADDRESS+
919 		    (i*PAGE_SIZE*NPTEPG))] = pte;
920 	}
921 
922 	/* Initialize the pmap_growkernel_slock. */
923 	simple_lock_init(&pmap_growkernel_slock);
924 
925 	/*
926 	 * Set up level three page table (lev3map)
927 	 */
928 	/* Nothing to do; it's already zero'd */
929 
930 	/*
931 	 * Intialize the pmap pools and list.
932 	 */
933 	pmap_ncpuids = ncpuids;
934 	pool_init(&pmap_pmap_pool,
935 	    PMAP_SIZEOF(pmap_ncpuids), 0, 0, 0, "pmappl",
936 	    &pool_allocator_nointr);
937 	pool_init(&pmap_l1pt_pool, PAGE_SIZE, 0, 0, 0, "l1ptpl",
938 	    &pmap_l1pt_allocator);
939 	pool_cache_init(&pmap_l1pt_cache, &pmap_l1pt_pool, pmap_l1pt_ctor,
940 	    NULL, NULL);
941 	pool_init(&pmap_pv_pool, sizeof(struct pv_entry), 0, 0, 0, "pvpl",
942 	    &pmap_pv_page_allocator);
943 
944 	TAILQ_INIT(&pmap_all_pmaps);
945 
946 	/*
947 	 * Initialize the ASN logic.
948 	 */
949 	pmap_max_asn = maxasn;
950 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
951 		pmap_asn_info[i].pma_asn = 1;
952 		pmap_asn_info[i].pma_asngen = 0;
953 	}
954 
955 	/*
956 	 * Initialize the locks.
957 	 */
958 	spinlockinit(&pmap_main_lock, "pmaplk", 0);
959 	simple_lock_init(&pmap_all_pmaps_slock);
960 
961 	/*
962 	 * Initialize kernel pmap.  Note that all kernel mappings
963 	 * have PG_ASM set, so the ASN doesn't really matter for
964 	 * the kernel pmap.  Also, since the kernel pmap always
965 	 * references kernel_lev1map, it always has an invalid ASN
966 	 * generation.
967 	 */
968 	memset(pmap_kernel(), 0, sizeof(struct pmap));
969 	pmap_kernel()->pm_lev1map = kernel_lev1map;
970 	pmap_kernel()->pm_count = 1;
971 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
972 		pmap_kernel()->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
973 		pmap_kernel()->pm_asni[i].pma_asngen =
974 		    pmap_asn_info[i].pma_asngen;
975 	}
976 	simple_lock_init(&pmap_kernel()->pm_slock);
977 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap_kernel(), pm_list);
978 
979 #if defined(MULTIPROCESSOR)
980 	/*
981 	 * Initialize the TLB shootdown queues.
982 	 */
983 	pool_init(&pmap_tlb_shootdown_job_pool,
984 	    sizeof(struct pmap_tlb_shootdown_job), 0, 0, 0, "pmaptlbpl", NULL);
985 	for (i = 0; i < ALPHA_MAXPROCS; i++) {
986 		TAILQ_INIT(&pmap_tlb_shootdown_q[i].pq_head);
987 		simple_lock_init(&pmap_tlb_shootdown_q[i].pq_slock);
988 	}
989 #endif
990 
991 	/*
992 	 * Set up proc0's PCB such that the ptbr points to the right place
993 	 * and has the kernel pmap's (really unused) ASN.
994 	 */
995 	proc0.p_addr->u_pcb.pcb_hw.apcb_ptbr =
996 	    ALPHA_K0SEG_TO_PHYS((vaddr_t)kernel_lev1map) >> PGSHIFT;
997 	proc0.p_addr->u_pcb.pcb_hw.apcb_asn =
998 	    pmap_kernel()->pm_asni[cpu_number()].pma_asn;
999 
1000 	/*
1001 	 * Mark the kernel pmap `active' on this processor.
1002 	 */
1003 	atomic_setbits_ulong(&pmap_kernel()->pm_cpus,
1004 	    (1UL << cpu_number()));
1005 }
1006 
1007 #ifdef _PMAP_MAY_USE_PROM_CONSOLE
1008 int
1009 pmap_uses_prom_console(void)
1010 {
1011 
1012 	return (cputype == ST_DEC_21000);
1013 }
1014 #endif /* _PMAP_MAY_USE_PROM_CONSOLE */
1015 
1016 /*
1017  * pmap_virtual_space:		[ INTERFACE ]
1018  *
1019  *	Define the initial bounds of the kernel virtual address space.
1020  */
1021 void
1022 pmap_virtual_space(vaddr_t *vstartp, vaddr_t *vendp)
1023 {
1024 
1025 	*vstartp = VM_MIN_KERNEL_ADDRESS;	/* kernel is in K0SEG */
1026 	*vendp = VM_MAX_KERNEL_ADDRESS;		/* we use pmap_growkernel */
1027 }
1028 
1029 /*
1030  * pmap_steal_memory:		[ INTERFACE ]
1031  *
1032  *	Bootstrap memory allocator (alternative to vm_bootstrap_steal_memory()).
1033  *	This function allows for early dynamic memory allocation until the
1034  *	virtual memory system has been bootstrapped.  After that point, either
1035  *	kmem_alloc or malloc should be used.  This function works by stealing
1036  *	pages from the (to be) managed page pool, then implicitly mapping the
1037  *	pages (by using their k0seg addresses) and zeroing them.
1038  *
1039  *	It may be used once the physical memory segments have been pre-loaded
1040  *	into the vm_physmem[] array.  Early memory allocation MUST use this
1041  *	interface!  This cannot be used after vm_page_startup(), and will
1042  *	generate a panic if tried.
1043  *
1044  *	Note that this memory will never be freed, and in essence it is wired
1045  *	down.
1046  *
1047  *	We must adjust *vstartp and/or *vendp iff we use address space
1048  *	from the kernel virtual address range defined by pmap_virtual_space().
1049  *
1050  *	Note: no locking is necessary in this function.
1051  */
1052 vaddr_t
1053 pmap_steal_memory(vsize_t size, vaddr_t *vstartp, vaddr_t *vendp)
1054 {
1055 	int bank, npgs, x;
1056 	vaddr_t va;
1057 	paddr_t pa;
1058 
1059 	size = round_page(size);
1060 	npgs = atop(size);
1061 
1062 #if 0
1063 	printf("PSM: size 0x%lx (npgs 0x%x)\n", size, npgs);
1064 #endif
1065 
1066 	for (bank = 0; bank < vm_nphysseg; bank++) {
1067 		if (uvm.page_init_done == TRUE)
1068 			panic("pmap_steal_memory: called _after_ bootstrap");
1069 
1070 #if 0
1071 		printf("     bank %d: avail_start 0x%lx, start 0x%lx, "
1072 		    "avail_end 0x%lx\n", bank, vm_physmem[bank].avail_start,
1073 		    vm_physmem[bank].start, vm_physmem[bank].avail_end);
1074 #endif
1075 
1076 		if (vm_physmem[bank].avail_start != vm_physmem[bank].start ||
1077 		    vm_physmem[bank].avail_start >= vm_physmem[bank].avail_end)
1078 			continue;
1079 
1080 #if 0
1081 		printf("             avail_end - avail_start = 0x%lx\n",
1082 		    vm_physmem[bank].avail_end - vm_physmem[bank].avail_start);
1083 #endif
1084 
1085 		if ((vm_physmem[bank].avail_end - vm_physmem[bank].avail_start)
1086 		    < npgs)
1087 			continue;
1088 
1089 		/*
1090 		 * There are enough pages here; steal them!
1091 		 */
1092 		pa = ptoa(vm_physmem[bank].avail_start);
1093 		vm_physmem[bank].avail_start += npgs;
1094 		vm_physmem[bank].start += npgs;
1095 
1096 		/*
1097 		 * Have we used up this segment?
1098 		 */
1099 		if (vm_physmem[bank].avail_start == vm_physmem[bank].end) {
1100 			if (vm_nphysseg == 1)
1101 				panic("pmap_steal_memory: out of memory!");
1102 
1103 			/* Remove this segment from the list. */
1104 			vm_nphysseg--;
1105 			for (x = bank; x < vm_nphysseg; x++) {
1106 				/* structure copy */
1107 				vm_physmem[x] = vm_physmem[x + 1];
1108 			}
1109 		}
1110 
1111 		va = ALPHA_PHYS_TO_K0SEG(pa);
1112 		memset((caddr_t)va, 0, size);
1113 		pmap_pages_stolen += npgs;
1114 		return (va);
1115 	}
1116 
1117 	/*
1118 	 * If we got here, this was no memory left.
1119 	 */
1120 	panic("pmap_steal_memory: no memory to steal");
1121 }
1122 
1123 /*
1124  * pmap_init:			[ INTERFACE ]
1125  *
1126  *	Initialize the pmap module.  Called by vm_init(), to initialize any
1127  *	structures that the pmap system needs to map virtual memory.
1128  *
1129  *	Note: no locking is necessary in this function.
1130  */
1131 void
1132 pmap_init(void)
1133 {
1134 
1135 #ifdef DEBUG
1136         if (pmapdebug & PDB_FOLLOW)
1137                 printf("pmap_init()\n");
1138 #endif
1139 
1140 	/* initialize protection array */
1141 	alpha_protection_init();
1142 
1143 	/*
1144 	 * Set a low water mark on the pv_entry pool, so that we are
1145 	 * more likely to have these around even in extreme memory
1146 	 * starvation.
1147 	 */
1148 	pool_setlowat(&pmap_pv_pool, pmap_pv_lowat);
1149 
1150 	/*
1151 	 * Now it is safe to enable pv entry recording.
1152 	 */
1153 	pmap_initialized = TRUE;
1154 
1155 #if 0
1156 	for (bank = 0; bank < vm_nphysseg; bank++) {
1157 		printf("bank %d\n", bank);
1158 		printf("\tstart = 0x%x\n", ptoa(vm_physmem[bank].start));
1159 		printf("\tend = 0x%x\n", ptoa(vm_physmem[bank].end));
1160 		printf("\tavail_start = 0x%x\n",
1161 		    ptoa(vm_physmem[bank].avail_start));
1162 		printf("\tavail_end = 0x%x\n",
1163 		    ptoa(vm_physmem[bank].avail_end));
1164 	}
1165 #endif
1166 }
1167 
1168 /*
1169  * pmap_create:			[ INTERFACE ]
1170  *
1171  *	Create and return a physical map.
1172  *
1173  *	Note: no locking is necessary in this function.
1174  */
1175 pmap_t
1176 pmap_create(void)
1177 {
1178 	pmap_t pmap;
1179 	int i;
1180 
1181 #ifdef DEBUG
1182 	if (pmapdebug & (PDB_FOLLOW|PDB_CREATE))
1183 		printf("pmap_create()\n");
1184 #endif
1185 
1186 	pmap = pool_get(&pmap_pmap_pool, PR_WAITOK);
1187 	memset(pmap, 0, sizeof(*pmap));
1188 
1189 	/*
1190 	 * Defer allocation of a new level 1 page table until
1191 	 * the first new mapping is entered; just take a reference
1192 	 * to the kernel kernel_lev1map.
1193 	 */
1194 	pmap->pm_lev1map = kernel_lev1map;
1195 
1196 	pmap->pm_count = 1;
1197 	for (i = 0; i < pmap_ncpuids; i++) {
1198 		pmap->pm_asni[i].pma_asn = PMAP_ASN_RESERVED;
1199 		/* XXX Locking? */
1200 		pmap->pm_asni[i].pma_asngen = pmap_asn_info[i].pma_asngen;
1201 	}
1202 	simple_lock_init(&pmap->pm_slock);
1203 
1204 	simple_lock(&pmap_all_pmaps_slock);
1205 	TAILQ_INSERT_TAIL(&pmap_all_pmaps, pmap, pm_list);
1206 	simple_unlock(&pmap_all_pmaps_slock);
1207 
1208 	return (pmap);
1209 }
1210 
1211 /*
1212  * pmap_destroy:		[ INTERFACE ]
1213  *
1214  *	Drop the reference count on the specified pmap, releasing
1215  *	all resources if the reference count drops to zero.
1216  */
1217 void
1218 pmap_destroy(pmap_t pmap)
1219 {
1220 	int refs;
1221 
1222 #ifdef DEBUG
1223 	if (pmapdebug & PDB_FOLLOW)
1224 		printf("pmap_destroy(%p)\n", pmap);
1225 #endif
1226 
1227 	PMAP_LOCK(pmap);
1228 	refs = --pmap->pm_count;
1229 	PMAP_UNLOCK(pmap);
1230 
1231 	if (refs > 0)
1232 		return;
1233 
1234 	/*
1235 	 * Remove it from the global list of all pmaps.
1236 	 */
1237 	simple_lock(&pmap_all_pmaps_slock);
1238 	TAILQ_REMOVE(&pmap_all_pmaps, pmap, pm_list);
1239 	simple_unlock(&pmap_all_pmaps_slock);
1240 
1241 #ifdef DIAGNOSTIC
1242 	/*
1243 	 * Since the pmap is supposed to contain no valid
1244 	 * mappings at this point, this should never happen.
1245 	 */
1246 	if (pmap->pm_lev1map != kernel_lev1map)
1247 		panic("pmap_destroy: pmap still contains valid mappings");
1248 #endif
1249 
1250 	pool_put(&pmap_pmap_pool, pmap);
1251 }
1252 
1253 /*
1254  * pmap_reference:		[ INTERFACE ]
1255  *
1256  *	Add a reference to the specified pmap.
1257  */
1258 void
1259 pmap_reference(pmap_t pmap)
1260 {
1261 
1262 #ifdef DEBUG
1263 	if (pmapdebug & PDB_FOLLOW)
1264 		printf("pmap_reference(%p)\n", pmap);
1265 #endif
1266 
1267 	PMAP_LOCK(pmap);
1268 	pmap->pm_count++;
1269 	PMAP_UNLOCK(pmap);
1270 }
1271 
1272 /*
1273  * pmap_remove:			[ INTERFACE ]
1274  *
1275  *	Remove the given range of addresses from the specified map.
1276  *
1277  *	It is assumed that the start and end are properly
1278  *	rounded to the page size.
1279  */
1280 void
1281 pmap_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva)
1282 {
1283 
1284 #ifdef DEBUG
1285 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1286 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1287 #endif
1288 
1289 	pmap_do_remove(pmap, sva, eva, TRUE);
1290 }
1291 
1292 /*
1293  * pmap_do_remove:
1294  *
1295  *	This actually removes the range of addresses from the
1296  *	specified map.  It is used by pmap_collect() (does not
1297  *	want to remove wired mappings) and pmap_remove() (does
1298  *	want to remove wired mappings).
1299  */
1300 void
1301 pmap_do_remove(pmap_t pmap, vaddr_t sva, vaddr_t eva, boolean_t dowired)
1302 {
1303 	pt_entry_t *l1pte, *l2pte, *l3pte;
1304 	pt_entry_t *saved_l1pte, *saved_l2pte, *saved_l3pte;
1305 	vaddr_t l1eva, l2eva, vptva;
1306 	boolean_t needisync = FALSE;
1307 	long cpu_id = cpu_number();
1308 
1309 #ifdef DEBUG
1310 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
1311 		printf("pmap_remove(%p, %lx, %lx)\n", pmap, sva, eva);
1312 #endif
1313 
1314 	/*
1315 	 * If this is the kernel pmap, we can use a faster method
1316 	 * for accessing the PTEs (since the PT pages are always
1317 	 * resident).
1318 	 *
1319 	 * Note that this routine should NEVER be called from an
1320 	 * interrupt context; pmap_kremove() is used for that.
1321 	 */
1322 	if (pmap == pmap_kernel()) {
1323 		PMAP_MAP_TO_HEAD_LOCK();
1324 		PMAP_LOCK(pmap);
1325 
1326 		KASSERT(dowired == TRUE);
1327 
1328 		while (sva < eva) {
1329 			l3pte = PMAP_KERNEL_PTE(sva);
1330 			if (pmap_pte_v(l3pte)) {
1331 #ifdef DIAGNOSTIC
1332 				if (PAGE_IS_MANAGED(pmap_pte_pa(l3pte)) &&
1333 				    pmap_pte_pv(l3pte) == 0)
1334 					panic("pmap_remove: managed page "
1335 					    "without PG_PVLIST for 0x%lx",
1336 					    sva);
1337 #endif
1338 				needisync |= pmap_remove_mapping(pmap, sva,
1339 				    l3pte, TRUE, cpu_id);
1340 			}
1341 			sva += PAGE_SIZE;
1342 		}
1343 
1344 		PMAP_UNLOCK(pmap);
1345 		PMAP_MAP_TO_HEAD_UNLOCK();
1346 
1347 		if (needisync)
1348 			PMAP_SYNC_ISTREAM_KERNEL();
1349 		return;
1350 	}
1351 
1352 #ifdef DIAGNOSTIC
1353 	if (sva > VM_MAXUSER_ADDRESS || eva > VM_MAXUSER_ADDRESS)
1354 		panic("pmap_remove: (0x%lx - 0x%lx) user pmap, kernel "
1355 		    "address range", sva, eva);
1356 #endif
1357 
1358 	PMAP_MAP_TO_HEAD_LOCK();
1359 	PMAP_LOCK(pmap);
1360 
1361 	/*
1362 	 * If we're already referencing the kernel_lev1map, there
1363 	 * is no work for us to do.
1364 	 */
1365 	if (pmap->pm_lev1map == kernel_lev1map)
1366 		goto out;
1367 
1368 	saved_l1pte = l1pte = pmap_l1pte(pmap, sva);
1369 
1370 	/*
1371 	 * Add a reference to the L1 table to it won't get
1372 	 * removed from under us.
1373 	 */
1374 	pmap_physpage_addref(saved_l1pte);
1375 
1376 	for (; sva < eva; sva = l1eva, l1pte++) {
1377 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1378 		if (pmap_pte_v(l1pte)) {
1379 			saved_l2pte = l2pte = pmap_l2pte(pmap, sva, l1pte);
1380 
1381 			/*
1382 			 * Add a reference to the L2 table so it won't
1383 			 * get removed from under us.
1384 			 */
1385 			pmap_physpage_addref(saved_l2pte);
1386 
1387 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1388 				l2eva =
1389 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1390 				if (pmap_pte_v(l2pte)) {
1391 					saved_l3pte = l3pte =
1392 					    pmap_l3pte(pmap, sva, l2pte);
1393 
1394 					/*
1395 					 * Add a reference to the L3 table so
1396 					 * it won't get removed from under us.
1397 					 */
1398 					pmap_physpage_addref(saved_l3pte);
1399 
1400 					/*
1401 					 * Remember this sva; if the L3 table
1402 					 * gets removed, we need to invalidate
1403 					 * the VPT TLB entry for it.
1404 					 */
1405 					vptva = sva;
1406 
1407 					for (; sva < l2eva && sva < eva;
1408 					     sva += PAGE_SIZE, l3pte++) {
1409 						if (pmap_pte_v(l3pte) &&
1410 						    (dowired == TRUE ||
1411 						     pmap_pte_w(l3pte) == 0)) {
1412 							needisync |=
1413 							    pmap_remove_mapping(
1414 								pmap, sva,
1415 								l3pte, TRUE,
1416 								cpu_id);
1417 						}
1418 					}
1419 
1420 					/*
1421 					 * Remove the reference to the L3
1422 					 * table that we added above.  This
1423 					 * may free the L3 table.
1424 					 */
1425 					pmap_l3pt_delref(pmap, vptva,
1426 					    saved_l3pte, cpu_id);
1427 				}
1428 			}
1429 
1430 			/*
1431 			 * Remove the reference to the L2 table that we
1432 			 * added above.  This may free the L2 table.
1433 			 */
1434 			pmap_l2pt_delref(pmap, l1pte, saved_l2pte, cpu_id);
1435 		}
1436 	}
1437 
1438 	/*
1439 	 * Remove the reference to the L1 table that we added above.
1440 	 * This may free the L1 table.
1441 	 */
1442 	pmap_l1pt_delref(pmap, saved_l1pte, cpu_id);
1443 
1444 	if (needisync)
1445 		PMAP_SYNC_ISTREAM_USER(pmap);
1446 
1447  out:
1448 	PMAP_UNLOCK(pmap);
1449 	PMAP_MAP_TO_HEAD_UNLOCK();
1450 }
1451 
1452 /*
1453  * pmap_page_protect:		[ INTERFACE ]
1454  *
1455  *	Lower the permission for all mappings to a given page to
1456  *	the permissions specified.
1457  */
1458 void
1459 pmap_page_protect(struct vm_page *pg, vm_prot_t prot)
1460 {
1461 	pmap_t pmap;
1462 	pv_entry_t pv, nextpv;
1463 	boolean_t needkisync = FALSE;
1464 	long cpu_id = cpu_number();
1465 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1466 #ifdef DEBUG
1467 	paddr_t pa = VM_PAGE_TO_PHYS(pg);
1468 
1469 
1470 	if ((pmapdebug & (PDB_FOLLOW|PDB_PROTECT)) ||
1471 	    (prot == VM_PROT_NONE && (pmapdebug & PDB_REMOVE)))
1472 		printf("pmap_page_protect(%p, %x)\n", pg, prot);
1473 #endif
1474 
1475 	switch (prot) {
1476 	case VM_PROT_READ|VM_PROT_WRITE|VM_PROT_EXECUTE:
1477 	case VM_PROT_READ|VM_PROT_WRITE:
1478 		return;
1479 
1480 	/* copy_on_write */
1481 	case VM_PROT_READ|VM_PROT_EXECUTE:
1482 	case VM_PROT_READ:
1483 		PMAP_HEAD_TO_MAP_LOCK();
1484 		simple_lock(&pg->mdpage.pvh_slock);
1485 		for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
1486 			PMAP_LOCK(pv->pv_pmap);
1487 			if (*pv->pv_pte & (PG_KWE | PG_UWE)) {
1488 				*pv->pv_pte &= ~(PG_KWE | PG_UWE);
1489 				PMAP_INVALIDATE_TLB(pv->pv_pmap, pv->pv_va,
1490 				    pmap_pte_asm(pv->pv_pte),
1491 				    PMAP_ISACTIVE(pv->pv_pmap, cpu_id), cpu_id);
1492 				PMAP_TLB_SHOOTDOWN(pv->pv_pmap, pv->pv_va,
1493 				    pmap_pte_asm(pv->pv_pte));
1494 			}
1495 			PMAP_UNLOCK(pv->pv_pmap);
1496 		}
1497 		simple_unlock(&pg->mdpage.pvh_slock);
1498 		PMAP_HEAD_TO_MAP_UNLOCK();
1499 		PMAP_TLB_SHOOTNOW();
1500 		return;
1501 
1502 	/* remove_all */
1503 	default:
1504 		break;
1505 	}
1506 
1507 	PMAP_HEAD_TO_MAP_LOCK();
1508 	simple_lock(&pg->mdpage.pvh_slock);
1509 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = nextpv) {
1510 		nextpv = pv->pv_next;
1511 		pmap = pv->pv_pmap;
1512 
1513 		PMAP_LOCK(pmap);
1514 #ifdef DEBUG
1515 		if (pmap_pte_v(pmap_l2pte(pv->pv_pmap, pv->pv_va, NULL)) == 0 ||
1516 		    pmap_pte_pa(pv->pv_pte) != pa)
1517 			panic("pmap_page_protect: bad mapping");
1518 #endif
1519 		if (pmap_remove_mapping(pmap, pv->pv_va, pv->pv_pte,
1520 		    FALSE, cpu_id) == TRUE) {
1521 			if (pmap == pmap_kernel())
1522 				needkisync |= TRUE;
1523 			else
1524 				PMAP_SYNC_ISTREAM_USER(pmap);
1525 		}
1526 		PMAP_UNLOCK(pmap);
1527 	}
1528 
1529 	if (needkisync)
1530 		PMAP_SYNC_ISTREAM_KERNEL();
1531 
1532 	simple_unlock(&pg->mdpage.pvh_slock);
1533 	PMAP_HEAD_TO_MAP_UNLOCK();
1534 }
1535 
1536 /*
1537  * pmap_protect:		[ INTERFACE ]
1538  *
1539  *	Set the physical protection on the specified range of this map
1540  *	as requested.
1541  */
1542 void
1543 pmap_protect(pmap_t pmap, vaddr_t sva, vaddr_t eva, vm_prot_t prot)
1544 {
1545 	pt_entry_t *l1pte, *l2pte, *l3pte, bits;
1546 	boolean_t isactive;
1547 	boolean_t hadasm;
1548 	vaddr_t l1eva, l2eva;
1549 	long cpu_id = cpu_number();
1550 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1551 
1552 #ifdef DEBUG
1553 	if (pmapdebug & (PDB_FOLLOW|PDB_PROTECT))
1554 		printf("pmap_protect(%p, %lx, %lx, %x)\n",
1555 		    pmap, sva, eva, prot);
1556 #endif
1557 
1558 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1559 		pmap_remove(pmap, sva, eva);
1560 		return;
1561 	}
1562 
1563 	if (prot & VM_PROT_WRITE)
1564 		return;
1565 
1566 	PMAP_LOCK(pmap);
1567 
1568 	bits = pte_prot(pmap, prot);
1569 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1570 
1571 	l1pte = pmap_l1pte(pmap, sva);
1572 	for (; sva < eva; sva = l1eva, l1pte++) {
1573 		l1eva = alpha_trunc_l1seg(sva) + ALPHA_L1SEG_SIZE;
1574 		if (pmap_pte_v(l1pte)) {
1575 			l2pte = pmap_l2pte(pmap, sva, l1pte);
1576 			for (; sva < l1eva && sva < eva; sva = l2eva, l2pte++) {
1577 				l2eva =
1578 				    alpha_trunc_l2seg(sva) + ALPHA_L2SEG_SIZE;
1579 				if (pmap_pte_v(l2pte)) {
1580 					l3pte = pmap_l3pte(pmap, sva, l2pte);
1581 					for (; sva < l2eva && sva < eva;
1582 					     sva += PAGE_SIZE, l3pte++) {
1583 						if (pmap_pte_v(l3pte) &&
1584 						    pmap_pte_prot_chg(l3pte,
1585 						    bits)) {
1586 							hadasm =
1587 							   (pmap_pte_asm(l3pte)
1588 							    != 0);
1589 							pmap_pte_set_prot(l3pte,
1590 							   bits);
1591 							PMAP_INVALIDATE_TLB(
1592 							   pmap, sva, hadasm,
1593 							   isactive, cpu_id);
1594 							PMAP_TLB_SHOOTDOWN(
1595 							   pmap, sva,
1596 							   hadasm ? PG_ASM : 0);
1597 						}
1598 					}
1599 				}
1600 			}
1601 		}
1602 	}
1603 
1604 	PMAP_TLB_SHOOTNOW();
1605 
1606 	if (prot & VM_PROT_EXECUTE)
1607 		PMAP_SYNC_ISTREAM(pmap);
1608 
1609 	PMAP_UNLOCK(pmap);
1610 }
1611 
1612 /*
1613  * pmap_enter:			[ INTERFACE ]
1614  *
1615  *	Insert the given physical page (p) at
1616  *	the specified virtual address (v) in the
1617  *	target physical map with the protection requested.
1618  *
1619  *	If specified, the page will be wired down, meaning
1620  *	that the related pte can not be reclaimed.
1621  *
1622  *	Note:  This is the only routine which MAY NOT lazy-evaluate
1623  *	or lose information.  That is, this routine must actually
1624  *	insert this page into the given map NOW.
1625  */
1626 int
1627 pmap_enter(pmap_t pmap, vaddr_t va, paddr_t pa, vm_prot_t prot, int flags)
1628 {
1629 	struct vm_page *pg;			/* if != NULL, managed page */
1630 	pt_entry_t *pte, npte, opte;
1631 	paddr_t opa;
1632 	boolean_t tflush = TRUE;
1633 	boolean_t hadasm = FALSE;	/* XXX gcc -Wuninitialized */
1634 	boolean_t needisync = FALSE;
1635 	boolean_t setisync = FALSE;
1636 	boolean_t isactive;
1637 	boolean_t wired;
1638 	long cpu_id = cpu_number();
1639 	int error = 0;
1640 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1641 
1642 #ifdef DEBUG
1643 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1644 		printf("pmap_enter(%p, %lx, %lx, %x, %x)\n",
1645 		       pmap, va, pa, prot, flags);
1646 #endif
1647 	pg = PHYS_TO_VM_PAGE(pa);
1648 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
1649 	wired = (flags & PMAP_WIRED) != 0;
1650 
1651 	/*
1652 	 * Determine what we need to do about the I-stream.  If
1653 	 * VM_PROT_EXECUTE is set, we mark a user pmap as needing
1654 	 * an I-sync on the way back out to userspace.  We always
1655 	 * need an immediate I-sync for the kernel pmap.
1656 	 */
1657 	if (prot & VM_PROT_EXECUTE) {
1658 		if (pmap == pmap_kernel())
1659 			needisync = TRUE;
1660 		else {
1661 			setisync = TRUE;
1662 			needisync = (pmap->pm_cpus != 0);
1663 		}
1664 	}
1665 
1666 	PMAP_MAP_TO_HEAD_LOCK();
1667 	PMAP_LOCK(pmap);
1668 
1669 	if (pmap == pmap_kernel()) {
1670 #ifdef DIAGNOSTIC
1671 		/*
1672 		 * Sanity check the virtual address.
1673 		 */
1674 		if (va < VM_MIN_KERNEL_ADDRESS)
1675 			panic("pmap_enter: kernel pmap, invalid va 0x%lx", va);
1676 #endif
1677 		pte = PMAP_KERNEL_PTE(va);
1678 	} else {
1679 		pt_entry_t *l1pte, *l2pte;
1680 
1681 #ifdef DIAGNOSTIC
1682 		/*
1683 		 * Sanity check the virtual address.
1684 		 */
1685 		if (va >= VM_MAXUSER_ADDRESS)
1686 			panic("pmap_enter: user pmap, invalid va 0x%lx", va);
1687 #endif
1688 
1689 		/*
1690 		 * If we're still referencing the kernel kernel_lev1map,
1691 		 * create a new level 1 page table.  A reference will be
1692 		 * added to the level 1 table when the level 2 table is
1693 		 * created.
1694 		 */
1695 		if (pmap->pm_lev1map == kernel_lev1map) {
1696 			/*
1697 			 * XXX Yuck.
1698 			 * We have to unlock the pmap, lock the
1699 			 * pmap_growkernel_slock, and re-lock the
1700 			 * pmap here, in order to avoid a deadlock
1701 			 * with pmap_growkernel().
1702 			 *
1703 			 * Because we unlock, we have a window for
1704 			 * someone else to add a mapping, thus creating
1705 			 * a level 1 map; pmap_lev1map_create() checks
1706 			 * for this condition.
1707 			 */
1708 			PMAP_UNLOCK(pmap);
1709 			simple_lock(&pmap_growkernel_slock);
1710 			PMAP_LOCK(pmap);
1711 			error = pmap_lev1map_create(pmap, cpu_id);
1712 			simple_unlock(&pmap_growkernel_slock);
1713 			if (error) {
1714 				if (flags & PMAP_CANFAIL)
1715 					goto out;
1716 				panic("pmap_enter: unable to create lev1map");
1717 			}
1718 		}
1719 
1720 		/*
1721 		 * Check to see if the level 1 PTE is valid, and
1722 		 * allocate a new level 2 page table page if it's not.
1723 		 * A reference will be added to the level 2 table when
1724 		 * the level 3 table is created.
1725 		 */
1726 		l1pte = pmap_l1pte(pmap, va);
1727 		if (pmap_pte_v(l1pte) == 0) {
1728 			pmap_physpage_addref(l1pte);
1729 			error = pmap_ptpage_alloc(pmap, l1pte, PGU_L2PT);
1730 			if (error) {
1731 				pmap_l1pt_delref(pmap, l1pte, cpu_id);
1732 				if (flags & PMAP_CANFAIL)
1733 					goto out;
1734 				panic("pmap_enter: unable to create L2 PT "
1735 				    "page");
1736 			}
1737 #ifdef DEBUG
1738 			if (pmapdebug & PDB_PTPAGE)
1739 				printf("pmap_enter: new level 2 table at "
1740 				    "0x%lx\n", pmap_pte_pa(l1pte));
1741 #endif
1742 		}
1743 
1744 		/*
1745 		 * Check to see if the level 2 PTE is valid, and
1746 		 * allocate a new level 3 page table page if it's not.
1747 		 * A reference will be added to the level 3 table when
1748 		 * the mapping is validated.
1749 		 */
1750 		l2pte = pmap_l2pte(pmap, va, l1pte);
1751 		if (pmap_pte_v(l2pte) == 0) {
1752 			pmap_physpage_addref(l2pte);
1753 			error = pmap_ptpage_alloc(pmap, l2pte, PGU_L3PT);
1754 			if (error) {
1755 				pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
1756 				if (flags & PMAP_CANFAIL)
1757 					goto out;
1758 				panic("pmap_enter: unable to create L3 PT "
1759 				    "page");
1760 			}
1761 #ifdef DEBUG
1762 			if (pmapdebug & PDB_PTPAGE)
1763 				printf("pmap_enter: new level 3 table at "
1764 				    "0x%lx\n", pmap_pte_pa(l2pte));
1765 #endif
1766 		}
1767 
1768 		/*
1769 		 * Get the PTE that will map the page.
1770 		 */
1771 		pte = pmap_l3pte(pmap, va, l2pte);
1772 	}
1773 
1774 	/* Remember all of the old PTE; used for TBI check later. */
1775 	opte = *pte;
1776 
1777 	/*
1778 	 * Check to see if the old mapping is valid.  If not, validate the
1779 	 * new one immediately.
1780 	 */
1781 	if (pmap_pte_v(pte) == 0) {
1782 		/*
1783 		 * No need to invalidate the TLB in this case; an invalid
1784 		 * mapping won't be in the TLB, and a previously valid
1785 		 * mapping would have been flushed when it was invalidated.
1786 		 */
1787 		tflush = FALSE;
1788 
1789 		/*
1790 		 * No need to synchronize the I-stream, either, for basically
1791 		 * the same reason.
1792 		 */
1793 		setisync = needisync = FALSE;
1794 
1795 		if (pmap != pmap_kernel()) {
1796 			/*
1797 			 * New mappings gain a reference on the level 3
1798 			 * table.
1799 			 */
1800 			pmap_physpage_addref(pte);
1801 		}
1802 		goto validate_enterpv;
1803 	}
1804 
1805 	opa = pmap_pte_pa(pte);
1806 	hadasm = (pmap_pte_asm(pte) != 0);
1807 
1808 	if (opa == pa) {
1809 		/*
1810 		 * Mapping has not changed; must be a protection or
1811 		 * wiring change.
1812 		 */
1813 		if (pmap_pte_w_chg(pte, wired ? PG_WIRED : 0)) {
1814 #ifdef DEBUG
1815 			if (pmapdebug & PDB_ENTER)
1816 				printf("pmap_enter: wiring change -> %d\n",
1817 				    wired);
1818 #endif
1819 			/*
1820 			 * Adjust the wiring count.
1821 			 */
1822 			if (wired)
1823 				PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1824 			else
1825 				PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1826 		}
1827 
1828 		/*
1829 		 * Set the PTE.
1830 		 */
1831 		goto validate;
1832 	}
1833 
1834 	/*
1835 	 * The mapping has changed.  We need to invalidate the
1836 	 * old mapping before creating the new one.
1837 	 */
1838 #ifdef DEBUG
1839 	if (pmapdebug & PDB_ENTER)
1840 		printf("pmap_enter: removing old mapping 0x%lx\n", va);
1841 #endif
1842 	if (pmap != pmap_kernel()) {
1843 		/*
1844 		 * Gain an extra reference on the level 3 table.
1845 		 * pmap_remove_mapping() will delete a reference,
1846 		 * and we don't want the table to be erroneously
1847 		 * freed.
1848 		 */
1849 		pmap_physpage_addref(pte);
1850 	}
1851 	needisync |= pmap_remove_mapping(pmap, va, pte, TRUE, cpu_id);
1852 
1853  validate_enterpv:
1854 	/*
1855 	 * Enter the mapping into the pv_table if appropriate.
1856 	 */
1857 	if (pg != NULL) {
1858 		error = pmap_pv_enter(pmap, pg, va, pte, TRUE);
1859 		if (error) {
1860 			pmap_l3pt_delref(pmap, va, pte, cpu_id);
1861 			if (flags & PMAP_CANFAIL)
1862 				goto out;
1863 			panic("pmap_enter: unable to enter mapping in PV "
1864 			    "table");
1865 		}
1866 	}
1867 
1868 	/*
1869 	 * Increment counters.
1870 	 */
1871 	PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1872 	if (wired)
1873 		PMAP_STAT_INCR(pmap->pm_stats.wired_count, 1);
1874 
1875  validate:
1876 	/*
1877 	 * Build the new PTE.
1878 	 */
1879 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap, prot) | PG_V;
1880 	if (pg != NULL) {
1881 		int attrs;
1882 
1883 #ifdef DIAGNOSTIC
1884 		if ((flags & VM_PROT_ALL) & ~prot)
1885 			panic("pmap_enter: access type exceeds prot");
1886 #endif
1887 		simple_lock(&pg->mdpage.pvh_slock);
1888 		if (flags & VM_PROT_WRITE)
1889 			pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
1890 		else if (flags & VM_PROT_ALL)
1891 			pg->mdpage.pvh_attrs |= PGA_REFERENCED;
1892 		attrs = pg->mdpage.pvh_attrs;
1893 		simple_unlock(&pg->mdpage.pvh_slock);
1894 
1895 		/*
1896 		 * Set up referenced/modified emulation for new mapping.
1897 		 */
1898 		if ((attrs & PGA_REFERENCED) == 0)
1899 			npte |= PG_FOR | PG_FOW | PG_FOE;
1900 		else if ((attrs & PGA_MODIFIED) == 0)
1901 			npte |= PG_FOW;
1902 
1903 		/*
1904 		 * Mapping was entered on PV list.
1905 		 */
1906 		npte |= PG_PVLIST;
1907 	}
1908 	if (wired)
1909 		npte |= PG_WIRED;
1910 #ifdef DEBUG
1911 	if (pmapdebug & PDB_ENTER)
1912 		printf("pmap_enter: new pte = 0x%lx\n", npte);
1913 #endif
1914 
1915 	/*
1916 	 * If the PALcode portion of the new PTE is the same as the
1917 	 * old PTE, no TBI is necessary.
1918 	 */
1919 	if (PG_PALCODE(opte) == PG_PALCODE(npte))
1920 		tflush = FALSE;
1921 
1922 	/*
1923 	 * Set the new PTE.
1924 	 */
1925 	PMAP_SET_PTE(pte, npte);
1926 
1927 	/*
1928 	 * Invalidate the TLB entry for this VA and any appropriate
1929 	 * caches.
1930 	 */
1931 	if (tflush) {
1932 		PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
1933 		PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
1934 		PMAP_TLB_SHOOTNOW();
1935 	}
1936 	if (setisync)
1937 		PMAP_SET_NEEDISYNC(pmap);
1938 	if (needisync)
1939 		PMAP_SYNC_ISTREAM(pmap);
1940 
1941 out:
1942 	PMAP_UNLOCK(pmap);
1943 	PMAP_MAP_TO_HEAD_UNLOCK();
1944 
1945 	return error;
1946 }
1947 
1948 /*
1949  * pmap_kenter_pa:		[ INTERFACE ]
1950  *
1951  *	Enter a va -> pa mapping into the kernel pmap without any
1952  *	physical->virtual tracking.
1953  *
1954  *	Note: no locking is necessary in this function.
1955  */
1956 void
1957 pmap_kenter_pa(vaddr_t va, paddr_t pa, vm_prot_t prot)
1958 {
1959 	pt_entry_t *pte, npte;
1960 	long cpu_id = cpu_number();
1961 	boolean_t needisync = FALSE;
1962 	pmap_t pmap = pmap_kernel();
1963 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
1964 
1965 #ifdef DEBUG
1966 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
1967 		printf("pmap_kenter_pa(%lx, %lx, %x)\n",
1968 		    va, pa, prot);
1969 #endif
1970 
1971 #ifdef DIAGNOSTIC
1972 	/*
1973 	 * Sanity check the virtual address.
1974 	 */
1975 	if (va < VM_MIN_KERNEL_ADDRESS)
1976 		panic("pmap_kenter_pa: kernel pmap, invalid va 0x%lx", va);
1977 #endif
1978 
1979 	pte = PMAP_KERNEL_PTE(va);
1980 
1981 	if (pmap_pte_v(pte) == 0)
1982 		PMAP_STAT_INCR(pmap->pm_stats.resident_count, 1);
1983 	if (pmap_pte_w(pte) == 0)
1984 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
1985 
1986 	if ((prot & VM_PROT_EXECUTE) != 0 || pmap_pte_exec(pte))
1987 		needisync = TRUE;
1988 
1989 	/*
1990 	 * Build the new PTE.
1991 	 */
1992 	npte = ((pa >> PGSHIFT) << PG_SHIFT) | pte_prot(pmap_kernel(), prot) |
1993 	    PG_V | PG_WIRED;
1994 
1995 	/*
1996 	 * Set the new PTE.
1997 	 */
1998 	PMAP_SET_PTE(pte, npte);
1999 #if defined(MULTIPROCESSOR)
2000 	alpha_mb();		/* XXX alpha_wmb()? */
2001 #endif
2002 
2003 	/*
2004 	 * Invalidate the TLB entry for this VA and any appropriate
2005 	 * caches.
2006 	 */
2007 	PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id);
2008 	PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
2009 	PMAP_TLB_SHOOTNOW();
2010 
2011 	if (needisync)
2012 		PMAP_SYNC_ISTREAM_KERNEL();
2013 }
2014 
2015 /*
2016  * pmap_kremove:		[ INTERFACE ]
2017  *
2018  *	Remove a mapping entered with pmap_kenter_pa() starting at va,
2019  *	for size bytes (assumed to be page rounded).
2020  */
2021 void
2022 pmap_kremove(vaddr_t va, vsize_t size)
2023 {
2024 	pt_entry_t *pte;
2025 	boolean_t needisync = FALSE;
2026 	long cpu_id = cpu_number();
2027 	pmap_t pmap = pmap_kernel();
2028 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2029 
2030 #ifdef DEBUG
2031 	if (pmapdebug & (PDB_FOLLOW|PDB_ENTER))
2032 		printf("pmap_kremove(%lx, %lx)\n",
2033 		    va, size);
2034 #endif
2035 
2036 #ifdef DIAGNOSTIC
2037 	if (va < VM_MIN_KERNEL_ADDRESS)
2038 		panic("pmap_kremove: user address");
2039 #endif
2040 
2041 	for (; size != 0; size -= PAGE_SIZE, va += PAGE_SIZE) {
2042 		pte = PMAP_KERNEL_PTE(va);
2043 		if (pmap_pte_v(pte)) {
2044 #ifdef DIAGNOSTIC
2045 			if (pmap_pte_pv(pte))
2046 				panic("pmap_kremove: PG_PVLIST mapping for "
2047 				    "0x%lx", va);
2048 #endif
2049 			if (pmap_pte_exec(pte))
2050 				needisync = TRUE;
2051 
2052 			/* Zap the mapping. */
2053 			PMAP_SET_PTE(pte, PG_NV);
2054 #if defined(MULTIPROCESSOR)
2055 			alpha_mb();		/* XXX alpha_wmb()? */
2056 #endif
2057 			PMAP_INVALIDATE_TLB(pmap, va, TRUE, TRUE, cpu_id);
2058 			PMAP_TLB_SHOOTDOWN(pmap, va, PG_ASM);
2059 
2060 			/* Update stats. */
2061 			PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2062 			PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2063 		}
2064 	}
2065 
2066 	PMAP_TLB_SHOOTNOW();
2067 
2068 	if (needisync)
2069 		PMAP_SYNC_ISTREAM_KERNEL();
2070 }
2071 
2072 /*
2073  * pmap_unwire:			[ INTERFACE ]
2074  *
2075  *	Clear the wired attribute for a map/virtual-address pair.
2076  *
2077  *	The mapping must already exist in the pmap.
2078  */
2079 void
2080 pmap_unwire(pmap_t pmap, vaddr_t va)
2081 {
2082 	pt_entry_t *pte;
2083 
2084 #ifdef DEBUG
2085 	if (pmapdebug & PDB_FOLLOW)
2086 		printf("pmap_unwire(%p, %lx)\n", pmap, va);
2087 #endif
2088 
2089 	PMAP_LOCK(pmap);
2090 
2091 	pte = pmap_l3pte(pmap, va, NULL);
2092 #ifdef DIAGNOSTIC
2093 	if (pte == NULL || pmap_pte_v(pte) == 0)
2094 		panic("pmap_unwire");
2095 #endif
2096 
2097 	/*
2098 	 * If wiring actually changed (always?) clear the wire bit and
2099 	 * update the wire count.  Note that wiring is not a hardware
2100 	 * characteristic so there is no need to invalidate the TLB.
2101 	 */
2102 	if (pmap_pte_w_chg(pte, 0)) {
2103 		pmap_pte_set_w(pte, FALSE);
2104 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2105 	}
2106 #ifdef DIAGNOSTIC
2107 	else {
2108 		printf("pmap_unwire: wiring for pmap %p va 0x%lx "
2109 		    "didn't change!\n", pmap, va);
2110 	}
2111 #endif
2112 
2113 	PMAP_UNLOCK(pmap);
2114 }
2115 
2116 /*
2117  * pmap_extract:		[ INTERFACE ]
2118  *
2119  *	Extract the physical address associated with the given
2120  *	pmap/virtual address pair.
2121  */
2122 boolean_t
2123 pmap_extract(pmap_t pmap, vaddr_t va, paddr_t *pap)
2124 {
2125 	pt_entry_t *l1pte, *l2pte, *l3pte;
2126 	paddr_t pa;
2127 	boolean_t rv = FALSE;
2128 
2129 #ifdef DEBUG
2130 	if (pmapdebug & PDB_FOLLOW)
2131 		printf("pmap_extract(%p, %lx) -> ", pmap, va);
2132 #endif
2133 	PMAP_LOCK(pmap);
2134 
2135 	l1pte = pmap_l1pte(pmap, va);
2136 	if (pmap_pte_v(l1pte) == 0)
2137 		goto out;
2138 
2139 	l2pte = pmap_l2pte(pmap, va, l1pte);
2140 	if (pmap_pte_v(l2pte) == 0)
2141 		goto out;
2142 
2143 	l3pte = pmap_l3pte(pmap, va, l2pte);
2144 	if (pmap_pte_v(l3pte) == 0)
2145 		goto out;
2146 
2147 	pa = pmap_pte_pa(l3pte) | (va & PGOFSET);
2148 	if (pap != NULL)
2149 		*pap = pa;
2150 	rv = TRUE;
2151 
2152  out:
2153 	PMAP_UNLOCK(pmap);
2154 #ifdef DEBUG
2155 	if (pmapdebug & PDB_FOLLOW) {
2156 		if (rv)
2157 			printf("0x%lx\n", pa);
2158 		else
2159 			printf("failed\n");
2160 	}
2161 #endif
2162 	return (rv);
2163 }
2164 
2165 /*
2166  * pmap_copy:			[ INTERFACE ]
2167  *
2168  *	Copy the mapping range specified by src_addr/len
2169  *	from the source map to the range dst_addr/len
2170  *	in the destination map.
2171  *
2172  *	This routine is only advisory and need not do anything.
2173  */
2174 /* call deleted in <machine/pmap.h> */
2175 
2176 /*
2177  * pmap_update:			[ INTERFACE ]
2178  *
2179  *	Require that all active physical maps contain no
2180  *	incorrect entries NOW, by processing any deferred
2181  *	pmap operations.
2182  */
2183 /* call deleted in <machine/pmap.h> */
2184 
2185 /*
2186  * pmap_collect:		[ INTERFACE ]
2187  *
2188  *	Garbage collects the physical map system for pages which are no
2189  *	longer used.  Success need not be guaranteed -- that is, there
2190  *	may well be pages which are not referenced, but others may be
2191  *	collected.
2192  *
2193  *	Called by the pageout daemon when pages are scarce.
2194  */
2195 void
2196 pmap_collect(pmap_t pmap)
2197 {
2198 
2199 #ifdef DEBUG
2200 	if (pmapdebug & PDB_FOLLOW)
2201 		printf("pmap_collect(%p)\n", pmap);
2202 #endif
2203 
2204 	/*
2205 	 * If called for the kernel pmap, just return.  We
2206 	 * handle this case in the event that we ever want
2207 	 * to have swappable kernel threads.
2208 	 */
2209 	if (pmap == pmap_kernel())
2210 		return;
2211 
2212 	/*
2213 	 * This process is about to be swapped out; free all of
2214 	 * the PT pages by removing the physical mappings for its
2215 	 * entire address space.  Note: pmap_remove() performs
2216 	 * all necessary locking.
2217 	 */
2218 	pmap_do_remove(pmap, VM_MIN_ADDRESS, VM_MAX_ADDRESS, FALSE);
2219 }
2220 
2221 /*
2222  * pmap_activate:		[ INTERFACE ]
2223  *
2224  *	Activate the pmap used by the specified process.  This includes
2225  *	reloading the MMU context if the current process, and marking
2226  *	the pmap in use by the processor.
2227  *
2228  *	Note: We may use only spin locks here, since we are called
2229  *	by a critical section in cpu_switch()!
2230  */
2231 void
2232 pmap_activate(struct proc *p)
2233 {
2234 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
2235 	long cpu_id = cpu_number();
2236 
2237 #ifdef DEBUG
2238 	if (pmapdebug & PDB_FOLLOW)
2239 		printf("pmap_activate(%p)\n", p);
2240 #endif
2241 
2242 	PMAP_LOCK(pmap);
2243 
2244 	/*
2245 	 * Mark the pmap in use by this processor.
2246 	 */
2247 	atomic_setbits_ulong(&pmap->pm_cpus, (1UL << cpu_id));
2248 
2249 	/*
2250 	 * Allocate an ASN.
2251 	 */
2252 	pmap_asn_alloc(pmap, cpu_id);
2253 
2254 	PMAP_ACTIVATE(pmap, p, cpu_id);
2255 
2256 	PMAP_UNLOCK(pmap);
2257 }
2258 
2259 /*
2260  * pmap_deactivate:		[ INTERFACE ]
2261  *
2262  *	Mark that the pmap used by the specified process is no longer
2263  *	in use by the processor.
2264  *
2265  *	The comment above pmap_activate() wrt. locking applies here,
2266  *	as well.  Note that we use only a single `atomic' operation,
2267  *	so no locking is necessary.
2268  */
2269 void
2270 pmap_deactivate(struct proc *p)
2271 {
2272 	struct pmap *pmap = p->p_vmspace->vm_map.pmap;
2273 
2274 #ifdef DEBUG
2275 	if (pmapdebug & PDB_FOLLOW)
2276 		printf("pmap_deactivate(%p)\n", p);
2277 #endif
2278 
2279 	/*
2280 	 * Mark the pmap no longer in use by this processor.
2281 	 */
2282 	atomic_clearbits_ulong(&pmap->pm_cpus, (1UL << cpu_number()));
2283 }
2284 
2285 #if defined(MULTIPROCESSOR)
2286 /*
2287  * pmap_do_reactivate:
2288  *
2289  *	Reactivate an address space when the level 1 map changes.
2290  *	We are invoked by an interprocessor interrupt.
2291  */
2292 void
2293 pmap_do_reactivate(struct cpu_info *ci, struct trapframe *framep)
2294 {
2295 	struct pmap *pmap;
2296 
2297 	if (ci->ci_curproc == NULL)
2298 		return;
2299 
2300 	pmap = ci->ci_curproc->p_vmspace->vm_map.pmap;
2301 
2302 	pmap_asn_alloc(pmap, ci->ci_cpuid);
2303 	if (PMAP_ISACTIVE(pmap, ci->ci_cpuid))
2304 		PMAP_ACTIVATE(pmap, ci->ci_curproc, ci->ci_cpuid);
2305 }
2306 #endif /* MULTIPROCESSOR */
2307 
2308 /*
2309  * pmap_zero_page:		[ INTERFACE ]
2310  *
2311  *	Zero the specified (machine independent) page by mapping the page
2312  *	into virtual memory and clear its contents, one machine dependent
2313  *	page at a time.
2314  *
2315  *	Note: no locking is necessary in this function.
2316  */
2317 void
2318 pmap_zero_page(paddr_t phys)
2319 {
2320 	u_long *p0, *p1, *pend;
2321 
2322 #ifdef DEBUG
2323 	if (pmapdebug & PDB_FOLLOW)
2324 		printf("pmap_zero_page(%lx)\n", phys);
2325 #endif
2326 
2327 	p0 = (u_long *)ALPHA_PHYS_TO_K0SEG(phys);
2328 	pend = (u_long *)((u_long)p0 + PAGE_SIZE);
2329 
2330 	/*
2331 	 * Unroll the loop a bit, doing 16 quadwords per iteration.
2332 	 * Do only 8 back-to-back stores, and alternate registers.
2333 	 */
2334 	do {
2335 		__asm __volatile(
2336 		"# BEGIN loop body\n"
2337 		"	addq	%2, (8 * 8), %1		\n"
2338 		"	stq	$31, (0 * 8)(%0)	\n"
2339 		"	stq	$31, (1 * 8)(%0)	\n"
2340 		"	stq	$31, (2 * 8)(%0)	\n"
2341 		"	stq	$31, (3 * 8)(%0)	\n"
2342 		"	stq	$31, (4 * 8)(%0)	\n"
2343 		"	stq	$31, (5 * 8)(%0)	\n"
2344 		"	stq	$31, (6 * 8)(%0)	\n"
2345 		"	stq	$31, (7 * 8)(%0)	\n"
2346 		"					\n"
2347 		"	addq	%3, (8 * 8), %0		\n"
2348 		"	stq	$31, (0 * 8)(%1)	\n"
2349 		"	stq	$31, (1 * 8)(%1)	\n"
2350 		"	stq	$31, (2 * 8)(%1)	\n"
2351 		"	stq	$31, (3 * 8)(%1)	\n"
2352 		"	stq	$31, (4 * 8)(%1)	\n"
2353 		"	stq	$31, (5 * 8)(%1)	\n"
2354 		"	stq	$31, (6 * 8)(%1)	\n"
2355 		"	stq	$31, (7 * 8)(%1)	\n"
2356 		"	# END loop body"
2357 		: "=r" (p0), "=r" (p1)
2358 		: "0" (p0), "1" (p1)
2359 		: "memory");
2360 	} while (p0 < pend);
2361 }
2362 
2363 /*
2364  * pmap_copy_page:		[ INTERFACE ]
2365  *
2366  *	Copy the specified (machine independent) page by mapping the page
2367  *	into virtual memory and using memcpy to copy the page, one machine
2368  *	dependent page at a time.
2369  *
2370  *	Note: no locking is necessary in this function.
2371  */
2372 void
2373 pmap_copy_page(paddr_t src, paddr_t dst)
2374 {
2375 	caddr_t s, d;
2376 
2377 #ifdef DEBUG
2378 	if (pmapdebug & PDB_FOLLOW)
2379 		printf("pmap_copy_page(%lx, %lx)\n", src, dst);
2380 #endif
2381         s = (caddr_t)ALPHA_PHYS_TO_K0SEG(src);
2382         d = (caddr_t)ALPHA_PHYS_TO_K0SEG(dst);
2383 	memcpy(d, s, PAGE_SIZE);
2384 }
2385 
2386 /*
2387  * pmap_pageidlezero:		[ INTERFACE ]
2388  *
2389  *	Page zero'er for the idle loop.  Returns TRUE if the
2390  *	page was zero'd, FLASE if we aborted for some reason.
2391  */
2392 boolean_t
2393 pmap_pageidlezero(paddr_t pa)
2394 {
2395 	u_long *ptr;
2396 	int i, cnt = PAGE_SIZE / sizeof(u_long);
2397 
2398 	for (i = 0, ptr = (u_long *) ALPHA_PHYS_TO_K0SEG(pa); i < cnt; i++) {
2399 		if (sched_whichqs != 0) {
2400 			/*
2401 			 * A process has become ready.  Abort now,
2402 			 * so we don't keep it waiting while we
2403 			 * finish zeroing the page.
2404 			 */
2405 			return (FALSE);
2406 		}
2407 		*ptr++ = 0;
2408 	}
2409 
2410 	return (TRUE);
2411 }
2412 
2413 /*
2414  * pmap_clear_modify:		[ INTERFACE ]
2415  *
2416  *	Clear the modify bits on the specified physical page.
2417  */
2418 boolean_t
2419 pmap_clear_modify(struct vm_page *pg)
2420 {
2421 	boolean_t rv = FALSE;
2422 	long cpu_id = cpu_number();
2423 
2424 #ifdef DEBUG
2425 	if (pmapdebug & PDB_FOLLOW)
2426 		printf("pmap_clear_modify(%p)\n", pg);
2427 #endif
2428 
2429 	PMAP_HEAD_TO_MAP_LOCK();
2430 	simple_lock(&pg->mdpage.pvh_slock);
2431 
2432 	if (pg->mdpage.pvh_attrs & PGA_MODIFIED) {
2433 		rv = TRUE;
2434 		pmap_changebit(pg, PG_FOW, ~0, cpu_id);
2435 		pg->mdpage.pvh_attrs &= ~PGA_MODIFIED;
2436 	}
2437 
2438 	simple_unlock(&pg->mdpage.pvh_slock);
2439 	PMAP_HEAD_TO_MAP_UNLOCK();
2440 
2441 	return (rv);
2442 }
2443 
2444 /*
2445  * pmap_clear_reference:	[ INTERFACE ]
2446  *
2447  *	Clear the reference bit on the specified physical page.
2448  */
2449 boolean_t
2450 pmap_clear_reference(struct vm_page *pg)
2451 {
2452 	boolean_t rv = FALSE;
2453 	long cpu_id = cpu_number();
2454 
2455 #ifdef DEBUG
2456 	if (pmapdebug & PDB_FOLLOW)
2457 		printf("pmap_clear_reference(%p)\n", pg);
2458 #endif
2459 
2460 	PMAP_HEAD_TO_MAP_LOCK();
2461 	simple_lock(&pg->mdpage.pvh_slock);
2462 
2463 	if (pg->mdpage.pvh_attrs & PGA_REFERENCED) {
2464 		rv = TRUE;
2465 		pmap_changebit(pg, PG_FOR | PG_FOW | PG_FOE, ~0, cpu_id);
2466 		pg->mdpage.pvh_attrs &= ~PGA_REFERENCED;
2467 	}
2468 
2469 	simple_unlock(&pg->mdpage.pvh_slock);
2470 	PMAP_HEAD_TO_MAP_UNLOCK();
2471 
2472 	return (rv);
2473 }
2474 
2475 /*
2476  * pmap_is_referenced:		[ INTERFACE ]
2477  *
2478  *	Return whether or not the specified physical page is referenced
2479  *	by any physical maps.
2480  */
2481 /* See <machine/pmap.h> */
2482 
2483 /*
2484  * pmap_is_modified:		[ INTERFACE ]
2485  *
2486  *	Return whether or not the specified physical page is modified
2487  *	by any physical maps.
2488  */
2489 /* See <machine/pmap.h> */
2490 
2491 /*
2492  * pmap_phys_address:		[ INTERFACE ]
2493  *
2494  *	Return the physical address corresponding to the specified
2495  *	cookie.  Used by the device pager to decode a device driver's
2496  *	mmap entry point return value.
2497  *
2498  *	Note: no locking is necessary in this function.
2499  */
2500 paddr_t
2501 pmap_phys_address(int ppn)
2502 {
2503 
2504 	return (alpha_ptob(ppn));
2505 }
2506 
2507 /*
2508  * Miscellaneous support routines follow
2509  */
2510 
2511 /*
2512  * alpha_protection_init:
2513  *
2514  *	Initialize Alpha protection code array.
2515  *
2516  *	Note: no locking is necessary in this function.
2517  */
2518 void
2519 alpha_protection_init(void)
2520 {
2521 	int prot, *kp, *up;
2522 
2523 	kp = protection_codes[0];
2524 	up = protection_codes[1];
2525 
2526 	for (prot = 0; prot < 8; prot++) {
2527 		kp[prot] = 0; up[prot] = 0;
2528 		switch (prot) {
2529 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_NONE:
2530 			kp[prot] |= PG_ASM;
2531 			up[prot] |= 0;
2532 			break;
2533 
2534 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_EXECUTE:
2535 		case VM_PROT_NONE | VM_PROT_NONE | VM_PROT_EXECUTE:
2536 			kp[prot] |= PG_EXEC;		/* software */
2537 			up[prot] |= PG_EXEC;		/* software */
2538 			/* FALLTHROUGH */
2539 
2540 		case VM_PROT_READ | VM_PROT_NONE | VM_PROT_NONE:
2541 			kp[prot] |= PG_ASM | PG_KRE;
2542 			up[prot] |= PG_URE | PG_KRE;
2543 			break;
2544 
2545 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_NONE:
2546 			kp[prot] |= PG_ASM | PG_KWE;
2547 			up[prot] |= PG_UWE | PG_KWE;
2548 			break;
2549 
2550 		case VM_PROT_NONE | VM_PROT_WRITE | VM_PROT_EXECUTE:
2551 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_EXECUTE:
2552 			kp[prot] |= PG_EXEC;		/* software */
2553 			up[prot] |= PG_EXEC;		/* software */
2554 			/* FALLTHROUGH */
2555 
2556 		case VM_PROT_READ | VM_PROT_WRITE | VM_PROT_NONE:
2557 			kp[prot] |= PG_ASM | PG_KWE | PG_KRE;
2558 			up[prot] |= PG_UWE | PG_URE | PG_KWE | PG_KRE;
2559 			break;
2560 		}
2561 	}
2562 }
2563 
2564 /*
2565  * pmap_remove_mapping:
2566  *
2567  *	Invalidate a single page denoted by pmap/va.
2568  *
2569  *	If (pte != NULL), it is the already computed PTE for the page.
2570  *
2571  *	Note: locking in this function is complicated by the fact
2572  *	that we can be called when the PV list is already locked.
2573  *	(pmap_page_protect()).  In this case, the caller must be
2574  *	careful to get the next PV entry while we remove this entry
2575  *	from beneath it.  We assume that the pmap itself is already
2576  *	locked; dolock applies only to the PV list.
2577  *
2578  *	Returns TRUE or FALSE, indicating if an I-stream sync needs
2579  *	to be initiated (for this CPU or for other CPUs).
2580  */
2581 boolean_t
2582 pmap_remove_mapping(pmap_t pmap, vaddr_t va, pt_entry_t *pte,
2583     boolean_t dolock, long cpu_id)
2584 {
2585 	paddr_t pa;
2586 	struct vm_page *pg;		/* if != NULL, page is managed */
2587 	boolean_t onpv;
2588 	boolean_t hadasm;
2589 	boolean_t isactive;
2590 	boolean_t needisync = FALSE;
2591 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2592 
2593 #ifdef DEBUG
2594 	if (pmapdebug & (PDB_FOLLOW|PDB_REMOVE|PDB_PROTECT))
2595 		printf("pmap_remove_mapping(%p, %lx, %p, %d, %ld)\n",
2596 		       pmap, va, pte, dolock, cpu_id);
2597 #endif
2598 
2599 	/*
2600 	 * PTE not provided, compute it from pmap and va.
2601 	 */
2602 	if (pte == NULL) {
2603 		pte = pmap_l3pte(pmap, va, NULL);
2604 		if (pmap_pte_v(pte) == 0)
2605 			return (FALSE);
2606 	}
2607 
2608 	pa = pmap_pte_pa(pte);
2609 	onpv = (pmap_pte_pv(pte) != 0);
2610 	hadasm = (pmap_pte_asm(pte) != 0);
2611 	isactive = PMAP_ISACTIVE(pmap, cpu_id);
2612 
2613 	/*
2614 	 * Determine what we need to do about the I-stream.  If
2615 	 * PG_EXEC was set, we mark a user pmap as needing an
2616 	 * I-sync on the way out to userspace.  We always need
2617 	 * an immediate I-sync for the kernel pmap.
2618 	 */
2619 	if (pmap_pte_exec(pte)) {
2620 		if (pmap == pmap_kernel())
2621 			needisync = TRUE;
2622 		else {
2623 			PMAP_SET_NEEDISYNC(pmap);
2624 			needisync = (pmap->pm_cpus != 0);
2625 		}
2626 	}
2627 
2628 	/*
2629 	 * Update statistics
2630 	 */
2631 	if (pmap_pte_w(pte))
2632 		PMAP_STAT_DECR(pmap->pm_stats.wired_count, 1);
2633 	PMAP_STAT_DECR(pmap->pm_stats.resident_count, 1);
2634 
2635 	/*
2636 	 * Invalidate the PTE after saving the reference modify info.
2637 	 */
2638 #ifdef DEBUG
2639 	if (pmapdebug & PDB_REMOVE)
2640 		printf("remove: invalidating pte at %p\n", pte);
2641 #endif
2642 	PMAP_SET_PTE(pte, PG_NV);
2643 
2644 	PMAP_INVALIDATE_TLB(pmap, va, hadasm, isactive, cpu_id);
2645 	PMAP_TLB_SHOOTDOWN(pmap, va, hadasm ? PG_ASM : 0);
2646 	PMAP_TLB_SHOOTNOW();
2647 
2648 	/*
2649 	 * If we're removing a user mapping, check to see if we
2650 	 * can free page table pages.
2651 	 */
2652 	if (pmap != pmap_kernel()) {
2653 		/*
2654 		 * Delete the reference on the level 3 table.  It will
2655 		 * delete references on the level 2 and 1 tables as
2656 		 * appropriate.
2657 		 */
2658 		pmap_l3pt_delref(pmap, va, pte, cpu_id);
2659 	}
2660 
2661 	/*
2662 	 * If the mapping wasn't enterd on the PV list, we're all done.
2663 	 */
2664 	if (onpv == FALSE)
2665 		return (needisync);
2666 
2667 	/*
2668 	 * Remove it from the PV table.
2669 	 */
2670 	pg = PHYS_TO_VM_PAGE(pa);
2671 	KASSERT(pg != NULL);
2672 	pmap_pv_remove(pmap, pg, va, dolock);
2673 
2674 	return (needisync);
2675 }
2676 
2677 /*
2678  * pmap_changebit:
2679  *
2680  *	Set or clear the specified PTE bits for all mappings on the
2681  *	specified page.
2682  *
2683  *	Note: we assume that the pv_head is already locked, and that
2684  *	the caller has acquired a PV->pmap mutex so that we can lock
2685  *	the pmaps as we encounter them.
2686  */
2687 void
2688 pmap_changebit(struct vm_page *pg, u_long set, u_long mask, long cpu_id)
2689 {
2690 	pv_entry_t pv;
2691 	pt_entry_t *pte, npte;
2692 	vaddr_t va;
2693 	boolean_t hadasm, isactive;
2694 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
2695 
2696 #ifdef DEBUG
2697 	if (pmapdebug & PDB_BITS)
2698 		printf("pmap_changebit(0x%p, 0x%lx, 0x%lx)\n",
2699 		    pg, set, mask);
2700 #endif
2701 
2702 	/*
2703 	 * Loop over all current mappings setting/clearing as appropos.
2704 	 */
2705 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2706 		va = pv->pv_va;
2707 
2708 		PMAP_LOCK(pv->pv_pmap);
2709 
2710 		pte = pv->pv_pte;
2711 		npte = (*pte | set) & mask;
2712 		if (*pte != npte) {
2713 			hadasm = (pmap_pte_asm(pte) != 0);
2714 			isactive = PMAP_ISACTIVE(pv->pv_pmap, cpu_id);
2715 			PMAP_SET_PTE(pte, npte);
2716 			PMAP_INVALIDATE_TLB(pv->pv_pmap, va, hadasm, isactive,
2717 			    cpu_id);
2718 			PMAP_TLB_SHOOTDOWN(pv->pv_pmap, va,
2719 			    hadasm ? PG_ASM : 0);
2720 		}
2721 		PMAP_UNLOCK(pv->pv_pmap);
2722 	}
2723 
2724 	PMAP_TLB_SHOOTNOW();
2725 }
2726 
2727 /*
2728  * pmap_emulate_reference:
2729  *
2730  *	Emulate reference and/or modified bit hits.
2731  */
2732 void
2733 pmap_emulate_reference(struct proc *p, vaddr_t v, int user, int write)
2734 {
2735 	pt_entry_t faultoff, *pte;
2736 	struct vm_page *pg;
2737 	paddr_t pa;
2738 	boolean_t didlock = FALSE;
2739 	long cpu_id = cpu_number();
2740 
2741 #ifdef DEBUG
2742 	if (pmapdebug & PDB_FOLLOW)
2743 		printf("pmap_emulate_reference: %p, 0x%lx, %d, %d\n",
2744 		    p, v, user, write);
2745 #endif
2746 
2747 	/*
2748 	 * Convert process and virtual address to physical address.
2749 	 */
2750 	if (v >= VM_MIN_KERNEL_ADDRESS) {
2751 		if (user)
2752 			panic("pmap_emulate_reference: user ref to kernel");
2753 		/*
2754 		 * No need to lock here; kernel PT pages never go away.
2755 		 */
2756 		pte = PMAP_KERNEL_PTE(v);
2757 	} else {
2758 #ifdef DIAGNOSTIC
2759 		if (p == NULL)
2760 			panic("pmap_emulate_reference: bad proc");
2761 		if (p->p_vmspace == NULL)
2762 			panic("pmap_emulate_reference: bad p_vmspace");
2763 #endif
2764 		PMAP_LOCK(p->p_vmspace->vm_map.pmap);
2765 		didlock = TRUE;
2766 		pte = pmap_l3pte(p->p_vmspace->vm_map.pmap, v, NULL);
2767 		/*
2768 		 * We'll unlock below where we're done with the PTE.
2769 		 */
2770 	}
2771 #ifdef DEBUG
2772 	if (pmapdebug & PDB_FOLLOW) {
2773 		printf("\tpte = %p, ", pte);
2774 		printf("*pte = 0x%lx\n", *pte);
2775 	}
2776 #endif
2777 #ifdef DEBUG				/* These checks are more expensive */
2778 	if (!pmap_pte_v(pte))
2779 		panic("pmap_emulate_reference: invalid pte");
2780 #if 0
2781 	/*
2782 	 * Can't do these, because cpu_fork and cpu_swapin call
2783 	 * pmap_emulate_reference(), and the bits aren't guaranteed,
2784 	 * for them...
2785 	 */
2786 	if (write) {
2787 		if (!(*pte & (user ? PG_UWE : PG_UWE | PG_KWE)))
2788 			panic("pmap_emulate_reference: write but unwritable");
2789 		if (!(*pte & PG_FOW))
2790 			panic("pmap_emulate_reference: write but not FOW");
2791 	} else {
2792 		if (!(*pte & (user ? PG_URE : PG_URE | PG_KRE)))
2793 			panic("pmap_emulate_reference: !write but unreadable");
2794 		if (!(*pte & (PG_FOR | PG_FOE)))
2795 			panic("pmap_emulate_reference: !write but not FOR|FOE");
2796 	}
2797 #endif
2798 	/* Other diagnostics? */
2799 #endif
2800 	pa = pmap_pte_pa(pte);
2801 
2802 	/*
2803 	 * We're now done with the PTE.  If it was a user pmap, unlock
2804 	 * it now.
2805 	 */
2806 	if (didlock)
2807 		PMAP_UNLOCK(p->p_vmspace->vm_map.pmap);
2808 
2809 #ifdef DEBUG
2810 	if (pmapdebug & PDB_FOLLOW)
2811 		printf("\tpa = 0x%lx\n", pa);
2812 #endif
2813 #ifdef DIAGNOSTIC
2814 	if (!PAGE_IS_MANAGED(pa))
2815 		panic("pmap_emulate_reference(%p, 0x%lx, %d, %d): pa 0x%lx not managed", p, v, user, write, pa);
2816 #endif
2817 
2818 	/*
2819 	 * Twiddle the appropriate bits to reflect the reference
2820 	 * and/or modification..
2821 	 *
2822 	 * The rules:
2823 	 * 	(1) always mark page as used, and
2824 	 *	(2) if it was a write fault, mark page as modified.
2825 	 */
2826 	pg = PHYS_TO_VM_PAGE(pa);
2827 
2828 	PMAP_HEAD_TO_MAP_LOCK();
2829 	simple_lock(&pg->mdpage.pvh_slock);
2830 
2831 	if (write) {
2832 		pg->mdpage.pvh_attrs |= (PGA_REFERENCED|PGA_MODIFIED);
2833 		faultoff = PG_FOR | PG_FOW | PG_FOE;
2834 	} else {
2835 		pg->mdpage.pvh_attrs |= PGA_REFERENCED;
2836 		faultoff = PG_FOR | PG_FOE;
2837 	}
2838 	pmap_changebit(pg, 0, ~faultoff, cpu_id);
2839 
2840 	simple_unlock(&pg->mdpage.pvh_slock);
2841 	PMAP_HEAD_TO_MAP_UNLOCK();
2842 }
2843 
2844 #ifdef DEBUG
2845 /*
2846  * pmap_pv_dump:
2847  *
2848  *	Dump the physical->virtual data for the specified page.
2849  */
2850 void
2851 pmap_pv_dump(paddr_t pa)
2852 {
2853 	struct vm_page *pg;
2854 	pv_entry_t pv;
2855 
2856 	pg = PHYS_TO_VM_PAGE(pa);
2857 
2858 	simple_lock(&pg->mdpage.pvh_slock);
2859 
2860 	printf("pa 0x%lx (attrs = 0x%x):\n", pa, pg->mdpage.pvh_attrs);
2861 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next)
2862 		printf("     pmap %p, va 0x%lx\n",
2863 		    pv->pv_pmap, pv->pv_va);
2864 	printf("\n");
2865 
2866 	simple_unlock(&pg->mdpage.pvh_slock);
2867 }
2868 #endif
2869 
2870 /*
2871  * vtophys:
2872  *
2873  *	Return the physical address corresponding to the K0SEG or
2874  *	K1SEG address provided.
2875  *
2876  *	Note: no locking is necessary in this function.
2877  */
2878 paddr_t
2879 vtophys(vaddr_t vaddr)
2880 {
2881 	pt_entry_t *pte;
2882 	paddr_t paddr = 0;
2883 
2884 	if (vaddr < ALPHA_K0SEG_BASE)
2885 		printf("vtophys: invalid vaddr 0x%lx", vaddr);
2886 	else if (vaddr <= ALPHA_K0SEG_END)
2887 		paddr = ALPHA_K0SEG_TO_PHYS(vaddr);
2888 	else {
2889 		pte = PMAP_KERNEL_PTE(vaddr);
2890 		if (pmap_pte_v(pte))
2891 			paddr = pmap_pte_pa(pte) | (vaddr & PGOFSET);
2892 	}
2893 
2894 #if 0
2895 	printf("vtophys(0x%lx) -> 0x%lx\n", vaddr, paddr);
2896 #endif
2897 
2898 	return (paddr);
2899 }
2900 
2901 /******************** pv_entry management ********************/
2902 
2903 /*
2904  * pmap_pv_enter:
2905  *
2906  *	Add a physical->virtual entry to the pv_table.
2907  */
2908 int
2909 pmap_pv_enter(pmap_t pmap, struct vm_page *pg, vaddr_t va, pt_entry_t *pte,
2910     boolean_t dolock)
2911 {
2912 	pv_entry_t newpv;
2913 
2914 	/*
2915 	 * Allocate and fill in the new pv_entry.
2916 	 */
2917 	newpv = pmap_pv_alloc();
2918 	if (newpv == NULL)
2919 		return ENOMEM;
2920 	newpv->pv_va = va;
2921 	newpv->pv_pmap = pmap;
2922 	newpv->pv_pte = pte;
2923 
2924 	if (dolock)
2925 		simple_lock(&pg->mdpage.pvh_slock);
2926 
2927 #ifdef DEBUG
2928     {
2929 	pv_entry_t pv;
2930 	/*
2931 	 * Make sure the entry doesn't already exist.
2932 	 */
2933 	for (pv = pg->mdpage.pvh_list; pv != NULL; pv = pv->pv_next) {
2934 		if (pmap == pv->pv_pmap && va == pv->pv_va) {
2935 			printf("pmap = %p, va = 0x%lx\n", pmap, va);
2936 			panic("pmap_pv_enter: already in pv table");
2937 		}
2938 	}
2939     }
2940 #endif
2941 
2942 	/*
2943 	 * ...and put it in the list.
2944 	 */
2945 	newpv->pv_next = pg->mdpage.pvh_list;
2946 	pg->mdpage.pvh_list = newpv;
2947 
2948 	if (dolock)
2949 		simple_unlock(&pg->mdpage.pvh_slock);
2950 
2951 	return 0;
2952 }
2953 
2954 /*
2955  * pmap_pv_remove:
2956  *
2957  *	Remove a physical->virtual entry from the pv_table.
2958  */
2959 void
2960 pmap_pv_remove(pmap_t pmap, struct vm_page *pg, vaddr_t va, boolean_t dolock)
2961 {
2962 	pv_entry_t pv, *pvp;
2963 
2964 	if (dolock)
2965 		simple_lock(&pg->mdpage.pvh_slock);
2966 
2967 	/*
2968 	 * Find the entry to remove.
2969 	 */
2970 	for (pvp = &pg->mdpage.pvh_list, pv = *pvp;
2971 	     pv != NULL; pvp = &pv->pv_next, pv = *pvp)
2972 		if (pmap == pv->pv_pmap && va == pv->pv_va)
2973 			break;
2974 
2975 #ifdef DEBUG
2976 	if (pv == NULL)
2977 		panic("pmap_pv_remove: not in pv table");
2978 #endif
2979 
2980 	*pvp = pv->pv_next;
2981 
2982 	if (dolock)
2983 		simple_unlock(&pg->mdpage.pvh_slock);
2984 
2985 	pmap_pv_free(pv);
2986 }
2987 
2988 /*
2989  * pmap_pv_page_alloc:
2990  *
2991  *	Allocate a page for the pv_entry pool.
2992  */
2993 void *
2994 pmap_pv_page_alloc(struct pool *pp, int flags)
2995 {
2996 	paddr_t pg;
2997 
2998 	if (pmap_physpage_alloc(PGU_PVENT, &pg))
2999 		return ((void *)ALPHA_PHYS_TO_K0SEG(pg));
3000 	return (NULL);
3001 }
3002 
3003 /*
3004  * pmap_pv_page_free:
3005  *
3006  *	Free a pv_entry pool page.
3007  */
3008 void
3009 pmap_pv_page_free(struct pool *pp, void *v)
3010 {
3011 
3012 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t)v));
3013 }
3014 
3015 /******************** misc. functions ********************/
3016 
3017 /*
3018  * pmap_physpage_alloc:
3019  *
3020  *	Allocate a single page from the VM system and return the
3021  *	physical address for that page.
3022  */
3023 boolean_t
3024 pmap_physpage_alloc(int usage, paddr_t *pap)
3025 {
3026 	struct vm_page *pg;
3027 	paddr_t pa;
3028 
3029 	/*
3030 	 * Don't ask for a zero'd page in the L1PT case -- we will
3031 	 * properly initialize it in the constructor.
3032 	 */
3033 
3034 	pg = uvm_pagealloc(NULL, 0, NULL, usage == PGU_L1PT ?
3035 	    UVM_PGA_USERESERVE : UVM_PGA_USERESERVE|UVM_PGA_ZERO);
3036 	if (pg != NULL) {
3037 		pa = VM_PAGE_TO_PHYS(pg);
3038 
3039 		simple_lock(&pg->mdpage.pvh_slock);
3040 #ifdef DIAGNOSTIC
3041 		if (pg->wire_count != 0) {
3042 			printf("pmap_physpage_alloc: page 0x%lx has "
3043 			    "%d references\n", pa, pg->wire_count);
3044 			panic("pmap_physpage_alloc");
3045 		}
3046 #endif
3047 		simple_unlock(&pg->mdpage.pvh_slock);
3048 		*pap = pa;
3049 		return (TRUE);
3050 	}
3051 	return (FALSE);
3052 }
3053 
3054 /*
3055  * pmap_physpage_free:
3056  *
3057  *	Free the single page table page at the specified physical address.
3058  */
3059 void
3060 pmap_physpage_free(paddr_t pa)
3061 {
3062 	struct vm_page *pg;
3063 
3064 	if ((pg = PHYS_TO_VM_PAGE(pa)) == NULL)
3065 		panic("pmap_physpage_free: bogus physical page address");
3066 
3067 	simple_lock(&pg->mdpage.pvh_slock);
3068 #ifdef DIAGNOSTIC
3069 	if (pg->wire_count != 0)
3070 		panic("pmap_physpage_free: page still has references");
3071 #endif
3072 	simple_unlock(&pg->mdpage.pvh_slock);
3073 
3074 	uvm_pagefree(pg);
3075 }
3076 
3077 /*
3078  * pmap_physpage_addref:
3079  *
3080  *	Add a reference to the specified special use page.
3081  */
3082 int
3083 pmap_physpage_addref(void *kva)
3084 {
3085 	struct vm_page *pg;
3086 	paddr_t pa;
3087 	int rval;
3088 
3089 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
3090 	pg = PHYS_TO_VM_PAGE(pa);
3091 
3092 	simple_lock(&pg->mdpage.pvh_slock);
3093 	rval = ++pg->wire_count;
3094 	simple_unlock(&pg->mdpage.pvh_slock);
3095 
3096 	return (rval);
3097 }
3098 
3099 /*
3100  * pmap_physpage_delref:
3101  *
3102  *	Delete a reference to the specified special use page.
3103  */
3104 int
3105 pmap_physpage_delref(void *kva)
3106 {
3107 	struct vm_page *pg;
3108 	paddr_t pa;
3109 	int rval;
3110 
3111 	pa = ALPHA_K0SEG_TO_PHYS(trunc_page((vaddr_t)kva));
3112 	pg = PHYS_TO_VM_PAGE(pa);
3113 
3114 	simple_lock(&pg->mdpage.pvh_slock);
3115 
3116 #ifdef DIAGNOSTIC
3117 	/*
3118 	 * Make sure we never have a negative reference count.
3119 	 */
3120 	if (pg->wire_count == 0)
3121 		panic("pmap_physpage_delref: reference count already zero");
3122 #endif
3123 
3124 	rval = --pg->wire_count;
3125 
3126 	simple_unlock(&pg->mdpage.pvh_slock);
3127 
3128 	return (rval);
3129 }
3130 
3131 /******************** page table page management ********************/
3132 
3133 /*
3134  * pmap_growkernel:		[ INTERFACE ]
3135  *
3136  *	Grow the kernel address space.  This is a hint from the
3137  *	upper layer to pre-allocate more kernel PT pages.
3138  */
3139 vaddr_t
3140 pmap_growkernel(vaddr_t maxkvaddr)
3141 {
3142 	struct pmap *kpm = pmap_kernel(), *pm;
3143 	paddr_t ptaddr;
3144 	pt_entry_t *l1pte, *l2pte, pte;
3145 	vaddr_t va;
3146 	int l1idx;
3147 
3148 	if (maxkvaddr <= virtual_end)
3149 		goto out;		/* we are OK */
3150 
3151 	simple_lock(&pmap_growkernel_slock);
3152 
3153 	va = virtual_end;
3154 
3155 	while (va < maxkvaddr) {
3156 		/*
3157 		 * If there is no valid L1 PTE (i.e. no L2 PT page),
3158 		 * allocate a new L2 PT page and insert it into the
3159 		 * L1 map.
3160 		 */
3161 		l1pte = pmap_l1pte(kpm, va);
3162 		if (pmap_pte_v(l1pte) == 0) {
3163 			/*
3164 			 * XXX PGU_NORMAL?  It's not a "traditional" PT page.
3165 			 */
3166 			if (uvm.page_init_done == FALSE) {
3167 				/*
3168 				 * We're growing the kernel pmap early (from
3169 				 * uvm_pageboot_alloc()).  This case must
3170 				 * be handled a little differently.
3171 				 */
3172 				ptaddr = ALPHA_K0SEG_TO_PHYS(
3173 				    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3174 			} else if (pmap_physpage_alloc(PGU_NORMAL,
3175 				   &ptaddr) == FALSE)
3176 				goto die;
3177 			pte = (atop(ptaddr) << PG_SHIFT) |
3178 			    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3179 			*l1pte = pte;
3180 
3181 			l1idx = l1pte_index(va);
3182 
3183 			/* Update all the user pmaps. */
3184 			simple_lock(&pmap_all_pmaps_slock);
3185 			for (pm = TAILQ_FIRST(&pmap_all_pmaps);
3186 			     pm != NULL; pm = TAILQ_NEXT(pm, pm_list)) {
3187 				/* Skip the kernel pmap. */
3188 				if (pm == pmap_kernel())
3189 					continue;
3190 
3191 				PMAP_LOCK(pm);
3192 				if (pm->pm_lev1map == kernel_lev1map) {
3193 					PMAP_UNLOCK(pm);
3194 					continue;
3195 				}
3196 				pm->pm_lev1map[l1idx] = pte;
3197 				PMAP_UNLOCK(pm);
3198 			}
3199 			simple_unlock(&pmap_all_pmaps_slock);
3200 		}
3201 
3202 		/*
3203 		 * Have an L2 PT page now, add the L3 PT page.
3204 		 */
3205 		l2pte = pmap_l2pte(kpm, va, l1pte);
3206 		KASSERT(pmap_pte_v(l2pte) == 0);
3207 		if (uvm.page_init_done == FALSE) {
3208 			/*
3209 			 * See above.
3210 			 */
3211 			ptaddr = ALPHA_K0SEG_TO_PHYS(
3212 			    pmap_steal_memory(PAGE_SIZE, NULL, NULL));
3213 		} else if (pmap_physpage_alloc(PGU_NORMAL, &ptaddr) == FALSE)
3214 			goto die;
3215 		*l2pte = (atop(ptaddr) << PG_SHIFT) |
3216 		    PG_V | PG_ASM | PG_KRE | PG_KWE | PG_WIRED;
3217 		va += ALPHA_L2SEG_SIZE;
3218 	}
3219 
3220 	/* Invalidate the L1 PT cache. */
3221 	pool_cache_invalidate(&pmap_l1pt_cache);
3222 
3223 	virtual_end = va;
3224 
3225 	simple_unlock(&pmap_growkernel_slock);
3226 
3227  out:
3228 	return (virtual_end);
3229 
3230  die:
3231 	panic("pmap_growkernel: out of memory");
3232 }
3233 
3234 /*
3235  * pmap_lev1map_create:
3236  *
3237  *	Create a new level 1 page table for the specified pmap.
3238  *
3239  *	Note: growkernel and the pmap must already be locked.
3240  */
3241 int
3242 pmap_lev1map_create(pmap_t pmap, long cpu_id)
3243 {
3244 	pt_entry_t *l1pt;
3245 
3246 #ifdef DIAGNOSTIC
3247 	if (pmap == pmap_kernel())
3248 		panic("pmap_lev1map_create: got kernel pmap");
3249 #endif
3250 
3251 	if (pmap->pm_lev1map != kernel_lev1map) {
3252 		/*
3253 		 * We have to briefly unlock the pmap in pmap_enter()
3254 		 * do deal with a lock ordering constraint, so it's
3255 		 * entirely possible for this to happen.
3256 		 */
3257 		return (0);
3258 	}
3259 
3260 #ifdef DIAGNOSTIC
3261 	if (pmap->pm_asni[cpu_id].pma_asn != PMAP_ASN_RESERVED)
3262 		panic("pmap_lev1map_create: pmap uses non-reserved ASN");
3263 #endif
3264 
3265 	l1pt = pool_cache_get(&pmap_l1pt_cache, PR_NOWAIT);
3266 	if (l1pt == NULL)
3267 		return (ENOMEM);
3268 
3269 	pmap->pm_lev1map = l1pt;
3270 
3271 	/*
3272 	 * The page table base has changed; if the pmap was active,
3273 	 * reactivate it.
3274 	 */
3275 	if (PMAP_ISACTIVE(pmap, cpu_id)) {
3276 		pmap_asn_alloc(pmap, cpu_id);
3277 		PMAP_ACTIVATE(pmap, curproc, cpu_id);
3278 	}
3279 	PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id);
3280 	return (0);
3281 }
3282 
3283 /*
3284  * pmap_lev1map_destroy:
3285  *
3286  *	Destroy the level 1 page table for the specified pmap.
3287  *
3288  *	Note: the pmap must already be locked.
3289  */
3290 void
3291 pmap_lev1map_destroy(pmap_t pmap, long cpu_id)
3292 {
3293 	pt_entry_t *l1pt = pmap->pm_lev1map;
3294 
3295 #ifdef DIAGNOSTIC
3296 	if (pmap == pmap_kernel())
3297 		panic("pmap_lev1map_destroy: got kernel pmap");
3298 #endif
3299 
3300 	/*
3301 	 * Go back to referencing the global kernel_lev1map.
3302 	 */
3303 	pmap->pm_lev1map = kernel_lev1map;
3304 
3305 	/*
3306 	 * The page table base has changed; if the pmap was active,
3307 	 * reactivate it.  Note that allocation of a new ASN is
3308 	 * not necessary here:
3309 	 *
3310 	 *	(1) We've gotten here because we've deleted all
3311 	 *	    user mappings in the pmap, invalidating the
3312 	 *	    TLB entries for them as we go.
3313 	 *
3314 	 *	(2) kernel_lev1map contains only kernel mappings, which
3315 	 *	    were identical in the user pmap, and all of
3316 	 *	    those mappings have PG_ASM, so the ASN doesn't
3317 	 *	    matter.
3318 	 *
3319 	 * We do, however, ensure that the pmap is using the
3320 	 * reserved ASN, to ensure that no two pmaps never have
3321 	 * clashing TLB entries.
3322 	 */
3323 	PMAP_INVALIDATE_ASN(pmap, cpu_id);
3324 	if (PMAP_ISACTIVE(pmap, cpu_id))
3325 		PMAP_ACTIVATE(pmap, curproc, cpu_id);
3326 	PMAP_LEV1MAP_SHOOTDOWN(pmap, cpu_id);
3327 
3328 	/*
3329 	 * Free the old level 1 page table page.
3330 	 */
3331 	pool_cache_put(&pmap_l1pt_cache, l1pt);
3332 }
3333 
3334 /*
3335  * pmap_l1pt_ctor:
3336  *
3337  *	Pool cache constructor for L1 PT pages.
3338  */
3339 int
3340 pmap_l1pt_ctor(void *arg, void *object, int flags)
3341 {
3342 	pt_entry_t *l1pt = object, pte;
3343 	int i;
3344 
3345 	/*
3346 	 * Initialize the new level 1 table by zeroing the
3347 	 * user portion and copying the kernel mappings into
3348 	 * the kernel portion.
3349 	 */
3350 	for (i = 0; i < l1pte_index(VM_MIN_KERNEL_ADDRESS); i++)
3351 		l1pt[i] = 0;
3352 
3353 	for (i = l1pte_index(VM_MIN_KERNEL_ADDRESS);
3354 	     i <= l1pte_index(VM_MAX_KERNEL_ADDRESS); i++)
3355 		l1pt[i] = kernel_lev1map[i];
3356 
3357 	/*
3358 	 * Now, map the new virtual page table.  NOTE: NO ASM!
3359 	 */
3360 	pte = ((ALPHA_K0SEG_TO_PHYS((vaddr_t) l1pt) >> PGSHIFT) << PG_SHIFT) |
3361 	    PG_V | PG_KRE | PG_KWE;
3362 	l1pt[l1pte_index(VPTBASE)] = pte;
3363 
3364 	return (0);
3365 }
3366 
3367 /*
3368  * pmap_l1pt_alloc:
3369  *
3370  *	Page alloctor for L1 PT pages.
3371  */
3372 void *
3373 pmap_l1pt_alloc(struct pool *pp, int flags)
3374 {
3375 	paddr_t ptpa;
3376 
3377 	/*
3378 	 * Attempt to allocate a free page.
3379 	 */
3380 	if (pmap_physpage_alloc(PGU_L1PT, &ptpa) == FALSE)
3381 		return (NULL);
3382 
3383 	return ((void *) ALPHA_PHYS_TO_K0SEG(ptpa));
3384 }
3385 
3386 /*
3387  * pmap_l1pt_free:
3388  *
3389  *	Page freer for L1 PT pages.
3390  */
3391 void
3392 pmap_l1pt_free(struct pool *pp, void *v)
3393 {
3394 
3395 	pmap_physpage_free(ALPHA_K0SEG_TO_PHYS((vaddr_t) v));
3396 }
3397 
3398 /*
3399  * pmap_ptpage_alloc:
3400  *
3401  *	Allocate a level 2 or level 3 page table page, and
3402  *	initialize the PTE that references it.
3403  *
3404  *	Note: the pmap must already be locked.
3405  */
3406 int
3407 pmap_ptpage_alloc(pmap_t pmap, pt_entry_t *pte, int usage)
3408 {
3409 	paddr_t ptpa;
3410 
3411 	/*
3412 	 * Allocate the page table page.
3413 	 */
3414 	if (pmap_physpage_alloc(usage, &ptpa) == FALSE)
3415 		return (ENOMEM);
3416 
3417 	/*
3418 	 * Initialize the referencing PTE.
3419 	 */
3420 	PMAP_SET_PTE(pte, ((ptpa >> PGSHIFT) << PG_SHIFT) |
3421 	    PG_V | PG_KRE | PG_KWE | PG_WIRED |
3422 	    (pmap == pmap_kernel() ? PG_ASM : 0));
3423 
3424 	return (0);
3425 }
3426 
3427 /*
3428  * pmap_ptpage_free:
3429  *
3430  *	Free the level 2 or level 3 page table page referenced
3431  *	be the provided PTE.
3432  *
3433  *	Note: the pmap must already be locked.
3434  */
3435 void
3436 pmap_ptpage_free(pmap_t pmap, pt_entry_t *pte)
3437 {
3438 	paddr_t ptpa;
3439 
3440 	/*
3441 	 * Extract the physical address of the page from the PTE
3442 	 * and clear the entry.
3443 	 */
3444 	ptpa = pmap_pte_pa(pte);
3445 	PMAP_SET_PTE(pte, PG_NV);
3446 
3447 #ifdef DEBUG
3448 	pmap_zero_page(ptpa);
3449 #endif
3450 	pmap_physpage_free(ptpa);
3451 }
3452 
3453 /*
3454  * pmap_l3pt_delref:
3455  *
3456  *	Delete a reference on a level 3 PT page.  If the reference drops
3457  *	to zero, free it.
3458  *
3459  *	Note: the pmap must already be locked.
3460  */
3461 void
3462 pmap_l3pt_delref(pmap_t pmap, vaddr_t va, pt_entry_t *l3pte, long cpu_id)
3463 {
3464 	pt_entry_t *l1pte, *l2pte;
3465 	PMAP_TLB_SHOOTDOWN_CPUSET_DECL
3466 
3467 	l1pte = pmap_l1pte(pmap, va);
3468 	l2pte = pmap_l2pte(pmap, va, l1pte);
3469 
3470 #ifdef DIAGNOSTIC
3471 	if (pmap == pmap_kernel())
3472 		panic("pmap_l3pt_delref: kernel pmap");
3473 #endif
3474 
3475 	if (pmap_physpage_delref(l3pte) == 0) {
3476 		/*
3477 		 * No more mappings; we can free the level 3 table.
3478 		 */
3479 #ifdef DEBUG
3480 		if (pmapdebug & PDB_PTPAGE)
3481 			printf("pmap_l3pt_delref: freeing level 3 table at "
3482 			    "0x%lx\n", pmap_pte_pa(l2pte));
3483 #endif
3484 		pmap_ptpage_free(pmap, l2pte);
3485 
3486 		/*
3487 		 * We've freed a level 3 table, so we must
3488 		 * invalidate the TLB entry for that PT page
3489 		 * in the Virtual Page Table VA range, because
3490 		 * otherwise the PALcode will service a TLB
3491 		 * miss using the stale VPT TLB entry it entered
3492 		 * behind our back to shortcut to the VA's PTE.
3493 		 */
3494 		PMAP_INVALIDATE_TLB(pmap,
3495 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), FALSE,
3496 		    PMAP_ISACTIVE(pmap, cpu_id), cpu_id);
3497 		PMAP_TLB_SHOOTDOWN(pmap,
3498 		    (vaddr_t)(&VPT[VPT_INDEX(va)]), 0);
3499 		PMAP_TLB_SHOOTNOW();
3500 
3501 		/*
3502 		 * We've freed a level 3 table, so delete the reference
3503 		 * on the level 2 table.
3504 		 */
3505 		pmap_l2pt_delref(pmap, l1pte, l2pte, cpu_id);
3506 	}
3507 }
3508 
3509 /*
3510  * pmap_l2pt_delref:
3511  *
3512  *	Delete a reference on a level 2 PT page.  If the reference drops
3513  *	to zero, free it.
3514  *
3515  *	Note: the pmap must already be locked.
3516  */
3517 void
3518 pmap_l2pt_delref(pmap_t pmap, pt_entry_t *l1pte, pt_entry_t *l2pte,
3519     long cpu_id)
3520 {
3521 
3522 #ifdef DIAGNOSTIC
3523 	if (pmap == pmap_kernel())
3524 		panic("pmap_l2pt_delref: kernel pmap");
3525 #endif
3526 
3527 	if (pmap_physpage_delref(l2pte) == 0) {
3528 		/*
3529 		 * No more mappings in this segment; we can free the
3530 		 * level 2 table.
3531 		 */
3532 #ifdef DEBUG
3533 		if (pmapdebug & PDB_PTPAGE)
3534 			printf("pmap_l2pt_delref: freeing level 2 table at "
3535 			    "0x%lx\n", pmap_pte_pa(l1pte));
3536 #endif
3537 		pmap_ptpage_free(pmap, l1pte);
3538 
3539 		/*
3540 		 * We've freed a level 2 table, so delete the reference
3541 		 * on the level 1 table.
3542 		 */
3543 		pmap_l1pt_delref(pmap, l1pte, cpu_id);
3544 	}
3545 }
3546 
3547 /*
3548  * pmap_l1pt_delref:
3549  *
3550  *	Delete a reference on a level 1 PT page.  If the reference drops
3551  *	to zero, free it.
3552  *
3553  *	Note: the pmap must already be locked.
3554  */
3555 void
3556 pmap_l1pt_delref(pmap_t pmap, pt_entry_t *l1pte, long cpu_id)
3557 {
3558 
3559 #ifdef DIAGNOSTIC
3560 	if (pmap == pmap_kernel())
3561 		panic("pmap_l1pt_delref: kernel pmap");
3562 #endif
3563 
3564 	if (pmap_physpage_delref(l1pte) == 0) {
3565 		/*
3566 		 * No more level 2 tables left, go back to the global
3567 		 * kernel_lev1map.
3568 		 */
3569 		pmap_lev1map_destroy(pmap, cpu_id);
3570 	}
3571 }
3572 
3573 /******************** Address Space Number management ********************/
3574 
3575 /*
3576  * pmap_asn_alloc:
3577  *
3578  *	Allocate and assign an ASN to the specified pmap.
3579  *
3580  *	Note: the pmap must already be locked.  This may be called from
3581  *	an interprocessor interrupt, and in that case, the sender of
3582  *	the IPI has the pmap lock.
3583  */
3584 void
3585 pmap_asn_alloc(pmap_t pmap, long cpu_id)
3586 {
3587 	struct pmap_asn_info *pma = &pmap->pm_asni[cpu_id];
3588 	struct pmap_asn_info *cpma = &pmap_asn_info[cpu_id];
3589 
3590 #ifdef DEBUG
3591 	if (pmapdebug & (PDB_FOLLOW|PDB_ASN))
3592 		printf("pmap_asn_alloc(%p)\n", pmap);
3593 #endif
3594 
3595 	/*
3596 	 * If the pmap is still using the global kernel_lev1map, there
3597 	 * is no need to assign an ASN at this time, because only
3598 	 * kernel mappings exist in that map, and all kernel mappings
3599 	 * have PG_ASM set.  If the pmap eventually gets its own
3600 	 * lev1map, an ASN will be allocated at that time.
3601 	 */
3602 	if (pmap->pm_lev1map == kernel_lev1map) {
3603 #ifdef DEBUG
3604 		if (pmapdebug & PDB_ASN)
3605 			printf("pmap_asn_alloc: still references "
3606 			    "kernel_lev1map\n");
3607 #endif
3608 #if defined(MULTIPROCESSOR)
3609 		/*
3610 		 * In a multiprocessor system, it's possible to
3611 		 * get here without having PMAP_ASN_RESERVED in
3612 		 * pmap->pm_asni[cpu_id].pma_asn; see pmap_lev1map_destroy().
3613 		 *
3614 		 * So, what we do here, is simply assign the reserved
3615 		 * ASN for kernel_lev1map users and let things
3616 		 * continue on.  We do, however, let uniprocessor
3617 		 * configurations continue to make its assertion.
3618 		 */
3619 		pma->pma_asn = PMAP_ASN_RESERVED;
3620 #else
3621 #ifdef DIAGNOSTIC
3622 		if (pma->pma_asn != PMAP_ASN_RESERVED)
3623 			panic("pmap_asn_alloc: kernel_lev1map without "
3624 			    "PMAP_ASN_RESERVED");
3625 #endif
3626 #endif /* MULTIPROCESSOR */
3627 		return;
3628 	}
3629 
3630 	/*
3631 	 * On processors which do not implement ASNs, the swpctx PALcode
3632 	 * operation will automatically invalidate the TLB and I-cache,
3633 	 * so we don't need to do that here.
3634 	 */
3635 	if (pmap_max_asn == 0) {
3636 		/*
3637 		 * Refresh the pmap's generation number, to
3638 		 * simplify logic elsewhere.
3639 		 */
3640 		pma->pma_asngen = cpma->pma_asngen;
3641 #ifdef DEBUG
3642 		if (pmapdebug & PDB_ASN)
3643 			printf("pmap_asn_alloc: no ASNs, using asngen %lu\n",
3644 			    pma->pma_asngen);
3645 #endif
3646 		return;
3647 	}
3648 
3649 	/*
3650 	 * Hopefully, we can continue using the one we have...
3651 	 */
3652 	if (pma->pma_asn != PMAP_ASN_RESERVED &&
3653 	    pma->pma_asngen == cpma->pma_asngen) {
3654 		/*
3655 		 * ASN is still in the current generation; keep on using it.
3656 		 */
3657 #ifdef DEBUG
3658 		if (pmapdebug & PDB_ASN)
3659 			printf("pmap_asn_alloc: same generation, keeping %u\n",
3660 			    pma->pma_asn);
3661 #endif
3662 		return;
3663 	}
3664 
3665 	/*
3666 	 * Need to assign a new ASN.  Grab the next one, incrementing
3667 	 * the generation number if we have to.
3668 	 */
3669 	if (cpma->pma_asn > pmap_max_asn) {
3670 		/*
3671 		 * Invalidate all non-PG_ASM TLB entries and the
3672 		 * I-cache, and bump the generation number.
3673 		 */
3674 		ALPHA_TBIAP();
3675 		alpha_pal_imb();
3676 
3677 		cpma->pma_asn = 1;
3678 		cpma->pma_asngen++;
3679 #ifdef DIAGNOSTIC
3680 		if (cpma->pma_asngen == 0) {
3681 			/*
3682 			 * The generation number has wrapped.  We could
3683 			 * handle this scenario by traversing all of
3684 			 * the pmaps, and invaldating the generation
3685 			 * number on those which are not currently
3686 			 * in use by this processor.
3687 			 *
3688 			 * However... considering that we're using
3689 			 * an unsigned 64-bit integer for generation
3690 			 * numbers, on non-ASN CPUs, we won't wrap
3691 			 * for approx. 585 million years, or 75 billion
3692 			 * years on a 128-ASN CPU (assuming 1000 switch
3693 			 * operations per second).
3694 			 *
3695 			 * So, we don't bother.
3696 			 */
3697 			panic("pmap_asn_alloc: too much uptime");
3698 		}
3699 #endif
3700 #ifdef DEBUG
3701 		if (pmapdebug & PDB_ASN)
3702 			printf("pmap_asn_alloc: generation bumped to %lu\n",
3703 			    cpma->pma_asngen);
3704 #endif
3705 	}
3706 
3707 	/*
3708 	 * Assign the new ASN and validate the generation number.
3709 	 */
3710 	pma->pma_asn = cpma->pma_asn++;
3711 	pma->pma_asngen = cpma->pma_asngen;
3712 
3713 #ifdef DEBUG
3714 	if (pmapdebug & PDB_ASN)
3715 		printf("pmap_asn_alloc: assigning %u to pmap %p\n",
3716 		    pma->pma_asn, pmap);
3717 #endif
3718 
3719 	/*
3720 	 * Have a new ASN, so there's no need to sync the I-stream
3721 	 * on the way back out to userspace.
3722 	 */
3723 	atomic_clearbits_ulong(&pmap->pm_needisync, (1UL << cpu_id));
3724 }
3725 
3726 #if defined(MULTIPROCESSOR)
3727 /******************** TLB shootdown code ********************/
3728 
3729 /*
3730  * pmap_tlb_shootdown:
3731  *
3732  *	Cause the TLB entry for pmap/va to be shot down.
3733  *
3734  *	NOTE: The pmap must be locked here.
3735  */
3736 void
3737 pmap_tlb_shootdown(pmap_t pmap, vaddr_t va, pt_entry_t pte, u_long *cpumaskp)
3738 {
3739 	struct pmap_tlb_shootdown_q *pq;
3740 	struct pmap_tlb_shootdown_job *pj;
3741 	struct cpu_info *ci, *self = curcpu();
3742 	u_long cpumask;
3743 	CPU_INFO_ITERATOR cii;
3744 	int s;
3745 
3746 	LOCK_ASSERT((pmap == pmap_kernel()) ||
3747 	    simple_lock_held(&pmap->pm_slock));
3748 
3749 	cpumask = 0;
3750 
3751 	for (CPU_INFO_FOREACH(cii, ci)) {
3752 		if (ci == self)
3753 			continue;
3754 
3755 		/*
3756 		 * The pmap must be locked (unless its the kernel
3757 		 * pmap, in which case it is okay for it to be
3758 		 * unlocked), which prevents it from  becoming
3759 		 * active on any additional processors.  This makes
3760 		 * it safe to check for activeness.  If it's not
3761 		 * active on the processor in question, then just
3762 		 * mark it as needing a new ASN the next time it
3763 		 * does, saving the IPI.  We always have to send
3764 		 * the IPI for the kernel pmap.
3765 		 *
3766 		 * Note if it's marked active now, and it becomes
3767 		 * inactive by the time the processor receives
3768 		 * the IPI, that's okay, because it does the right
3769 		 * thing with it later.
3770 		 */
3771 		if (pmap != pmap_kernel() &&
3772 		    PMAP_ISACTIVE(pmap, ci->ci_cpuid) == 0) {
3773 			PMAP_INVALIDATE_ASN(pmap, ci->ci_cpuid);
3774 			continue;
3775 		}
3776 
3777 		pq = &pmap_tlb_shootdown_q[ci->ci_cpuid];
3778 
3779 		PSJQ_LOCK(pq, s);
3780 
3781 		pq->pq_pte |= pte;
3782 
3783 		/*
3784 		 * If a global flush is already pending, we
3785 		 * don't really have to do anything else.
3786 		 */
3787 		if (pq->pq_tbia) {
3788 			PSJQ_UNLOCK(pq, s);
3789 			continue;
3790 		}
3791 
3792 		pj = pmap_tlb_shootdown_job_get(pq);
3793 		if (pj == NULL) {
3794 			/*
3795 			 * Couldn't allocate a job entry.  Just
3796 			 * tell the processor to kill everything.
3797 			 */
3798 			pq->pq_tbia = 1;
3799 		} else {
3800 			pj->pj_pmap = pmap;
3801 			pj->pj_va = va;
3802 			pj->pj_pte = pte;
3803 			TAILQ_INSERT_TAIL(&pq->pq_head, pj, pj_list);
3804 		}
3805 
3806 		cpumask |= 1UL << ci->ci_cpuid;
3807 
3808 		PSJQ_UNLOCK(pq, s);
3809 	}
3810 
3811 	*cpumaskp |= cpumask;
3812 }
3813 
3814 /*
3815  * pmap_tlb_shootnow:
3816  *
3817  *	Process the TLB shootdowns that we have been accumulating
3818  *	for the specified processor set.
3819  */
3820 void
3821 pmap_tlb_shootnow(u_long cpumask)
3822 {
3823 
3824 	alpha_multicast_ipi(cpumask, ALPHA_IPI_SHOOTDOWN);
3825 }
3826 
3827 /*
3828  * pmap_do_tlb_shootdown:
3829  *
3830  *	Process pending TLB shootdown operations for this processor.
3831  */
3832 void
3833 pmap_do_tlb_shootdown(struct cpu_info *ci, struct trapframe *framep)
3834 {
3835 	u_long cpu_id = ci->ci_cpuid;
3836 	u_long cpu_mask = (1UL << cpu_id);
3837 	struct pmap_tlb_shootdown_q *pq = &pmap_tlb_shootdown_q[cpu_id];
3838 	struct pmap_tlb_shootdown_job *pj;
3839 	int s;
3840 
3841 	PSJQ_LOCK(pq, s);
3842 
3843 	if (pq->pq_tbia) {
3844 		if (pq->pq_pte & PG_ASM)
3845 			ALPHA_TBIA();
3846 		else
3847 			ALPHA_TBIAP();
3848 		pq->pq_tbia = 0;
3849 		pmap_tlb_shootdown_q_drain(pq);
3850 	} else {
3851 		while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
3852 			TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
3853 			PMAP_INVALIDATE_TLB(pj->pj_pmap, pj->pj_va,
3854 			    pj->pj_pte & PG_ASM,
3855 			    pj->pj_pmap->pm_cpus & cpu_mask, cpu_id);
3856 			pmap_tlb_shootdown_job_put(pq, pj);
3857 		}
3858 		pq->pq_pte = 0;
3859 	}
3860 
3861 	PSJQ_UNLOCK(pq, s);
3862 }
3863 
3864 /*
3865  * pmap_tlb_shootdown_q_drain:
3866  *
3867  *	Drain a processor's TLB shootdown queue.  We do not perform
3868  *	the shootdown operations.  This is merely a convenience
3869  *	function.
3870  *
3871  *	Note: We expect the queue to be locked.
3872  */
3873 void
3874 pmap_tlb_shootdown_q_drain(struct pmap_tlb_shootdown_q *pq)
3875 {
3876 	struct pmap_tlb_shootdown_job *pj;
3877 
3878 	while ((pj = TAILQ_FIRST(&pq->pq_head)) != NULL) {
3879 		TAILQ_REMOVE(&pq->pq_head, pj, pj_list);
3880 		pmap_tlb_shootdown_job_put(pq, pj);
3881 	}
3882 	pq->pq_pte = 0;
3883 }
3884 
3885 /*
3886  * pmap_tlb_shootdown_job_get:
3887  *
3888  *	Get a TLB shootdown job queue entry.  This places a limit on
3889  *	the number of outstanding jobs a processor may have.
3890  *
3891  *	Note: We expect the queue to be locked.
3892  */
3893 struct pmap_tlb_shootdown_job *
3894 pmap_tlb_shootdown_job_get(struct pmap_tlb_shootdown_q *pq)
3895 {
3896 	struct pmap_tlb_shootdown_job *pj;
3897 
3898 	if (pq->pq_count >= PMAP_TLB_SHOOTDOWN_MAXJOBS)
3899 		return (NULL);
3900 	pj = pool_get(&pmap_tlb_shootdown_job_pool, PR_NOWAIT);
3901 	if (pj != NULL)
3902 		pq->pq_count++;
3903 	return (pj);
3904 }
3905 
3906 /*
3907  * pmap_tlb_shootdown_job_put:
3908  *
3909  *	Put a TLB shootdown job queue entry onto the free list.
3910  *
3911  *	Note: We expect the queue to be locked.
3912  */
3913 void
3914 pmap_tlb_shootdown_job_put(struct pmap_tlb_shootdown_q *pq,
3915     struct pmap_tlb_shootdown_job *pj)
3916 {
3917 
3918 #ifdef DIAGNOSTIC
3919 	if (pq->pq_count == 0)
3920 		panic("pmap_tlb_shootdown_job_put: queue length inconsistency");
3921 #endif
3922 	pool_put(&pmap_tlb_shootdown_job_pool, pj);
3923 	pq->pq_count--;
3924 }
3925 #endif /* MULTIPROCESSOR */
3926