xref: /freebsd/sys/arm64/arm64/pmap.c (revision 5b9c547c)
1 /*-
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  * Copyright (c) 2003 Peter Wemm
9  * All rights reserved.
10  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
11  * All rights reserved.
12  * Copyright (c) 2014 Andrew Turner
13  * All rights reserved.
14  * Copyright (c) 2014 The FreeBSD Foundation
15  * All rights reserved.
16  *
17  * This code is derived from software contributed to Berkeley by
18  * the Systems Programming Group of the University of Utah Computer
19  * Science Department and William Jolitz of UUNET Technologies Inc.
20  *
21  * This software was developed by Andrew Turner under sponsorship from
22  * the FreeBSD Foundation.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted provided that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  * 3. All advertising materials mentioning features or use of this software
33  *    must display the following acknowledgement:
34  *	This product includes software developed by the University of
35  *	California, Berkeley and its contributors.
36  * 4. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
53  */
54 /*-
55  * Copyright (c) 2003 Networks Associates Technology, Inc.
56  * All rights reserved.
57  *
58  * This software was developed for the FreeBSD Project by Jake Burkholder,
59  * Safeport Network Services, and Network Associates Laboratories, the
60  * Security Research Division of Network Associates, Inc. under
61  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
62  * CHATS research program.
63  *
64  * Redistribution and use in source and binary forms, with or without
65  * modification, are permitted provided that the following conditions
66  * are met:
67  * 1. Redistributions of source code must retain the above copyright
68  *    notice, this list of conditions and the following disclaimer.
69  * 2. Redistributions in binary form must reproduce the above copyright
70  *    notice, this list of conditions and the following disclaimer in the
71  *    documentation and/or other materials provided with the distribution.
72  *
73  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
74  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83  * SUCH DAMAGE.
84  */
85 
86 #define	AMD64_NPT_AWARE
87 
88 #include <sys/cdefs.h>
89 __FBSDID("$FreeBSD$");
90 
91 /*
92  *	Manages physical address maps.
93  *
94  *	Since the information managed by this module is
95  *	also stored by the logical address mapping module,
96  *	this module may throw away valid virtual-to-physical
97  *	mappings at almost any time.  However, invalidations
98  *	of virtual-to-physical mappings must be done as
99  *	requested.
100  *
101  *	In order to cope with hardware architectures which
102  *	make virtual-to-physical map invalidates expensive,
103  *	this module may delay invalidate or reduced protection
104  *	operations until such time as they are actually
105  *	necessary.  This module is given full information as
106  *	to which processors are currently using which maps,
107  *	and to when physical maps must be made correct.
108  */
109 
110 #include <sys/param.h>
111 #include <sys/bus.h>
112 #include <sys/systm.h>
113 #include <sys/kernel.h>
114 #include <sys/ktr.h>
115 #include <sys/lock.h>
116 #include <sys/malloc.h>
117 #include <sys/mman.h>
118 #include <sys/msgbuf.h>
119 #include <sys/mutex.h>
120 #include <sys/proc.h>
121 #include <sys/rwlock.h>
122 #include <sys/sx.h>
123 #include <sys/vmem.h>
124 #include <sys/vmmeter.h>
125 #include <sys/sched.h>
126 #include <sys/sysctl.h>
127 #include <sys/_unrhdr.h>
128 #include <sys/smp.h>
129 
130 #include <vm/vm.h>
131 #include <vm/vm_param.h>
132 #include <vm/vm_kern.h>
133 #include <vm/vm_page.h>
134 #include <vm/vm_map.h>
135 #include <vm/vm_object.h>
136 #include <vm/vm_extern.h>
137 #include <vm/vm_pageout.h>
138 #include <vm/vm_pager.h>
139 #include <vm/vm_radix.h>
140 #include <vm/vm_reserv.h>
141 #include <vm/uma.h>
142 
143 #include <machine/machdep.h>
144 #include <machine/md_var.h>
145 #include <machine/pcb.h>
146 
147 #define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
148 #define	NUPDE			(NPDEPG * NPDEPG)
149 #define	NUSERPGTBLS		(NUPDE + NPDEPG)
150 
151 #if !defined(DIAGNOSTIC)
152 #ifdef __GNUC_GNU_INLINE__
153 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
154 #else
155 #define PMAP_INLINE	extern inline
156 #endif
157 #else
158 #define PMAP_INLINE
159 #endif
160 
161 /*
162  * These are configured by the mair_el1 register. This is set up in locore.S
163  */
164 #define	DEVICE_MEMORY	0
165 #define	UNCACHED_MEMORY	1
166 #define	CACHED_MEMORY	2
167 
168 
169 #ifdef PV_STATS
170 #define PV_STAT(x)	do { x ; } while (0)
171 #else
172 #define PV_STAT(x)	do { } while (0)
173 #endif
174 
175 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
176 
177 #define	NPV_LIST_LOCKS	MAXCPU
178 
179 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
180 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
181 
182 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
183 	struct rwlock **_lockp = (lockp);		\
184 	struct rwlock *_new_lock;			\
185 							\
186 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
187 	if (_new_lock != *_lockp) {			\
188 		if (*_lockp != NULL)			\
189 			rw_wunlock(*_lockp);		\
190 		*_lockp = _new_lock;			\
191 		rw_wlock(*_lockp);			\
192 	}						\
193 } while (0)
194 
195 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
196 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
197 
198 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
199 	struct rwlock **_lockp = (lockp);		\
200 							\
201 	if (*_lockp != NULL) {				\
202 		rw_wunlock(*_lockp);			\
203 		*_lockp = NULL;				\
204 	}						\
205 } while (0)
206 
207 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
208 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
209 
210 struct pmap kernel_pmap_store;
211 
212 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
213 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
214 vm_offset_t kernel_vm_end = 0;
215 
216 struct msgbuf *msgbufp = NULL;
217 
218 static struct rwlock_padalign pvh_global_lock;
219 
220 /*
221  * Data for the pv entry allocation mechanism
222  */
223 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
224 static struct mtx pv_chunks_mutex;
225 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
226 
227 static void	free_pv_chunk(struct pv_chunk *pc);
228 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
229 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
230 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
231 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
232 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
233 		    vm_offset_t va);
234 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
235     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
236 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
237     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
238 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
239     vm_page_t m, struct rwlock **lockp);
240 
241 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
242 		struct rwlock **lockp);
243 
244 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
245     struct spglist *free);
246 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
247 
248 /********************/
249 /* Inline functions */
250 /********************/
251 
252 static __inline void
253 pagecopy(void *s, void *d)
254 {
255 
256 	memcpy(d, s, PAGE_SIZE);
257 }
258 
259 static __inline void
260 pagezero(void *p)
261 {
262 
263 	bzero(p, PAGE_SIZE);
264 }
265 
266 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
267 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
268 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
269 
270 static __inline pd_entry_t *
271 pmap_l1(pmap_t pmap, vm_offset_t va)
272 {
273 
274 	return (&pmap->pm_l1[pmap_l1_index(va)]);
275 }
276 
277 static __inline pd_entry_t *
278 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
279 {
280 	pd_entry_t *l2;
281 
282 	l2 = (pd_entry_t *)PHYS_TO_DMAP(*l1 & ~ATTR_MASK);
283 	return (&l2[pmap_l2_index(va)]);
284 }
285 
286 static __inline pd_entry_t *
287 pmap_l2(pmap_t pmap, vm_offset_t va)
288 {
289 	pd_entry_t *l1;
290 
291 	l1 = pmap_l1(pmap, va);
292 	if ((*l1 & ATTR_DESCR_MASK) != L1_TABLE)
293 		return (NULL);
294 
295 	return (pmap_l1_to_l2(l1, va));
296 }
297 
298 static __inline pt_entry_t *
299 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
300 {
301 	pt_entry_t *l3;
302 
303 	l3 = (pd_entry_t *)PHYS_TO_DMAP(*l2 & ~ATTR_MASK);
304 	return (&l3[pmap_l3_index(va)]);
305 }
306 
307 static __inline pt_entry_t *
308 pmap_l3(pmap_t pmap, vm_offset_t va)
309 {
310 	pd_entry_t *l2;
311 
312 	l2 = pmap_l2(pmap, va);
313 	if (l2 == NULL || (*l2 & ATTR_DESCR_MASK) != L2_TABLE)
314 		return (NULL);
315 
316 	return (pmap_l2_to_l3(l2, va));
317 }
318 
319 /*
320  * These load the old table data and store the new value.
321  * They need to be atomic as the System MMU may write to the table at
322  * the same time as the CPU.
323  */
324 #define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
325 #define	pmap_set(table, mask) atomic_set_64(table, mask)
326 #define	pmap_load_clear(table) atomic_swap_64(table, 0)
327 #define	pmap_load(table) (*table)
328 
329 static __inline int
330 pmap_is_current(pmap_t pmap)
331 {
332 
333 	return ((pmap == pmap_kernel()) ||
334 	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
335 }
336 
337 static __inline int
338 pmap_l3_valid(pt_entry_t l3)
339 {
340 
341 	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
342 }
343 
344 static __inline int
345 pmap_l3_valid_cacheable(pt_entry_t l3)
346 {
347 
348 	return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
349 	    ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
350 }
351 
352 #define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
353 
354 /*
355  * Checks if the page is dirty. We currently lack proper tracking of this on
356  * arm64 so for now assume is a page mapped as rw was accessed it is.
357  */
358 static inline int
359 pmap_page_dirty(pt_entry_t pte)
360 {
361 
362 	return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
363 	    (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
364 }
365 
366 static __inline void
367 pmap_resident_count_inc(pmap_t pmap, int count)
368 {
369 
370 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
371 	pmap->pm_stats.resident_count += count;
372 }
373 
374 static __inline void
375 pmap_resident_count_dec(pmap_t pmap, int count)
376 {
377 
378 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
379 	KASSERT(pmap->pm_stats.resident_count >= count,
380 	    ("pmap %p resident count underflow %ld %d", pmap,
381 	    pmap->pm_stats.resident_count, count));
382 	pmap->pm_stats.resident_count -= count;
383 }
384 
385 static pt_entry_t *
386 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
387     u_int *l2_slot)
388 {
389 	pt_entry_t *l2;
390 	pd_entry_t *l1;
391 
392 	l1 = (pd_entry_t *)l1pt;
393 	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
394 
395 	/* Check locore has used a table L1 map */
396 	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
397 	   ("Invalid bootstrap L1 table"));
398 	/* Find the address of the L2 table */
399 	l2 = (pt_entry_t *)init_pt_va;
400 	*l2_slot = pmap_l2_index(va);
401 
402 	return (l2);
403 }
404 
405 static vm_paddr_t
406 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
407 {
408 	u_int l1_slot, l2_slot;
409 	pt_entry_t *l2;
410 
411 	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
412 
413 	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
414 }
415 
416 static void
417 pmap_bootstrap_dmap(vm_offset_t l1pt)
418 {
419 	vm_offset_t va;
420 	vm_paddr_t pa;
421 	pd_entry_t *l1;
422 	u_int l1_slot;
423 
424 	va = DMAP_MIN_ADDRESS;
425 	l1 = (pd_entry_t *)l1pt;
426 	l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
427 
428 	for (pa = 0; va < DMAP_MAX_ADDRESS;
429 	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
430 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
431 
432 		/*
433 		 * TODO: Turn the cache on here when we have cache
434 		 * flushing code.
435 		 */
436 		pmap_load_store(&l1[l1_slot],
437 		    (pa & ~L1_OFFSET) | ATTR_AF | L1_BLOCK |
438 		    ATTR_IDX(CACHED_MEMORY));
439 	}
440 
441 	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
442 	cpu_tlb_flushID();
443 }
444 
445 static vm_offset_t
446 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
447 {
448 	vm_offset_t l2pt;
449 	vm_paddr_t pa;
450 	pd_entry_t *l1;
451 	u_int l1_slot;
452 
453 	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
454 
455 	l1 = (pd_entry_t *)l1pt;
456 	l1_slot = pmap_l1_index(va);
457 	l2pt = l2_start;
458 
459 	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
460 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
461 
462 		pa = pmap_early_vtophys(l1pt, l2pt);
463 		pmap_load_store(&l1[l1_slot],
464 		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
465 		l2pt += PAGE_SIZE;
466 	}
467 
468 	/* Clean the L2 page table */
469 	memset((void *)l2_start, 0, l2pt - l2_start);
470 	cpu_dcache_wb_range(l2_start, l2pt - l2_start);
471 
472 	/* Flush the l1 table to ram */
473 	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
474 
475 	return l2pt;
476 }
477 
478 static vm_offset_t
479 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
480 {
481 	vm_offset_t l2pt, l3pt;
482 	vm_paddr_t pa;
483 	pd_entry_t *l2;
484 	u_int l2_slot;
485 
486 	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
487 
488 	l2 = pmap_l2(kernel_pmap, va);
489 	l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
490 	l2pt = (vm_offset_t)l2;
491 	l2_slot = pmap_l2_index(va);
492 	l3pt = l3_start;
493 
494 	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
495 		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
496 
497 		pa = pmap_early_vtophys(l1pt, l3pt);
498 		pmap_load_store(&l2[l2_slot],
499 		    (pa & ~Ln_TABLE_MASK) | L2_TABLE);
500 		l3pt += PAGE_SIZE;
501 	}
502 
503 	/* Clean the L2 page table */
504 	memset((void *)l3_start, 0, l3pt - l3_start);
505 	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
506 
507 	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
508 
509 	return l3pt;
510 }
511 
512 /*
513  *	Bootstrap the system enough to run with virtual memory.
514  */
515 void
516 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
517 {
518 	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
519 	uint64_t kern_delta;
520 	pt_entry_t *l2;
521 	vm_offset_t va, freemempos;
522 	vm_offset_t dpcpu, msgbufpv;
523 	vm_paddr_t pa;
524 
525 	kern_delta = KERNBASE - kernstart;
526 	physmem = 0;
527 
528 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
529 	printf("%lx\n", l1pt);
530 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
531 
532 	/* Set this early so we can use the pagetable walking functions */
533 	kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
534 	PMAP_LOCK_INIT(kernel_pmap);
535 
536  	/*
537 	 * Initialize the global pv list lock.
538 	 */
539 	rw_init(&pvh_global_lock, "pmap pv global");
540 
541 	/* Create a direct map region early so we can use it for pa -> va */
542 	pmap_bootstrap_dmap(l1pt);
543 
544 	va = KERNBASE;
545 	pa = KERNBASE - kern_delta;
546 
547 	/*
548 	 * Start to initialise phys_avail by copying from physmap
549 	 * up to the physical address KERNBASE points at.
550 	 */
551 	map_slot = avail_slot = 0;
552 	for (; map_slot < (physmap_idx * 2); map_slot += 2) {
553 		if (physmap[map_slot] == physmap[map_slot + 1])
554 			continue;
555 
556 		if (physmap[map_slot] <= pa &&
557 		    physmap[map_slot + 1] > pa)
558 			break;
559 
560 		phys_avail[avail_slot] = physmap[map_slot];
561 		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
562 		physmem += (phys_avail[avail_slot + 1] -
563 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
564 		avail_slot += 2;
565 	}
566 
567 	/* Add the memory before the kernel */
568 	if (physmap[avail_slot] < pa) {
569 		phys_avail[avail_slot] = physmap[map_slot];
570 		phys_avail[avail_slot + 1] = pa;
571 		physmem += (phys_avail[avail_slot + 1] -
572 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
573 		avail_slot += 2;
574 	}
575 	used_map_slot = map_slot;
576 
577 	/*
578 	 * Read the page table to find out what is already mapped.
579 	 * This assumes we have mapped a block of memory from KERNBASE
580 	 * using a single L1 entry.
581 	 */
582 	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
583 
584 	/* Sanity check the index, KERNBASE should be the first VA */
585 	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
586 
587 	/* Find how many pages we have mapped */
588 	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
589 		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
590 			break;
591 
592 		/* Check locore used L2 blocks */
593 		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
594 		    ("Invalid bootstrap L2 table"));
595 		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
596 		    ("Incorrect PA in L2 table"));
597 
598 		va += L2_SIZE;
599 		pa += L2_SIZE;
600 	}
601 
602 	va = roundup2(va, L1_SIZE);
603 
604 	freemempos = KERNBASE + kernlen;
605 	freemempos = roundup2(freemempos, PAGE_SIZE);
606 	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
607 	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
608 	/* And the l3 tables for the early devmap */
609 	freemempos = pmap_bootstrap_l3(l1pt,
610 	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
611 
612 	cpu_tlb_flushID();
613 
614 #define alloc_pages(var, np)						\
615 	(var) = freemempos;						\
616 	freemempos += (np * PAGE_SIZE);					\
617 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
618 
619 	/* Allocate dynamic per-cpu area. */
620 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
621 	dpcpu_init((void *)dpcpu, 0);
622 
623 	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
624 	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
625 	msgbufp = (void *)msgbufpv;
626 
627 	virtual_avail = roundup2(freemempos, L1_SIZE);
628 	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
629 	kernel_vm_end = virtual_avail;
630 
631 	pa = pmap_early_vtophys(l1pt, freemempos);
632 
633 	/* Finish initialising physmap */
634 	map_slot = used_map_slot;
635 	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
636 	    map_slot < (physmap_idx * 2); map_slot += 2) {
637 		if (physmap[map_slot] == physmap[map_slot + 1])
638 			continue;
639 
640 		/* Have we used the current range? */
641 		if (physmap[map_slot + 1] <= pa)
642 			continue;
643 
644 		/* Do we need to split the entry? */
645 		if (physmap[map_slot] < pa) {
646 			phys_avail[avail_slot] = pa;
647 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
648 		} else {
649 			phys_avail[avail_slot] = physmap[map_slot];
650 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
651 		}
652 		physmem += (phys_avail[avail_slot + 1] -
653 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
654 
655 		avail_slot += 2;
656 	}
657 	phys_avail[avail_slot] = 0;
658 	phys_avail[avail_slot + 1] = 0;
659 
660 	/*
661 	 * Maxmem isn't the "maximum memory", it's one larger than the
662 	 * highest page of the physical address space.  It should be
663 	 * called something like "Maxphyspage".
664 	 */
665 	Maxmem = atop(phys_avail[avail_slot - 1]);
666 
667 	cpu_tlb_flushID();
668 }
669 
670 /*
671  *	Initialize a vm_page's machine-dependent fields.
672  */
673 void
674 pmap_page_init(vm_page_t m)
675 {
676 
677 	TAILQ_INIT(&m->md.pv_list);
678 	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
679 }
680 
681 /*
682  *	Initialize the pmap module.
683  *	Called by vm_init, to initialize any structures that the pmap
684  *	system needs to map virtual memory.
685  */
686 void
687 pmap_init(void)
688 {
689 	int i;
690 
691 	/*
692 	 * Initialize the pv chunk list mutex.
693 	 */
694 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
695 
696 	/*
697 	 * Initialize the pool of pv list locks.
698 	 */
699 	for (i = 0; i < NPV_LIST_LOCKS; i++)
700 		rw_init(&pv_list_locks[i], "pmap pv list");
701 }
702 
703 /*
704  * Normal, non-SMP, invalidation functions.
705  * We inline these within pmap.c for speed.
706  */
707 PMAP_INLINE void
708 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
709 {
710 
711 	sched_pin();
712 	__asm __volatile(
713 	    "dsb  sy		\n"
714 	    "tlbi vaae1is, %0	\n"
715 	    "dsb  sy		\n"
716 	    "isb		\n"
717 	    : : "r"(va >> PAGE_SHIFT));
718 	sched_unpin();
719 }
720 
721 PMAP_INLINE void
722 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
723 {
724 	vm_offset_t addr;
725 
726 	sched_pin();
727 	sva >>= PAGE_SHIFT;
728 	eva >>= PAGE_SHIFT;
729 	__asm __volatile("dsb	sy");
730 	for (addr = sva; addr < eva; addr++) {
731 		__asm __volatile(
732 		    "tlbi vaae1is, %0" : : "r"(addr));
733 	}
734 	__asm __volatile(
735 	    "dsb  sy	\n"
736 	    "isb	\n");
737 	sched_unpin();
738 }
739 
740 PMAP_INLINE void
741 pmap_invalidate_all(pmap_t pmap)
742 {
743 
744 	sched_pin();
745 	__asm __volatile(
746 	    "dsb  sy		\n"
747 	    "tlbi vmalle1is	\n"
748 	    "dsb  sy		\n"
749 	    "isb		\n");
750 	sched_unpin();
751 }
752 
753 /*
754  *	Routine:	pmap_extract
755  *	Function:
756  *		Extract the physical page address associated
757  *		with the given map/virtual_address pair.
758  */
759 vm_paddr_t
760 pmap_extract(pmap_t pmap, vm_offset_t va)
761 {
762 	pd_entry_t *l2p, l2;
763 	pt_entry_t *l3p, l3;
764 	vm_paddr_t pa;
765 
766 	pa = 0;
767 	PMAP_LOCK(pmap);
768 	/*
769 	 * Start with the l2 tabel. We are unable to allocate
770 	 * pages in the l1 table.
771 	 */
772 	l2p = pmap_l2(pmap, va);
773 	if (l2p != NULL) {
774 		l2 = *l2p;
775 		if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) {
776 			l3p = pmap_l2_to_l3(l2p, va);
777 			if (l3p != NULL) {
778 				l3 = *l3p;
779 
780 				if ((l3 & ATTR_DESCR_MASK) == L3_PAGE)
781 					pa = (l3 & ~ATTR_MASK) |
782 					    (va & L3_OFFSET);
783 			}
784 		} else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
785 			pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET);
786 	}
787 	PMAP_UNLOCK(pmap);
788 	return (pa);
789 }
790 
791 /*
792  *	Routine:	pmap_extract_and_hold
793  *	Function:
794  *		Atomically extract and hold the physical page
795  *		with the given pmap and virtual address pair
796  *		if that mapping permits the given protection.
797  */
798 vm_page_t
799 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
800 {
801 	pt_entry_t *l3p, l3;
802 	vm_paddr_t pa;
803 	vm_page_t m;
804 
805 	pa = 0;
806 	m = NULL;
807 	PMAP_LOCK(pmap);
808 retry:
809 	l3p = pmap_l3(pmap, va);
810 	if (l3p != NULL && (l3 = *l3p) != 0) {
811 		if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
812 		    ((prot & VM_PROT_WRITE) == 0)) {
813 			if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa))
814 				goto retry;
815 			m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK);
816 			vm_page_hold(m);
817 		}
818 	}
819 	PA_UNLOCK_COND(pa);
820 	PMAP_UNLOCK(pmap);
821 	return (m);
822 }
823 
824 vm_paddr_t
825 pmap_kextract(vm_offset_t va)
826 {
827 	pd_entry_t *l2;
828 	pt_entry_t *l3;
829 	vm_paddr_t pa;
830 
831 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
832 		pa = DMAP_TO_PHYS(va);
833 	} else {
834 		l2 = pmap_l2(kernel_pmap, va);
835 		if (l2 == NULL)
836 			panic("pmap_kextract: No l2");
837 		if ((*l2 & ATTR_DESCR_MASK) == L2_BLOCK)
838 			return ((*l2 & ~ATTR_MASK) | (va & L2_OFFSET));
839 
840 		l3 = pmap_l2_to_l3(l2, va);
841 		if (l3 == NULL)
842 			panic("pmap_kextract: No l3...");
843 		pa = (*l3 & ~ATTR_MASK) | (va & PAGE_MASK);
844 	}
845 	return (pa);
846 }
847 
848 /***************************************************
849  * Low level mapping routines.....
850  ***************************************************/
851 
852 void
853 pmap_kenter_device(vm_offset_t va, vm_size_t size, vm_paddr_t pa)
854 {
855 	pt_entry_t *l3;
856 
857 	KASSERT((pa & L3_OFFSET) == 0,
858 	   ("pmap_kenter_device: Invalid physical address"));
859 	KASSERT((va & L3_OFFSET) == 0,
860 	   ("pmap_kenter_device: Invalid virtual address"));
861 	KASSERT((size & PAGE_MASK) == 0,
862 	    ("pmap_kenter_device: Mapping is not page-sized"));
863 
864 	while (size != 0) {
865 		l3 = pmap_l3(kernel_pmap, va);
866 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
867 		pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_AF | L3_PAGE |
868 		    ATTR_IDX(DEVICE_MEMORY));
869 		PTE_SYNC(l3);
870 
871 		va += PAGE_SIZE;
872 		pa += PAGE_SIZE;
873 		size -= PAGE_SIZE;
874 	}
875 }
876 
877 /*
878  * Remove a page from the kernel pagetables.
879  * Note: not SMP coherent.
880  */
881 PMAP_INLINE void
882 pmap_kremove(vm_offset_t va)
883 {
884 	pt_entry_t *l3;
885 
886 	l3 = pmap_l3(kernel_pmap, va);
887 	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
888 
889 	if (pmap_l3_valid_cacheable(pmap_load(l3)))
890 		cpu_dcache_wb_range(va, L3_SIZE);
891 	pmap_load_clear(l3);
892 	PTE_SYNC(l3);
893 }
894 
895 void
896 pmap_kremove_device(vm_offset_t va, vm_size_t size)
897 {
898 	pt_entry_t *l3;
899 
900 	KASSERT((va & L3_OFFSET) == 0,
901 	   ("pmap_kremove_device: Invalid virtual address"));
902 	KASSERT((size & PAGE_MASK) == 0,
903 	    ("pmap_kremove_device: Mapping is not page-sized"));
904 
905 	while (size != 0) {
906 		l3 = pmap_l3(kernel_pmap, va);
907 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
908 		pmap_load_clear(l3);
909 		PTE_SYNC(l3);
910 
911 		va += PAGE_SIZE;
912 		size -= PAGE_SIZE;
913 	}
914 }
915 
916 /*
917  *	Used to map a range of physical addresses into kernel
918  *	virtual address space.
919  *
920  *	The value passed in '*virt' is a suggested virtual address for
921  *	the mapping. Architectures which can support a direct-mapped
922  *	physical to virtual region can return the appropriate address
923  *	within that region, leaving '*virt' unchanged. Other
924  *	architectures should map the pages starting at '*virt' and
925  *	update '*virt' with the first usable address after the mapped
926  *	region.
927  */
928 vm_offset_t
929 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
930 {
931 	return PHYS_TO_DMAP(start);
932 }
933 
934 
935 /*
936  * Add a list of wired pages to the kva
937  * this routine is only used for temporary
938  * kernel mappings that do not need to have
939  * page modification or references recorded.
940  * Note that old mappings are simply written
941  * over.  The page *must* be wired.
942  * Note: SMP coherent.  Uses a ranged shootdown IPI.
943  */
944 void
945 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
946 {
947 	pt_entry_t *l3, pa;
948 	vm_offset_t va;
949 	vm_page_t m;
950 	int i;
951 
952 	va = sva;
953 	for (i = 0; i < count; i++) {
954 		m = ma[i];
955 		pa = VM_PAGE_TO_PHYS(m) | ATTR_AF |
956 		    ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RW) | L3_PAGE;
957 		l3 = pmap_l3(kernel_pmap, va);
958 		pmap_load_store(l3, pa);
959 		PTE_SYNC(l3);
960 
961 		va += L3_SIZE;
962 	}
963 }
964 
965 /*
966  * This routine tears out page mappings from the
967  * kernel -- it is meant only for temporary mappings.
968  * Note: SMP coherent.  Uses a ranged shootdown IPI.
969  */
970 void
971 pmap_qremove(vm_offset_t sva, int count)
972 {
973 	vm_offset_t va;
974 
975 	va = sva;
976 	while (count-- > 0) {
977 		KASSERT(va >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", va));
978 		pmap_kremove(va);
979 		va += PAGE_SIZE;
980 	}
981 	pmap_invalidate_range(kernel_pmap, sva, va);
982 }
983 
984 /***************************************************
985  * Page table page management routines.....
986  ***************************************************/
987 static __inline void
988 pmap_free_zero_pages(struct spglist *free)
989 {
990 	vm_page_t m;
991 
992 	while ((m = SLIST_FIRST(free)) != NULL) {
993 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
994 		/* Preserve the page's PG_ZERO setting. */
995 		vm_page_free_toq(m);
996 	}
997 }
998 
999 /*
1000  * Schedule the specified unused page table page to be freed.  Specifically,
1001  * add the page to the specified list of pages that will be released to the
1002  * physical memory manager after the TLB has been updated.
1003  */
1004 static __inline void
1005 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
1006     boolean_t set_PG_ZERO)
1007 {
1008 
1009 	if (set_PG_ZERO)
1010 		m->flags |= PG_ZERO;
1011 	else
1012 		m->flags &= ~PG_ZERO;
1013 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
1014 }
1015 
1016 /*
1017  * Decrements a page table page's wire count, which is used to record the
1018  * number of valid page table entries within the page.  If the wire count
1019  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
1020  * page table page was unmapped and FALSE otherwise.
1021  */
1022 static inline boolean_t
1023 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1024 {
1025 
1026 	--m->wire_count;
1027 	if (m->wire_count == 0) {
1028 		_pmap_unwire_l3(pmap, va, m, free);
1029 		return (TRUE);
1030 	} else
1031 		return (FALSE);
1032 }
1033 
1034 static void
1035 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1036 {
1037 
1038 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1039 	/*
1040 	 * unmap the page table page
1041 	 */
1042 	if (m->pindex >= NUPDE) {
1043 		/* PD page */
1044 		pd_entry_t *l1;
1045 		l1 = pmap_l1(pmap, va);
1046 		pmap_load_clear(l1);
1047 		PTE_SYNC(l1);
1048 	} else {
1049 		/* PTE page */
1050 		pd_entry_t *l2;
1051 		l2 = pmap_l2(pmap, va);
1052 		pmap_load_clear(l2);
1053 		PTE_SYNC(l2);
1054 	}
1055 	pmap_resident_count_dec(pmap, 1);
1056 	if (m->pindex < NUPDE) {
1057 		/* We just released a PT, unhold the matching PD */
1058 		vm_page_t pdpg;
1059 
1060 		pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK);
1061 		pmap_unwire_l3(pmap, va, pdpg, free);
1062 	}
1063 
1064 	/*
1065 	 * This is a release store so that the ordinary store unmapping
1066 	 * the page table page is globally performed before TLB shoot-
1067 	 * down is begun.
1068 	 */
1069 	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
1070 
1071 	/*
1072 	 * Put page on a list so that it is released after
1073 	 * *ALL* TLB shootdown is done
1074 	 */
1075 	pmap_add_delayed_free_list(m, free, TRUE);
1076 }
1077 
1078 /*
1079  * After removing an l3 entry, this routine is used to
1080  * conditionally free the page, and manage the hold/wire counts.
1081  */
1082 static int
1083 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
1084     struct spglist *free)
1085 {
1086 	vm_page_t mpte;
1087 
1088 	if (va >= VM_MAXUSER_ADDRESS)
1089 		return (0);
1090 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
1091 	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
1092 	return (pmap_unwire_l3(pmap, va, mpte, free));
1093 }
1094 
1095 void
1096 pmap_pinit0(pmap_t pmap)
1097 {
1098 
1099 	PMAP_LOCK_INIT(pmap);
1100 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1101 	pmap->pm_l1 = kernel_pmap->pm_l1;
1102 }
1103 
1104 int
1105 pmap_pinit(pmap_t pmap)
1106 {
1107 	vm_paddr_t l1phys;
1108 	vm_page_t l1pt;
1109 
1110 	/*
1111 	 * allocate the l1 page
1112 	 */
1113 	while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
1114 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
1115 		VM_WAIT;
1116 
1117 	l1phys = VM_PAGE_TO_PHYS(l1pt);
1118 	pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
1119 
1120 	if ((l1pt->flags & PG_ZERO) == 0)
1121 		pagezero(pmap->pm_l1);
1122 
1123 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1124 
1125 	return (1);
1126 }
1127 
1128 /*
1129  * This routine is called if the desired page table page does not exist.
1130  *
1131  * If page table page allocation fails, this routine may sleep before
1132  * returning NULL.  It sleeps only if a lock pointer was given.
1133  *
1134  * Note: If a page allocation fails at page table level two or three,
1135  * one or two pages may be held during the wait, only to be released
1136  * afterwards.  This conservative approach is easily argued to avoid
1137  * race conditions.
1138  */
1139 static vm_page_t
1140 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
1141 {
1142 	vm_page_t m, /*pdppg, */pdpg;
1143 
1144 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1145 
1146 	/*
1147 	 * Allocate a page table page.
1148 	 */
1149 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
1150 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
1151 		if (lockp != NULL) {
1152 			RELEASE_PV_LIST_LOCK(lockp);
1153 			PMAP_UNLOCK(pmap);
1154 			rw_runlock(&pvh_global_lock);
1155 			VM_WAIT;
1156 			rw_rlock(&pvh_global_lock);
1157 			PMAP_LOCK(pmap);
1158 		}
1159 
1160 		/*
1161 		 * Indicate the need to retry.  While waiting, the page table
1162 		 * page may have been allocated.
1163 		 */
1164 		return (NULL);
1165 	}
1166 	if ((m->flags & PG_ZERO) == 0)
1167 		pmap_zero_page(m);
1168 
1169 	/*
1170 	 * Map the pagetable page into the process address space, if
1171 	 * it isn't already there.
1172 	 */
1173 
1174 	if (ptepindex >= NUPDE) {
1175 		pd_entry_t *l1;
1176 		vm_pindex_t l1index;
1177 
1178 		l1index = ptepindex - NUPDE;
1179 		l1 = &pmap->pm_l1[l1index];
1180 		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
1181 		PTE_SYNC(l1);
1182 
1183 	} else {
1184 		vm_pindex_t l1index;
1185 		pd_entry_t *l1, *l2;
1186 
1187 		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
1188 		l1 = &pmap->pm_l1[l1index];
1189 		if (*l1 == 0) {
1190 			/* recurse for allocating page dir */
1191 			if (_pmap_alloc_l3(pmap, NUPDE + l1index,
1192 			    lockp) == NULL) {
1193 				--m->wire_count;
1194 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1195 				vm_page_free_zero(m);
1196 				return (NULL);
1197 			}
1198 		} else {
1199 			pdpg = PHYS_TO_VM_PAGE(*l1 & ~ATTR_MASK);
1200 			pdpg->wire_count++;
1201 		}
1202 
1203 		l2 = (pd_entry_t *)PHYS_TO_DMAP(*l1 & ~ATTR_MASK);
1204 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
1205 		pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | ATTR_AF |
1206 		    ATTR_IDX(CACHED_MEMORY) | L2_TABLE);
1207 		PTE_SYNC(l2);
1208 	}
1209 
1210 	pmap_resident_count_inc(pmap, 1);
1211 
1212 	return (m);
1213 }
1214 
1215 static vm_page_t
1216 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
1217 {
1218 	vm_pindex_t ptepindex;
1219 	pd_entry_t *l2;
1220 	vm_page_t m;
1221 
1222 	/*
1223 	 * Calculate pagetable page index
1224 	 */
1225 	ptepindex = pmap_l2_pindex(va);
1226 retry:
1227 	/*
1228 	 * Get the page directory entry
1229 	 */
1230 	l2 = pmap_l2(pmap, va);
1231 
1232 	/*
1233 	 * If the page table page is mapped, we just increment the
1234 	 * hold count, and activate it.
1235 	 */
1236 	if (l2 != NULL && *l2 != 0) {
1237 		m = PHYS_TO_VM_PAGE(*l2 & ~ATTR_MASK);
1238 		m->wire_count++;
1239 	} else {
1240 		/*
1241 		 * Here if the pte page isn't mapped, or if it has been
1242 		 * deallocated.
1243 		 */
1244 		m = _pmap_alloc_l3(pmap, ptepindex, lockp);
1245 		if (m == NULL && lockp != NULL)
1246 			goto retry;
1247 	}
1248 	/*
1249 	 * XXXARM64: I'm not sure why we need this but it fixes a crash
1250 	 * when running things from a shell script.
1251 	 */
1252 	pmap_invalidate_all(pmap);
1253 	return (m);
1254 }
1255 
1256 
1257 /***************************************************
1258  * Pmap allocation/deallocation routines.
1259  ***************************************************/
1260 
1261 /*
1262  * Release any resources held by the given physical map.
1263  * Called when a pmap initialized by pmap_pinit is being released.
1264  * Should only be called if the map contains no valid mappings.
1265  */
1266 void
1267 pmap_release(pmap_t pmap)
1268 {
1269 	vm_page_t m;
1270 
1271 	KASSERT(pmap->pm_stats.resident_count == 0,
1272 	    ("pmap_release: pmap resident count %ld != 0",
1273 	    pmap->pm_stats.resident_count));
1274 
1275 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
1276 
1277 	m->wire_count--;
1278 	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1279 	vm_page_free_zero(m);
1280 }
1281 
1282 #if 0
1283 static int
1284 kvm_size(SYSCTL_HANDLER_ARGS)
1285 {
1286 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
1287 
1288 	return sysctl_handle_long(oidp, &ksize, 0, req);
1289 }
1290 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
1291     0, 0, kvm_size, "LU", "Size of KVM");
1292 
1293 static int
1294 kvm_free(SYSCTL_HANDLER_ARGS)
1295 {
1296 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
1297 
1298 	return sysctl_handle_long(oidp, &kfree, 0, req);
1299 }
1300 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
1301     0, 0, kvm_free, "LU", "Amount of KVM free");
1302 #endif /* 0 */
1303 
1304 /*
1305  * grow the number of kernel page table entries, if needed
1306  */
1307 void
1308 pmap_growkernel(vm_offset_t addr)
1309 {
1310 	vm_paddr_t paddr;
1311 	vm_page_t nkpg;
1312 	pd_entry_t *l1, *l2;
1313 
1314 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1315 
1316 	addr = roundup2(addr, L2_SIZE);
1317 	if (addr - 1 >= kernel_map->max_offset)
1318 		addr = kernel_map->max_offset;
1319 	while (kernel_vm_end < addr) {
1320 		l1 = pmap_l1(kernel_pmap, kernel_vm_end);
1321 		if (*l1 == 0) {
1322 			/* We need a new PDP entry */
1323 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
1324 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
1325 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1326 			if (nkpg == NULL)
1327 				panic("pmap_growkernel: no memory to grow kernel");
1328 			if ((nkpg->flags & PG_ZERO) == 0)
1329 				pmap_zero_page(nkpg);
1330 			paddr = VM_PAGE_TO_PHYS(nkpg);
1331 			pmap_load_store(l1, paddr | L1_TABLE);
1332 			PTE_SYNC(l1);
1333 			continue; /* try again */
1334 		}
1335 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
1336 		if ((*l2 & ATTR_AF) != 0) {
1337 			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1338 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1339 				kernel_vm_end = kernel_map->max_offset;
1340 				break;
1341 			}
1342 			continue;
1343 		}
1344 
1345 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
1346 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1347 		    VM_ALLOC_ZERO);
1348 		if (nkpg == NULL)
1349 			panic("pmap_growkernel: no memory to grow kernel");
1350 		if ((nkpg->flags & PG_ZERO) == 0)
1351 			pmap_zero_page(nkpg);
1352 		paddr = VM_PAGE_TO_PHYS(nkpg);
1353 		pmap_load_store(l2, paddr | L2_TABLE);
1354 		PTE_SYNC(l2);
1355 
1356 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1357 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1358 			kernel_vm_end = kernel_map->max_offset;
1359 			break;
1360 		}
1361 	}
1362 }
1363 
1364 
1365 /***************************************************
1366  * page management routines.
1367  ***************************************************/
1368 
1369 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1370 CTASSERT(_NPCM == 3);
1371 CTASSERT(_NPCPV == 168);
1372 
1373 static __inline struct pv_chunk *
1374 pv_to_chunk(pv_entry_t pv)
1375 {
1376 
1377 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1378 }
1379 
1380 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1381 
1382 #define	PC_FREE0	0xfffffffffffffffful
1383 #define	PC_FREE1	0xfffffffffffffffful
1384 #define	PC_FREE2	0x000000fffffffffful
1385 
1386 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
1387 
1388 #if 0
1389 #ifdef PV_STATS
1390 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1391 
1392 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1393 	"Current number of pv entry chunks");
1394 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1395 	"Current number of pv entry chunks allocated");
1396 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1397 	"Current number of pv entry chunks frees");
1398 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1399 	"Number of times tried to get a chunk page but failed.");
1400 
1401 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
1402 static int pv_entry_spare;
1403 
1404 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1405 	"Current number of pv entry frees");
1406 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1407 	"Current number of pv entry allocs");
1408 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1409 	"Current number of pv entries");
1410 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1411 	"Current number of spare pv entries");
1412 #endif
1413 #endif /* 0 */
1414 
1415 /*
1416  * We are in a serious low memory condition.  Resort to
1417  * drastic measures to free some pages so we can allocate
1418  * another pv entry chunk.
1419  *
1420  * Returns NULL if PV entries were reclaimed from the specified pmap.
1421  *
1422  * We do not, however, unmap 2mpages because subsequent accesses will
1423  * allocate per-page pv entries until repromotion occurs, thereby
1424  * exacerbating the shortage of free pv entries.
1425  */
1426 static vm_page_t
1427 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
1428 {
1429 
1430 	panic("reclaim_pv_chunk");
1431 }
1432 
1433 /*
1434  * free the pv_entry back to the free list
1435  */
1436 static void
1437 free_pv_entry(pmap_t pmap, pv_entry_t pv)
1438 {
1439 	struct pv_chunk *pc;
1440 	int idx, field, bit;
1441 
1442 	rw_assert(&pvh_global_lock, RA_LOCKED);
1443 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1444 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
1445 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
1446 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
1447 	pc = pv_to_chunk(pv);
1448 	idx = pv - &pc->pc_pventry[0];
1449 	field = idx / 64;
1450 	bit = idx % 64;
1451 	pc->pc_map[field] |= 1ul << bit;
1452 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
1453 	    pc->pc_map[2] != PC_FREE2) {
1454 		/* 98% of the time, pc is already at the head of the list. */
1455 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
1456 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1457 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1458 		}
1459 		return;
1460 	}
1461 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1462 	free_pv_chunk(pc);
1463 }
1464 
1465 static void
1466 free_pv_chunk(struct pv_chunk *pc)
1467 {
1468 	vm_page_t m;
1469 
1470 	mtx_lock(&pv_chunks_mutex);
1471  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1472 	mtx_unlock(&pv_chunks_mutex);
1473 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
1474 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
1475 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
1476 	/* entire chunk is free, return it */
1477 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
1478 #if 0 /* TODO: For minidump */
1479 	dump_drop_page(m->phys_addr);
1480 #endif
1481 	vm_page_unwire(m, PQ_INACTIVE);
1482 	vm_page_free(m);
1483 }
1484 
1485 /*
1486  * Returns a new PV entry, allocating a new PV chunk from the system when
1487  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
1488  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
1489  * returned.
1490  *
1491  * The given PV list lock may be released.
1492  */
1493 static pv_entry_t
1494 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
1495 {
1496 	int bit, field;
1497 	pv_entry_t pv;
1498 	struct pv_chunk *pc;
1499 	vm_page_t m;
1500 
1501 	rw_assert(&pvh_global_lock, RA_LOCKED);
1502 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1503 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
1504 retry:
1505 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1506 	if (pc != NULL) {
1507 		for (field = 0; field < _NPCM; field++) {
1508 			if (pc->pc_map[field]) {
1509 				bit = ffsl(pc->pc_map[field]) - 1;
1510 				break;
1511 			}
1512 		}
1513 		if (field < _NPCM) {
1514 			pv = &pc->pc_pventry[field * 64 + bit];
1515 			pc->pc_map[field] &= ~(1ul << bit);
1516 			/* If this was the last item, move it to tail */
1517 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
1518 			    pc->pc_map[2] == 0) {
1519 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1520 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
1521 				    pc_list);
1522 			}
1523 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
1524 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
1525 			return (pv);
1526 		}
1527 	}
1528 	/* No free items, allocate another chunk */
1529 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
1530 	    VM_ALLOC_WIRED);
1531 	if (m == NULL) {
1532 		if (lockp == NULL) {
1533 			PV_STAT(pc_chunk_tryfail++);
1534 			return (NULL);
1535 		}
1536 		m = reclaim_pv_chunk(pmap, lockp);
1537 		if (m == NULL)
1538 			goto retry;
1539 	}
1540 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
1541 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
1542 #if 0 /* TODO: This is for minidump */
1543 	dump_add_page(m->phys_addr);
1544 #endif
1545 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
1546 	pc->pc_pmap = pmap;
1547 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
1548 	pc->pc_map[1] = PC_FREE1;
1549 	pc->pc_map[2] = PC_FREE2;
1550 	mtx_lock(&pv_chunks_mutex);
1551 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1552 	mtx_unlock(&pv_chunks_mutex);
1553 	pv = &pc->pc_pventry[0];
1554 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1555 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
1556 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
1557 	return (pv);
1558 }
1559 
1560 /*
1561  * First find and then remove the pv entry for the specified pmap and virtual
1562  * address from the specified pv list.  Returns the pv entry if found and NULL
1563  * otherwise.  This operation can be performed on pv lists for either 4KB or
1564  * 2MB page mappings.
1565  */
1566 static __inline pv_entry_t
1567 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1568 {
1569 	pv_entry_t pv;
1570 
1571 	rw_assert(&pvh_global_lock, RA_LOCKED);
1572 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
1573 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1574 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
1575 			pvh->pv_gen++;
1576 			break;
1577 		}
1578 	}
1579 	return (pv);
1580 }
1581 
1582 /*
1583  * First find and then destroy the pv entry for the specified pmap and virtual
1584  * address.  This operation can be performed on pv lists for either 4KB or 2MB
1585  * page mappings.
1586  */
1587 static void
1588 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1589 {
1590 	pv_entry_t pv;
1591 
1592 	pv = pmap_pvh_remove(pvh, pmap, va);
1593 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
1594 	free_pv_entry(pmap, pv);
1595 }
1596 
1597 /*
1598  * Conditionally create the PV entry for a 4KB page mapping if the required
1599  * memory can be allocated without resorting to reclamation.
1600  */
1601 static boolean_t
1602 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
1603     struct rwlock **lockp)
1604 {
1605 	pv_entry_t pv;
1606 
1607 	rw_assert(&pvh_global_lock, RA_LOCKED);
1608 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1609 	/* Pass NULL instead of the lock pointer to disable reclamation. */
1610 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
1611 		pv->pv_va = va;
1612 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1613 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
1614 		m->md.pv_gen++;
1615 		return (TRUE);
1616 	} else
1617 		return (FALSE);
1618 }
1619 
1620 /*
1621  * pmap_remove_l3: do the things to unmap a page in a process
1622  */
1623 static int
1624 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
1625     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
1626 {
1627 	pt_entry_t old_l3;
1628 	vm_page_t m;
1629 
1630 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1631 	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
1632 		cpu_dcache_wb_range(va, L3_SIZE);
1633 	old_l3 = pmap_load_clear(l3);
1634 	PTE_SYNC(l3);
1635 	if (old_l3 & ATTR_SW_WIRED)
1636 		pmap->pm_stats.wired_count -= 1;
1637 	pmap_resident_count_dec(pmap, 1);
1638 	if (old_l3 & ATTR_SW_MANAGED) {
1639 		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
1640 		if (pmap_page_dirty(old_l3))
1641 			vm_page_dirty(m);
1642 		if (old_l3 & ATTR_AF)
1643 			vm_page_aflag_set(m, PGA_REFERENCED);
1644 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1645 		pmap_pvh_free(&m->md, pmap, va);
1646 	}
1647 	return (pmap_unuse_l3(pmap, va, l2e, free));
1648 }
1649 
1650 /*
1651  *	Remove the given range of addresses from the specified map.
1652  *
1653  *	It is assumed that the start and end are properly
1654  *	rounded to the page size.
1655  */
1656 void
1657 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1658 {
1659 	struct rwlock *lock;
1660 	vm_offset_t va, va_next;
1661 	pd_entry_t *l1, *l2;
1662 	pt_entry_t l3_paddr, *l3;
1663 	struct spglist free;
1664 	int anyvalid;
1665 
1666 	/*
1667 	 * Perform an unsynchronized read.  This is, however, safe.
1668 	 */
1669 	if (pmap->pm_stats.resident_count == 0)
1670 		return;
1671 
1672 	anyvalid = 0;
1673 	SLIST_INIT(&free);
1674 
1675 	rw_rlock(&pvh_global_lock);
1676 	PMAP_LOCK(pmap);
1677 
1678 	lock = NULL;
1679 	for (; sva < eva; sva = va_next) {
1680 
1681 		if (pmap->pm_stats.resident_count == 0)
1682 			break;
1683 
1684 		l1 = pmap_l1(pmap, sva);
1685 		if (*l1 == 0) {
1686 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1687 			if (va_next < sva)
1688 				va_next = eva;
1689 			continue;
1690 		}
1691 
1692 		/*
1693 		 * Calculate index for next page table.
1694 		 */
1695 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1696 		if (va_next < sva)
1697 			va_next = eva;
1698 
1699 		l2 = pmap_l1_to_l2(l1, sva);
1700 		if (l2 == NULL)
1701 			continue;
1702 
1703 		l3_paddr = *l2;
1704 
1705 		/*
1706 		 * Weed out invalid mappings.
1707 		 */
1708 		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
1709 			continue;
1710 
1711 		/*
1712 		 * Limit our scan to either the end of the va represented
1713 		 * by the current page table page, or to the end of the
1714 		 * range being removed.
1715 		 */
1716 		if (va_next > eva)
1717 			va_next = eva;
1718 
1719 		va = va_next;
1720 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
1721 		    sva += L3_SIZE) {
1722 			if (l3 == NULL)
1723 				panic("l3 == NULL");
1724 			if (*l3 == 0) {
1725 				if (va != va_next) {
1726 					pmap_invalidate_range(pmap, va, sva);
1727 					va = va_next;
1728 				}
1729 				continue;
1730 			}
1731 			if (va == va_next)
1732 				va = sva;
1733 			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
1734 			    &lock)) {
1735 				sva += L3_SIZE;
1736 				break;
1737 			}
1738 		}
1739 		if (va != va_next)
1740 			pmap_invalidate_range(pmap, va, sva);
1741 	}
1742 	if (lock != NULL)
1743 		rw_wunlock(lock);
1744 	if (anyvalid)
1745 		pmap_invalidate_all(pmap);
1746 	rw_runlock(&pvh_global_lock);
1747 	PMAP_UNLOCK(pmap);
1748 	pmap_free_zero_pages(&free);
1749 }
1750 
1751 /*
1752  *	Routine:	pmap_remove_all
1753  *	Function:
1754  *		Removes this physical page from
1755  *		all physical maps in which it resides.
1756  *		Reflects back modify bits to the pager.
1757  *
1758  *	Notes:
1759  *		Original versions of this routine were very
1760  *		inefficient because they iteratively called
1761  *		pmap_remove (slow...)
1762  */
1763 
1764 void
1765 pmap_remove_all(vm_page_t m)
1766 {
1767 	pv_entry_t pv;
1768 	pmap_t pmap;
1769 	pt_entry_t *l3, tl3;
1770 	pd_entry_t *l2;
1771 	struct spglist free;
1772 
1773 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1774 	    ("pmap_remove_all: page %p is not managed", m));
1775 	SLIST_INIT(&free);
1776 	rw_wlock(&pvh_global_lock);
1777 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1778 		pmap = PV_PMAP(pv);
1779 		PMAP_LOCK(pmap);
1780 		pmap_resident_count_dec(pmap, 1);
1781 		l2 = pmap_l2(pmap, pv->pv_va);
1782 		KASSERT((*l2 & ATTR_DESCR_MASK) == L2_TABLE,
1783 		    ("pmap_remove_all: found a table when expecting "
1784 		     "a block in %p's pv list", m));
1785 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
1786 		if (pmap_is_current(pmap) &&
1787 		    pmap_l3_valid_cacheable(pmap_load(l3)))
1788 			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
1789 		tl3 = pmap_load_clear(l3);
1790 		PTE_SYNC(l3);
1791 		if (tl3 & ATTR_SW_WIRED)
1792 			pmap->pm_stats.wired_count--;
1793 		if ((tl3 & ATTR_AF) != 0)
1794 			vm_page_aflag_set(m, PGA_REFERENCED);
1795 
1796 		/*
1797 		 * Update the vm_page_t clean and reference bits.
1798 		 */
1799 		if (pmap_page_dirty(tl3))
1800 			vm_page_dirty(m);
1801 		pmap_unuse_l3(pmap, pv->pv_va, *l2, &free);
1802 		pmap_invalidate_page(pmap, pv->pv_va);
1803 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
1804 		m->md.pv_gen++;
1805 		free_pv_entry(pmap, pv);
1806 		PMAP_UNLOCK(pmap);
1807 	}
1808 	vm_page_aflag_clear(m, PGA_WRITEABLE);
1809 	rw_wunlock(&pvh_global_lock);
1810 	pmap_free_zero_pages(&free);
1811 }
1812 
1813 /*
1814  *	Set the physical protection on the
1815  *	specified range of this map as requested.
1816  */
1817 void
1818 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1819 {
1820 	vm_offset_t va, va_next;
1821 	pd_entry_t *l1, *l2;
1822 	pt_entry_t *l3p, l3;
1823 
1824 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1825 		pmap_remove(pmap, sva, eva);
1826 		return;
1827 	}
1828 
1829 	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
1830 		return;
1831 
1832 	PMAP_LOCK(pmap);
1833 	for (; sva < eva; sva = va_next) {
1834 
1835 		l1 = pmap_l1(pmap, sva);
1836 		if (*l1 == 0) {
1837 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1838 			if (va_next < sva)
1839 				va_next = eva;
1840 			continue;
1841 		}
1842 
1843 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1844 		if (va_next < sva)
1845 			va_next = eva;
1846 
1847 		l2 = pmap_l1_to_l2(l1, sva);
1848 		if (l2 == NULL || (*l2 & ATTR_DESCR_MASK) != L2_TABLE)
1849 			continue;
1850 
1851 		if (va_next > eva)
1852 			va_next = eva;
1853 
1854 		va = va_next;
1855 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
1856 		    sva += L3_SIZE) {
1857 			l3 = pmap_load(l3p);
1858 			if (pmap_l3_valid(l3)) {
1859 				pmap_set(l3p, ATTR_AP(ATTR_AP_RO));
1860 				PTE_SYNC(l3p);
1861 			}
1862 		}
1863 	}
1864 	PMAP_UNLOCK(pmap);
1865 
1866 	/* TODO: Only invalidate entries we are touching */
1867 	pmap_invalidate_all(pmap);
1868 }
1869 
1870 /*
1871  *	Insert the given physical page (p) at
1872  *	the specified virtual address (v) in the
1873  *	target physical map with the protection requested.
1874  *
1875  *	If specified, the page will be wired down, meaning
1876  *	that the related pte can not be reclaimed.
1877  *
1878  *	NB:  This is the only routine which MAY NOT lazy-evaluate
1879  *	or lose information.  That is, this routine must actually
1880  *	insert this page into the given map NOW.
1881  */
1882 int
1883 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1884     u_int flags, int8_t psind __unused)
1885 {
1886 	struct rwlock *lock;
1887 	pd_entry_t *l1, *l2;
1888 	pt_entry_t new_l3, orig_l3;
1889 	pt_entry_t *l3;
1890 	pv_entry_t pv;
1891 	vm_paddr_t opa, pa, l2_pa, l3_pa;
1892 	vm_page_t mpte, om, l2_m, l3_m;
1893 	boolean_t nosleep;
1894 
1895 	va = trunc_page(va);
1896 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1897 		VM_OBJECT_ASSERT_LOCKED(m->object);
1898 	pa = VM_PAGE_TO_PHYS(m);
1899 	new_l3 = (pt_entry_t)(pa | ATTR_AF | L3_PAGE);
1900 	if ((prot & VM_PROT_WRITE) == 0)
1901 		new_l3 |= ATTR_AP(ATTR_AP_RO);
1902 	if ((flags & PMAP_ENTER_WIRED) != 0)
1903 		new_l3 |= ATTR_SW_WIRED;
1904 	if ((va >> 63) == 0)
1905 		new_l3 |= ATTR_AP(ATTR_AP_USER);
1906 	new_l3 |= ATTR_IDX(m->md.pv_memattr);
1907 
1908 	mpte = NULL;
1909 
1910 	lock = NULL;
1911 	rw_rlock(&pvh_global_lock);
1912 	PMAP_LOCK(pmap);
1913 
1914 	if (va < VM_MAXUSER_ADDRESS) {
1915 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
1916 		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
1917 		if (mpte == NULL && nosleep) {
1918 			if (lock != NULL)
1919 				rw_wunlock(lock);
1920 			rw_runlock(&pvh_global_lock);
1921 			PMAP_UNLOCK(pmap);
1922 			return (KERN_RESOURCE_SHORTAGE);
1923 		}
1924 		l3 = pmap_l3(pmap, va);
1925 	} else {
1926 		l3 = pmap_l3(pmap, va);
1927 		/* TODO: This is not optimal, but should mostly work */
1928 		if (l3 == NULL) {
1929 			l2 = pmap_l2(pmap, va);
1930 
1931 			if (l2 == NULL) {
1932 				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
1933 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1934 				    VM_ALLOC_ZERO);
1935 				if (l2_m == NULL)
1936 					panic("pmap_enter: l2 pte_m == NULL");
1937 				if ((l2_m->flags & PG_ZERO) == 0)
1938 					pmap_zero_page(l2_m);
1939 
1940 				l2_pa = VM_PAGE_TO_PHYS(l2_m);
1941 				l1 = pmap_l1(pmap, va);
1942 				pmap_load_store(l1, l2_pa | L1_TABLE);
1943 				PTE_SYNC(l1);
1944 				l2 = pmap_l1_to_l2(l1, va);
1945 			}
1946 
1947 			KASSERT(l2 != NULL,
1948 			    ("No l2 table after allocating one"));
1949 
1950 			l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
1951 			    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1952 			if (l3_m == NULL)
1953 				panic("pmap_enter: l3 pte_m == NULL");
1954 			if ((l3_m->flags & PG_ZERO) == 0)
1955 				pmap_zero_page(l3_m);
1956 
1957 			l3_pa = VM_PAGE_TO_PHYS(l3_m);
1958 			pmap_load_store(l2, l3_pa | L2_TABLE);
1959 			PTE_SYNC(l2);
1960 			l3 = pmap_l2_to_l3(l2, va);
1961 		}
1962 	}
1963 
1964 	om = NULL;
1965 	orig_l3 = pmap_load(l3);
1966 	opa = orig_l3 & ~ATTR_MASK;
1967 
1968 	/*
1969 	 * Is the specified virtual address already mapped?
1970 	 */
1971 	if (pmap_l3_valid(orig_l3)) {
1972 		/*
1973 		 * Wiring change, just update stats. We don't worry about
1974 		 * wiring PT pages as they remain resident as long as there
1975 		 * are valid mappings in them. Hence, if a user page is wired,
1976 		 * the PT page will be also.
1977 		 */
1978 		if ((flags & PMAP_ENTER_WIRED) != 0 &&
1979 		    (orig_l3 & ATTR_SW_WIRED) == 0)
1980 			pmap->pm_stats.wired_count++;
1981 		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
1982 		    (orig_l3 & ATTR_SW_WIRED) != 0)
1983 			pmap->pm_stats.wired_count--;
1984 
1985 		/*
1986 		 * Remove the extra PT page reference.
1987 		 */
1988 		if (mpte != NULL) {
1989 			mpte->wire_count--;
1990 			KASSERT(mpte->wire_count > 0,
1991 			    ("pmap_enter: missing reference to page table page,"
1992 			     " va: 0x%lx", va));
1993 		}
1994 
1995 		/*
1996 		 * Has the physical page changed?
1997 		 */
1998 		if (opa == pa) {
1999 			/*
2000 			 * No, might be a protection or wiring change.
2001 			 */
2002 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2003 				new_l3 |= ATTR_SW_MANAGED;
2004 				if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
2005 				    ATTR_AP(ATTR_AP_RW)) {
2006 					vm_page_aflag_set(m, PGA_WRITEABLE);
2007 				}
2008 			}
2009 			goto validate;
2010 		}
2011 
2012 		/* Flush the cache, there might be uncommitted data in it */
2013 		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
2014 			cpu_dcache_wb_range(va, L3_SIZE);
2015 	} else {
2016 		/*
2017 		 * Increment the counters.
2018 		 */
2019 		if ((new_l3 & ATTR_SW_WIRED) != 0)
2020 			pmap->pm_stats.wired_count++;
2021 		pmap_resident_count_inc(pmap, 1);
2022 	}
2023 	/*
2024 	 * Enter on the PV list if part of our managed memory.
2025 	 */
2026 	if ((m->oflags & VPO_UNMANAGED) == 0) {
2027 		new_l3 |= ATTR_SW_MANAGED;
2028 		pv = get_pv_entry(pmap, &lock);
2029 		pv->pv_va = va;
2030 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
2031 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2032 		m->md.pv_gen++;
2033 		if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
2034 			vm_page_aflag_set(m, PGA_WRITEABLE);
2035 	}
2036 
2037 	/*
2038 	 * Update the L3 entry.
2039 	 */
2040 	if (orig_l3 != 0) {
2041 validate:
2042 		orig_l3 = pmap_load_store(l3, new_l3);
2043 		PTE_SYNC(l3);
2044 		opa = orig_l3 & ~ATTR_MASK;
2045 
2046 		if (opa != pa) {
2047 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2048 				om = PHYS_TO_VM_PAGE(opa);
2049 				if (pmap_page_dirty(orig_l3))
2050 					vm_page_dirty(om);
2051 				if ((orig_l3 & ATTR_AF) != 0)
2052 					vm_page_aflag_set(om, PGA_REFERENCED);
2053 				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
2054 				pmap_pvh_free(&om->md, pmap, va);
2055 			}
2056 		} else if (pmap_page_dirty(orig_l3)) {
2057 			if ((orig_l3 & ATTR_SW_MANAGED) != 0)
2058 				vm_page_dirty(m);
2059 		}
2060 		if ((orig_l3 & ATTR_AF) != 0)
2061 			pmap_invalidate_page(pmap, va);
2062 	} else {
2063 		pmap_load_store(l3, new_l3);
2064 		PTE_SYNC(l3);
2065 	}
2066 	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
2067 	    cpu_icache_sync_range(va, PAGE_SIZE);
2068 
2069 	if (lock != NULL)
2070 		rw_wunlock(lock);
2071 	rw_runlock(&pvh_global_lock);
2072 	PMAP_UNLOCK(pmap);
2073 	return (KERN_SUCCESS);
2074 }
2075 
2076 /*
2077  * Maps a sequence of resident pages belonging to the same object.
2078  * The sequence begins with the given page m_start.  This page is
2079  * mapped at the given virtual address start.  Each subsequent page is
2080  * mapped at a virtual address that is offset from start by the same
2081  * amount as the page is offset from m_start within the object.  The
2082  * last page in the sequence is the page with the largest offset from
2083  * m_start that can be mapped at a virtual address less than the given
2084  * virtual address end.  Not every virtual page between start and end
2085  * is mapped; only those for which a resident page exists with the
2086  * corresponding offset from m_start are mapped.
2087  */
2088 void
2089 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2090     vm_page_t m_start, vm_prot_t prot)
2091 {
2092 	struct rwlock *lock;
2093 	vm_offset_t va;
2094 	vm_page_t m, mpte;
2095 	vm_pindex_t diff, psize;
2096 
2097 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
2098 
2099 	psize = atop(end - start);
2100 	mpte = NULL;
2101 	m = m_start;
2102 	lock = NULL;
2103 	rw_rlock(&pvh_global_lock);
2104 	PMAP_LOCK(pmap);
2105 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2106 		va = start + ptoa(diff);
2107 		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
2108 		m = TAILQ_NEXT(m, listq);
2109 	}
2110 	if (lock != NULL)
2111 		rw_wunlock(lock);
2112 	rw_runlock(&pvh_global_lock);
2113 	PMAP_UNLOCK(pmap);
2114 }
2115 
2116 /*
2117  * this code makes some *MAJOR* assumptions:
2118  * 1. Current pmap & pmap exists.
2119  * 2. Not wired.
2120  * 3. Read access.
2121  * 4. No page table pages.
2122  * but is *MUCH* faster than pmap_enter...
2123  */
2124 
2125 void
2126 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2127 {
2128 	struct rwlock *lock;
2129 
2130 	lock = NULL;
2131 	rw_rlock(&pvh_global_lock);
2132 	PMAP_LOCK(pmap);
2133 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
2134 	if (lock != NULL)
2135 		rw_wunlock(lock);
2136 	rw_runlock(&pvh_global_lock);
2137 	PMAP_UNLOCK(pmap);
2138 }
2139 
2140 static vm_page_t
2141 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2142     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
2143 {
2144 	struct spglist free;
2145 	pd_entry_t *l2;
2146 	pt_entry_t *l3;
2147 	vm_paddr_t pa;
2148 
2149 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2150 	    (m->oflags & VPO_UNMANAGED) != 0,
2151 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2152 	rw_assert(&pvh_global_lock, RA_LOCKED);
2153 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2154 
2155 	/*
2156 	 * In the case that a page table page is not
2157 	 * resident, we are creating it here.
2158 	 */
2159 	if (va < VM_MAXUSER_ADDRESS) {
2160 		vm_pindex_t l2pindex;
2161 
2162 		/*
2163 		 * Calculate pagetable page index
2164 		 */
2165 		l2pindex = pmap_l2_pindex(va);
2166 		if (mpte && (mpte->pindex == l2pindex)) {
2167 			mpte->wire_count++;
2168 		} else {
2169 			/*
2170 			 * Get the l2 entry
2171 			 */
2172 			l2 = pmap_l2(pmap, va);
2173 
2174 			/*
2175 			 * If the page table page is mapped, we just increment
2176 			 * the hold count, and activate it.  Otherwise, we
2177 			 * attempt to allocate a page table page.  If this
2178 			 * attempt fails, we don't retry.  Instead, we give up.
2179 			 */
2180 			if (l2 != NULL && *l2 != 0) {
2181 				mpte = PHYS_TO_VM_PAGE(*l2 & ~ATTR_MASK);
2182 				mpte->wire_count++;
2183 			} else {
2184 				/*
2185 				 * Pass NULL instead of the PV list lock
2186 				 * pointer, because we don't intend to sleep.
2187 				 */
2188 				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
2189 				if (mpte == NULL)
2190 					return (mpte);
2191 			}
2192 		}
2193 		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
2194 		l3 = &l3[pmap_l3_index(va)];
2195 	} else {
2196 		mpte = NULL;
2197 		l3 = pmap_l3(kernel_pmap, va);
2198 	}
2199 	if (l3 == NULL)
2200 		panic("pmap_enter_quick_locked: No l3");
2201 	if (*l3) {
2202 		if (mpte != NULL) {
2203 			mpte->wire_count--;
2204 			mpte = NULL;
2205 		}
2206 		return (mpte);
2207 	}
2208 
2209 	/*
2210 	 * Enter on the PV list if part of our managed memory.
2211 	 */
2212 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2213 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
2214 		if (mpte != NULL) {
2215 			SLIST_INIT(&free);
2216 			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
2217 				pmap_invalidate_page(pmap, va);
2218 				pmap_free_zero_pages(&free);
2219 			}
2220 			mpte = NULL;
2221 		}
2222 		return (mpte);
2223 	}
2224 
2225 	/*
2226 	 * Increment counters
2227 	 */
2228 	pmap_resident_count_inc(pmap, 1);
2229 
2230 	pa = VM_PAGE_TO_PHYS(m) | ATTR_AF | ATTR_IDX(m->md.pv_memattr) |
2231 	    ATTR_AP(ATTR_AP_RW) | L3_PAGE;
2232 
2233 	/*
2234 	 * Now validate mapping with RO protection
2235 	 */
2236 	if ((m->oflags & VPO_UNMANAGED) == 0)
2237 		pa |= ATTR_SW_MANAGED;
2238 	pmap_load_store(l3, pa);
2239 	PTE_SYNC(l3);
2240 	pmap_invalidate_page(pmap, va);
2241 	return (mpte);
2242 }
2243 
2244 /*
2245  * This code maps large physical mmap regions into the
2246  * processor address space.  Note that some shortcuts
2247  * are taken, but the code works.
2248  */
2249 void
2250 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
2251     vm_pindex_t pindex, vm_size_t size)
2252 {
2253 
2254 	panic("pmap_object_init_pt");
2255 }
2256 
2257 /*
2258  *	Clear the wired attribute from the mappings for the specified range of
2259  *	addresses in the given pmap.  Every valid mapping within that range
2260  *	must have the wired attribute set.  In contrast, invalid mappings
2261  *	cannot have the wired attribute set, so they are ignored.
2262  *
2263  *	The wired attribute of the page table entry is not a hardware feature,
2264  *	so there is no need to invalidate any TLB entries.
2265  */
2266 void
2267 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2268 {
2269 	vm_offset_t va_next;
2270 	pd_entry_t *l1, *l2;
2271 	pt_entry_t *l3;
2272 	boolean_t pv_lists_locked;
2273 
2274 	pv_lists_locked = FALSE;
2275 	PMAP_LOCK(pmap);
2276 	for (; sva < eva; sva = va_next) {
2277 		l1 = pmap_l1(pmap, sva);
2278 		if (*l1 == 0) {
2279 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2280 			if (va_next < sva)
2281 				va_next = eva;
2282 			continue;
2283 		}
2284 
2285 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2286 		if (va_next < sva)
2287 			va_next = eva;
2288 
2289 		l2 = pmap_l1_to_l2(l1, sva);
2290 		if (*l2 == 0)
2291 			continue;
2292 
2293 		if (va_next > eva)
2294 			va_next = eva;
2295 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2296 		    sva += L3_SIZE) {
2297 			if (*l3 == 0)
2298 				continue;
2299 			if ((*l3 & ATTR_SW_WIRED) == 0)
2300 				panic("pmap_unwire: l3 %#jx is missing "
2301 				    "ATTR_SW_WIRED", (uintmax_t)*l3);
2302 
2303 			/*
2304 			 * PG_W must be cleared atomically.  Although the pmap
2305 			 * lock synchronizes access to PG_W, another processor
2306 			 * could be setting PG_M and/or PG_A concurrently.
2307 			 */
2308 			atomic_clear_long(l3, ATTR_SW_WIRED);
2309 			pmap->pm_stats.wired_count--;
2310 		}
2311 	}
2312 	if (pv_lists_locked)
2313 		rw_runlock(&pvh_global_lock);
2314 	PMAP_UNLOCK(pmap);
2315 }
2316 
2317 /*
2318  *	Copy the range specified by src_addr/len
2319  *	from the source map to the range dst_addr/len
2320  *	in the destination map.
2321  *
2322  *	This routine is only advisory and need not do anything.
2323  */
2324 
2325 void
2326 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2327     vm_offset_t src_addr)
2328 {
2329 }
2330 
2331 /*
2332  *	pmap_zero_page zeros the specified hardware page by mapping
2333  *	the page into KVM and using bzero to clear its contents.
2334  */
2335 void
2336 pmap_zero_page(vm_page_t m)
2337 {
2338 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2339 
2340 	pagezero((void *)va);
2341 }
2342 
2343 /*
2344  *	pmap_zero_page_area zeros the specified hardware page by mapping
2345  *	the page into KVM and using bzero to clear its contents.
2346  *
2347  *	off and size may not cover an area beyond a single hardware page.
2348  */
2349 void
2350 pmap_zero_page_area(vm_page_t m, int off, int size)
2351 {
2352 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2353 
2354 	if (off == 0 && size == PAGE_SIZE)
2355 		pagezero((void *)va);
2356 	else
2357 		bzero((char *)va + off, size);
2358 }
2359 
2360 /*
2361  *	pmap_zero_page_idle zeros the specified hardware page by mapping
2362  *	the page into KVM and using bzero to clear its contents.  This
2363  *	is intended to be called from the vm_pagezero process only and
2364  *	outside of Giant.
2365  */
2366 void
2367 pmap_zero_page_idle(vm_page_t m)
2368 {
2369 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2370 
2371 	pagezero((void *)va);
2372 }
2373 
2374 /*
2375  *	pmap_copy_page copies the specified (machine independent)
2376  *	page by mapping the page into virtual memory and using
2377  *	bcopy to copy the page, one machine dependent page at a
2378  *	time.
2379  */
2380 void
2381 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2382 {
2383 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
2384 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
2385 
2386 	pagecopy((void *)src, (void *)dst);
2387 }
2388 
2389 int unmapped_buf_allowed = 1;
2390 
2391 void
2392 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2393     vm_offset_t b_offset, int xfersize)
2394 {
2395 	void *a_cp, *b_cp;
2396 	vm_page_t m_a, m_b;
2397 	vm_paddr_t p_a, p_b;
2398 	vm_offset_t a_pg_offset, b_pg_offset;
2399 	int cnt;
2400 
2401 	while (xfersize > 0) {
2402 		a_pg_offset = a_offset & PAGE_MASK;
2403 		m_a = ma[a_offset >> PAGE_SHIFT];
2404 		p_a = m_a->phys_addr;
2405 		b_pg_offset = b_offset & PAGE_MASK;
2406 		m_b = mb[b_offset >> PAGE_SHIFT];
2407 		p_b = m_b->phys_addr;
2408 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2409 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2410 		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
2411 			panic("!DMAP a %lx", p_a);
2412 		} else {
2413 			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
2414 		}
2415 		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
2416 			panic("!DMAP b %lx", p_b);
2417 		} else {
2418 			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
2419 		}
2420 		bcopy(a_cp, b_cp, cnt);
2421 		a_offset += cnt;
2422 		b_offset += cnt;
2423 		xfersize -= cnt;
2424 	}
2425 }
2426 
2427 /*
2428  * Returns true if the pmap's pv is one of the first
2429  * 16 pvs linked to from this page.  This count may
2430  * be changed upwards or downwards in the future; it
2431  * is only necessary that true be returned for a small
2432  * subset of pmaps for proper page aging.
2433  */
2434 boolean_t
2435 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2436 {
2437 	struct rwlock *lock;
2438 	pv_entry_t pv;
2439 	int loops = 0;
2440 	boolean_t rv;
2441 
2442 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2443 	    ("pmap_page_exists_quick: page %p is not managed", m));
2444 	rv = FALSE;
2445 	rw_rlock(&pvh_global_lock);
2446 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2447 	rw_rlock(lock);
2448 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2449 		if (PV_PMAP(pv) == pmap) {
2450 			rv = TRUE;
2451 			break;
2452 		}
2453 		loops++;
2454 		if (loops >= 16)
2455 			break;
2456 	}
2457 	rw_runlock(lock);
2458 	rw_runlock(&pvh_global_lock);
2459 	return (rv);
2460 }
2461 
2462 /*
2463  *	pmap_page_wired_mappings:
2464  *
2465  *	Return the number of managed mappings to the given physical page
2466  *	that are wired.
2467  */
2468 int
2469 pmap_page_wired_mappings(vm_page_t m)
2470 {
2471 	struct rwlock *lock;
2472 	pmap_t pmap;
2473 	pt_entry_t *l3;
2474 	pv_entry_t pv;
2475 	int count, md_gen;
2476 
2477 	if ((m->oflags & VPO_UNMANAGED) != 0)
2478 		return (0);
2479 	rw_rlock(&pvh_global_lock);
2480 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2481 	rw_rlock(lock);
2482 restart:
2483 	count = 0;
2484 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2485 		pmap = PV_PMAP(pv);
2486 		if (!PMAP_TRYLOCK(pmap)) {
2487 			md_gen = m->md.pv_gen;
2488 			rw_runlock(lock);
2489 			PMAP_LOCK(pmap);
2490 			rw_rlock(lock);
2491 			if (md_gen != m->md.pv_gen) {
2492 				PMAP_UNLOCK(pmap);
2493 				goto restart;
2494 			}
2495 		}
2496 		l3 = pmap_l3(pmap, pv->pv_va);
2497 		if (l3 != NULL && (*l3 & ATTR_SW_WIRED) != 0)
2498 			count++;
2499 		PMAP_UNLOCK(pmap);
2500 	}
2501 	rw_runlock(lock);
2502 	rw_runlock(&pvh_global_lock);
2503 	return (count);
2504 }
2505 
2506 /*
2507  * Destroy all managed, non-wired mappings in the given user-space
2508  * pmap.  This pmap cannot be active on any processor besides the
2509  * caller.
2510  *
2511  * This function cannot be applied to the kernel pmap.  Moreover, it
2512  * is not intended for general use.  It is only to be used during
2513  * process termination.  Consequently, it can be implemented in ways
2514  * that make it faster than pmap_remove().  First, it can more quickly
2515  * destroy mappings by iterating over the pmap's collection of PV
2516  * entries, rather than searching the page table.  Second, it doesn't
2517  * have to test and clear the page table entries atomically, because
2518  * no processor is currently accessing the user address space.  In
2519  * particular, a page table entry's dirty bit won't change state once
2520  * this function starts.
2521  */
2522 void
2523 pmap_remove_pages(pmap_t pmap)
2524 {
2525 	pd_entry_t ptepde, *l2;
2526 	pt_entry_t *l3, tl3;
2527 	struct spglist free;
2528 	vm_page_t m;
2529 	pv_entry_t pv;
2530 	struct pv_chunk *pc, *npc;
2531 	struct rwlock *lock;
2532 	int64_t bit;
2533 	uint64_t inuse, bitmask;
2534 	int allfree, field, freed, idx;
2535 	vm_paddr_t pa;
2536 
2537 	lock = NULL;
2538 
2539 	SLIST_INIT(&free);
2540 	rw_rlock(&pvh_global_lock);
2541 	PMAP_LOCK(pmap);
2542 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2543 		allfree = 1;
2544 		freed = 0;
2545 		for (field = 0; field < _NPCM; field++) {
2546 			inuse = ~pc->pc_map[field] & pc_freemask[field];
2547 			while (inuse != 0) {
2548 				bit = ffsl(inuse) - 1;
2549 				bitmask = 1UL << bit;
2550 				idx = field * 64 + bit;
2551 				pv = &pc->pc_pventry[idx];
2552 				inuse &= ~bitmask;
2553 
2554 				l2 = pmap_l2(pmap, pv->pv_va);
2555 				ptepde = pmap_load(l2);
2556 				l3 = pmap_l2_to_l3(l2, pv->pv_va);
2557 				tl3 = pmap_load(l3);
2558 
2559 /*
2560  * We cannot remove wired pages from a process' mapping at this time
2561  */
2562 				if (tl3 & ATTR_SW_WIRED) {
2563 					allfree = 0;
2564 					continue;
2565 				}
2566 
2567 				pa = tl3 & ~ATTR_MASK;
2568 
2569 				m = PHYS_TO_VM_PAGE(pa);
2570 				KASSERT(m->phys_addr == pa,
2571 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
2572 				    m, (uintmax_t)m->phys_addr,
2573 				    (uintmax_t)tl3));
2574 
2575 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
2576 				    m < &vm_page_array[vm_page_array_size],
2577 				    ("pmap_remove_pages: bad l3 %#jx",
2578 				    (uintmax_t)tl3));
2579 
2580 				if (pmap_is_current(pmap) &&
2581 				    pmap_l3_valid_cacheable(pmap_load(l3)))
2582 					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2583 				pmap_load_clear(l3);
2584 				PTE_SYNC(l3);
2585 
2586 				/*
2587 				 * Update the vm_page_t clean/reference bits.
2588 				 */
2589 				if ((tl3 & ATTR_AP_RW_BIT) ==
2590 				    ATTR_AP(ATTR_AP_RW))
2591 					vm_page_dirty(m);
2592 
2593 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
2594 
2595 				/* Mark free */
2596 				pc->pc_map[field] |= bitmask;
2597 
2598 				pmap_resident_count_dec(pmap, 1);
2599 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2600 				m->md.pv_gen++;
2601 
2602 				pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free);
2603 				freed++;
2604 			}
2605 		}
2606 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
2607 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
2608 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
2609 		if (allfree) {
2610 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2611 			free_pv_chunk(pc);
2612 		}
2613 	}
2614 	pmap_invalidate_all(pmap);
2615 	if (lock != NULL)
2616 		rw_wunlock(lock);
2617 	rw_runlock(&pvh_global_lock);
2618 	PMAP_UNLOCK(pmap);
2619 	pmap_free_zero_pages(&free);
2620 }
2621 
2622 /*
2623  * This is used to check if a page has been accessed or modified. As we
2624  * don't have a bit to see if it has been modified we have to assume it
2625  * has been if the page is read/write.
2626  */
2627 static boolean_t
2628 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
2629 {
2630 	struct rwlock *lock;
2631 	pv_entry_t pv;
2632 	pt_entry_t *l3, mask, value;
2633 	pmap_t pmap;
2634 	int md_gen;
2635 	boolean_t rv;
2636 
2637 	rv = FALSE;
2638 	rw_rlock(&pvh_global_lock);
2639 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2640 	rw_rlock(lock);
2641 restart:
2642 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2643 		pmap = PV_PMAP(pv);
2644 		if (!PMAP_TRYLOCK(pmap)) {
2645 			md_gen = m->md.pv_gen;
2646 			rw_runlock(lock);
2647 			PMAP_LOCK(pmap);
2648 			rw_rlock(lock);
2649 			if (md_gen != m->md.pv_gen) {
2650 				PMAP_UNLOCK(pmap);
2651 				goto restart;
2652 			}
2653 		}
2654 		l3 = pmap_l3(pmap, pv->pv_va);
2655 		mask = 0;
2656 		value = 0;
2657 		if (modified) {
2658 			mask |= ATTR_AP_RW_BIT;
2659 			value |= ATTR_AP(ATTR_AP_RW);
2660 		}
2661 		if (accessed) {
2662 			mask |= ATTR_AF | ATTR_DESCR_MASK;
2663 			value |= ATTR_AF | L3_PAGE;
2664 		}
2665 		rv = (pmap_load(l3) & mask) == value;
2666 		PMAP_UNLOCK(pmap);
2667 		if (rv)
2668 			goto out;
2669 	}
2670 out:
2671 	rw_runlock(lock);
2672 	rw_runlock(&pvh_global_lock);
2673 	return (rv);
2674 }
2675 
2676 /*
2677  *	pmap_is_modified:
2678  *
2679  *	Return whether or not the specified physical page was modified
2680  *	in any physical maps.
2681  */
2682 boolean_t
2683 pmap_is_modified(vm_page_t m)
2684 {
2685 
2686 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2687 	    ("pmap_is_modified: page %p is not managed", m));
2688 
2689 	/*
2690 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2691 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2692 	 * is clear, no PTEs can have PG_M set.
2693 	 */
2694 	VM_OBJECT_ASSERT_WLOCKED(m->object);
2695 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2696 		return (FALSE);
2697 	return (pmap_page_test_mappings(m, FALSE, TRUE));
2698 }
2699 
2700 /*
2701  *	pmap_is_prefaultable:
2702  *
2703  *	Return whether or not the specified virtual address is eligible
2704  *	for prefault.
2705  */
2706 boolean_t
2707 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2708 {
2709 	pt_entry_t *l3;
2710 	boolean_t rv;
2711 
2712 	rv = FALSE;
2713 	PMAP_LOCK(pmap);
2714 	l3 = pmap_l3(pmap, addr);
2715 	if (l3 != NULL && *l3 != 0) {
2716 		rv = TRUE;
2717 	}
2718 	PMAP_UNLOCK(pmap);
2719 	return (rv);
2720 }
2721 
2722 /*
2723  *	pmap_is_referenced:
2724  *
2725  *	Return whether or not the specified physical page was referenced
2726  *	in any physical maps.
2727  */
2728 boolean_t
2729 pmap_is_referenced(vm_page_t m)
2730 {
2731 
2732 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2733 	    ("pmap_is_referenced: page %p is not managed", m));
2734 	return (pmap_page_test_mappings(m, TRUE, FALSE));
2735 }
2736 
2737 /*
2738  * Clear the write and modified bits in each of the given page's mappings.
2739  */
2740 void
2741 pmap_remove_write(vm_page_t m)
2742 {
2743 	pmap_t pmap;
2744 	struct rwlock *lock;
2745 	pv_entry_t pv;
2746 	pt_entry_t *l3, oldl3;
2747 	int md_gen;
2748 
2749 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2750 	    ("pmap_remove_write: page %p is not managed", m));
2751 
2752 	/*
2753 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2754 	 * set by another thread while the object is locked.  Thus,
2755 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2756 	 */
2757 	VM_OBJECT_ASSERT_WLOCKED(m->object);
2758 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2759 		return;
2760 	rw_rlock(&pvh_global_lock);
2761 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2762 retry_pv_loop:
2763 	rw_wlock(lock);
2764 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2765 		pmap = PV_PMAP(pv);
2766 		if (!PMAP_TRYLOCK(pmap)) {
2767 			md_gen = m->md.pv_gen;
2768 			rw_wunlock(lock);
2769 			PMAP_LOCK(pmap);
2770 			rw_wlock(lock);
2771 			if (md_gen != m->md.pv_gen) {
2772 				PMAP_UNLOCK(pmap);
2773 				rw_wunlock(lock);
2774 				goto retry_pv_loop;
2775 			}
2776 		}
2777 		l3 = pmap_l3(pmap, pv->pv_va);
2778 retry:
2779 		oldl3 = *l3;
2780 		if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
2781 			if (!atomic_cmpset_long(l3, oldl3,
2782 			    oldl3 | ATTR_AP(ATTR_AP_RO)))
2783 				goto retry;
2784 			if ((oldl3 & ATTR_AF) != 0)
2785 				vm_page_dirty(m);
2786 			pmap_invalidate_page(pmap, pv->pv_va);
2787 		}
2788 		PMAP_UNLOCK(pmap);
2789 	}
2790 	rw_wunlock(lock);
2791 	vm_page_aflag_clear(m, PGA_WRITEABLE);
2792 	rw_runlock(&pvh_global_lock);
2793 }
2794 
2795 static __inline boolean_t
2796 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
2797 {
2798 
2799 	return (FALSE);
2800 }
2801 
2802 #define	PMAP_TS_REFERENCED_MAX	5
2803 
2804 /*
2805  *	pmap_ts_referenced:
2806  *
2807  *	Return a count of reference bits for a page, clearing those bits.
2808  *	It is not necessary for every reference bit to be cleared, but it
2809  *	is necessary that 0 only be returned when there are truly no
2810  *	reference bits set.
2811  *
2812  *	XXX: The exact number of bits to check and clear is a matter that
2813  *	should be tested and standardized at some point in the future for
2814  *	optimal aging of shared pages.
2815  */
2816 int
2817 pmap_ts_referenced(vm_page_t m)
2818 {
2819 	pv_entry_t pv, pvf;
2820 	pmap_t pmap;
2821 	struct rwlock *lock;
2822 	pd_entry_t *l2;
2823 	pt_entry_t *l3;
2824 	vm_paddr_t pa;
2825 	int cleared, md_gen, not_cleared;
2826 	struct spglist free;
2827 
2828 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2829 	    ("pmap_ts_referenced: page %p is not managed", m));
2830 	SLIST_INIT(&free);
2831 	cleared = 0;
2832 	pa = VM_PAGE_TO_PHYS(m);
2833 	lock = PHYS_TO_PV_LIST_LOCK(pa);
2834 	rw_rlock(&pvh_global_lock);
2835 	rw_wlock(lock);
2836 retry:
2837 	not_cleared = 0;
2838 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
2839 		goto out;
2840 	pv = pvf;
2841 	do {
2842 		if (pvf == NULL)
2843 			pvf = pv;
2844 		pmap = PV_PMAP(pv);
2845 		if (!PMAP_TRYLOCK(pmap)) {
2846 			md_gen = m->md.pv_gen;
2847 			rw_wunlock(lock);
2848 			PMAP_LOCK(pmap);
2849 			rw_wlock(lock);
2850 			if (md_gen != m->md.pv_gen) {
2851 				PMAP_UNLOCK(pmap);
2852 				goto retry;
2853 			}
2854 		}
2855 		l2 = pmap_l2(pmap, pv->pv_va);
2856 		KASSERT((*l2 & ATTR_DESCR_MASK) == L2_TABLE,
2857 		    ("pmap_ts_referenced: found an invalid l2 table"));
2858 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
2859 		if ((*l3 & ATTR_AF) != 0) {
2860 			if (safe_to_clear_referenced(pmap, *l3)) {
2861 				/*
2862 				 * TODO: We don't handle the access flag
2863 				 * at all. We need to be able to set it in
2864 				 * the exception handler.
2865 				 */
2866 				panic("TODO: safe_to_clear_referenced\n");
2867 			} else if ((*l3 & ATTR_SW_WIRED) == 0) {
2868 				/*
2869 				 * Wired pages cannot be paged out so
2870 				 * doing accessed bit emulation for
2871 				 * them is wasted effort. We do the
2872 				 * hard work for unwired pages only.
2873 				 */
2874 				pmap_remove_l3(pmap, l3, pv->pv_va,
2875 				    *l2, &free, &lock);
2876 				pmap_invalidate_page(pmap, pv->pv_va);
2877 				cleared++;
2878 				if (pvf == pv)
2879 					pvf = NULL;
2880 				pv = NULL;
2881 				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
2882 				    ("inconsistent pv lock %p %p for page %p",
2883 				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
2884 			} else
2885 				not_cleared++;
2886 		}
2887 		PMAP_UNLOCK(pmap);
2888 		/* Rotate the PV list if it has more than one entry. */
2889 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
2890 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2891 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2892 			m->md.pv_gen++;
2893 		}
2894 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
2895 	    not_cleared < PMAP_TS_REFERENCED_MAX);
2896 out:
2897 	rw_wunlock(lock);
2898 	rw_runlock(&pvh_global_lock);
2899 	pmap_free_zero_pages(&free);
2900 	return (cleared + not_cleared);
2901 }
2902 
2903 /*
2904  *	Apply the given advice to the specified range of addresses within the
2905  *	given pmap.  Depending on the advice, clear the referenced and/or
2906  *	modified flags in each mapping and set the mapped page's dirty field.
2907  */
2908 void
2909 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2910 {
2911 }
2912 
2913 /*
2914  *	Clear the modify bits on the specified physical page.
2915  */
2916 void
2917 pmap_clear_modify(vm_page_t m)
2918 {
2919 
2920 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2921 	    ("pmap_clear_modify: page %p is not managed", m));
2922 	VM_OBJECT_ASSERT_WLOCKED(m->object);
2923 	KASSERT(!vm_page_xbusied(m),
2924 	    ("pmap_clear_modify: page %p is exclusive busied", m));
2925 
2926 	/*
2927 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
2928 	 * If the object containing the page is locked and the page is not
2929 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
2930 	 */
2931 	if ((m->aflags & PGA_WRITEABLE) == 0)
2932 		return;
2933 	panic("pmap_clear_modify");
2934 }
2935 
2936 /*
2937  * Sets the memory attribute for the specified page.
2938  */
2939 void
2940 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
2941 {
2942 
2943 	panic("pmap_page_set_memattr");
2944 }
2945 
2946 /*
2947  * perform the pmap work for mincore
2948  */
2949 int
2950 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
2951 {
2952 
2953 	panic("pmap_mincore");
2954 }
2955 
2956 void
2957 pmap_activate(struct thread *td)
2958 {
2959 	pmap_t	pmap;
2960 
2961 	critical_enter();
2962 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
2963 	td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
2964 	__asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr));
2965 	critical_exit();
2966 }
2967 
2968 void
2969 pmap_sync_icache(pmap_t pm, vm_offset_t va, vm_size_t sz)
2970 {
2971 
2972 	panic("pmap_sync_icache");
2973 }
2974 
2975 /*
2976  *	Increase the starting virtual address of the given mapping if a
2977  *	different alignment might result in more superpage mappings.
2978  */
2979 void
2980 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
2981     vm_offset_t *addr, vm_size_t size)
2982 {
2983 }
2984 
2985 /**
2986  * Get the kernel virtual address of a set of physical pages. If there are
2987  * physical addresses not covered by the DMAP perform a transient mapping
2988  * that will be removed when calling pmap_unmap_io_transient.
2989  *
2990  * \param page        The pages the caller wishes to obtain the virtual
2991  *                    address on the kernel memory map.
2992  * \param vaddr       On return contains the kernel virtual memory address
2993  *                    of the pages passed in the page parameter.
2994  * \param count       Number of pages passed in.
2995  * \param can_fault   TRUE if the thread using the mapped pages can take
2996  *                    page faults, FALSE otherwise.
2997  *
2998  * \returns TRUE if the caller must call pmap_unmap_io_transient when
2999  *          finished or FALSE otherwise.
3000  *
3001  */
3002 boolean_t
3003 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3004     boolean_t can_fault)
3005 {
3006 	vm_paddr_t paddr;
3007 	boolean_t needs_mapping;
3008 	int error, i;
3009 
3010 	/*
3011 	 * Allocate any KVA space that we need, this is done in a separate
3012 	 * loop to prevent calling vmem_alloc while pinned.
3013 	 */
3014 	needs_mapping = FALSE;
3015 	for (i = 0; i < count; i++) {
3016 		paddr = VM_PAGE_TO_PHYS(page[i]);
3017 		if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
3018 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
3019 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
3020 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
3021 			needs_mapping = TRUE;
3022 		} else {
3023 			vaddr[i] = PHYS_TO_DMAP(paddr);
3024 		}
3025 	}
3026 
3027 	/* Exit early if everything is covered by the DMAP */
3028 	if (!needs_mapping)
3029 		return (FALSE);
3030 
3031 	/*
3032 	 * NB:  The sequence of updating a page table followed by accesses
3033 	 * to the corresponding pages used in the !DMAP case is subject to
3034 	 * the situation described in the "AMD64 Architecture Programmer's
3035 	 * Manual Volume 2: System Programming" rev. 3.23, "7.3.1 Special
3036 	 * Coherency Considerations".  Therefore, issuing the INVLPG right
3037 	 * after modifying the PTE bits is crucial.
3038 	 */
3039 	if (!can_fault)
3040 		sched_pin();
3041 	for (i = 0; i < count; i++) {
3042 		paddr = VM_PAGE_TO_PHYS(page[i]);
3043 		if (paddr >= DMAP_MAX_PHYSADDR) {
3044 			panic(
3045 			   "pmap_map_io_transient: TODO: Map out of DMAP data");
3046 		}
3047 	}
3048 
3049 	return (needs_mapping);
3050 }
3051 
3052 void
3053 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3054     boolean_t can_fault)
3055 {
3056 	vm_paddr_t paddr;
3057 	int i;
3058 
3059 	if (!can_fault)
3060 		sched_unpin();
3061 	for (i = 0; i < count; i++) {
3062 		paddr = VM_PAGE_TO_PHYS(page[i]);
3063 		if (paddr >= DMAP_MAX_PHYSADDR) {
3064 			panic("pmap_unmap_io_transient: TODO: Unmap data");
3065 		}
3066 	}
3067 }
3068