xref: /freebsd/sys/arm64/arm64/pmap.c (revision a0ee8cc6)
1 /*-
2  * Copyright (c) 1991 Regents of the University of California.
3  * All rights reserved.
4  * Copyright (c) 1994 John S. Dyson
5  * All rights reserved.
6  * Copyright (c) 1994 David Greenman
7  * All rights reserved.
8  * Copyright (c) 2003 Peter Wemm
9  * All rights reserved.
10  * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu>
11  * All rights reserved.
12  * Copyright (c) 2014 Andrew Turner
13  * All rights reserved.
14  * Copyright (c) 2014 The FreeBSD Foundation
15  * All rights reserved.
16  *
17  * This code is derived from software contributed to Berkeley by
18  * the Systems Programming Group of the University of Utah Computer
19  * Science Department and William Jolitz of UUNET Technologies Inc.
20  *
21  * This software was developed by Andrew Turner under sponsorship from
22  * the FreeBSD Foundation.
23  *
24  * Redistribution and use in source and binary forms, with or without
25  * modification, are permitted provided that the following conditions
26  * are met:
27  * 1. Redistributions of source code must retain the above copyright
28  *    notice, this list of conditions and the following disclaimer.
29  * 2. Redistributions in binary form must reproduce the above copyright
30  *    notice, this list of conditions and the following disclaimer in the
31  *    documentation and/or other materials provided with the distribution.
32  * 3. All advertising materials mentioning features or use of this software
33  *    must display the following acknowledgement:
34  *	This product includes software developed by the University of
35  *	California, Berkeley and its contributors.
36  * 4. Neither the name of the University nor the names of its contributors
37  *    may be used to endorse or promote products derived from this software
38  *    without specific prior written permission.
39  *
40  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
50  * SUCH DAMAGE.
51  *
52  *	from:	@(#)pmap.c	7.7 (Berkeley)	5/12/91
53  */
54 /*-
55  * Copyright (c) 2003 Networks Associates Technology, Inc.
56  * All rights reserved.
57  *
58  * This software was developed for the FreeBSD Project by Jake Burkholder,
59  * Safeport Network Services, and Network Associates Laboratories, the
60  * Security Research Division of Network Associates, Inc. under
61  * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA
62  * CHATS research program.
63  *
64  * Redistribution and use in source and binary forms, with or without
65  * modification, are permitted provided that the following conditions
66  * are met:
67  * 1. Redistributions of source code must retain the above copyright
68  *    notice, this list of conditions and the following disclaimer.
69  * 2. Redistributions in binary form must reproduce the above copyright
70  *    notice, this list of conditions and the following disclaimer in the
71  *    documentation and/or other materials provided with the distribution.
72  *
73  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
74  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
75  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
76  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
77  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
78  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
79  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
80  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
81  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
82  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
83  * SUCH DAMAGE.
84  */
85 
86 #include <sys/cdefs.h>
87 __FBSDID("$FreeBSD$");
88 
89 /*
90  *	Manages physical address maps.
91  *
92  *	Since the information managed by this module is
93  *	also stored by the logical address mapping module,
94  *	this module may throw away valid virtual-to-physical
95  *	mappings at almost any time.  However, invalidations
96  *	of virtual-to-physical mappings must be done as
97  *	requested.
98  *
99  *	In order to cope with hardware architectures which
100  *	make virtual-to-physical map invalidates expensive,
101  *	this module may delay invalidate or reduced protection
102  *	operations until such time as they are actually
103  *	necessary.  This module is given full information as
104  *	to which processors are currently using which maps,
105  *	and to when physical maps must be made correct.
106  */
107 
108 #include <sys/param.h>
109 #include <sys/bus.h>
110 #include <sys/systm.h>
111 #include <sys/kernel.h>
112 #include <sys/ktr.h>
113 #include <sys/lock.h>
114 #include <sys/malloc.h>
115 #include <sys/mman.h>
116 #include <sys/msgbuf.h>
117 #include <sys/mutex.h>
118 #include <sys/proc.h>
119 #include <sys/rwlock.h>
120 #include <sys/sx.h>
121 #include <sys/vmem.h>
122 #include <sys/vmmeter.h>
123 #include <sys/sched.h>
124 #include <sys/sysctl.h>
125 #include <sys/_unrhdr.h>
126 #include <sys/smp.h>
127 
128 #include <vm/vm.h>
129 #include <vm/vm_param.h>
130 #include <vm/vm_kern.h>
131 #include <vm/vm_page.h>
132 #include <vm/vm_map.h>
133 #include <vm/vm_object.h>
134 #include <vm/vm_extern.h>
135 #include <vm/vm_pageout.h>
136 #include <vm/vm_pager.h>
137 #include <vm/vm_radix.h>
138 #include <vm/vm_reserv.h>
139 #include <vm/uma.h>
140 
141 #include <machine/machdep.h>
142 #include <machine/md_var.h>
143 #include <machine/pcb.h>
144 
145 #define	NPDEPG		(PAGE_SIZE/(sizeof (pd_entry_t)))
146 #define	NUPDE			(NPDEPG * NPDEPG)
147 #define	NUSERPGTBLS		(NUPDE + NPDEPG)
148 
149 #if !defined(DIAGNOSTIC)
150 #ifdef __GNUC_GNU_INLINE__
151 #define PMAP_INLINE	__attribute__((__gnu_inline__)) inline
152 #else
153 #define PMAP_INLINE	extern inline
154 #endif
155 #else
156 #define PMAP_INLINE
157 #endif
158 
159 /*
160  * These are configured by the mair_el1 register. This is set up in locore.S
161  */
162 #define	DEVICE_MEMORY	0
163 #define	UNCACHED_MEMORY	1
164 #define	CACHED_MEMORY	2
165 
166 
167 #ifdef PV_STATS
168 #define PV_STAT(x)	do { x ; } while (0)
169 #else
170 #define PV_STAT(x)	do { } while (0)
171 #endif
172 
173 #define	pmap_l2_pindex(v)	((v) >> L2_SHIFT)
174 
175 #define	NPV_LIST_LOCKS	MAXCPU
176 
177 #define	PHYS_TO_PV_LIST_LOCK(pa)	\
178 			(&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS])
179 
180 #define	CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa)	do {	\
181 	struct rwlock **_lockp = (lockp);		\
182 	struct rwlock *_new_lock;			\
183 							\
184 	_new_lock = PHYS_TO_PV_LIST_LOCK(pa);		\
185 	if (_new_lock != *_lockp) {			\
186 		if (*_lockp != NULL)			\
187 			rw_wunlock(*_lockp);		\
188 		*_lockp = _new_lock;			\
189 		rw_wlock(*_lockp);			\
190 	}						\
191 } while (0)
192 
193 #define	CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m)	\
194 			CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m))
195 
196 #define	RELEASE_PV_LIST_LOCK(lockp)		do {	\
197 	struct rwlock **_lockp = (lockp);		\
198 							\
199 	if (*_lockp != NULL) {				\
200 		rw_wunlock(*_lockp);			\
201 		*_lockp = NULL;				\
202 	}						\
203 } while (0)
204 
205 #define	VM_PAGE_TO_PV_LIST_LOCK(m)	\
206 			PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m))
207 
208 struct pmap kernel_pmap_store;
209 
210 vm_offset_t virtual_avail;	/* VA of first avail page (after kernel bss) */
211 vm_offset_t virtual_end;	/* VA of last avail page (end of kernel AS) */
212 vm_offset_t kernel_vm_end = 0;
213 
214 struct msgbuf *msgbufp = NULL;
215 
216 static struct rwlock_padalign pvh_global_lock;
217 
218 vm_paddr_t dmap_phys_base;	/* The start of the dmap region */
219 
220 /*
221  * Data for the pv entry allocation mechanism
222  */
223 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks);
224 static struct mtx pv_chunks_mutex;
225 static struct rwlock pv_list_locks[NPV_LIST_LOCKS];
226 
227 static void	free_pv_chunk(struct pv_chunk *pc);
228 static void	free_pv_entry(pmap_t pmap, pv_entry_t pv);
229 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp);
230 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp);
231 static void	pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va);
232 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap,
233 		    vm_offset_t va);
234 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va,
235     vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp);
236 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva,
237     pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp);
238 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va,
239     vm_page_t m, struct rwlock **lockp);
240 
241 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex,
242 		struct rwlock **lockp);
243 
244 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m,
245     struct spglist *free);
246 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *);
247 
248 /*
249  * These load the old table data and store the new value.
250  * They need to be atomic as the System MMU may write to the table at
251  * the same time as the CPU.
252  */
253 #define	pmap_load_store(table, entry) atomic_swap_64(table, entry)
254 #define	pmap_set(table, mask) atomic_set_64(table, mask)
255 #define	pmap_load_clear(table) atomic_swap_64(table, 0)
256 #define	pmap_load(table) (*table)
257 
258 /********************/
259 /* Inline functions */
260 /********************/
261 
262 static __inline void
263 pagecopy(void *s, void *d)
264 {
265 
266 	memcpy(d, s, PAGE_SIZE);
267 }
268 
269 static __inline void
270 pagezero(void *p)
271 {
272 
273 	bzero(p, PAGE_SIZE);
274 }
275 
276 #define	pmap_l1_index(va)	(((va) >> L1_SHIFT) & Ln_ADDR_MASK)
277 #define	pmap_l2_index(va)	(((va) >> L2_SHIFT) & Ln_ADDR_MASK)
278 #define	pmap_l3_index(va)	(((va) >> L3_SHIFT) & Ln_ADDR_MASK)
279 
280 static __inline pd_entry_t *
281 pmap_l1(pmap_t pmap, vm_offset_t va)
282 {
283 
284 	return (&pmap->pm_l1[pmap_l1_index(va)]);
285 }
286 
287 static __inline pd_entry_t *
288 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va)
289 {
290 	pd_entry_t *l2;
291 
292 	l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
293 	return (&l2[pmap_l2_index(va)]);
294 }
295 
296 static __inline pd_entry_t *
297 pmap_l2(pmap_t pmap, vm_offset_t va)
298 {
299 	pd_entry_t *l1;
300 
301 	l1 = pmap_l1(pmap, va);
302 	if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE)
303 		return (NULL);
304 
305 	return (pmap_l1_to_l2(l1, va));
306 }
307 
308 static __inline pt_entry_t *
309 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va)
310 {
311 	pt_entry_t *l3;
312 
313 	l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK);
314 	return (&l3[pmap_l3_index(va)]);
315 }
316 
317 static __inline pt_entry_t *
318 pmap_l3(pmap_t pmap, vm_offset_t va)
319 {
320 	pd_entry_t *l2;
321 
322 	l2 = pmap_l2(pmap, va);
323 	if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
324 		return (NULL);
325 
326 	return (pmap_l2_to_l3(l2, va));
327 }
328 
329 bool
330 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2,
331     pt_entry_t **l3)
332 {
333 	pd_entry_t *l1p, *l2p;
334 
335 	if (pmap->pm_l1 == NULL)
336 		return (false);
337 
338 	l1p = pmap_l1(pmap, va);
339 	*l1 = l1p;
340 
341 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) {
342 		*l2 = NULL;
343 		*l3 = NULL;
344 		return (true);
345 	}
346 
347 	if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE)
348 		return (false);
349 
350 	l2p = pmap_l1_to_l2(l1p, va);
351 	*l2 = l2p;
352 
353 	if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) {
354 		*l3 = NULL;
355 		return (true);
356 	}
357 
358 	*l3 = pmap_l2_to_l3(l2p, va);
359 
360 	return (true);
361 }
362 
363 static __inline int
364 pmap_is_current(pmap_t pmap)
365 {
366 
367 	return ((pmap == pmap_kernel()) ||
368 	    (pmap == curthread->td_proc->p_vmspace->vm_map.pmap));
369 }
370 
371 static __inline int
372 pmap_l3_valid(pt_entry_t l3)
373 {
374 
375 	return ((l3 & ATTR_DESCR_MASK) == L3_PAGE);
376 }
377 
378 static __inline int
379 pmap_l3_valid_cacheable(pt_entry_t l3)
380 {
381 
382 	return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) &&
383 	    ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY)));
384 }
385 
386 #define	PTE_SYNC(pte)	cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte))
387 
388 /*
389  * Checks if the page is dirty. We currently lack proper tracking of this on
390  * arm64 so for now assume is a page mapped as rw was accessed it is.
391  */
392 static inline int
393 pmap_page_dirty(pt_entry_t pte)
394 {
395 
396 	return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) ==
397 	    (ATTR_AF | ATTR_AP(ATTR_AP_RW)));
398 }
399 
400 static __inline void
401 pmap_resident_count_inc(pmap_t pmap, int count)
402 {
403 
404 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
405 	pmap->pm_stats.resident_count += count;
406 }
407 
408 static __inline void
409 pmap_resident_count_dec(pmap_t pmap, int count)
410 {
411 
412 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
413 	KASSERT(pmap->pm_stats.resident_count >= count,
414 	    ("pmap %p resident count underflow %ld %d", pmap,
415 	    pmap->pm_stats.resident_count, count));
416 	pmap->pm_stats.resident_count -= count;
417 }
418 
419 static pt_entry_t *
420 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot,
421     u_int *l2_slot)
422 {
423 	pt_entry_t *l2;
424 	pd_entry_t *l1;
425 
426 	l1 = (pd_entry_t *)l1pt;
427 	*l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK;
428 
429 	/* Check locore has used a table L1 map */
430 	KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE,
431 	   ("Invalid bootstrap L1 table"));
432 	/* Find the address of the L2 table */
433 	l2 = (pt_entry_t *)init_pt_va;
434 	*l2_slot = pmap_l2_index(va);
435 
436 	return (l2);
437 }
438 
439 static vm_paddr_t
440 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va)
441 {
442 	u_int l1_slot, l2_slot;
443 	pt_entry_t *l2;
444 
445 	l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot);
446 
447 	return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET));
448 }
449 
450 static void
451 pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart)
452 {
453 	vm_offset_t va;
454 	vm_paddr_t pa;
455 	pd_entry_t *l1;
456 	u_int l1_slot;
457 
458 	pa = dmap_phys_base = kernstart & ~L1_OFFSET;
459 	va = DMAP_MIN_ADDRESS;
460 	l1 = (pd_entry_t *)l1pt;
461 	l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS);
462 
463 	for (; va < DMAP_MAX_ADDRESS;
464 	    pa += L1_SIZE, va += L1_SIZE, l1_slot++) {
465 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
466 
467 		pmap_load_store(&l1[l1_slot],
468 		    (pa & ~L1_OFFSET) | ATTR_DEFAULT |
469 		    ATTR_IDX(CACHED_MEMORY) | L1_BLOCK);
470 	}
471 
472 	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
473 	cpu_tlb_flushID();
474 }
475 
476 static vm_offset_t
477 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start)
478 {
479 	vm_offset_t l2pt;
480 	vm_paddr_t pa;
481 	pd_entry_t *l1;
482 	u_int l1_slot;
483 
484 	KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address"));
485 
486 	l1 = (pd_entry_t *)l1pt;
487 	l1_slot = pmap_l1_index(va);
488 	l2pt = l2_start;
489 
490 	for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) {
491 		KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index"));
492 
493 		pa = pmap_early_vtophys(l1pt, l2pt);
494 		pmap_load_store(&l1[l1_slot],
495 		    (pa & ~Ln_TABLE_MASK) | L1_TABLE);
496 		l2pt += PAGE_SIZE;
497 	}
498 
499 	/* Clean the L2 page table */
500 	memset((void *)l2_start, 0, l2pt - l2_start);
501 	cpu_dcache_wb_range(l2_start, l2pt - l2_start);
502 
503 	/* Flush the l1 table to ram */
504 	cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE);
505 
506 	return l2pt;
507 }
508 
509 static vm_offset_t
510 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start)
511 {
512 	vm_offset_t l2pt, l3pt;
513 	vm_paddr_t pa;
514 	pd_entry_t *l2;
515 	u_int l2_slot;
516 
517 	KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address"));
518 
519 	l2 = pmap_l2(kernel_pmap, va);
520 	l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1));
521 	l2pt = (vm_offset_t)l2;
522 	l2_slot = pmap_l2_index(va);
523 	l3pt = l3_start;
524 
525 	for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) {
526 		KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index"));
527 
528 		pa = pmap_early_vtophys(l1pt, l3pt);
529 		pmap_load_store(&l2[l2_slot],
530 		    (pa & ~Ln_TABLE_MASK) | L2_TABLE);
531 		l3pt += PAGE_SIZE;
532 	}
533 
534 	/* Clean the L2 page table */
535 	memset((void *)l3_start, 0, l3pt - l3_start);
536 	cpu_dcache_wb_range(l3_start, l3pt - l3_start);
537 
538 	cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE);
539 
540 	return l3pt;
541 }
542 
543 /*
544  *	Bootstrap the system enough to run with virtual memory.
545  */
546 void
547 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen)
548 {
549 	u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot;
550 	uint64_t kern_delta;
551 	pt_entry_t *l2;
552 	vm_offset_t va, freemempos;
553 	vm_offset_t dpcpu, msgbufpv;
554 	vm_paddr_t pa, min_pa;
555 	int i;
556 
557 	kern_delta = KERNBASE - kernstart;
558 	physmem = 0;
559 
560 	printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen);
561 	printf("%lx\n", l1pt);
562 	printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK);
563 
564 	/* Set this early so we can use the pagetable walking functions */
565 	kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt;
566 	PMAP_LOCK_INIT(kernel_pmap);
567 
568  	/*
569 	 * Initialize the global pv list lock.
570 	 */
571 	rw_init(&pvh_global_lock, "pmap pv global");
572 
573 	/* Assume the address we were loaded to is a valid physical address */
574 	min_pa = KERNBASE - kern_delta;
575 
576 	/*
577 	 * Find the minimum physical address. physmap is sorted,
578 	 * but may contain empty ranges.
579 	 */
580 	for (i = 0; i < (physmap_idx * 2); i += 2) {
581 		if (physmap[i] == physmap[i + 1])
582 			continue;
583 		if (physmap[i] <= min_pa)
584 			min_pa = physmap[i];
585 		break;
586 	}
587 
588 	/* Create a direct map region early so we can use it for pa -> va */
589 	pmap_bootstrap_dmap(l1pt, min_pa);
590 
591 	va = KERNBASE;
592 	pa = KERNBASE - kern_delta;
593 
594 	/*
595 	 * Start to initialise phys_avail by copying from physmap
596 	 * up to the physical address KERNBASE points at.
597 	 */
598 	map_slot = avail_slot = 0;
599 	for (; map_slot < (physmap_idx * 2); map_slot += 2) {
600 		if (physmap[map_slot] == physmap[map_slot + 1])
601 			continue;
602 
603 		if (physmap[map_slot] <= pa &&
604 		    physmap[map_slot + 1] > pa)
605 			break;
606 
607 		phys_avail[avail_slot] = physmap[map_slot];
608 		phys_avail[avail_slot + 1] = physmap[map_slot + 1];
609 		physmem += (phys_avail[avail_slot + 1] -
610 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
611 		avail_slot += 2;
612 	}
613 
614 	/* Add the memory before the kernel */
615 	if (physmap[avail_slot] < pa) {
616 		phys_avail[avail_slot] = physmap[map_slot];
617 		phys_avail[avail_slot + 1] = pa;
618 		physmem += (phys_avail[avail_slot + 1] -
619 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
620 		avail_slot += 2;
621 	}
622 	used_map_slot = map_slot;
623 
624 	/*
625 	 * Read the page table to find out what is already mapped.
626 	 * This assumes we have mapped a block of memory from KERNBASE
627 	 * using a single L1 entry.
628 	 */
629 	l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot);
630 
631 	/* Sanity check the index, KERNBASE should be the first VA */
632 	KASSERT(l2_slot == 0, ("The L2 index is non-zero"));
633 
634 	/* Find how many pages we have mapped */
635 	for (; l2_slot < Ln_ENTRIES; l2_slot++) {
636 		if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0)
637 			break;
638 
639 		/* Check locore used L2 blocks */
640 		KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK,
641 		    ("Invalid bootstrap L2 table"));
642 		KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa,
643 		    ("Incorrect PA in L2 table"));
644 
645 		va += L2_SIZE;
646 		pa += L2_SIZE;
647 	}
648 
649 	va = roundup2(va, L1_SIZE);
650 
651 	freemempos = KERNBASE + kernlen;
652 	freemempos = roundup2(freemempos, PAGE_SIZE);
653 	/* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */
654 	freemempos = pmap_bootstrap_l2(l1pt, va, freemempos);
655 	/* And the l3 tables for the early devmap */
656 	freemempos = pmap_bootstrap_l3(l1pt,
657 	    VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos);
658 
659 	cpu_tlb_flushID();
660 
661 #define alloc_pages(var, np)						\
662 	(var) = freemempos;						\
663 	freemempos += (np * PAGE_SIZE);					\
664 	memset((char *)(var), 0, ((np) * PAGE_SIZE));
665 
666 	/* Allocate dynamic per-cpu area. */
667 	alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE);
668 	dpcpu_init((void *)dpcpu, 0);
669 
670 	/* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */
671 	alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE);
672 	msgbufp = (void *)msgbufpv;
673 
674 	virtual_avail = roundup2(freemempos, L1_SIZE);
675 	virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE;
676 	kernel_vm_end = virtual_avail;
677 
678 	pa = pmap_early_vtophys(l1pt, freemempos);
679 
680 	/* Finish initialising physmap */
681 	map_slot = used_map_slot;
682 	for (; avail_slot < (PHYS_AVAIL_SIZE - 2) &&
683 	    map_slot < (physmap_idx * 2); map_slot += 2) {
684 		if (physmap[map_slot] == physmap[map_slot + 1])
685 			continue;
686 
687 		/* Have we used the current range? */
688 		if (physmap[map_slot + 1] <= pa)
689 			continue;
690 
691 		/* Do we need to split the entry? */
692 		if (physmap[map_slot] < pa) {
693 			phys_avail[avail_slot] = pa;
694 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
695 		} else {
696 			phys_avail[avail_slot] = physmap[map_slot];
697 			phys_avail[avail_slot + 1] = physmap[map_slot + 1];
698 		}
699 		physmem += (phys_avail[avail_slot + 1] -
700 		    phys_avail[avail_slot]) >> PAGE_SHIFT;
701 
702 		avail_slot += 2;
703 	}
704 	phys_avail[avail_slot] = 0;
705 	phys_avail[avail_slot + 1] = 0;
706 
707 	/*
708 	 * Maxmem isn't the "maximum memory", it's one larger than the
709 	 * highest page of the physical address space.  It should be
710 	 * called something like "Maxphyspage".
711 	 */
712 	Maxmem = atop(phys_avail[avail_slot - 1]);
713 
714 	cpu_tlb_flushID();
715 }
716 
717 /*
718  *	Initialize a vm_page's machine-dependent fields.
719  */
720 void
721 pmap_page_init(vm_page_t m)
722 {
723 
724 	TAILQ_INIT(&m->md.pv_list);
725 	m->md.pv_memattr = VM_MEMATTR_WRITE_BACK;
726 }
727 
728 /*
729  *	Initialize the pmap module.
730  *	Called by vm_init, to initialize any structures that the pmap
731  *	system needs to map virtual memory.
732  */
733 void
734 pmap_init(void)
735 {
736 	int i;
737 
738 	/*
739 	 * Initialize the pv chunk list mutex.
740 	 */
741 	mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF);
742 
743 	/*
744 	 * Initialize the pool of pv list locks.
745 	 */
746 	for (i = 0; i < NPV_LIST_LOCKS; i++)
747 		rw_init(&pv_list_locks[i], "pmap pv list");
748 }
749 
750 /*
751  * Normal, non-SMP, invalidation functions.
752  * We inline these within pmap.c for speed.
753  */
754 PMAP_INLINE void
755 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
756 {
757 
758 	sched_pin();
759 	__asm __volatile(
760 	    "dsb  sy		\n"
761 	    "tlbi vaae1is, %0	\n"
762 	    "dsb  sy		\n"
763 	    "isb		\n"
764 	    : : "r"(va >> PAGE_SHIFT));
765 	sched_unpin();
766 }
767 
768 PMAP_INLINE void
769 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
770 {
771 	vm_offset_t addr;
772 
773 	sched_pin();
774 	sva >>= PAGE_SHIFT;
775 	eva >>= PAGE_SHIFT;
776 	__asm __volatile("dsb	sy");
777 	for (addr = sva; addr < eva; addr++) {
778 		__asm __volatile(
779 		    "tlbi vaae1is, %0" : : "r"(addr));
780 	}
781 	__asm __volatile(
782 	    "dsb  sy	\n"
783 	    "isb	\n");
784 	sched_unpin();
785 }
786 
787 PMAP_INLINE void
788 pmap_invalidate_all(pmap_t pmap)
789 {
790 
791 	sched_pin();
792 	__asm __volatile(
793 	    "dsb  sy		\n"
794 	    "tlbi vmalle1is	\n"
795 	    "dsb  sy		\n"
796 	    "isb		\n");
797 	sched_unpin();
798 }
799 
800 /*
801  *	Routine:	pmap_extract
802  *	Function:
803  *		Extract the physical page address associated
804  *		with the given map/virtual_address pair.
805  */
806 vm_paddr_t
807 pmap_extract(pmap_t pmap, vm_offset_t va)
808 {
809 	pd_entry_t *l2p, l2;
810 	pt_entry_t *l3p, l3;
811 	vm_paddr_t pa;
812 
813 	pa = 0;
814 	PMAP_LOCK(pmap);
815 	/*
816 	 * Start with the l2 tabel. We are unable to allocate
817 	 * pages in the l1 table.
818 	 */
819 	l2p = pmap_l2(pmap, va);
820 	if (l2p != NULL) {
821 		l2 = pmap_load(l2p);
822 		if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) {
823 			l3p = pmap_l2_to_l3(l2p, va);
824 			if (l3p != NULL) {
825 				l3 = pmap_load(l3p);
826 
827 				if ((l3 & ATTR_DESCR_MASK) == L3_PAGE)
828 					pa = (l3 & ~ATTR_MASK) |
829 					    (va & L3_OFFSET);
830 			}
831 		} else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
832 			pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET);
833 	}
834 	PMAP_UNLOCK(pmap);
835 	return (pa);
836 }
837 
838 /*
839  *	Routine:	pmap_extract_and_hold
840  *	Function:
841  *		Atomically extract and hold the physical page
842  *		with the given pmap and virtual address pair
843  *		if that mapping permits the given protection.
844  */
845 vm_page_t
846 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot)
847 {
848 	pt_entry_t *l3p, l3;
849 	vm_paddr_t pa;
850 	vm_page_t m;
851 
852 	pa = 0;
853 	m = NULL;
854 	PMAP_LOCK(pmap);
855 retry:
856 	l3p = pmap_l3(pmap, va);
857 	if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) {
858 		if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) ||
859 		    ((prot & VM_PROT_WRITE) == 0)) {
860 			if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa))
861 				goto retry;
862 			m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK);
863 			vm_page_hold(m);
864 		}
865 	}
866 	PA_UNLOCK_COND(pa);
867 	PMAP_UNLOCK(pmap);
868 	return (m);
869 }
870 
871 vm_paddr_t
872 pmap_kextract(vm_offset_t va)
873 {
874 	pd_entry_t *l2p, l2;
875 	pt_entry_t *l3;
876 	vm_paddr_t pa;
877 
878 	if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) {
879 		pa = DMAP_TO_PHYS(va);
880 	} else {
881 		l2p = pmap_l2(kernel_pmap, va);
882 		if (l2p == NULL)
883 			panic("pmap_kextract: No l2");
884 		l2 = pmap_load(l2p);
885 		if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK)
886 			return ((l2 & ~ATTR_MASK) |
887 			    (va & L2_OFFSET));
888 
889 		l3 = pmap_l2_to_l3(l2p, va);
890 		if (l3 == NULL)
891 			panic("pmap_kextract: No l3...");
892 		pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK);
893 	}
894 	return (pa);
895 }
896 
897 /***************************************************
898  * Low level mapping routines.....
899  ***************************************************/
900 
901 void
902 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa)
903 {
904 	pt_entry_t *l3;
905 	vm_offset_t va;
906 
907 	KASSERT((pa & L3_OFFSET) == 0,
908 	   ("pmap_kenter_device: Invalid physical address"));
909 	KASSERT((sva & L3_OFFSET) == 0,
910 	   ("pmap_kenter_device: Invalid virtual address"));
911 	KASSERT((size & PAGE_MASK) == 0,
912 	    ("pmap_kenter_device: Mapping is not page-sized"));
913 
914 	va = sva;
915 	while (size != 0) {
916 		l3 = pmap_l3(kernel_pmap, va);
917 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
918 		pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT |
919 		    ATTR_IDX(DEVICE_MEMORY) | L3_PAGE);
920 		PTE_SYNC(l3);
921 
922 		va += PAGE_SIZE;
923 		pa += PAGE_SIZE;
924 		size -= PAGE_SIZE;
925 	}
926 	pmap_invalidate_range(kernel_pmap, sva, va);
927 }
928 
929 /*
930  * Remove a page from the kernel pagetables.
931  * Note: not SMP coherent.
932  */
933 PMAP_INLINE void
934 pmap_kremove(vm_offset_t va)
935 {
936 	pt_entry_t *l3;
937 
938 	l3 = pmap_l3(kernel_pmap, va);
939 	KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
940 
941 	if (pmap_l3_valid_cacheable(pmap_load(l3)))
942 		cpu_dcache_wb_range(va, L3_SIZE);
943 	pmap_load_clear(l3);
944 	PTE_SYNC(l3);
945 	pmap_invalidate_page(kernel_pmap, va);
946 }
947 
948 void
949 pmap_kremove_device(vm_offset_t sva, vm_size_t size)
950 {
951 	pt_entry_t *l3;
952 	vm_offset_t va;
953 
954 	KASSERT((sva & L3_OFFSET) == 0,
955 	   ("pmap_kremove_device: Invalid virtual address"));
956 	KASSERT((size & PAGE_MASK) == 0,
957 	    ("pmap_kremove_device: Mapping is not page-sized"));
958 
959 	va = sva;
960 	while (size != 0) {
961 		l3 = pmap_l3(kernel_pmap, va);
962 		KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va));
963 		pmap_load_clear(l3);
964 		PTE_SYNC(l3);
965 
966 		va += PAGE_SIZE;
967 		size -= PAGE_SIZE;
968 	}
969 	pmap_invalidate_range(kernel_pmap, sva, va);
970 }
971 
972 /*
973  *	Used to map a range of physical addresses into kernel
974  *	virtual address space.
975  *
976  *	The value passed in '*virt' is a suggested virtual address for
977  *	the mapping. Architectures which can support a direct-mapped
978  *	physical to virtual region can return the appropriate address
979  *	within that region, leaving '*virt' unchanged. Other
980  *	architectures should map the pages starting at '*virt' and
981  *	update '*virt' with the first usable address after the mapped
982  *	region.
983  */
984 vm_offset_t
985 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot)
986 {
987 	return PHYS_TO_DMAP(start);
988 }
989 
990 
991 /*
992  * Add a list of wired pages to the kva
993  * this routine is only used for temporary
994  * kernel mappings that do not need to have
995  * page modification or references recorded.
996  * Note that old mappings are simply written
997  * over.  The page *must* be wired.
998  * Note: SMP coherent.  Uses a ranged shootdown IPI.
999  */
1000 void
1001 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count)
1002 {
1003 	pt_entry_t *l3, pa;
1004 	vm_offset_t va;
1005 	vm_page_t m;
1006 	int i;
1007 
1008 	va = sva;
1009 	for (i = 0; i < count; i++) {
1010 		m = ma[i];
1011 		pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) |
1012 		    ATTR_IDX(m->md.pv_memattr) | L3_PAGE;
1013 		l3 = pmap_l3(kernel_pmap, va);
1014 		pmap_load_store(l3, pa);
1015 		PTE_SYNC(l3);
1016 
1017 		va += L3_SIZE;
1018 	}
1019 	pmap_invalidate_range(kernel_pmap, sva, va);
1020 }
1021 
1022 /*
1023  * This routine tears out page mappings from the
1024  * kernel -- it is meant only for temporary mappings.
1025  * Note: SMP coherent.  Uses a ranged shootdown IPI.
1026  */
1027 void
1028 pmap_qremove(vm_offset_t sva, int count)
1029 {
1030 	pt_entry_t *l3;
1031 	vm_offset_t va;
1032 
1033 	KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva));
1034 
1035 	va = sva;
1036 	while (count-- > 0) {
1037 		l3 = pmap_l3(kernel_pmap, va);
1038 		KASSERT(l3 != NULL, ("pmap_kremove: Invalid address"));
1039 
1040 		if (pmap_l3_valid_cacheable(pmap_load(l3)))
1041 			cpu_dcache_wb_range(va, L3_SIZE);
1042 		pmap_load_clear(l3);
1043 		PTE_SYNC(l3);
1044 
1045 		va += PAGE_SIZE;
1046 	}
1047 	pmap_invalidate_range(kernel_pmap, sva, va);
1048 }
1049 
1050 /***************************************************
1051  * Page table page management routines.....
1052  ***************************************************/
1053 static __inline void
1054 pmap_free_zero_pages(struct spglist *free)
1055 {
1056 	vm_page_t m;
1057 
1058 	while ((m = SLIST_FIRST(free)) != NULL) {
1059 		SLIST_REMOVE_HEAD(free, plinks.s.ss);
1060 		/* Preserve the page's PG_ZERO setting. */
1061 		vm_page_free_toq(m);
1062 	}
1063 }
1064 
1065 /*
1066  * Schedule the specified unused page table page to be freed.  Specifically,
1067  * add the page to the specified list of pages that will be released to the
1068  * physical memory manager after the TLB has been updated.
1069  */
1070 static __inline void
1071 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free,
1072     boolean_t set_PG_ZERO)
1073 {
1074 
1075 	if (set_PG_ZERO)
1076 		m->flags |= PG_ZERO;
1077 	else
1078 		m->flags &= ~PG_ZERO;
1079 	SLIST_INSERT_HEAD(free, m, plinks.s.ss);
1080 }
1081 
1082 /*
1083  * Decrements a page table page's wire count, which is used to record the
1084  * number of valid page table entries within the page.  If the wire count
1085  * drops to zero, then the page table page is unmapped.  Returns TRUE if the
1086  * page table page was unmapped and FALSE otherwise.
1087  */
1088 static inline boolean_t
1089 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1090 {
1091 
1092 	--m->wire_count;
1093 	if (m->wire_count == 0) {
1094 		_pmap_unwire_l3(pmap, va, m, free);
1095 		return (TRUE);
1096 	} else
1097 		return (FALSE);
1098 }
1099 
1100 static void
1101 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free)
1102 {
1103 
1104 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1105 	/*
1106 	 * unmap the page table page
1107 	 */
1108 	if (m->pindex >= NUPDE) {
1109 		/* PD page */
1110 		pd_entry_t *l1;
1111 		l1 = pmap_l1(pmap, va);
1112 		pmap_load_clear(l1);
1113 		PTE_SYNC(l1);
1114 	} else {
1115 		/* PTE page */
1116 		pd_entry_t *l2;
1117 		l2 = pmap_l2(pmap, va);
1118 		pmap_load_clear(l2);
1119 		PTE_SYNC(l2);
1120 	}
1121 	pmap_resident_count_dec(pmap, 1);
1122 	if (m->pindex < NUPDE) {
1123 		/* We just released a PT, unhold the matching PD */
1124 		vm_page_t pdpg;
1125 
1126 		pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK);
1127 		pmap_unwire_l3(pmap, va, pdpg, free);
1128 	}
1129 	pmap_invalidate_page(pmap, va);
1130 
1131 	/*
1132 	 * This is a release store so that the ordinary store unmapping
1133 	 * the page table page is globally performed before TLB shoot-
1134 	 * down is begun.
1135 	 */
1136 	atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1);
1137 
1138 	/*
1139 	 * Put page on a list so that it is released after
1140 	 * *ALL* TLB shootdown is done
1141 	 */
1142 	pmap_add_delayed_free_list(m, free, TRUE);
1143 }
1144 
1145 /*
1146  * After removing an l3 entry, this routine is used to
1147  * conditionally free the page, and manage the hold/wire counts.
1148  */
1149 static int
1150 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde,
1151     struct spglist *free)
1152 {
1153 	vm_page_t mpte;
1154 
1155 	if (va >= VM_MAXUSER_ADDRESS)
1156 		return (0);
1157 	KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0"));
1158 	mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK);
1159 	return (pmap_unwire_l3(pmap, va, mpte, free));
1160 }
1161 
1162 void
1163 pmap_pinit0(pmap_t pmap)
1164 {
1165 
1166 	PMAP_LOCK_INIT(pmap);
1167 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1168 	pmap->pm_l1 = kernel_pmap->pm_l1;
1169 }
1170 
1171 int
1172 pmap_pinit(pmap_t pmap)
1173 {
1174 	vm_paddr_t l1phys;
1175 	vm_page_t l1pt;
1176 
1177 	/*
1178 	 * allocate the l1 page
1179 	 */
1180 	while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL |
1181 	    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL)
1182 		VM_WAIT;
1183 
1184 	l1phys = VM_PAGE_TO_PHYS(l1pt);
1185 	pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys);
1186 
1187 	if ((l1pt->flags & PG_ZERO) == 0)
1188 		pagezero(pmap->pm_l1);
1189 
1190 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
1191 
1192 	return (1);
1193 }
1194 
1195 /*
1196  * This routine is called if the desired page table page does not exist.
1197  *
1198  * If page table page allocation fails, this routine may sleep before
1199  * returning NULL.  It sleeps only if a lock pointer was given.
1200  *
1201  * Note: If a page allocation fails at page table level two or three,
1202  * one or two pages may be held during the wait, only to be released
1203  * afterwards.  This conservative approach is easily argued to avoid
1204  * race conditions.
1205  */
1206 static vm_page_t
1207 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp)
1208 {
1209 	vm_page_t m, /*pdppg, */pdpg;
1210 
1211 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1212 
1213 	/*
1214 	 * Allocate a page table page.
1215 	 */
1216 	if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ |
1217 	    VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) {
1218 		if (lockp != NULL) {
1219 			RELEASE_PV_LIST_LOCK(lockp);
1220 			PMAP_UNLOCK(pmap);
1221 			rw_runlock(&pvh_global_lock);
1222 			VM_WAIT;
1223 			rw_rlock(&pvh_global_lock);
1224 			PMAP_LOCK(pmap);
1225 		}
1226 
1227 		/*
1228 		 * Indicate the need to retry.  While waiting, the page table
1229 		 * page may have been allocated.
1230 		 */
1231 		return (NULL);
1232 	}
1233 	if ((m->flags & PG_ZERO) == 0)
1234 		pmap_zero_page(m);
1235 
1236 	/*
1237 	 * Map the pagetable page into the process address space, if
1238 	 * it isn't already there.
1239 	 */
1240 
1241 	if (ptepindex >= NUPDE) {
1242 		pd_entry_t *l1;
1243 		vm_pindex_t l1index;
1244 
1245 		l1index = ptepindex - NUPDE;
1246 		l1 = &pmap->pm_l1[l1index];
1247 		pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE);
1248 		PTE_SYNC(l1);
1249 
1250 	} else {
1251 		vm_pindex_t l1index;
1252 		pd_entry_t *l1, *l2;
1253 
1254 		l1index = ptepindex >> (L1_SHIFT - L2_SHIFT);
1255 		l1 = &pmap->pm_l1[l1index];
1256 		if (pmap_load(l1) == 0) {
1257 			/* recurse for allocating page dir */
1258 			if (_pmap_alloc_l3(pmap, NUPDE + l1index,
1259 			    lockp) == NULL) {
1260 				--m->wire_count;
1261 				atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1262 				vm_page_free_zero(m);
1263 				return (NULL);
1264 			}
1265 		} else {
1266 			pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK);
1267 			pdpg->wire_count++;
1268 		}
1269 
1270 		l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK);
1271 		l2 = &l2[ptepindex & Ln_ADDR_MASK];
1272 		pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE);
1273 		PTE_SYNC(l2);
1274 	}
1275 
1276 	pmap_resident_count_inc(pmap, 1);
1277 
1278 	return (m);
1279 }
1280 
1281 static vm_page_t
1282 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp)
1283 {
1284 	vm_pindex_t ptepindex;
1285 	pd_entry_t *l2;
1286 	vm_page_t m;
1287 
1288 	/*
1289 	 * Calculate pagetable page index
1290 	 */
1291 	ptepindex = pmap_l2_pindex(va);
1292 retry:
1293 	/*
1294 	 * Get the page directory entry
1295 	 */
1296 	l2 = pmap_l2(pmap, va);
1297 
1298 	/*
1299 	 * If the page table page is mapped, we just increment the
1300 	 * hold count, and activate it.
1301 	 */
1302 	if (l2 != NULL && pmap_load(l2) != 0) {
1303 		m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
1304 		m->wire_count++;
1305 	} else {
1306 		/*
1307 		 * Here if the pte page isn't mapped, or if it has been
1308 		 * deallocated.
1309 		 */
1310 		m = _pmap_alloc_l3(pmap, ptepindex, lockp);
1311 		if (m == NULL && lockp != NULL)
1312 			goto retry;
1313 	}
1314 	return (m);
1315 }
1316 
1317 
1318 /***************************************************
1319  * Pmap allocation/deallocation routines.
1320  ***************************************************/
1321 
1322 /*
1323  * Release any resources held by the given physical map.
1324  * Called when a pmap initialized by pmap_pinit is being released.
1325  * Should only be called if the map contains no valid mappings.
1326  */
1327 void
1328 pmap_release(pmap_t pmap)
1329 {
1330 	vm_page_t m;
1331 
1332 	KASSERT(pmap->pm_stats.resident_count == 0,
1333 	    ("pmap_release: pmap resident count %ld != 0",
1334 	    pmap->pm_stats.resident_count));
1335 
1336 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1));
1337 
1338 	m->wire_count--;
1339 	atomic_subtract_int(&vm_cnt.v_wire_count, 1);
1340 	vm_page_free_zero(m);
1341 }
1342 
1343 #if 0
1344 static int
1345 kvm_size(SYSCTL_HANDLER_ARGS)
1346 {
1347 	unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS;
1348 
1349 	return sysctl_handle_long(oidp, &ksize, 0, req);
1350 }
1351 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD,
1352     0, 0, kvm_size, "LU", "Size of KVM");
1353 
1354 static int
1355 kvm_free(SYSCTL_HANDLER_ARGS)
1356 {
1357 	unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end;
1358 
1359 	return sysctl_handle_long(oidp, &kfree, 0, req);
1360 }
1361 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
1362     0, 0, kvm_free, "LU", "Amount of KVM free");
1363 #endif /* 0 */
1364 
1365 /*
1366  * grow the number of kernel page table entries, if needed
1367  */
1368 void
1369 pmap_growkernel(vm_offset_t addr)
1370 {
1371 	vm_paddr_t paddr;
1372 	vm_page_t nkpg;
1373 	pd_entry_t *l1, *l2;
1374 
1375 	mtx_assert(&kernel_map->system_mtx, MA_OWNED);
1376 
1377 	addr = roundup2(addr, L2_SIZE);
1378 	if (addr - 1 >= kernel_map->max_offset)
1379 		addr = kernel_map->max_offset;
1380 	while (kernel_vm_end < addr) {
1381 		l1 = pmap_l1(kernel_pmap, kernel_vm_end);
1382 		if (pmap_load(l1) == 0) {
1383 			/* We need a new PDP entry */
1384 			nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT,
1385 			    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ |
1386 			    VM_ALLOC_WIRED | VM_ALLOC_ZERO);
1387 			if (nkpg == NULL)
1388 				panic("pmap_growkernel: no memory to grow kernel");
1389 			if ((nkpg->flags & PG_ZERO) == 0)
1390 				pmap_zero_page(nkpg);
1391 			paddr = VM_PAGE_TO_PHYS(nkpg);
1392 			pmap_load_store(l1, paddr | L1_TABLE);
1393 			PTE_SYNC(l1);
1394 			continue; /* try again */
1395 		}
1396 		l2 = pmap_l1_to_l2(l1, kernel_vm_end);
1397 		if ((pmap_load(l2) & ATTR_AF) != 0) {
1398 			kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1399 			if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1400 				kernel_vm_end = kernel_map->max_offset;
1401 				break;
1402 			}
1403 			continue;
1404 		}
1405 
1406 		nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT,
1407 		    VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
1408 		    VM_ALLOC_ZERO);
1409 		if (nkpg == NULL)
1410 			panic("pmap_growkernel: no memory to grow kernel");
1411 		if ((nkpg->flags & PG_ZERO) == 0)
1412 			pmap_zero_page(nkpg);
1413 		paddr = VM_PAGE_TO_PHYS(nkpg);
1414 		pmap_load_store(l2, paddr | L2_TABLE);
1415 		PTE_SYNC(l2);
1416 		pmap_invalidate_page(kernel_pmap, kernel_vm_end);
1417 
1418 		kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET;
1419 		if (kernel_vm_end - 1 >= kernel_map->max_offset) {
1420 			kernel_vm_end = kernel_map->max_offset;
1421 			break;
1422 		}
1423 	}
1424 }
1425 
1426 
1427 /***************************************************
1428  * page management routines.
1429  ***************************************************/
1430 
1431 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE);
1432 CTASSERT(_NPCM == 3);
1433 CTASSERT(_NPCPV == 168);
1434 
1435 static __inline struct pv_chunk *
1436 pv_to_chunk(pv_entry_t pv)
1437 {
1438 
1439 	return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK));
1440 }
1441 
1442 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap)
1443 
1444 #define	PC_FREE0	0xfffffffffffffffful
1445 #define	PC_FREE1	0xfffffffffffffffful
1446 #define	PC_FREE2	0x000000fffffffffful
1447 
1448 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 };
1449 
1450 #if 0
1451 #ifdef PV_STATS
1452 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail;
1453 
1454 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0,
1455 	"Current number of pv entry chunks");
1456 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0,
1457 	"Current number of pv entry chunks allocated");
1458 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0,
1459 	"Current number of pv entry chunks frees");
1460 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0,
1461 	"Number of times tried to get a chunk page but failed.");
1462 
1463 static long pv_entry_frees, pv_entry_allocs, pv_entry_count;
1464 static int pv_entry_spare;
1465 
1466 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0,
1467 	"Current number of pv entry frees");
1468 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0,
1469 	"Current number of pv entry allocs");
1470 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0,
1471 	"Current number of pv entries");
1472 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0,
1473 	"Current number of spare pv entries");
1474 #endif
1475 #endif /* 0 */
1476 
1477 /*
1478  * We are in a serious low memory condition.  Resort to
1479  * drastic measures to free some pages so we can allocate
1480  * another pv entry chunk.
1481  *
1482  * Returns NULL if PV entries were reclaimed from the specified pmap.
1483  *
1484  * We do not, however, unmap 2mpages because subsequent accesses will
1485  * allocate per-page pv entries until repromotion occurs, thereby
1486  * exacerbating the shortage of free pv entries.
1487  */
1488 static vm_page_t
1489 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp)
1490 {
1491 
1492 	panic("ARM64TODO: reclaim_pv_chunk");
1493 }
1494 
1495 /*
1496  * free the pv_entry back to the free list
1497  */
1498 static void
1499 free_pv_entry(pmap_t pmap, pv_entry_t pv)
1500 {
1501 	struct pv_chunk *pc;
1502 	int idx, field, bit;
1503 
1504 	rw_assert(&pvh_global_lock, RA_LOCKED);
1505 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1506 	PV_STAT(atomic_add_long(&pv_entry_frees, 1));
1507 	PV_STAT(atomic_add_int(&pv_entry_spare, 1));
1508 	PV_STAT(atomic_subtract_long(&pv_entry_count, 1));
1509 	pc = pv_to_chunk(pv);
1510 	idx = pv - &pc->pc_pventry[0];
1511 	field = idx / 64;
1512 	bit = idx % 64;
1513 	pc->pc_map[field] |= 1ul << bit;
1514 	if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 ||
1515 	    pc->pc_map[2] != PC_FREE2) {
1516 		/* 98% of the time, pc is already at the head of the list. */
1517 		if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) {
1518 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1519 			TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1520 		}
1521 		return;
1522 	}
1523 	TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1524 	free_pv_chunk(pc);
1525 }
1526 
1527 static void
1528 free_pv_chunk(struct pv_chunk *pc)
1529 {
1530 	vm_page_t m;
1531 
1532 	mtx_lock(&pv_chunks_mutex);
1533  	TAILQ_REMOVE(&pv_chunks, pc, pc_lru);
1534 	mtx_unlock(&pv_chunks_mutex);
1535 	PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV));
1536 	PV_STAT(atomic_subtract_int(&pc_chunk_count, 1));
1537 	PV_STAT(atomic_add_int(&pc_chunk_frees, 1));
1538 	/* entire chunk is free, return it */
1539 	m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc));
1540 	dump_drop_page(m->phys_addr);
1541 	vm_page_unwire(m, PQ_NONE);
1542 	vm_page_free(m);
1543 }
1544 
1545 /*
1546  * Returns a new PV entry, allocating a new PV chunk from the system when
1547  * needed.  If this PV chunk allocation fails and a PV list lock pointer was
1548  * given, a PV chunk is reclaimed from an arbitrary pmap.  Otherwise, NULL is
1549  * returned.
1550  *
1551  * The given PV list lock may be released.
1552  */
1553 static pv_entry_t
1554 get_pv_entry(pmap_t pmap, struct rwlock **lockp)
1555 {
1556 	int bit, field;
1557 	pv_entry_t pv;
1558 	struct pv_chunk *pc;
1559 	vm_page_t m;
1560 
1561 	rw_assert(&pvh_global_lock, RA_LOCKED);
1562 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1563 	PV_STAT(atomic_add_long(&pv_entry_allocs, 1));
1564 retry:
1565 	pc = TAILQ_FIRST(&pmap->pm_pvchunk);
1566 	if (pc != NULL) {
1567 		for (field = 0; field < _NPCM; field++) {
1568 			if (pc->pc_map[field]) {
1569 				bit = ffsl(pc->pc_map[field]) - 1;
1570 				break;
1571 			}
1572 		}
1573 		if (field < _NPCM) {
1574 			pv = &pc->pc_pventry[field * 64 + bit];
1575 			pc->pc_map[field] &= ~(1ul << bit);
1576 			/* If this was the last item, move it to tail */
1577 			if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 &&
1578 			    pc->pc_map[2] == 0) {
1579 				TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
1580 				TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc,
1581 				    pc_list);
1582 			}
1583 			PV_STAT(atomic_add_long(&pv_entry_count, 1));
1584 			PV_STAT(atomic_subtract_int(&pv_entry_spare, 1));
1585 			return (pv);
1586 		}
1587 	}
1588 	/* No free items, allocate another chunk */
1589 	m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ |
1590 	    VM_ALLOC_WIRED);
1591 	if (m == NULL) {
1592 		if (lockp == NULL) {
1593 			PV_STAT(pc_chunk_tryfail++);
1594 			return (NULL);
1595 		}
1596 		m = reclaim_pv_chunk(pmap, lockp);
1597 		if (m == NULL)
1598 			goto retry;
1599 	}
1600 	PV_STAT(atomic_add_int(&pc_chunk_count, 1));
1601 	PV_STAT(atomic_add_int(&pc_chunk_allocs, 1));
1602 	dump_add_page(m->phys_addr);
1603 	pc = (void *)PHYS_TO_DMAP(m->phys_addr);
1604 	pc->pc_pmap = pmap;
1605 	pc->pc_map[0] = PC_FREE0 & ~1ul;	/* preallocated bit 0 */
1606 	pc->pc_map[1] = PC_FREE1;
1607 	pc->pc_map[2] = PC_FREE2;
1608 	mtx_lock(&pv_chunks_mutex);
1609 	TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru);
1610 	mtx_unlock(&pv_chunks_mutex);
1611 	pv = &pc->pc_pventry[0];
1612 	TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list);
1613 	PV_STAT(atomic_add_long(&pv_entry_count, 1));
1614 	PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1));
1615 	return (pv);
1616 }
1617 
1618 /*
1619  * First find and then remove the pv entry for the specified pmap and virtual
1620  * address from the specified pv list.  Returns the pv entry if found and NULL
1621  * otherwise.  This operation can be performed on pv lists for either 4KB or
1622  * 2MB page mappings.
1623  */
1624 static __inline pv_entry_t
1625 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1626 {
1627 	pv_entry_t pv;
1628 
1629 	rw_assert(&pvh_global_lock, RA_LOCKED);
1630 	TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) {
1631 		if (pmap == PV_PMAP(pv) && va == pv->pv_va) {
1632 			TAILQ_REMOVE(&pvh->pv_list, pv, pv_next);
1633 			pvh->pv_gen++;
1634 			break;
1635 		}
1636 	}
1637 	return (pv);
1638 }
1639 
1640 /*
1641  * First find and then destroy the pv entry for the specified pmap and virtual
1642  * address.  This operation can be performed on pv lists for either 4KB or 2MB
1643  * page mappings.
1644  */
1645 static void
1646 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va)
1647 {
1648 	pv_entry_t pv;
1649 
1650 	pv = pmap_pvh_remove(pvh, pmap, va);
1651 	KASSERT(pv != NULL, ("pmap_pvh_free: pv not found"));
1652 	free_pv_entry(pmap, pv);
1653 }
1654 
1655 /*
1656  * Conditionally create the PV entry for a 4KB page mapping if the required
1657  * memory can be allocated without resorting to reclamation.
1658  */
1659 static boolean_t
1660 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m,
1661     struct rwlock **lockp)
1662 {
1663 	pv_entry_t pv;
1664 
1665 	rw_assert(&pvh_global_lock, RA_LOCKED);
1666 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1667 	/* Pass NULL instead of the lock pointer to disable reclamation. */
1668 	if ((pv = get_pv_entry(pmap, NULL)) != NULL) {
1669 		pv->pv_va = va;
1670 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1671 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
1672 		m->md.pv_gen++;
1673 		return (TRUE);
1674 	} else
1675 		return (FALSE);
1676 }
1677 
1678 /*
1679  * pmap_remove_l3: do the things to unmap a page in a process
1680  */
1681 static int
1682 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va,
1683     pd_entry_t l2e, struct spglist *free, struct rwlock **lockp)
1684 {
1685 	pt_entry_t old_l3;
1686 	vm_page_t m;
1687 
1688 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
1689 	if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3)))
1690 		cpu_dcache_wb_range(va, L3_SIZE);
1691 	old_l3 = pmap_load_clear(l3);
1692 	PTE_SYNC(l3);
1693 	pmap_invalidate_page(pmap, va);
1694 	if (old_l3 & ATTR_SW_WIRED)
1695 		pmap->pm_stats.wired_count -= 1;
1696 	pmap_resident_count_dec(pmap, 1);
1697 	if (old_l3 & ATTR_SW_MANAGED) {
1698 		m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK);
1699 		if (pmap_page_dirty(old_l3))
1700 			vm_page_dirty(m);
1701 		if (old_l3 & ATTR_AF)
1702 			vm_page_aflag_set(m, PGA_REFERENCED);
1703 		CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m);
1704 		pmap_pvh_free(&m->md, pmap, va);
1705 	}
1706 	return (pmap_unuse_l3(pmap, va, l2e, free));
1707 }
1708 
1709 /*
1710  *	Remove the given range of addresses from the specified map.
1711  *
1712  *	It is assumed that the start and end are properly
1713  *	rounded to the page size.
1714  */
1715 void
1716 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
1717 {
1718 	struct rwlock *lock;
1719 	vm_offset_t va, va_next;
1720 	pd_entry_t *l1, *l2;
1721 	pt_entry_t l3_paddr, *l3;
1722 	struct spglist free;
1723 	int anyvalid;
1724 
1725 	/*
1726 	 * Perform an unsynchronized read.  This is, however, safe.
1727 	 */
1728 	if (pmap->pm_stats.resident_count == 0)
1729 		return;
1730 
1731 	anyvalid = 0;
1732 	SLIST_INIT(&free);
1733 
1734 	rw_rlock(&pvh_global_lock);
1735 	PMAP_LOCK(pmap);
1736 
1737 	lock = NULL;
1738 	for (; sva < eva; sva = va_next) {
1739 
1740 		if (pmap->pm_stats.resident_count == 0)
1741 			break;
1742 
1743 		l1 = pmap_l1(pmap, sva);
1744 		if (pmap_load(l1) == 0) {
1745 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1746 			if (va_next < sva)
1747 				va_next = eva;
1748 			continue;
1749 		}
1750 
1751 		/*
1752 		 * Calculate index for next page table.
1753 		 */
1754 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1755 		if (va_next < sva)
1756 			va_next = eva;
1757 
1758 		l2 = pmap_l1_to_l2(l1, sva);
1759 		if (l2 == NULL)
1760 			continue;
1761 
1762 		l3_paddr = pmap_load(l2);
1763 
1764 		/*
1765 		 * Weed out invalid mappings.
1766 		 */
1767 		if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE)
1768 			continue;
1769 
1770 		/*
1771 		 * Limit our scan to either the end of the va represented
1772 		 * by the current page table page, or to the end of the
1773 		 * range being removed.
1774 		 */
1775 		if (va_next > eva)
1776 			va_next = eva;
1777 
1778 		va = va_next;
1779 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
1780 		    sva += L3_SIZE) {
1781 			if (l3 == NULL)
1782 				panic("l3 == NULL");
1783 			if (pmap_load(l3) == 0) {
1784 				if (va != va_next) {
1785 					pmap_invalidate_range(pmap, va, sva);
1786 					va = va_next;
1787 				}
1788 				continue;
1789 			}
1790 			if (va == va_next)
1791 				va = sva;
1792 			if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free,
1793 			    &lock)) {
1794 				sva += L3_SIZE;
1795 				break;
1796 			}
1797 		}
1798 		if (va != va_next)
1799 			pmap_invalidate_range(pmap, va, sva);
1800 	}
1801 	if (lock != NULL)
1802 		rw_wunlock(lock);
1803 	if (anyvalid)
1804 		pmap_invalidate_all(pmap);
1805 	rw_runlock(&pvh_global_lock);
1806 	PMAP_UNLOCK(pmap);
1807 	pmap_free_zero_pages(&free);
1808 }
1809 
1810 /*
1811  *	Routine:	pmap_remove_all
1812  *	Function:
1813  *		Removes this physical page from
1814  *		all physical maps in which it resides.
1815  *		Reflects back modify bits to the pager.
1816  *
1817  *	Notes:
1818  *		Original versions of this routine were very
1819  *		inefficient because they iteratively called
1820  *		pmap_remove (slow...)
1821  */
1822 
1823 void
1824 pmap_remove_all(vm_page_t m)
1825 {
1826 	pv_entry_t pv;
1827 	pmap_t pmap;
1828 	pt_entry_t *l3, tl3;
1829 	pd_entry_t *l2, tl2;
1830 	struct spglist free;
1831 
1832 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
1833 	    ("pmap_remove_all: page %p is not managed", m));
1834 	SLIST_INIT(&free);
1835 	rw_wlock(&pvh_global_lock);
1836 	while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) {
1837 		pmap = PV_PMAP(pv);
1838 		PMAP_LOCK(pmap);
1839 		pmap_resident_count_dec(pmap, 1);
1840 		l2 = pmap_l2(pmap, pv->pv_va);
1841 		KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found"));
1842 		tl2 = pmap_load(l2);
1843 		KASSERT((tl2 & ATTR_DESCR_MASK) == L2_TABLE,
1844 		    ("pmap_remove_all: found a table when expecting "
1845 		     "a block in %p's pv list", m));
1846 		l3 = pmap_l2_to_l3(l2, pv->pv_va);
1847 		if (pmap_is_current(pmap) &&
1848 		    pmap_l3_valid_cacheable(pmap_load(l3)))
1849 			cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
1850 		tl3 = pmap_load_clear(l3);
1851 		PTE_SYNC(l3);
1852 		pmap_invalidate_page(pmap, pv->pv_va);
1853 		if (tl3 & ATTR_SW_WIRED)
1854 			pmap->pm_stats.wired_count--;
1855 		if ((tl3 & ATTR_AF) != 0)
1856 			vm_page_aflag_set(m, PGA_REFERENCED);
1857 
1858 		/*
1859 		 * Update the vm_page_t clean and reference bits.
1860 		 */
1861 		if (pmap_page_dirty(tl3))
1862 			vm_page_dirty(m);
1863 		pmap_unuse_l3(pmap, pv->pv_va, tl2, &free);
1864 		TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
1865 		m->md.pv_gen++;
1866 		free_pv_entry(pmap, pv);
1867 		PMAP_UNLOCK(pmap);
1868 	}
1869 	vm_page_aflag_clear(m, PGA_WRITEABLE);
1870 	rw_wunlock(&pvh_global_lock);
1871 	pmap_free_zero_pages(&free);
1872 }
1873 
1874 /*
1875  *	Set the physical protection on the
1876  *	specified range of this map as requested.
1877  */
1878 void
1879 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot)
1880 {
1881 	vm_offset_t va, va_next;
1882 	pd_entry_t *l1, *l2;
1883 	pt_entry_t *l3p, l3;
1884 
1885 	if ((prot & VM_PROT_READ) == VM_PROT_NONE) {
1886 		pmap_remove(pmap, sva, eva);
1887 		return;
1888 	}
1889 
1890 	if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE)
1891 		return;
1892 
1893 	PMAP_LOCK(pmap);
1894 	for (; sva < eva; sva = va_next) {
1895 
1896 		l1 = pmap_l1(pmap, sva);
1897 		if (pmap_load(l1) == 0) {
1898 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
1899 			if (va_next < sva)
1900 				va_next = eva;
1901 			continue;
1902 		}
1903 
1904 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
1905 		if (va_next < sva)
1906 			va_next = eva;
1907 
1908 		l2 = pmap_l1_to_l2(l1, sva);
1909 		if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE)
1910 			continue;
1911 
1912 		if (va_next > eva)
1913 			va_next = eva;
1914 
1915 		va = va_next;
1916 		for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++,
1917 		    sva += L3_SIZE) {
1918 			l3 = pmap_load(l3p);
1919 			if (pmap_l3_valid(l3)) {
1920 				pmap_set(l3p, ATTR_AP(ATTR_AP_RO));
1921 				PTE_SYNC(l3p);
1922 				/* XXX: Use pmap_invalidate_range */
1923 				pmap_invalidate_page(pmap, va);
1924 			}
1925 		}
1926 	}
1927 	PMAP_UNLOCK(pmap);
1928 
1929 	/* TODO: Only invalidate entries we are touching */
1930 	pmap_invalidate_all(pmap);
1931 }
1932 
1933 /*
1934  *	Insert the given physical page (p) at
1935  *	the specified virtual address (v) in the
1936  *	target physical map with the protection requested.
1937  *
1938  *	If specified, the page will be wired down, meaning
1939  *	that the related pte can not be reclaimed.
1940  *
1941  *	NB:  This is the only routine which MAY NOT lazy-evaluate
1942  *	or lose information.  That is, this routine must actually
1943  *	insert this page into the given map NOW.
1944  */
1945 int
1946 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot,
1947     u_int flags, int8_t psind __unused)
1948 {
1949 	struct rwlock *lock;
1950 	pd_entry_t *l1, *l2;
1951 	pt_entry_t new_l3, orig_l3;
1952 	pt_entry_t *l3;
1953 	pv_entry_t pv;
1954 	vm_paddr_t opa, pa, l2_pa, l3_pa;
1955 	vm_page_t mpte, om, l2_m, l3_m;
1956 	boolean_t nosleep;
1957 
1958 	va = trunc_page(va);
1959 	if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m))
1960 		VM_OBJECT_ASSERT_LOCKED(m->object);
1961 	pa = VM_PAGE_TO_PHYS(m);
1962 	new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
1963 	    L3_PAGE);
1964 	if ((prot & VM_PROT_WRITE) == 0)
1965 		new_l3 |= ATTR_AP(ATTR_AP_RO);
1966 	if ((flags & PMAP_ENTER_WIRED) != 0)
1967 		new_l3 |= ATTR_SW_WIRED;
1968 	if ((va >> 63) == 0)
1969 		new_l3 |= ATTR_AP(ATTR_AP_USER);
1970 
1971 	CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa);
1972 
1973 	mpte = NULL;
1974 
1975 	lock = NULL;
1976 	rw_rlock(&pvh_global_lock);
1977 	PMAP_LOCK(pmap);
1978 
1979 	if (va < VM_MAXUSER_ADDRESS) {
1980 		nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0;
1981 		mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock);
1982 		if (mpte == NULL && nosleep) {
1983 			CTR0(KTR_PMAP, "pmap_enter: mpte == NULL");
1984 			if (lock != NULL)
1985 				rw_wunlock(lock);
1986 			rw_runlock(&pvh_global_lock);
1987 			PMAP_UNLOCK(pmap);
1988 			return (KERN_RESOURCE_SHORTAGE);
1989 		}
1990 		l3 = pmap_l3(pmap, va);
1991 	} else {
1992 		l3 = pmap_l3(pmap, va);
1993 		/* TODO: This is not optimal, but should mostly work */
1994 		if (l3 == NULL) {
1995 			l2 = pmap_l2(pmap, va);
1996 
1997 			if (l2 == NULL) {
1998 				l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
1999 				    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED |
2000 				    VM_ALLOC_ZERO);
2001 				if (l2_m == NULL)
2002 					panic("pmap_enter: l2 pte_m == NULL");
2003 				if ((l2_m->flags & PG_ZERO) == 0)
2004 					pmap_zero_page(l2_m);
2005 
2006 				l2_pa = VM_PAGE_TO_PHYS(l2_m);
2007 				l1 = pmap_l1(pmap, va);
2008 				pmap_load_store(l1, l2_pa | L1_TABLE);
2009 				PTE_SYNC(l1);
2010 				l2 = pmap_l1_to_l2(l1, va);
2011 			}
2012 
2013 			KASSERT(l2 != NULL,
2014 			    ("No l2 table after allocating one"));
2015 
2016 			l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL |
2017 			    VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO);
2018 			if (l3_m == NULL)
2019 				panic("pmap_enter: l3 pte_m == NULL");
2020 			if ((l3_m->flags & PG_ZERO) == 0)
2021 				pmap_zero_page(l3_m);
2022 
2023 			l3_pa = VM_PAGE_TO_PHYS(l3_m);
2024 			pmap_load_store(l2, l3_pa | L2_TABLE);
2025 			PTE_SYNC(l2);
2026 			l3 = pmap_l2_to_l3(l2, va);
2027 		}
2028 		pmap_invalidate_page(pmap, va);
2029 	}
2030 
2031 	om = NULL;
2032 	orig_l3 = pmap_load(l3);
2033 	opa = orig_l3 & ~ATTR_MASK;
2034 
2035 	/*
2036 	 * Is the specified virtual address already mapped?
2037 	 */
2038 	if (pmap_l3_valid(orig_l3)) {
2039 		/*
2040 		 * Wiring change, just update stats. We don't worry about
2041 		 * wiring PT pages as they remain resident as long as there
2042 		 * are valid mappings in them. Hence, if a user page is wired,
2043 		 * the PT page will be also.
2044 		 */
2045 		if ((flags & PMAP_ENTER_WIRED) != 0 &&
2046 		    (orig_l3 & ATTR_SW_WIRED) == 0)
2047 			pmap->pm_stats.wired_count++;
2048 		else if ((flags & PMAP_ENTER_WIRED) == 0 &&
2049 		    (orig_l3 & ATTR_SW_WIRED) != 0)
2050 			pmap->pm_stats.wired_count--;
2051 
2052 		/*
2053 		 * Remove the extra PT page reference.
2054 		 */
2055 		if (mpte != NULL) {
2056 			mpte->wire_count--;
2057 			KASSERT(mpte->wire_count > 0,
2058 			    ("pmap_enter: missing reference to page table page,"
2059 			     " va: 0x%lx", va));
2060 		}
2061 
2062 		/*
2063 		 * Has the physical page changed?
2064 		 */
2065 		if (opa == pa) {
2066 			/*
2067 			 * No, might be a protection or wiring change.
2068 			 */
2069 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2070 				new_l3 |= ATTR_SW_MANAGED;
2071 				if ((new_l3 & ATTR_AP(ATTR_AP_RW)) ==
2072 				    ATTR_AP(ATTR_AP_RW)) {
2073 					vm_page_aflag_set(m, PGA_WRITEABLE);
2074 				}
2075 			}
2076 			goto validate;
2077 		}
2078 
2079 		/* Flush the cache, there might be uncommitted data in it */
2080 		if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3))
2081 			cpu_dcache_wb_range(va, L3_SIZE);
2082 	} else {
2083 		/*
2084 		 * Increment the counters.
2085 		 */
2086 		if ((new_l3 & ATTR_SW_WIRED) != 0)
2087 			pmap->pm_stats.wired_count++;
2088 		pmap_resident_count_inc(pmap, 1);
2089 	}
2090 	/*
2091 	 * Enter on the PV list if part of our managed memory.
2092 	 */
2093 	if ((m->oflags & VPO_UNMANAGED) == 0) {
2094 		new_l3 |= ATTR_SW_MANAGED;
2095 		pv = get_pv_entry(pmap, &lock);
2096 		pv->pv_va = va;
2097 		CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa);
2098 		TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2099 		m->md.pv_gen++;
2100 		if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW))
2101 			vm_page_aflag_set(m, PGA_WRITEABLE);
2102 	}
2103 
2104 	/*
2105 	 * Update the L3 entry.
2106 	 */
2107 	if (orig_l3 != 0) {
2108 validate:
2109 		orig_l3 = pmap_load_store(l3, new_l3);
2110 		PTE_SYNC(l3);
2111 		opa = orig_l3 & ~ATTR_MASK;
2112 
2113 		if (opa != pa) {
2114 			if ((orig_l3 & ATTR_SW_MANAGED) != 0) {
2115 				om = PHYS_TO_VM_PAGE(opa);
2116 				if (pmap_page_dirty(orig_l3))
2117 					vm_page_dirty(om);
2118 				if ((orig_l3 & ATTR_AF) != 0)
2119 					vm_page_aflag_set(om, PGA_REFERENCED);
2120 				CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa);
2121 				pmap_pvh_free(&om->md, pmap, va);
2122 			}
2123 		} else if (pmap_page_dirty(orig_l3)) {
2124 			if ((orig_l3 & ATTR_SW_MANAGED) != 0)
2125 				vm_page_dirty(m);
2126 		}
2127 	} else {
2128 		pmap_load_store(l3, new_l3);
2129 		PTE_SYNC(l3);
2130 	}
2131 	pmap_invalidate_page(pmap, va);
2132 	if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap))
2133 	    cpu_icache_sync_range(va, PAGE_SIZE);
2134 
2135 	if (lock != NULL)
2136 		rw_wunlock(lock);
2137 	rw_runlock(&pvh_global_lock);
2138 	PMAP_UNLOCK(pmap);
2139 	return (KERN_SUCCESS);
2140 }
2141 
2142 /*
2143  * Maps a sequence of resident pages belonging to the same object.
2144  * The sequence begins with the given page m_start.  This page is
2145  * mapped at the given virtual address start.  Each subsequent page is
2146  * mapped at a virtual address that is offset from start by the same
2147  * amount as the page is offset from m_start within the object.  The
2148  * last page in the sequence is the page with the largest offset from
2149  * m_start that can be mapped at a virtual address less than the given
2150  * virtual address end.  Not every virtual page between start and end
2151  * is mapped; only those for which a resident page exists with the
2152  * corresponding offset from m_start are mapped.
2153  */
2154 void
2155 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end,
2156     vm_page_t m_start, vm_prot_t prot)
2157 {
2158 	struct rwlock *lock;
2159 	vm_offset_t va;
2160 	vm_page_t m, mpte;
2161 	vm_pindex_t diff, psize;
2162 
2163 	VM_OBJECT_ASSERT_LOCKED(m_start->object);
2164 
2165 	psize = atop(end - start);
2166 	mpte = NULL;
2167 	m = m_start;
2168 	lock = NULL;
2169 	rw_rlock(&pvh_global_lock);
2170 	PMAP_LOCK(pmap);
2171 	while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) {
2172 		va = start + ptoa(diff);
2173 		mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock);
2174 		m = TAILQ_NEXT(m, listq);
2175 	}
2176 	if (lock != NULL)
2177 		rw_wunlock(lock);
2178 	rw_runlock(&pvh_global_lock);
2179 	PMAP_UNLOCK(pmap);
2180 }
2181 
2182 /*
2183  * this code makes some *MAJOR* assumptions:
2184  * 1. Current pmap & pmap exists.
2185  * 2. Not wired.
2186  * 3. Read access.
2187  * 4. No page table pages.
2188  * but is *MUCH* faster than pmap_enter...
2189  */
2190 
2191 void
2192 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot)
2193 {
2194 	struct rwlock *lock;
2195 
2196 	lock = NULL;
2197 	rw_rlock(&pvh_global_lock);
2198 	PMAP_LOCK(pmap);
2199 	(void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock);
2200 	if (lock != NULL)
2201 		rw_wunlock(lock);
2202 	rw_runlock(&pvh_global_lock);
2203 	PMAP_UNLOCK(pmap);
2204 }
2205 
2206 static vm_page_t
2207 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m,
2208     vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp)
2209 {
2210 	struct spglist free;
2211 	pd_entry_t *l2;
2212 	pt_entry_t *l3;
2213 	vm_paddr_t pa;
2214 
2215 	KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva ||
2216 	    (m->oflags & VPO_UNMANAGED) != 0,
2217 	    ("pmap_enter_quick_locked: managed mapping within the clean submap"));
2218 	rw_assert(&pvh_global_lock, RA_LOCKED);
2219 	PMAP_LOCK_ASSERT(pmap, MA_OWNED);
2220 
2221 	CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va);
2222 	/*
2223 	 * In the case that a page table page is not
2224 	 * resident, we are creating it here.
2225 	 */
2226 	if (va < VM_MAXUSER_ADDRESS) {
2227 		vm_pindex_t l2pindex;
2228 
2229 		/*
2230 		 * Calculate pagetable page index
2231 		 */
2232 		l2pindex = pmap_l2_pindex(va);
2233 		if (mpte && (mpte->pindex == l2pindex)) {
2234 			mpte->wire_count++;
2235 		} else {
2236 			/*
2237 			 * Get the l2 entry
2238 			 */
2239 			l2 = pmap_l2(pmap, va);
2240 
2241 			/*
2242 			 * If the page table page is mapped, we just increment
2243 			 * the hold count, and activate it.  Otherwise, we
2244 			 * attempt to allocate a page table page.  If this
2245 			 * attempt fails, we don't retry.  Instead, we give up.
2246 			 */
2247 			if (l2 != NULL && pmap_load(l2) != 0) {
2248 				mpte =
2249 				    PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK);
2250 				mpte->wire_count++;
2251 			} else {
2252 				/*
2253 				 * Pass NULL instead of the PV list lock
2254 				 * pointer, because we don't intend to sleep.
2255 				 */
2256 				mpte = _pmap_alloc_l3(pmap, l2pindex, NULL);
2257 				if (mpte == NULL)
2258 					return (mpte);
2259 			}
2260 		}
2261 		l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte));
2262 		l3 = &l3[pmap_l3_index(va)];
2263 	} else {
2264 		mpte = NULL;
2265 		l3 = pmap_l3(kernel_pmap, va);
2266 	}
2267 	if (l3 == NULL)
2268 		panic("pmap_enter_quick_locked: No l3");
2269 	if (pmap_load(l3) != 0) {
2270 		if (mpte != NULL) {
2271 			mpte->wire_count--;
2272 			mpte = NULL;
2273 		}
2274 		return (mpte);
2275 	}
2276 
2277 	/*
2278 	 * Enter on the PV list if part of our managed memory.
2279 	 */
2280 	if ((m->oflags & VPO_UNMANAGED) == 0 &&
2281 	    !pmap_try_insert_pv_entry(pmap, va, m, lockp)) {
2282 		if (mpte != NULL) {
2283 			SLIST_INIT(&free);
2284 			if (pmap_unwire_l3(pmap, va, mpte, &free)) {
2285 				pmap_invalidate_page(pmap, va);
2286 				pmap_free_zero_pages(&free);
2287 			}
2288 			mpte = NULL;
2289 		}
2290 		return (mpte);
2291 	}
2292 
2293 	/*
2294 	 * Increment counters
2295 	 */
2296 	pmap_resident_count_inc(pmap, 1);
2297 
2298 	pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) |
2299 	    ATTR_AP(ATTR_AP_RW) | L3_PAGE;
2300 
2301 	/*
2302 	 * Now validate mapping with RO protection
2303 	 */
2304 	if ((m->oflags & VPO_UNMANAGED) == 0)
2305 		pa |= ATTR_SW_MANAGED;
2306 	pmap_load_store(l3, pa);
2307 	PTE_SYNC(l3);
2308 	pmap_invalidate_page(pmap, va);
2309 	return (mpte);
2310 }
2311 
2312 /*
2313  * This code maps large physical mmap regions into the
2314  * processor address space.  Note that some shortcuts
2315  * are taken, but the code works.
2316  */
2317 void
2318 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object,
2319     vm_pindex_t pindex, vm_size_t size)
2320 {
2321 
2322 	VM_OBJECT_ASSERT_WLOCKED(object);
2323 	KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG,
2324 	    ("pmap_object_init_pt: non-device object"));
2325 }
2326 
2327 /*
2328  *	Clear the wired attribute from the mappings for the specified range of
2329  *	addresses in the given pmap.  Every valid mapping within that range
2330  *	must have the wired attribute set.  In contrast, invalid mappings
2331  *	cannot have the wired attribute set, so they are ignored.
2332  *
2333  *	The wired attribute of the page table entry is not a hardware feature,
2334  *	so there is no need to invalidate any TLB entries.
2335  */
2336 void
2337 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
2338 {
2339 	vm_offset_t va_next;
2340 	pd_entry_t *l1, *l2;
2341 	pt_entry_t *l3;
2342 	boolean_t pv_lists_locked;
2343 
2344 	pv_lists_locked = FALSE;
2345 	PMAP_LOCK(pmap);
2346 	for (; sva < eva; sva = va_next) {
2347 		l1 = pmap_l1(pmap, sva);
2348 		if (pmap_load(l1) == 0) {
2349 			va_next = (sva + L1_SIZE) & ~L1_OFFSET;
2350 			if (va_next < sva)
2351 				va_next = eva;
2352 			continue;
2353 		}
2354 
2355 		va_next = (sva + L2_SIZE) & ~L2_OFFSET;
2356 		if (va_next < sva)
2357 			va_next = eva;
2358 
2359 		l2 = pmap_l1_to_l2(l1, sva);
2360 		if (pmap_load(l2) == 0)
2361 			continue;
2362 
2363 		if (va_next > eva)
2364 			va_next = eva;
2365 		for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++,
2366 		    sva += L3_SIZE) {
2367 			if (pmap_load(l3) == 0)
2368 				continue;
2369 			if ((pmap_load(l3) & ATTR_SW_WIRED) == 0)
2370 				panic("pmap_unwire: l3 %#jx is missing "
2371 				    "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3));
2372 
2373 			/*
2374 			 * PG_W must be cleared atomically.  Although the pmap
2375 			 * lock synchronizes access to PG_W, another processor
2376 			 * could be setting PG_M and/or PG_A concurrently.
2377 			 */
2378 			atomic_clear_long(l3, ATTR_SW_WIRED);
2379 			pmap->pm_stats.wired_count--;
2380 		}
2381 	}
2382 	if (pv_lists_locked)
2383 		rw_runlock(&pvh_global_lock);
2384 	PMAP_UNLOCK(pmap);
2385 }
2386 
2387 /*
2388  *	Copy the range specified by src_addr/len
2389  *	from the source map to the range dst_addr/len
2390  *	in the destination map.
2391  *
2392  *	This routine is only advisory and need not do anything.
2393  */
2394 
2395 void
2396 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len,
2397     vm_offset_t src_addr)
2398 {
2399 }
2400 
2401 /*
2402  *	pmap_zero_page zeros the specified hardware page by mapping
2403  *	the page into KVM and using bzero to clear its contents.
2404  */
2405 void
2406 pmap_zero_page(vm_page_t m)
2407 {
2408 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2409 
2410 	pagezero((void *)va);
2411 }
2412 
2413 /*
2414  *	pmap_zero_page_area zeros the specified hardware page by mapping
2415  *	the page into KVM and using bzero to clear its contents.
2416  *
2417  *	off and size may not cover an area beyond a single hardware page.
2418  */
2419 void
2420 pmap_zero_page_area(vm_page_t m, int off, int size)
2421 {
2422 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2423 
2424 	if (off == 0 && size == PAGE_SIZE)
2425 		pagezero((void *)va);
2426 	else
2427 		bzero((char *)va + off, size);
2428 }
2429 
2430 /*
2431  *	pmap_zero_page_idle zeros the specified hardware page by mapping
2432  *	the page into KVM and using bzero to clear its contents.  This
2433  *	is intended to be called from the vm_pagezero process only and
2434  *	outside of Giant.
2435  */
2436 void
2437 pmap_zero_page_idle(vm_page_t m)
2438 {
2439 	vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
2440 
2441 	pagezero((void *)va);
2442 }
2443 
2444 /*
2445  *	pmap_copy_page copies the specified (machine independent)
2446  *	page by mapping the page into virtual memory and using
2447  *	bcopy to copy the page, one machine dependent page at a
2448  *	time.
2449  */
2450 void
2451 pmap_copy_page(vm_page_t msrc, vm_page_t mdst)
2452 {
2453 	vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc));
2454 	vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst));
2455 
2456 	pagecopy((void *)src, (void *)dst);
2457 }
2458 
2459 int unmapped_buf_allowed = 1;
2460 
2461 void
2462 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[],
2463     vm_offset_t b_offset, int xfersize)
2464 {
2465 	void *a_cp, *b_cp;
2466 	vm_page_t m_a, m_b;
2467 	vm_paddr_t p_a, p_b;
2468 	vm_offset_t a_pg_offset, b_pg_offset;
2469 	int cnt;
2470 
2471 	while (xfersize > 0) {
2472 		a_pg_offset = a_offset & PAGE_MASK;
2473 		m_a = ma[a_offset >> PAGE_SHIFT];
2474 		p_a = m_a->phys_addr;
2475 		b_pg_offset = b_offset & PAGE_MASK;
2476 		m_b = mb[b_offset >> PAGE_SHIFT];
2477 		p_b = m_b->phys_addr;
2478 		cnt = min(xfersize, PAGE_SIZE - a_pg_offset);
2479 		cnt = min(cnt, PAGE_SIZE - b_pg_offset);
2480 		if (__predict_false(!PHYS_IN_DMAP(p_a))) {
2481 			panic("!DMAP a %lx", p_a);
2482 		} else {
2483 			a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset;
2484 		}
2485 		if (__predict_false(!PHYS_IN_DMAP(p_b))) {
2486 			panic("!DMAP b %lx", p_b);
2487 		} else {
2488 			b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset;
2489 		}
2490 		bcopy(a_cp, b_cp, cnt);
2491 		a_offset += cnt;
2492 		b_offset += cnt;
2493 		xfersize -= cnt;
2494 	}
2495 }
2496 
2497 vm_offset_t
2498 pmap_quick_enter_page(vm_page_t m)
2499 {
2500 
2501 	return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)));
2502 }
2503 
2504 void
2505 pmap_quick_remove_page(vm_offset_t addr)
2506 {
2507 }
2508 
2509 /*
2510  * Returns true if the pmap's pv is one of the first
2511  * 16 pvs linked to from this page.  This count may
2512  * be changed upwards or downwards in the future; it
2513  * is only necessary that true be returned for a small
2514  * subset of pmaps for proper page aging.
2515  */
2516 boolean_t
2517 pmap_page_exists_quick(pmap_t pmap, vm_page_t m)
2518 {
2519 	struct rwlock *lock;
2520 	pv_entry_t pv;
2521 	int loops = 0;
2522 	boolean_t rv;
2523 
2524 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2525 	    ("pmap_page_exists_quick: page %p is not managed", m));
2526 	rv = FALSE;
2527 	rw_rlock(&pvh_global_lock);
2528 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2529 	rw_rlock(lock);
2530 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2531 		if (PV_PMAP(pv) == pmap) {
2532 			rv = TRUE;
2533 			break;
2534 		}
2535 		loops++;
2536 		if (loops >= 16)
2537 			break;
2538 	}
2539 	rw_runlock(lock);
2540 	rw_runlock(&pvh_global_lock);
2541 	return (rv);
2542 }
2543 
2544 /*
2545  *	pmap_page_wired_mappings:
2546  *
2547  *	Return the number of managed mappings to the given physical page
2548  *	that are wired.
2549  */
2550 int
2551 pmap_page_wired_mappings(vm_page_t m)
2552 {
2553 	struct rwlock *lock;
2554 	pmap_t pmap;
2555 	pt_entry_t *l3;
2556 	pv_entry_t pv;
2557 	int count, md_gen;
2558 
2559 	if ((m->oflags & VPO_UNMANAGED) != 0)
2560 		return (0);
2561 	rw_rlock(&pvh_global_lock);
2562 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2563 	rw_rlock(lock);
2564 restart:
2565 	count = 0;
2566 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2567 		pmap = PV_PMAP(pv);
2568 		if (!PMAP_TRYLOCK(pmap)) {
2569 			md_gen = m->md.pv_gen;
2570 			rw_runlock(lock);
2571 			PMAP_LOCK(pmap);
2572 			rw_rlock(lock);
2573 			if (md_gen != m->md.pv_gen) {
2574 				PMAP_UNLOCK(pmap);
2575 				goto restart;
2576 			}
2577 		}
2578 		l3 = pmap_l3(pmap, pv->pv_va);
2579 		if (l3 != NULL && (pmap_load(l3) & ATTR_SW_WIRED) != 0)
2580 			count++;
2581 		PMAP_UNLOCK(pmap);
2582 	}
2583 	rw_runlock(lock);
2584 	rw_runlock(&pvh_global_lock);
2585 	return (count);
2586 }
2587 
2588 /*
2589  * Destroy all managed, non-wired mappings in the given user-space
2590  * pmap.  This pmap cannot be active on any processor besides the
2591  * caller.
2592  *
2593  * This function cannot be applied to the kernel pmap.  Moreover, it
2594  * is not intended for general use.  It is only to be used during
2595  * process termination.  Consequently, it can be implemented in ways
2596  * that make it faster than pmap_remove().  First, it can more quickly
2597  * destroy mappings by iterating over the pmap's collection of PV
2598  * entries, rather than searching the page table.  Second, it doesn't
2599  * have to test and clear the page table entries atomically, because
2600  * no processor is currently accessing the user address space.  In
2601  * particular, a page table entry's dirty bit won't change state once
2602  * this function starts.
2603  */
2604 void
2605 pmap_remove_pages(pmap_t pmap)
2606 {
2607 	pd_entry_t ptepde, *l2;
2608 	pt_entry_t *l3, tl3;
2609 	struct spglist free;
2610 	vm_page_t m;
2611 	pv_entry_t pv;
2612 	struct pv_chunk *pc, *npc;
2613 	struct rwlock *lock;
2614 	int64_t bit;
2615 	uint64_t inuse, bitmask;
2616 	int allfree, field, freed, idx;
2617 	vm_paddr_t pa;
2618 
2619 	lock = NULL;
2620 
2621 	SLIST_INIT(&free);
2622 	rw_rlock(&pvh_global_lock);
2623 	PMAP_LOCK(pmap);
2624 	TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) {
2625 		allfree = 1;
2626 		freed = 0;
2627 		for (field = 0; field < _NPCM; field++) {
2628 			inuse = ~pc->pc_map[field] & pc_freemask[field];
2629 			while (inuse != 0) {
2630 				bit = ffsl(inuse) - 1;
2631 				bitmask = 1UL << bit;
2632 				idx = field * 64 + bit;
2633 				pv = &pc->pc_pventry[idx];
2634 				inuse &= ~bitmask;
2635 
2636 				l2 = pmap_l2(pmap, pv->pv_va);
2637 				ptepde = pmap_load(l2);
2638 				l3 = pmap_l2_to_l3(l2, pv->pv_va);
2639 				tl3 = pmap_load(l3);
2640 
2641 /*
2642  * We cannot remove wired pages from a process' mapping at this time
2643  */
2644 				if (tl3 & ATTR_SW_WIRED) {
2645 					allfree = 0;
2646 					continue;
2647 				}
2648 
2649 				pa = tl3 & ~ATTR_MASK;
2650 
2651 				m = PHYS_TO_VM_PAGE(pa);
2652 				KASSERT(m->phys_addr == pa,
2653 				    ("vm_page_t %p phys_addr mismatch %016jx %016jx",
2654 				    m, (uintmax_t)m->phys_addr,
2655 				    (uintmax_t)tl3));
2656 
2657 				KASSERT((m->flags & PG_FICTITIOUS) != 0 ||
2658 				    m < &vm_page_array[vm_page_array_size],
2659 				    ("pmap_remove_pages: bad l3 %#jx",
2660 				    (uintmax_t)tl3));
2661 
2662 				if (pmap_is_current(pmap) &&
2663 				    pmap_l3_valid_cacheable(pmap_load(l3)))
2664 					cpu_dcache_wb_range(pv->pv_va, L3_SIZE);
2665 				pmap_load_clear(l3);
2666 				PTE_SYNC(l3);
2667 				pmap_invalidate_page(pmap, pv->pv_va);
2668 
2669 				/*
2670 				 * Update the vm_page_t clean/reference bits.
2671 				 */
2672 				if ((tl3 & ATTR_AP_RW_BIT) ==
2673 				    ATTR_AP(ATTR_AP_RW))
2674 					vm_page_dirty(m);
2675 
2676 				CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m);
2677 
2678 				/* Mark free */
2679 				pc->pc_map[field] |= bitmask;
2680 
2681 				pmap_resident_count_dec(pmap, 1);
2682 				TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2683 				m->md.pv_gen++;
2684 
2685 				pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free);
2686 				freed++;
2687 			}
2688 		}
2689 		PV_STAT(atomic_add_long(&pv_entry_frees, freed));
2690 		PV_STAT(atomic_add_int(&pv_entry_spare, freed));
2691 		PV_STAT(atomic_subtract_long(&pv_entry_count, freed));
2692 		if (allfree) {
2693 			TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list);
2694 			free_pv_chunk(pc);
2695 		}
2696 	}
2697 	pmap_invalidate_all(pmap);
2698 	if (lock != NULL)
2699 		rw_wunlock(lock);
2700 	rw_runlock(&pvh_global_lock);
2701 	PMAP_UNLOCK(pmap);
2702 	pmap_free_zero_pages(&free);
2703 }
2704 
2705 /*
2706  * This is used to check if a page has been accessed or modified. As we
2707  * don't have a bit to see if it has been modified we have to assume it
2708  * has been if the page is read/write.
2709  */
2710 static boolean_t
2711 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified)
2712 {
2713 	struct rwlock *lock;
2714 	pv_entry_t pv;
2715 	pt_entry_t *l3, mask, value;
2716 	pmap_t pmap;
2717 	int md_gen;
2718 	boolean_t rv;
2719 
2720 	rv = FALSE;
2721 	rw_rlock(&pvh_global_lock);
2722 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2723 	rw_rlock(lock);
2724 restart:
2725 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2726 		pmap = PV_PMAP(pv);
2727 		if (!PMAP_TRYLOCK(pmap)) {
2728 			md_gen = m->md.pv_gen;
2729 			rw_runlock(lock);
2730 			PMAP_LOCK(pmap);
2731 			rw_rlock(lock);
2732 			if (md_gen != m->md.pv_gen) {
2733 				PMAP_UNLOCK(pmap);
2734 				goto restart;
2735 			}
2736 		}
2737 		l3 = pmap_l3(pmap, pv->pv_va);
2738 		mask = 0;
2739 		value = 0;
2740 		if (modified) {
2741 			mask |= ATTR_AP_RW_BIT;
2742 			value |= ATTR_AP(ATTR_AP_RW);
2743 		}
2744 		if (accessed) {
2745 			mask |= ATTR_AF | ATTR_DESCR_MASK;
2746 			value |= ATTR_AF | L3_PAGE;
2747 		}
2748 		rv = (pmap_load(l3) & mask) == value;
2749 		PMAP_UNLOCK(pmap);
2750 		if (rv)
2751 			goto out;
2752 	}
2753 out:
2754 	rw_runlock(lock);
2755 	rw_runlock(&pvh_global_lock);
2756 	return (rv);
2757 }
2758 
2759 /*
2760  *	pmap_is_modified:
2761  *
2762  *	Return whether or not the specified physical page was modified
2763  *	in any physical maps.
2764  */
2765 boolean_t
2766 pmap_is_modified(vm_page_t m)
2767 {
2768 
2769 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2770 	    ("pmap_is_modified: page %p is not managed", m));
2771 
2772 	/*
2773 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2774 	 * concurrently set while the object is locked.  Thus, if PGA_WRITEABLE
2775 	 * is clear, no PTEs can have PG_M set.
2776 	 */
2777 	VM_OBJECT_ASSERT_WLOCKED(m->object);
2778 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2779 		return (FALSE);
2780 	return (pmap_page_test_mappings(m, FALSE, TRUE));
2781 }
2782 
2783 /*
2784  *	pmap_is_prefaultable:
2785  *
2786  *	Return whether or not the specified virtual address is eligible
2787  *	for prefault.
2788  */
2789 boolean_t
2790 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr)
2791 {
2792 	pt_entry_t *l3;
2793 	boolean_t rv;
2794 
2795 	rv = FALSE;
2796 	PMAP_LOCK(pmap);
2797 	l3 = pmap_l3(pmap, addr);
2798 	if (l3 != NULL && pmap_load(l3) != 0) {
2799 		rv = TRUE;
2800 	}
2801 	PMAP_UNLOCK(pmap);
2802 	return (rv);
2803 }
2804 
2805 /*
2806  *	pmap_is_referenced:
2807  *
2808  *	Return whether or not the specified physical page was referenced
2809  *	in any physical maps.
2810  */
2811 boolean_t
2812 pmap_is_referenced(vm_page_t m)
2813 {
2814 
2815 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2816 	    ("pmap_is_referenced: page %p is not managed", m));
2817 	return (pmap_page_test_mappings(m, TRUE, FALSE));
2818 }
2819 
2820 /*
2821  * Clear the write and modified bits in each of the given page's mappings.
2822  */
2823 void
2824 pmap_remove_write(vm_page_t m)
2825 {
2826 	pmap_t pmap;
2827 	struct rwlock *lock;
2828 	pv_entry_t pv;
2829 	pt_entry_t *l3, oldl3;
2830 	int md_gen;
2831 
2832 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2833 	    ("pmap_remove_write: page %p is not managed", m));
2834 
2835 	/*
2836 	 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be
2837 	 * set by another thread while the object is locked.  Thus,
2838 	 * if PGA_WRITEABLE is clear, no page table entries need updating.
2839 	 */
2840 	VM_OBJECT_ASSERT_WLOCKED(m->object);
2841 	if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0)
2842 		return;
2843 	rw_rlock(&pvh_global_lock);
2844 	lock = VM_PAGE_TO_PV_LIST_LOCK(m);
2845 retry_pv_loop:
2846 	rw_wlock(lock);
2847 	TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) {
2848 		pmap = PV_PMAP(pv);
2849 		if (!PMAP_TRYLOCK(pmap)) {
2850 			md_gen = m->md.pv_gen;
2851 			rw_wunlock(lock);
2852 			PMAP_LOCK(pmap);
2853 			rw_wlock(lock);
2854 			if (md_gen != m->md.pv_gen) {
2855 				PMAP_UNLOCK(pmap);
2856 				rw_wunlock(lock);
2857 				goto retry_pv_loop;
2858 			}
2859 		}
2860 		l3 = pmap_l3(pmap, pv->pv_va);
2861 retry:
2862 		oldl3 = pmap_load(l3);
2863 		if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) {
2864 			if (!atomic_cmpset_long(l3, oldl3,
2865 			    oldl3 | ATTR_AP(ATTR_AP_RO)))
2866 				goto retry;
2867 			if ((oldl3 & ATTR_AF) != 0)
2868 				vm_page_dirty(m);
2869 			pmap_invalidate_page(pmap, pv->pv_va);
2870 		}
2871 		PMAP_UNLOCK(pmap);
2872 	}
2873 	rw_wunlock(lock);
2874 	vm_page_aflag_clear(m, PGA_WRITEABLE);
2875 	rw_runlock(&pvh_global_lock);
2876 }
2877 
2878 static __inline boolean_t
2879 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte)
2880 {
2881 
2882 	return (FALSE);
2883 }
2884 
2885 #define	PMAP_TS_REFERENCED_MAX	5
2886 
2887 /*
2888  *	pmap_ts_referenced:
2889  *
2890  *	Return a count of reference bits for a page, clearing those bits.
2891  *	It is not necessary for every reference bit to be cleared, but it
2892  *	is necessary that 0 only be returned when there are truly no
2893  *	reference bits set.
2894  *
2895  *	XXX: The exact number of bits to check and clear is a matter that
2896  *	should be tested and standardized at some point in the future for
2897  *	optimal aging of shared pages.
2898  */
2899 int
2900 pmap_ts_referenced(vm_page_t m)
2901 {
2902 	pv_entry_t pv, pvf;
2903 	pmap_t pmap;
2904 	struct rwlock *lock;
2905 	pd_entry_t *l2p, l2;
2906 	pt_entry_t *l3;
2907 	vm_paddr_t pa;
2908 	int cleared, md_gen, not_cleared;
2909 	struct spglist free;
2910 
2911 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
2912 	    ("pmap_ts_referenced: page %p is not managed", m));
2913 	SLIST_INIT(&free);
2914 	cleared = 0;
2915 	pa = VM_PAGE_TO_PHYS(m);
2916 	lock = PHYS_TO_PV_LIST_LOCK(pa);
2917 	rw_rlock(&pvh_global_lock);
2918 	rw_wlock(lock);
2919 retry:
2920 	not_cleared = 0;
2921 	if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL)
2922 		goto out;
2923 	pv = pvf;
2924 	do {
2925 		if (pvf == NULL)
2926 			pvf = pv;
2927 		pmap = PV_PMAP(pv);
2928 		if (!PMAP_TRYLOCK(pmap)) {
2929 			md_gen = m->md.pv_gen;
2930 			rw_wunlock(lock);
2931 			PMAP_LOCK(pmap);
2932 			rw_wlock(lock);
2933 			if (md_gen != m->md.pv_gen) {
2934 				PMAP_UNLOCK(pmap);
2935 				goto retry;
2936 			}
2937 		}
2938 		l2p = pmap_l2(pmap, pv->pv_va);
2939 		KASSERT(l2p != NULL, ("pmap_ts_referenced: no l2 table found"));
2940 		l2 = pmap_load(l2p);
2941 		KASSERT((l2 & ATTR_DESCR_MASK) == L2_TABLE,
2942 		    ("pmap_ts_referenced: found an invalid l2 table"));
2943 		l3 = pmap_l2_to_l3(l2p, pv->pv_va);
2944 		if ((pmap_load(l3) & ATTR_AF) != 0) {
2945 			if (safe_to_clear_referenced(pmap, pmap_load(l3))) {
2946 				/*
2947 				 * TODO: We don't handle the access flag
2948 				 * at all. We need to be able to set it in
2949 				 * the exception handler.
2950 				 */
2951 				panic("ARM64TODO: safe_to_clear_referenced\n");
2952 			} else if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) {
2953 				/*
2954 				 * Wired pages cannot be paged out so
2955 				 * doing accessed bit emulation for
2956 				 * them is wasted effort. We do the
2957 				 * hard work for unwired pages only.
2958 				 */
2959 				pmap_remove_l3(pmap, l3, pv->pv_va, l2,
2960 				    &free, &lock);
2961 				pmap_invalidate_page(pmap, pv->pv_va);
2962 				cleared++;
2963 				if (pvf == pv)
2964 					pvf = NULL;
2965 				pv = NULL;
2966 				KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m),
2967 				    ("inconsistent pv lock %p %p for page %p",
2968 				    lock, VM_PAGE_TO_PV_LIST_LOCK(m), m));
2969 			} else
2970 				not_cleared++;
2971 		}
2972 		PMAP_UNLOCK(pmap);
2973 		/* Rotate the PV list if it has more than one entry. */
2974 		if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) {
2975 			TAILQ_REMOVE(&m->md.pv_list, pv, pv_next);
2976 			TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next);
2977 			m->md.pv_gen++;
2978 		}
2979 	} while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared +
2980 	    not_cleared < PMAP_TS_REFERENCED_MAX);
2981 out:
2982 	rw_wunlock(lock);
2983 	rw_runlock(&pvh_global_lock);
2984 	pmap_free_zero_pages(&free);
2985 	return (cleared + not_cleared);
2986 }
2987 
2988 /*
2989  *	Apply the given advice to the specified range of addresses within the
2990  *	given pmap.  Depending on the advice, clear the referenced and/or
2991  *	modified flags in each mapping and set the mapped page's dirty field.
2992  */
2993 void
2994 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice)
2995 {
2996 }
2997 
2998 /*
2999  *	Clear the modify bits on the specified physical page.
3000  */
3001 void
3002 pmap_clear_modify(vm_page_t m)
3003 {
3004 
3005 	KASSERT((m->oflags & VPO_UNMANAGED) == 0,
3006 	    ("pmap_clear_modify: page %p is not managed", m));
3007 	VM_OBJECT_ASSERT_WLOCKED(m->object);
3008 	KASSERT(!vm_page_xbusied(m),
3009 	    ("pmap_clear_modify: page %p is exclusive busied", m));
3010 
3011 	/*
3012 	 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set.
3013 	 * If the object containing the page is locked and the page is not
3014 	 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set.
3015 	 */
3016 	if ((m->aflags & PGA_WRITEABLE) == 0)
3017 		return;
3018 
3019 	/* ARM64TODO: We lack support for tracking if a page is modified */
3020 }
3021 
3022 void *
3023 pmap_mapbios(vm_paddr_t pa, vm_size_t size)
3024 {
3025 
3026         return ((void *)PHYS_TO_DMAP(pa));
3027 }
3028 
3029 void
3030 pmap_unmapbios(vm_paddr_t pa, vm_size_t size)
3031 {
3032 }
3033 
3034 /*
3035  * Sets the memory attribute for the specified page.
3036  */
3037 void
3038 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma)
3039 {
3040 
3041 	m->md.pv_memattr = ma;
3042 
3043 	/*
3044 	 * ARM64TODO: Implement the below (from the amd64 pmap)
3045 	 * If "m" is a normal page, update its direct mapping.  This update
3046 	 * can be relied upon to perform any cache operations that are
3047 	 * required for data coherence.
3048 	 */
3049 	if ((m->flags & PG_FICTITIOUS) == 0 &&
3050 	    PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m)))
3051 		panic("ARM64TODO: pmap_page_set_memattr");
3052 }
3053 
3054 /*
3055  * perform the pmap work for mincore
3056  */
3057 int
3058 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa)
3059 {
3060 	pd_entry_t *l1p, l1;
3061 	pd_entry_t *l2p, l2;
3062 	pt_entry_t *l3p, l3;
3063 	vm_paddr_t pa;
3064 	bool managed;
3065 	int val;
3066 
3067 	PMAP_LOCK(pmap);
3068 retry:
3069 	pa = 0;
3070 	val = 0;
3071 	managed = false;
3072 
3073 	l1p = pmap_l1(pmap, addr);
3074 	if (l1p == NULL) /* No l1 */
3075 		goto done;
3076 	l1 = pmap_load(l1p);
3077 	if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) {
3078 		pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET);
3079 		managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3080 		val = MINCORE_SUPER | MINCORE_INCORE;
3081 		if (pmap_page_dirty(l1))
3082 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3083 		if ((l1 & ATTR_AF) == ATTR_AF)
3084 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3085 		goto done;
3086 	}
3087 
3088 	l2p = pmap_l1_to_l2(l1p, addr);
3089 	if (l2p == NULL) /* No l2 */
3090 		goto done;
3091 	l2 = pmap_load(l2p);
3092 	if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) {
3093 		pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET);
3094 		managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3095 		val = MINCORE_SUPER | MINCORE_INCORE;
3096 		if (pmap_page_dirty(l2))
3097 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3098 		if ((l2 & ATTR_AF) == ATTR_AF)
3099 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3100 		goto done;
3101 	}
3102 
3103 	l3p = pmap_l2_to_l3(l2p, addr);
3104 	if (l3p == NULL) /* No l3 */
3105 		goto done;
3106 	l3 = pmap_load(l2p);
3107 	if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) {
3108 		pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET);
3109 		managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED;
3110 		val = MINCORE_INCORE;
3111 		if (pmap_page_dirty(l3))
3112 			val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER;
3113 		if ((l3 & ATTR_AF) == ATTR_AF)
3114 			val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER;
3115 	}
3116 
3117 done:
3118 	if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) !=
3119 	    (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) {
3120 		/* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */
3121 		if (vm_page_pa_tryrelock(pmap, pa, locked_pa))
3122 			goto retry;
3123 	} else
3124 		PA_UNLOCK_COND(*locked_pa);
3125 	PMAP_UNLOCK(pmap);
3126 
3127 	return (val);
3128 }
3129 
3130 void
3131 pmap_activate(struct thread *td)
3132 {
3133 	pmap_t	pmap;
3134 
3135 	critical_enter();
3136 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
3137 	td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1);
3138 	__asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr));
3139 	pmap_invalidate_all(pmap);
3140 	critical_exit();
3141 }
3142 
3143 void
3144 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz)
3145 {
3146 
3147 	if (va >= VM_MIN_KERNEL_ADDRESS) {
3148 		cpu_icache_sync_range(va, sz);
3149 	} else {
3150 		u_int len, offset;
3151 		vm_paddr_t pa;
3152 
3153 		/* Find the length of data in this page to flush */
3154 		offset = va & PAGE_MASK;
3155 		len = imin(PAGE_SIZE - offset, sz);
3156 
3157 		while (sz != 0) {
3158 			/* Extract the physical address & find it in the DMAP */
3159 			pa = pmap_extract(pmap, va);
3160 			if (pa != 0)
3161 				cpu_icache_sync_range(PHYS_TO_DMAP(pa), len);
3162 
3163 			/* Move to the next page */
3164 			sz -= len;
3165 			va += len;
3166 			/* Set the length for the next iteration */
3167 			len = imin(PAGE_SIZE, sz);
3168 		}
3169 	}
3170 }
3171 
3172 /*
3173  *	Increase the starting virtual address of the given mapping if a
3174  *	different alignment might result in more superpage mappings.
3175  */
3176 void
3177 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset,
3178     vm_offset_t *addr, vm_size_t size)
3179 {
3180 }
3181 
3182 /**
3183  * Get the kernel virtual address of a set of physical pages. If there are
3184  * physical addresses not covered by the DMAP perform a transient mapping
3185  * that will be removed when calling pmap_unmap_io_transient.
3186  *
3187  * \param page        The pages the caller wishes to obtain the virtual
3188  *                    address on the kernel memory map.
3189  * \param vaddr       On return contains the kernel virtual memory address
3190  *                    of the pages passed in the page parameter.
3191  * \param count       Number of pages passed in.
3192  * \param can_fault   TRUE if the thread using the mapped pages can take
3193  *                    page faults, FALSE otherwise.
3194  *
3195  * \returns TRUE if the caller must call pmap_unmap_io_transient when
3196  *          finished or FALSE otherwise.
3197  *
3198  */
3199 boolean_t
3200 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3201     boolean_t can_fault)
3202 {
3203 	vm_paddr_t paddr;
3204 	boolean_t needs_mapping;
3205 	int error, i;
3206 
3207 	/*
3208 	 * Allocate any KVA space that we need, this is done in a separate
3209 	 * loop to prevent calling vmem_alloc while pinned.
3210 	 */
3211 	needs_mapping = FALSE;
3212 	for (i = 0; i < count; i++) {
3213 		paddr = VM_PAGE_TO_PHYS(page[i]);
3214 		if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) {
3215 			error = vmem_alloc(kernel_arena, PAGE_SIZE,
3216 			    M_BESTFIT | M_WAITOK, &vaddr[i]);
3217 			KASSERT(error == 0, ("vmem_alloc failed: %d", error));
3218 			needs_mapping = TRUE;
3219 		} else {
3220 			vaddr[i] = PHYS_TO_DMAP(paddr);
3221 		}
3222 	}
3223 
3224 	/* Exit early if everything is covered by the DMAP */
3225 	if (!needs_mapping)
3226 		return (FALSE);
3227 
3228 	if (!can_fault)
3229 		sched_pin();
3230 	for (i = 0; i < count; i++) {
3231 		paddr = VM_PAGE_TO_PHYS(page[i]);
3232 		if (paddr >= DMAP_MAX_PHYSADDR) {
3233 			panic(
3234 			   "pmap_map_io_transient: TODO: Map out of DMAP data");
3235 		}
3236 	}
3237 
3238 	return (needs_mapping);
3239 }
3240 
3241 void
3242 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count,
3243     boolean_t can_fault)
3244 {
3245 	vm_paddr_t paddr;
3246 	int i;
3247 
3248 	if (!can_fault)
3249 		sched_unpin();
3250 	for (i = 0; i < count; i++) {
3251 		paddr = VM_PAGE_TO_PHYS(page[i]);
3252 		if (paddr >= DMAP_MAX_PHYSADDR) {
3253 			panic("ARM64TODO: pmap_unmap_io_transient: Unmap data");
3254 		}
3255 	}
3256 }
3257