1 /*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11 * All rights reserved. 12 * Copyright (c) 2014 Andrew Turner 13 * All rights reserved. 14 * Copyright (c) 2014-2016 The FreeBSD Foundation 15 * All rights reserved. 16 * 17 * This code is derived from software contributed to Berkeley by 18 * the Systems Programming Group of the University of Utah Computer 19 * Science Department and William Jolitz of UUNET Technologies Inc. 20 * 21 * This software was developed by Andrew Turner under sponsorship from 22 * the FreeBSD Foundation. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 3. All advertising materials mentioning features or use of this software 33 * must display the following acknowledgement: 34 * This product includes software developed by the University of 35 * California, Berkeley and its contributors. 36 * 4. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53 */ 54 /*- 55 * Copyright (c) 2003 Networks Associates Technology, Inc. 56 * All rights reserved. 57 * 58 * This software was developed for the FreeBSD Project by Jake Burkholder, 59 * Safeport Network Services, and Network Associates Laboratories, the 60 * Security Research Division of Network Associates, Inc. under 61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62 * CHATS research program. 63 * 64 * Redistribution and use in source and binary forms, with or without 65 * modification, are permitted provided that the following conditions 66 * are met: 67 * 1. Redistributions of source code must retain the above copyright 68 * notice, this list of conditions and the following disclaimer. 69 * 2. Redistributions in binary form must reproduce the above copyright 70 * notice, this list of conditions and the following disclaimer in the 71 * documentation and/or other materials provided with the distribution. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 */ 85 86 #include <sys/cdefs.h> 87 __FBSDID("$FreeBSD$"); 88 89 /* 90 * Manages physical address maps. 91 * 92 * Since the information managed by this module is 93 * also stored by the logical address mapping module, 94 * this module may throw away valid virtual-to-physical 95 * mappings at almost any time. However, invalidations 96 * of virtual-to-physical mappings must be done as 97 * requested. 98 * 99 * In order to cope with hardware architectures which 100 * make virtual-to-physical map invalidates expensive, 101 * this module may delay invalidate or reduced protection 102 * operations until such time as they are actually 103 * necessary. This module is given full information as 104 * to which processors are currently using which maps, 105 * and to when physical maps must be made correct. 106 */ 107 108 #include "opt_vm.h" 109 110 #include <sys/param.h> 111 #include <sys/bitstring.h> 112 #include <sys/bus.h> 113 #include <sys/systm.h> 114 #include <sys/kernel.h> 115 #include <sys/ktr.h> 116 #include <sys/lock.h> 117 #include <sys/malloc.h> 118 #include <sys/mman.h> 119 #include <sys/msgbuf.h> 120 #include <sys/mutex.h> 121 #include <sys/proc.h> 122 #include <sys/rwlock.h> 123 #include <sys/sx.h> 124 #include <sys/vmem.h> 125 #include <sys/vmmeter.h> 126 #include <sys/sched.h> 127 #include <sys/sysctl.h> 128 #include <sys/_unrhdr.h> 129 #include <sys/smp.h> 130 131 #include <vm/vm.h> 132 #include <vm/vm_param.h> 133 #include <vm/vm_kern.h> 134 #include <vm/vm_page.h> 135 #include <vm/vm_map.h> 136 #include <vm/vm_object.h> 137 #include <vm/vm_extern.h> 138 #include <vm/vm_pageout.h> 139 #include <vm/vm_pager.h> 140 #include <vm/vm_phys.h> 141 #include <vm/vm_radix.h> 142 #include <vm/vm_reserv.h> 143 #include <vm/uma.h> 144 145 #include <machine/machdep.h> 146 #include <machine/md_var.h> 147 #include <machine/pcb.h> 148 149 #include <arm/include/physmem.h> 150 151 #define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) 152 #define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) 153 #define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) 154 #define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) 155 156 #define NUL0E L0_ENTRIES 157 #define NUL1E (NUL0E * NL1PG) 158 #define NUL2E (NUL1E * NL2PG) 159 160 #if !defined(DIAGNOSTIC) 161 #ifdef __GNUC_GNU_INLINE__ 162 #define PMAP_INLINE __attribute__((__gnu_inline__)) inline 163 #else 164 #define PMAP_INLINE extern inline 165 #endif 166 #else 167 #define PMAP_INLINE 168 #endif 169 170 /* 171 * These are configured by the mair_el1 register. This is set up in locore.S 172 */ 173 #define DEVICE_MEMORY 0 174 #define UNCACHED_MEMORY 1 175 #define CACHED_MEMORY 2 176 177 178 #ifdef PV_STATS 179 #define PV_STAT(x) do { x ; } while (0) 180 #else 181 #define PV_STAT(x) do { } while (0) 182 #endif 183 184 #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 185 #define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) 186 187 #define NPV_LIST_LOCKS MAXCPU 188 189 #define PHYS_TO_PV_LIST_LOCK(pa) \ 190 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 191 192 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 193 struct rwlock **_lockp = (lockp); \ 194 struct rwlock *_new_lock; \ 195 \ 196 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 197 if (_new_lock != *_lockp) { \ 198 if (*_lockp != NULL) \ 199 rw_wunlock(*_lockp); \ 200 *_lockp = _new_lock; \ 201 rw_wlock(*_lockp); \ 202 } \ 203 } while (0) 204 205 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 206 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 207 208 #define RELEASE_PV_LIST_LOCK(lockp) do { \ 209 struct rwlock **_lockp = (lockp); \ 210 \ 211 if (*_lockp != NULL) { \ 212 rw_wunlock(*_lockp); \ 213 *_lockp = NULL; \ 214 } \ 215 } while (0) 216 217 #define VM_PAGE_TO_PV_LIST_LOCK(m) \ 218 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 219 220 struct pmap kernel_pmap_store; 221 222 /* Used for mapping ACPI memory before VM is initialized */ 223 #define PMAP_PREINIT_MAPPING_COUNT 32 224 #define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE) 225 static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */ 226 static int vm_initialized = 0; /* No need to use pre-init maps when set */ 227 228 /* 229 * Reserve a few L2 blocks starting from 'preinit_map_va' pointer. 230 * Always map entire L2 block for simplicity. 231 * VA of L2 block = preinit_map_va + i * L2_SIZE 232 */ 233 static struct pmap_preinit_mapping { 234 vm_paddr_t pa; 235 vm_offset_t va; 236 vm_size_t size; 237 } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; 238 239 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 240 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 241 vm_offset_t kernel_vm_end = 0; 242 243 /* 244 * Data for the pv entry allocation mechanism. 245 */ 246 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 247 static struct mtx pv_chunks_mutex; 248 static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 249 static struct md_page *pv_table; 250 static struct md_page pv_dummy; 251 252 vm_paddr_t dmap_phys_base; /* The start of the dmap region */ 253 vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 254 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 255 256 /* This code assumes all L1 DMAP entries will be used */ 257 CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); 258 CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); 259 260 #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 261 extern pt_entry_t pagetable_dmap[]; 262 263 #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) 264 static vm_paddr_t physmap[PHYSMAP_SIZE]; 265 static u_int physmap_idx; 266 267 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 268 269 static int superpages_enabled = 1; 270 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, 271 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, 272 "Are large page mappings enabled?"); 273 274 /* 275 * Internal flags for pmap_enter()'s helper functions. 276 */ 277 #define PMAP_ENTER_NORECLAIM 0x1000000 /* Don't reclaim PV entries. */ 278 #define PMAP_ENTER_NOREPLACE 0x2000000 /* Don't replace mappings. */ 279 280 static void free_pv_chunk(struct pv_chunk *pc); 281 static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 282 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 283 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 284 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 285 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 286 vm_offset_t va); 287 288 static int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); 289 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); 290 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); 291 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, 292 vm_offset_t va, struct rwlock **lockp); 293 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); 294 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 295 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 296 static int pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, 297 u_int flags, vm_page_t m, struct rwlock **lockp); 298 static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, 299 pd_entry_t l1e, struct spglist *free, struct rwlock **lockp); 300 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 301 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp); 302 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 303 vm_page_t m, struct rwlock **lockp); 304 305 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 306 struct rwlock **lockp); 307 308 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 309 struct spglist *free); 310 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 311 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); 312 313 /* 314 * These load the old table data and store the new value. 315 * They need to be atomic as the System MMU may write to the table at 316 * the same time as the CPU. 317 */ 318 #define pmap_load_store(table, entry) atomic_swap_64(table, entry) 319 #define pmap_set(table, mask) atomic_set_64(table, mask) 320 #define pmap_load_clear(table) atomic_swap_64(table, 0) 321 #define pmap_load(table) (*table) 322 323 /********************/ 324 /* Inline functions */ 325 /********************/ 326 327 static __inline void 328 pagecopy(void *s, void *d) 329 { 330 331 memcpy(d, s, PAGE_SIZE); 332 } 333 334 static __inline pd_entry_t * 335 pmap_l0(pmap_t pmap, vm_offset_t va) 336 { 337 338 return (&pmap->pm_l0[pmap_l0_index(va)]); 339 } 340 341 static __inline pd_entry_t * 342 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 343 { 344 pd_entry_t *l1; 345 346 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 347 return (&l1[pmap_l1_index(va)]); 348 } 349 350 static __inline pd_entry_t * 351 pmap_l1(pmap_t pmap, vm_offset_t va) 352 { 353 pd_entry_t *l0; 354 355 l0 = pmap_l0(pmap, va); 356 if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) 357 return (NULL); 358 359 return (pmap_l0_to_l1(l0, va)); 360 } 361 362 static __inline pd_entry_t * 363 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 364 { 365 pd_entry_t *l2; 366 367 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 368 return (&l2[pmap_l2_index(va)]); 369 } 370 371 static __inline pd_entry_t * 372 pmap_l2(pmap_t pmap, vm_offset_t va) 373 { 374 pd_entry_t *l1; 375 376 l1 = pmap_l1(pmap, va); 377 if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 378 return (NULL); 379 380 return (pmap_l1_to_l2(l1, va)); 381 } 382 383 static __inline pt_entry_t * 384 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 385 { 386 pt_entry_t *l3; 387 388 l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 389 return (&l3[pmap_l3_index(va)]); 390 } 391 392 /* 393 * Returns the lowest valid pde for a given virtual address. 394 * The next level may or may not point to a valid page or block. 395 */ 396 static __inline pd_entry_t * 397 pmap_pde(pmap_t pmap, vm_offset_t va, int *level) 398 { 399 pd_entry_t *l0, *l1, *l2, desc; 400 401 l0 = pmap_l0(pmap, va); 402 desc = pmap_load(l0) & ATTR_DESCR_MASK; 403 if (desc != L0_TABLE) { 404 *level = -1; 405 return (NULL); 406 } 407 408 l1 = pmap_l0_to_l1(l0, va); 409 desc = pmap_load(l1) & ATTR_DESCR_MASK; 410 if (desc != L1_TABLE) { 411 *level = 0; 412 return (l0); 413 } 414 415 l2 = pmap_l1_to_l2(l1, va); 416 desc = pmap_load(l2) & ATTR_DESCR_MASK; 417 if (desc != L2_TABLE) { 418 *level = 1; 419 return (l1); 420 } 421 422 *level = 2; 423 return (l2); 424 } 425 426 /* 427 * Returns the lowest valid pte block or table entry for a given virtual 428 * address. If there are no valid entries return NULL and set the level to 429 * the first invalid level. 430 */ 431 static __inline pt_entry_t * 432 pmap_pte(pmap_t pmap, vm_offset_t va, int *level) 433 { 434 pd_entry_t *l1, *l2, desc; 435 pt_entry_t *l3; 436 437 l1 = pmap_l1(pmap, va); 438 if (l1 == NULL) { 439 *level = 0; 440 return (NULL); 441 } 442 desc = pmap_load(l1) & ATTR_DESCR_MASK; 443 if (desc == L1_BLOCK) { 444 *level = 1; 445 return (l1); 446 } 447 448 if (desc != L1_TABLE) { 449 *level = 1; 450 return (NULL); 451 } 452 453 l2 = pmap_l1_to_l2(l1, va); 454 desc = pmap_load(l2) & ATTR_DESCR_MASK; 455 if (desc == L2_BLOCK) { 456 *level = 2; 457 return (l2); 458 } 459 460 if (desc != L2_TABLE) { 461 *level = 2; 462 return (NULL); 463 } 464 465 *level = 3; 466 l3 = pmap_l2_to_l3(l2, va); 467 if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) 468 return (NULL); 469 470 return (l3); 471 } 472 473 bool 474 pmap_ps_enabled(pmap_t pmap __unused) 475 { 476 477 return (superpages_enabled != 0); 478 } 479 480 bool 481 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, 482 pd_entry_t **l2, pt_entry_t **l3) 483 { 484 pd_entry_t *l0p, *l1p, *l2p; 485 486 if (pmap->pm_l0 == NULL) 487 return (false); 488 489 l0p = pmap_l0(pmap, va); 490 *l0 = l0p; 491 492 if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) 493 return (false); 494 495 l1p = pmap_l0_to_l1(l0p, va); 496 *l1 = l1p; 497 498 if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 499 *l2 = NULL; 500 *l3 = NULL; 501 return (true); 502 } 503 504 if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 505 return (false); 506 507 l2p = pmap_l1_to_l2(l1p, va); 508 *l2 = l2p; 509 510 if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 511 *l3 = NULL; 512 return (true); 513 } 514 515 if ((pmap_load(l2p) & ATTR_DESCR_MASK) != L2_TABLE) 516 return (false); 517 518 *l3 = pmap_l2_to_l3(l2p, va); 519 520 return (true); 521 } 522 523 static __inline int 524 pmap_l3_valid(pt_entry_t l3) 525 { 526 527 return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 528 } 529 530 531 CTASSERT(L1_BLOCK == L2_BLOCK); 532 533 /* 534 * Checks if the page is dirty. We currently lack proper tracking of this on 535 * arm64 so for now assume is a page mapped as rw was accessed it is. 536 */ 537 static inline int 538 pmap_page_dirty(pt_entry_t pte) 539 { 540 541 return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 542 (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 543 } 544 545 static __inline void 546 pmap_resident_count_inc(pmap_t pmap, int count) 547 { 548 549 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 550 pmap->pm_stats.resident_count += count; 551 } 552 553 static __inline void 554 pmap_resident_count_dec(pmap_t pmap, int count) 555 { 556 557 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 558 KASSERT(pmap->pm_stats.resident_count >= count, 559 ("pmap %p resident count underflow %ld %d", pmap, 560 pmap->pm_stats.resident_count, count)); 561 pmap->pm_stats.resident_count -= count; 562 } 563 564 static pt_entry_t * 565 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 566 u_int *l2_slot) 567 { 568 pt_entry_t *l2; 569 pd_entry_t *l1; 570 571 l1 = (pd_entry_t *)l1pt; 572 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 573 574 /* Check locore has used a table L1 map */ 575 KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 576 ("Invalid bootstrap L1 table")); 577 /* Find the address of the L2 table */ 578 l2 = (pt_entry_t *)init_pt_va; 579 *l2_slot = pmap_l2_index(va); 580 581 return (l2); 582 } 583 584 static vm_paddr_t 585 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 586 { 587 u_int l1_slot, l2_slot; 588 pt_entry_t *l2; 589 590 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 591 592 return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 593 } 594 595 static vm_offset_t 596 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, 597 vm_offset_t freemempos) 598 { 599 pt_entry_t *l2; 600 vm_offset_t va; 601 vm_paddr_t l2_pa, pa; 602 u_int l1_slot, l2_slot, prev_l1_slot; 603 int i; 604 605 dmap_phys_base = min_pa & ~L1_OFFSET; 606 dmap_phys_max = 0; 607 dmap_max_addr = 0; 608 l2 = NULL; 609 prev_l1_slot = -1; 610 611 #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 612 memset(pagetable_dmap, 0, PAGE_SIZE * DMAP_TABLES); 613 614 for (i = 0; i < (physmap_idx * 2); i += 2) { 615 pa = physmap[i] & ~L2_OFFSET; 616 va = pa - dmap_phys_base + DMAP_MIN_ADDRESS; 617 618 /* Create L2 mappings at the start of the region */ 619 if ((pa & L1_OFFSET) != 0) { 620 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 621 if (l1_slot != prev_l1_slot) { 622 prev_l1_slot = l1_slot; 623 l2 = (pt_entry_t *)freemempos; 624 l2_pa = pmap_early_vtophys(kern_l1, 625 (vm_offset_t)l2); 626 freemempos += PAGE_SIZE; 627 628 pmap_load_store(&pagetable_dmap[l1_slot], 629 (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); 630 631 memset(l2, 0, PAGE_SIZE); 632 } 633 KASSERT(l2 != NULL, 634 ("pmap_bootstrap_dmap: NULL l2 map")); 635 for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; 636 pa += L2_SIZE, va += L2_SIZE) { 637 /* 638 * We are on a boundary, stop to 639 * create a level 1 block 640 */ 641 if ((pa & L1_OFFSET) == 0) 642 break; 643 644 l2_slot = pmap_l2_index(va); 645 KASSERT(l2_slot != 0, ("...")); 646 pmap_load_store(&l2[l2_slot], 647 (pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN | 648 ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); 649 } 650 KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS), 651 ("...")); 652 } 653 654 for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1] && 655 (physmap[i + 1] - pa) >= L1_SIZE; 656 pa += L1_SIZE, va += L1_SIZE) { 657 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 658 pmap_load_store(&pagetable_dmap[l1_slot], 659 (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN | 660 ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 661 } 662 663 /* Create L2 mappings at the end of the region */ 664 if (pa < physmap[i + 1]) { 665 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 666 if (l1_slot != prev_l1_slot) { 667 prev_l1_slot = l1_slot; 668 l2 = (pt_entry_t *)freemempos; 669 l2_pa = pmap_early_vtophys(kern_l1, 670 (vm_offset_t)l2); 671 freemempos += PAGE_SIZE; 672 673 pmap_load_store(&pagetable_dmap[l1_slot], 674 (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); 675 676 memset(l2, 0, PAGE_SIZE); 677 } 678 KASSERT(l2 != NULL, 679 ("pmap_bootstrap_dmap: NULL l2 map")); 680 for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; 681 pa += L2_SIZE, va += L2_SIZE) { 682 l2_slot = pmap_l2_index(va); 683 pmap_load_store(&l2[l2_slot], 684 (pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN | 685 ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); 686 } 687 } 688 689 if (pa > dmap_phys_max) { 690 dmap_phys_max = pa; 691 dmap_max_addr = va; 692 } 693 } 694 695 cpu_tlb_flushID(); 696 697 return (freemempos); 698 } 699 700 static vm_offset_t 701 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 702 { 703 vm_offset_t l2pt; 704 vm_paddr_t pa; 705 pd_entry_t *l1; 706 u_int l1_slot; 707 708 KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 709 710 l1 = (pd_entry_t *)l1pt; 711 l1_slot = pmap_l1_index(va); 712 l2pt = l2_start; 713 714 for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 715 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 716 717 pa = pmap_early_vtophys(l1pt, l2pt); 718 pmap_load_store(&l1[l1_slot], 719 (pa & ~Ln_TABLE_MASK) | L1_TABLE); 720 l2pt += PAGE_SIZE; 721 } 722 723 /* Clean the L2 page table */ 724 memset((void *)l2_start, 0, l2pt - l2_start); 725 726 return l2pt; 727 } 728 729 static vm_offset_t 730 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 731 { 732 vm_offset_t l3pt; 733 vm_paddr_t pa; 734 pd_entry_t *l2; 735 u_int l2_slot; 736 737 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 738 739 l2 = pmap_l2(kernel_pmap, va); 740 l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); 741 l2_slot = pmap_l2_index(va); 742 l3pt = l3_start; 743 744 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 745 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 746 747 pa = pmap_early_vtophys(l1pt, l3pt); 748 pmap_load_store(&l2[l2_slot], 749 (pa & ~Ln_TABLE_MASK) | L2_TABLE); 750 l3pt += PAGE_SIZE; 751 } 752 753 /* Clean the L2 page table */ 754 memset((void *)l3_start, 0, l3pt - l3_start); 755 756 return l3pt; 757 } 758 759 /* 760 * Bootstrap the system enough to run with virtual memory. 761 */ 762 void 763 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, 764 vm_size_t kernlen) 765 { 766 u_int l1_slot, l2_slot; 767 uint64_t kern_delta; 768 pt_entry_t *l2; 769 vm_offset_t va, freemempos; 770 vm_offset_t dpcpu, msgbufpv; 771 vm_paddr_t start_pa, pa, min_pa; 772 int i; 773 774 kern_delta = KERNBASE - kernstart; 775 776 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 777 printf("%lx\n", l1pt); 778 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 779 780 /* Set this early so we can use the pagetable walking functions */ 781 kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; 782 PMAP_LOCK_INIT(kernel_pmap); 783 784 /* Assume the address we were loaded to is a valid physical address */ 785 min_pa = KERNBASE - kern_delta; 786 787 physmap_idx = arm_physmem_avail(physmap, nitems(physmap)); 788 physmap_idx /= 2; 789 790 /* 791 * Find the minimum physical address. physmap is sorted, 792 * but may contain empty ranges. 793 */ 794 for (i = 0; i < (physmap_idx * 2); i += 2) { 795 if (physmap[i] == physmap[i + 1]) 796 continue; 797 if (physmap[i] <= min_pa) 798 min_pa = physmap[i]; 799 } 800 801 freemempos = KERNBASE + kernlen; 802 freemempos = roundup2(freemempos, PAGE_SIZE); 803 804 /* Create a direct map region early so we can use it for pa -> va */ 805 freemempos = pmap_bootstrap_dmap(l1pt, min_pa, freemempos); 806 807 va = KERNBASE; 808 start_pa = pa = KERNBASE - kern_delta; 809 810 /* 811 * Read the page table to find out what is already mapped. 812 * This assumes we have mapped a block of memory from KERNBASE 813 * using a single L1 entry. 814 */ 815 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 816 817 /* Sanity check the index, KERNBASE should be the first VA */ 818 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 819 820 /* Find how many pages we have mapped */ 821 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 822 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 823 break; 824 825 /* Check locore used L2 blocks */ 826 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 827 ("Invalid bootstrap L2 table")); 828 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 829 ("Incorrect PA in L2 table")); 830 831 va += L2_SIZE; 832 pa += L2_SIZE; 833 } 834 835 va = roundup2(va, L1_SIZE); 836 837 /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 838 freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 839 /* And the l3 tables for the early devmap */ 840 freemempos = pmap_bootstrap_l3(l1pt, 841 VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE), freemempos); 842 843 cpu_tlb_flushID(); 844 845 #define alloc_pages(var, np) \ 846 (var) = freemempos; \ 847 freemempos += (np * PAGE_SIZE); \ 848 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 849 850 /* Allocate dynamic per-cpu area. */ 851 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 852 dpcpu_init((void *)dpcpu, 0); 853 854 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 855 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 856 msgbufp = (void *)msgbufpv; 857 858 /* Reserve some VA space for early BIOS/ACPI mapping */ 859 preinit_map_va = roundup2(freemempos, L2_SIZE); 860 861 virtual_avail = preinit_map_va + PMAP_PREINIT_MAPPING_SIZE; 862 virtual_avail = roundup2(virtual_avail, L1_SIZE); 863 virtual_end = VM_MAX_KERNEL_ADDRESS - (PMAP_MAPDEV_EARLY_SIZE); 864 kernel_vm_end = virtual_avail; 865 866 pa = pmap_early_vtophys(l1pt, freemempos); 867 868 arm_physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC); 869 870 cpu_tlb_flushID(); 871 } 872 873 /* 874 * Initialize a vm_page's machine-dependent fields. 875 */ 876 void 877 pmap_page_init(vm_page_t m) 878 { 879 880 TAILQ_INIT(&m->md.pv_list); 881 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 882 } 883 884 /* 885 * Initialize the pmap module. 886 * Called by vm_init, to initialize any structures that the pmap 887 * system needs to map virtual memory. 888 */ 889 void 890 pmap_init(void) 891 { 892 vm_size_t s; 893 int i, pv_npg; 894 895 /* 896 * Are large page mappings enabled? 897 */ 898 TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); 899 if (superpages_enabled) { 900 KASSERT(MAXPAGESIZES > 1 && pagesizes[1] == 0, 901 ("pmap_init: can't assign to pagesizes[1]")); 902 pagesizes[1] = L2_SIZE; 903 } 904 905 /* 906 * Initialize the pv chunk list mutex. 907 */ 908 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 909 910 /* 911 * Initialize the pool of pv list locks. 912 */ 913 for (i = 0; i < NPV_LIST_LOCKS; i++) 914 rw_init(&pv_list_locks[i], "pmap pv list"); 915 916 /* 917 * Calculate the size of the pv head table for superpages. 918 */ 919 pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); 920 921 /* 922 * Allocate memory for the pv head table for superpages. 923 */ 924 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 925 s = round_page(s); 926 pv_table = (struct md_page *)kmem_malloc(s, M_WAITOK | M_ZERO); 927 for (i = 0; i < pv_npg; i++) 928 TAILQ_INIT(&pv_table[i].pv_list); 929 TAILQ_INIT(&pv_dummy.pv_list); 930 931 vm_initialized = 1; 932 } 933 934 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, 935 "2MB page mapping counters"); 936 937 static u_long pmap_l2_demotions; 938 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, 939 &pmap_l2_demotions, 0, "2MB page demotions"); 940 941 static u_long pmap_l2_mappings; 942 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, mappings, CTLFLAG_RD, 943 &pmap_l2_mappings, 0, "2MB page mappings"); 944 945 static u_long pmap_l2_p_failures; 946 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, 947 &pmap_l2_p_failures, 0, "2MB page promotion failures"); 948 949 static u_long pmap_l2_promotions; 950 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, 951 &pmap_l2_promotions, 0, "2MB page promotions"); 952 953 /* 954 * Invalidate a single TLB entry. 955 */ 956 static __inline void 957 pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 958 { 959 960 sched_pin(); 961 __asm __volatile( 962 "dsb ishst \n" 963 "tlbi vaae1is, %0 \n" 964 "dsb ish \n" 965 "isb \n" 966 : : "r"(va >> PAGE_SHIFT)); 967 sched_unpin(); 968 } 969 970 static __inline void 971 pmap_invalidate_range_nopin(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 972 { 973 vm_offset_t addr; 974 975 dsb(ishst); 976 for (addr = sva; addr < eva; addr += PAGE_SIZE) { 977 __asm __volatile( 978 "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); 979 } 980 __asm __volatile( 981 "dsb ish \n" 982 "isb \n"); 983 } 984 985 static __inline void 986 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 987 { 988 989 sched_pin(); 990 pmap_invalidate_range_nopin(pmap, sva, eva); 991 sched_unpin(); 992 } 993 994 static __inline void 995 pmap_invalidate_all(pmap_t pmap) 996 { 997 998 sched_pin(); 999 __asm __volatile( 1000 "dsb ishst \n" 1001 "tlbi vmalle1is \n" 1002 "dsb ish \n" 1003 "isb \n"); 1004 sched_unpin(); 1005 } 1006 1007 /* 1008 * Routine: pmap_extract 1009 * Function: 1010 * Extract the physical page address associated 1011 * with the given map/virtual_address pair. 1012 */ 1013 vm_paddr_t 1014 pmap_extract(pmap_t pmap, vm_offset_t va) 1015 { 1016 pt_entry_t *pte, tpte; 1017 vm_paddr_t pa; 1018 int lvl; 1019 1020 pa = 0; 1021 PMAP_LOCK(pmap); 1022 /* 1023 * Find the block or page map for this virtual address. pmap_pte 1024 * will return either a valid block/page entry, or NULL. 1025 */ 1026 pte = pmap_pte(pmap, va, &lvl); 1027 if (pte != NULL) { 1028 tpte = pmap_load(pte); 1029 pa = tpte & ~ATTR_MASK; 1030 switch(lvl) { 1031 case 1: 1032 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1033 ("pmap_extract: Invalid L1 pte found: %lx", 1034 tpte & ATTR_DESCR_MASK)); 1035 pa |= (va & L1_OFFSET); 1036 break; 1037 case 2: 1038 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1039 ("pmap_extract: Invalid L2 pte found: %lx", 1040 tpte & ATTR_DESCR_MASK)); 1041 pa |= (va & L2_OFFSET); 1042 break; 1043 case 3: 1044 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1045 ("pmap_extract: Invalid L3 pte found: %lx", 1046 tpte & ATTR_DESCR_MASK)); 1047 pa |= (va & L3_OFFSET); 1048 break; 1049 } 1050 } 1051 PMAP_UNLOCK(pmap); 1052 return (pa); 1053 } 1054 1055 /* 1056 * Routine: pmap_extract_and_hold 1057 * Function: 1058 * Atomically extract and hold the physical page 1059 * with the given pmap and virtual address pair 1060 * if that mapping permits the given protection. 1061 */ 1062 vm_page_t 1063 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1064 { 1065 pt_entry_t *pte, tpte; 1066 vm_offset_t off; 1067 vm_paddr_t pa; 1068 vm_page_t m; 1069 int lvl; 1070 1071 pa = 0; 1072 m = NULL; 1073 PMAP_LOCK(pmap); 1074 retry: 1075 pte = pmap_pte(pmap, va, &lvl); 1076 if (pte != NULL) { 1077 tpte = pmap_load(pte); 1078 1079 KASSERT(lvl > 0 && lvl <= 3, 1080 ("pmap_extract_and_hold: Invalid level %d", lvl)); 1081 CTASSERT(L1_BLOCK == L2_BLOCK); 1082 KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || 1083 (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), 1084 ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, 1085 tpte & ATTR_DESCR_MASK)); 1086 if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 1087 ((prot & VM_PROT_WRITE) == 0)) { 1088 switch(lvl) { 1089 case 1: 1090 off = va & L1_OFFSET; 1091 break; 1092 case 2: 1093 off = va & L2_OFFSET; 1094 break; 1095 case 3: 1096 default: 1097 off = 0; 1098 } 1099 if (vm_page_pa_tryrelock(pmap, 1100 (tpte & ~ATTR_MASK) | off, &pa)) 1101 goto retry; 1102 m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); 1103 vm_page_hold(m); 1104 } 1105 } 1106 PA_UNLOCK_COND(pa); 1107 PMAP_UNLOCK(pmap); 1108 return (m); 1109 } 1110 1111 vm_paddr_t 1112 pmap_kextract(vm_offset_t va) 1113 { 1114 pt_entry_t *pte, tpte; 1115 vm_paddr_t pa; 1116 int lvl; 1117 1118 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 1119 pa = DMAP_TO_PHYS(va); 1120 } else { 1121 pa = 0; 1122 pte = pmap_pte(kernel_pmap, va, &lvl); 1123 if (pte != NULL) { 1124 tpte = pmap_load(pte); 1125 pa = tpte & ~ATTR_MASK; 1126 switch(lvl) { 1127 case 1: 1128 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1129 ("pmap_kextract: Invalid L1 pte found: %lx", 1130 tpte & ATTR_DESCR_MASK)); 1131 pa |= (va & L1_OFFSET); 1132 break; 1133 case 2: 1134 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1135 ("pmap_kextract: Invalid L2 pte found: %lx", 1136 tpte & ATTR_DESCR_MASK)); 1137 pa |= (va & L2_OFFSET); 1138 break; 1139 case 3: 1140 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1141 ("pmap_kextract: Invalid L3 pte found: %lx", 1142 tpte & ATTR_DESCR_MASK)); 1143 pa |= (va & L3_OFFSET); 1144 break; 1145 } 1146 } 1147 } 1148 return (pa); 1149 } 1150 1151 /*************************************************** 1152 * Low level mapping routines..... 1153 ***************************************************/ 1154 1155 void 1156 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) 1157 { 1158 pd_entry_t *pde; 1159 pt_entry_t *pte, attr; 1160 vm_offset_t va; 1161 int lvl; 1162 1163 KASSERT((pa & L3_OFFSET) == 0, 1164 ("pmap_kenter: Invalid physical address")); 1165 KASSERT((sva & L3_OFFSET) == 0, 1166 ("pmap_kenter: Invalid virtual address")); 1167 KASSERT((size & PAGE_MASK) == 0, 1168 ("pmap_kenter: Mapping is not page-sized")); 1169 1170 attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE; 1171 if (mode == DEVICE_MEMORY) 1172 attr |= ATTR_XN; 1173 1174 va = sva; 1175 while (size != 0) { 1176 pde = pmap_pde(kernel_pmap, va, &lvl); 1177 KASSERT(pde != NULL, 1178 ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); 1179 KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); 1180 1181 pte = pmap_l2_to_l3(pde, va); 1182 pmap_load_store(pte, (pa & ~L3_OFFSET) | attr); 1183 1184 va += PAGE_SIZE; 1185 pa += PAGE_SIZE; 1186 size -= PAGE_SIZE; 1187 } 1188 pmap_invalidate_range(kernel_pmap, sva, va); 1189 } 1190 1191 void 1192 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 1193 { 1194 1195 pmap_kenter(sva, size, pa, DEVICE_MEMORY); 1196 } 1197 1198 /* 1199 * Remove a page from the kernel pagetables. 1200 */ 1201 PMAP_INLINE void 1202 pmap_kremove(vm_offset_t va) 1203 { 1204 pt_entry_t *pte; 1205 int lvl; 1206 1207 pte = pmap_pte(kernel_pmap, va, &lvl); 1208 KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); 1209 KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); 1210 1211 pmap_load_clear(pte); 1212 pmap_invalidate_page(kernel_pmap, va); 1213 } 1214 1215 void 1216 pmap_kremove_device(vm_offset_t sva, vm_size_t size) 1217 { 1218 pt_entry_t *pte; 1219 vm_offset_t va; 1220 int lvl; 1221 1222 KASSERT((sva & L3_OFFSET) == 0, 1223 ("pmap_kremove_device: Invalid virtual address")); 1224 KASSERT((size & PAGE_MASK) == 0, 1225 ("pmap_kremove_device: Mapping is not page-sized")); 1226 1227 va = sva; 1228 while (size != 0) { 1229 pte = pmap_pte(kernel_pmap, va, &lvl); 1230 KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); 1231 KASSERT(lvl == 3, 1232 ("Invalid device pagetable level: %d != 3", lvl)); 1233 pmap_load_clear(pte); 1234 1235 va += PAGE_SIZE; 1236 size -= PAGE_SIZE; 1237 } 1238 pmap_invalidate_range(kernel_pmap, sva, va); 1239 } 1240 1241 /* 1242 * Used to map a range of physical addresses into kernel 1243 * virtual address space. 1244 * 1245 * The value passed in '*virt' is a suggested virtual address for 1246 * the mapping. Architectures which can support a direct-mapped 1247 * physical to virtual region can return the appropriate address 1248 * within that region, leaving '*virt' unchanged. Other 1249 * architectures should map the pages starting at '*virt' and 1250 * update '*virt' with the first usable address after the mapped 1251 * region. 1252 */ 1253 vm_offset_t 1254 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1255 { 1256 return PHYS_TO_DMAP(start); 1257 } 1258 1259 1260 /* 1261 * Add a list of wired pages to the kva 1262 * this routine is only used for temporary 1263 * kernel mappings that do not need to have 1264 * page modification or references recorded. 1265 * Note that old mappings are simply written 1266 * over. The page *must* be wired. 1267 * Note: SMP coherent. Uses a ranged shootdown IPI. 1268 */ 1269 void 1270 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1271 { 1272 pd_entry_t *pde; 1273 pt_entry_t *pte, pa; 1274 vm_offset_t va; 1275 vm_page_t m; 1276 int i, lvl; 1277 1278 va = sva; 1279 for (i = 0; i < count; i++) { 1280 pde = pmap_pde(kernel_pmap, va, &lvl); 1281 KASSERT(pde != NULL, 1282 ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); 1283 KASSERT(lvl == 2, 1284 ("pmap_qenter: Invalid level %d", lvl)); 1285 1286 m = ma[i]; 1287 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1288 ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1289 if (m->md.pv_memattr == DEVICE_MEMORY) 1290 pa |= ATTR_XN; 1291 pte = pmap_l2_to_l3(pde, va); 1292 pmap_load_store(pte, pa); 1293 1294 va += L3_SIZE; 1295 } 1296 pmap_invalidate_range(kernel_pmap, sva, va); 1297 } 1298 1299 /* 1300 * This routine tears out page mappings from the 1301 * kernel -- it is meant only for temporary mappings. 1302 */ 1303 void 1304 pmap_qremove(vm_offset_t sva, int count) 1305 { 1306 pt_entry_t *pte; 1307 vm_offset_t va; 1308 int lvl; 1309 1310 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1311 1312 va = sva; 1313 while (count-- > 0) { 1314 pte = pmap_pte(kernel_pmap, va, &lvl); 1315 KASSERT(lvl == 3, 1316 ("Invalid device pagetable level: %d != 3", lvl)); 1317 if (pte != NULL) { 1318 pmap_load_clear(pte); 1319 } 1320 1321 va += PAGE_SIZE; 1322 } 1323 pmap_invalidate_range(kernel_pmap, sva, va); 1324 } 1325 1326 /*************************************************** 1327 * Page table page management routines..... 1328 ***************************************************/ 1329 /* 1330 * Schedule the specified unused page table page to be freed. Specifically, 1331 * add the page to the specified list of pages that will be released to the 1332 * physical memory manager after the TLB has been updated. 1333 */ 1334 static __inline void 1335 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1336 boolean_t set_PG_ZERO) 1337 { 1338 1339 if (set_PG_ZERO) 1340 m->flags |= PG_ZERO; 1341 else 1342 m->flags &= ~PG_ZERO; 1343 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1344 } 1345 1346 /* 1347 * Decrements a page table page's wire count, which is used to record the 1348 * number of valid page table entries within the page. If the wire count 1349 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1350 * page table page was unmapped and FALSE otherwise. 1351 */ 1352 static inline boolean_t 1353 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1354 { 1355 1356 --m->wire_count; 1357 if (m->wire_count == 0) { 1358 _pmap_unwire_l3(pmap, va, m, free); 1359 return (TRUE); 1360 } else 1361 return (FALSE); 1362 } 1363 1364 static void 1365 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1366 { 1367 1368 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1369 /* 1370 * unmap the page table page 1371 */ 1372 if (m->pindex >= (NUL2E + NUL1E)) { 1373 /* l1 page */ 1374 pd_entry_t *l0; 1375 1376 l0 = pmap_l0(pmap, va); 1377 pmap_load_clear(l0); 1378 } else if (m->pindex >= NUL2E) { 1379 /* l2 page */ 1380 pd_entry_t *l1; 1381 1382 l1 = pmap_l1(pmap, va); 1383 pmap_load_clear(l1); 1384 } else { 1385 /* l3 page */ 1386 pd_entry_t *l2; 1387 1388 l2 = pmap_l2(pmap, va); 1389 pmap_load_clear(l2); 1390 } 1391 pmap_resident_count_dec(pmap, 1); 1392 if (m->pindex < NUL2E) { 1393 /* We just released an l3, unhold the matching l2 */ 1394 pd_entry_t *l1, tl1; 1395 vm_page_t l2pg; 1396 1397 l1 = pmap_l1(pmap, va); 1398 tl1 = pmap_load(l1); 1399 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1400 pmap_unwire_l3(pmap, va, l2pg, free); 1401 } else if (m->pindex < (NUL2E + NUL1E)) { 1402 /* We just released an l2, unhold the matching l1 */ 1403 pd_entry_t *l0, tl0; 1404 vm_page_t l1pg; 1405 1406 l0 = pmap_l0(pmap, va); 1407 tl0 = pmap_load(l0); 1408 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1409 pmap_unwire_l3(pmap, va, l1pg, free); 1410 } 1411 pmap_invalidate_page(pmap, va); 1412 1413 vm_wire_sub(1); 1414 1415 /* 1416 * Put page on a list so that it is released after 1417 * *ALL* TLB shootdown is done 1418 */ 1419 pmap_add_delayed_free_list(m, free, TRUE); 1420 } 1421 1422 /* 1423 * After removing a page table entry, this routine is used to 1424 * conditionally free the page, and manage the hold/wire counts. 1425 */ 1426 static int 1427 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1428 struct spglist *free) 1429 { 1430 vm_page_t mpte; 1431 1432 if (va >= VM_MAXUSER_ADDRESS) 1433 return (0); 1434 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1435 mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1436 return (pmap_unwire_l3(pmap, va, mpte, free)); 1437 } 1438 1439 void 1440 pmap_pinit0(pmap_t pmap) 1441 { 1442 1443 PMAP_LOCK_INIT(pmap); 1444 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1445 pmap->pm_l0 = kernel_pmap->pm_l0; 1446 pmap->pm_root.rt_root = 0; 1447 } 1448 1449 int 1450 pmap_pinit(pmap_t pmap) 1451 { 1452 vm_paddr_t l0phys; 1453 vm_page_t l0pt; 1454 1455 /* 1456 * allocate the l0 page 1457 */ 1458 while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1459 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1460 vm_wait(NULL); 1461 1462 l0phys = VM_PAGE_TO_PHYS(l0pt); 1463 pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); 1464 1465 if ((l0pt->flags & PG_ZERO) == 0) 1466 pagezero(pmap->pm_l0); 1467 1468 pmap->pm_root.rt_root = 0; 1469 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1470 1471 return (1); 1472 } 1473 1474 /* 1475 * This routine is called if the desired page table page does not exist. 1476 * 1477 * If page table page allocation fails, this routine may sleep before 1478 * returning NULL. It sleeps only if a lock pointer was given. 1479 * 1480 * Note: If a page allocation fails at page table level two or three, 1481 * one or two pages may be held during the wait, only to be released 1482 * afterwards. This conservative approach is easily argued to avoid 1483 * race conditions. 1484 */ 1485 static vm_page_t 1486 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1487 { 1488 vm_page_t m, l1pg, l2pg; 1489 1490 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1491 1492 /* 1493 * Allocate a page table page. 1494 */ 1495 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1496 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1497 if (lockp != NULL) { 1498 RELEASE_PV_LIST_LOCK(lockp); 1499 PMAP_UNLOCK(pmap); 1500 vm_wait(NULL); 1501 PMAP_LOCK(pmap); 1502 } 1503 1504 /* 1505 * Indicate the need to retry. While waiting, the page table 1506 * page may have been allocated. 1507 */ 1508 return (NULL); 1509 } 1510 if ((m->flags & PG_ZERO) == 0) 1511 pmap_zero_page(m); 1512 1513 /* 1514 * Map the pagetable page into the process address space, if 1515 * it isn't already there. 1516 */ 1517 1518 if (ptepindex >= (NUL2E + NUL1E)) { 1519 pd_entry_t *l0; 1520 vm_pindex_t l0index; 1521 1522 l0index = ptepindex - (NUL2E + NUL1E); 1523 l0 = &pmap->pm_l0[l0index]; 1524 pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); 1525 } else if (ptepindex >= NUL2E) { 1526 vm_pindex_t l0index, l1index; 1527 pd_entry_t *l0, *l1; 1528 pd_entry_t tl0; 1529 1530 l1index = ptepindex - NUL2E; 1531 l0index = l1index >> L0_ENTRIES_SHIFT; 1532 1533 l0 = &pmap->pm_l0[l0index]; 1534 tl0 = pmap_load(l0); 1535 if (tl0 == 0) { 1536 /* recurse for allocating page dir */ 1537 if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, 1538 lockp) == NULL) { 1539 vm_page_unwire_noq(m); 1540 vm_page_free_zero(m); 1541 return (NULL); 1542 } 1543 } else { 1544 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1545 l1pg->wire_count++; 1546 } 1547 1548 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 1549 l1 = &l1[ptepindex & Ln_ADDR_MASK]; 1550 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1551 } else { 1552 vm_pindex_t l0index, l1index; 1553 pd_entry_t *l0, *l1, *l2; 1554 pd_entry_t tl0, tl1; 1555 1556 l1index = ptepindex >> Ln_ENTRIES_SHIFT; 1557 l0index = l1index >> L0_ENTRIES_SHIFT; 1558 1559 l0 = &pmap->pm_l0[l0index]; 1560 tl0 = pmap_load(l0); 1561 if (tl0 == 0) { 1562 /* recurse for allocating page dir */ 1563 if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1564 lockp) == NULL) { 1565 vm_page_unwire_noq(m); 1566 vm_page_free_zero(m); 1567 return (NULL); 1568 } 1569 tl0 = pmap_load(l0); 1570 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1571 l1 = &l1[l1index & Ln_ADDR_MASK]; 1572 } else { 1573 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1574 l1 = &l1[l1index & Ln_ADDR_MASK]; 1575 tl1 = pmap_load(l1); 1576 if (tl1 == 0) { 1577 /* recurse for allocating page dir */ 1578 if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1579 lockp) == NULL) { 1580 vm_page_unwire_noq(m); 1581 vm_page_free_zero(m); 1582 return (NULL); 1583 } 1584 } else { 1585 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1586 l2pg->wire_count++; 1587 } 1588 } 1589 1590 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1591 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1592 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1593 } 1594 1595 pmap_resident_count_inc(pmap, 1); 1596 1597 return (m); 1598 } 1599 1600 static vm_page_t 1601 pmap_alloc_l2(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1602 { 1603 pd_entry_t *l1; 1604 vm_page_t l2pg; 1605 vm_pindex_t l2pindex; 1606 1607 retry: 1608 l1 = pmap_l1(pmap, va); 1609 if (l1 != NULL && (pmap_load(l1) & ATTR_DESCR_MASK) == L1_TABLE) { 1610 /* Add a reference to the L2 page. */ 1611 l2pg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); 1612 l2pg->wire_count++; 1613 } else { 1614 /* Allocate a L2 page. */ 1615 l2pindex = pmap_l2_pindex(va) >> Ln_ENTRIES_SHIFT; 1616 l2pg = _pmap_alloc_l3(pmap, NUL2E + l2pindex, lockp); 1617 if (l2pg == NULL && lockp != NULL) 1618 goto retry; 1619 } 1620 return (l2pg); 1621 } 1622 1623 static vm_page_t 1624 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1625 { 1626 vm_pindex_t ptepindex; 1627 pd_entry_t *pde, tpde; 1628 #ifdef INVARIANTS 1629 pt_entry_t *pte; 1630 #endif 1631 vm_page_t m; 1632 int lvl; 1633 1634 /* 1635 * Calculate pagetable page index 1636 */ 1637 ptepindex = pmap_l2_pindex(va); 1638 retry: 1639 /* 1640 * Get the page directory entry 1641 */ 1642 pde = pmap_pde(pmap, va, &lvl); 1643 1644 /* 1645 * If the page table page is mapped, we just increment the hold count, 1646 * and activate it. If we get a level 2 pde it will point to a level 3 1647 * table. 1648 */ 1649 switch (lvl) { 1650 case -1: 1651 break; 1652 case 0: 1653 #ifdef INVARIANTS 1654 pte = pmap_l0_to_l1(pde, va); 1655 KASSERT(pmap_load(pte) == 0, 1656 ("pmap_alloc_l3: TODO: l0 superpages")); 1657 #endif 1658 break; 1659 case 1: 1660 #ifdef INVARIANTS 1661 pte = pmap_l1_to_l2(pde, va); 1662 KASSERT(pmap_load(pte) == 0, 1663 ("pmap_alloc_l3: TODO: l1 superpages")); 1664 #endif 1665 break; 1666 case 2: 1667 tpde = pmap_load(pde); 1668 if (tpde != 0) { 1669 m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); 1670 m->wire_count++; 1671 return (m); 1672 } 1673 break; 1674 default: 1675 panic("pmap_alloc_l3: Invalid level %d", lvl); 1676 } 1677 1678 /* 1679 * Here if the pte page isn't mapped, or if it has been deallocated. 1680 */ 1681 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1682 if (m == NULL && lockp != NULL) 1683 goto retry; 1684 1685 return (m); 1686 } 1687 1688 /*************************************************** 1689 * Pmap allocation/deallocation routines. 1690 ***************************************************/ 1691 1692 /* 1693 * Release any resources held by the given physical map. 1694 * Called when a pmap initialized by pmap_pinit is being released. 1695 * Should only be called if the map contains no valid mappings. 1696 */ 1697 void 1698 pmap_release(pmap_t pmap) 1699 { 1700 vm_page_t m; 1701 1702 KASSERT(pmap->pm_stats.resident_count == 0, 1703 ("pmap_release: pmap resident count %ld != 0", 1704 pmap->pm_stats.resident_count)); 1705 KASSERT(vm_radix_is_empty(&pmap->pm_root), 1706 ("pmap_release: pmap has reserved page table page(s)")); 1707 1708 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); 1709 1710 vm_page_unwire_noq(m); 1711 vm_page_free_zero(m); 1712 } 1713 1714 static int 1715 kvm_size(SYSCTL_HANDLER_ARGS) 1716 { 1717 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1718 1719 return sysctl_handle_long(oidp, &ksize, 0, req); 1720 } 1721 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1722 0, 0, kvm_size, "LU", "Size of KVM"); 1723 1724 static int 1725 kvm_free(SYSCTL_HANDLER_ARGS) 1726 { 1727 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1728 1729 return sysctl_handle_long(oidp, &kfree, 0, req); 1730 } 1731 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1732 0, 0, kvm_free, "LU", "Amount of KVM free"); 1733 1734 /* 1735 * grow the number of kernel page table entries, if needed 1736 */ 1737 void 1738 pmap_growkernel(vm_offset_t addr) 1739 { 1740 vm_paddr_t paddr; 1741 vm_page_t nkpg; 1742 pd_entry_t *l0, *l1, *l2; 1743 1744 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1745 1746 addr = roundup2(addr, L2_SIZE); 1747 if (addr - 1 >= vm_map_max(kernel_map)) 1748 addr = vm_map_max(kernel_map); 1749 while (kernel_vm_end < addr) { 1750 l0 = pmap_l0(kernel_pmap, kernel_vm_end); 1751 KASSERT(pmap_load(l0) != 0, 1752 ("pmap_growkernel: No level 0 kernel entry")); 1753 1754 l1 = pmap_l0_to_l1(l0, kernel_vm_end); 1755 if (pmap_load(l1) == 0) { 1756 /* We need a new PDP entry */ 1757 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1758 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1759 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1760 if (nkpg == NULL) 1761 panic("pmap_growkernel: no memory to grow kernel"); 1762 if ((nkpg->flags & PG_ZERO) == 0) 1763 pmap_zero_page(nkpg); 1764 paddr = VM_PAGE_TO_PHYS(nkpg); 1765 pmap_load_store(l1, paddr | L1_TABLE); 1766 continue; /* try again */ 1767 } 1768 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1769 if ((pmap_load(l2) & ATTR_AF) != 0) { 1770 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1771 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1772 kernel_vm_end = vm_map_max(kernel_map); 1773 break; 1774 } 1775 continue; 1776 } 1777 1778 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1779 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1780 VM_ALLOC_ZERO); 1781 if (nkpg == NULL) 1782 panic("pmap_growkernel: no memory to grow kernel"); 1783 if ((nkpg->flags & PG_ZERO) == 0) 1784 pmap_zero_page(nkpg); 1785 paddr = VM_PAGE_TO_PHYS(nkpg); 1786 pmap_load_store(l2, paddr | L2_TABLE); 1787 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1788 1789 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1790 if (kernel_vm_end - 1 >= vm_map_max(kernel_map)) { 1791 kernel_vm_end = vm_map_max(kernel_map); 1792 break; 1793 } 1794 } 1795 } 1796 1797 1798 /*************************************************** 1799 * page management routines. 1800 ***************************************************/ 1801 1802 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1803 CTASSERT(_NPCM == 3); 1804 CTASSERT(_NPCPV == 168); 1805 1806 static __inline struct pv_chunk * 1807 pv_to_chunk(pv_entry_t pv) 1808 { 1809 1810 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1811 } 1812 1813 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1814 1815 #define PC_FREE0 0xfffffffffffffffful 1816 #define PC_FREE1 0xfffffffffffffffful 1817 #define PC_FREE2 0x000000fffffffffful 1818 1819 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1820 1821 #if 0 1822 #ifdef PV_STATS 1823 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1824 1825 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1826 "Current number of pv entry chunks"); 1827 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1828 "Current number of pv entry chunks allocated"); 1829 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1830 "Current number of pv entry chunks frees"); 1831 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1832 "Number of times tried to get a chunk page but failed."); 1833 1834 static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1835 static int pv_entry_spare; 1836 1837 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1838 "Current number of pv entry frees"); 1839 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1840 "Current number of pv entry allocs"); 1841 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1842 "Current number of pv entries"); 1843 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1844 "Current number of spare pv entries"); 1845 #endif 1846 #endif /* 0 */ 1847 1848 /* 1849 * We are in a serious low memory condition. Resort to 1850 * drastic measures to free some pages so we can allocate 1851 * another pv entry chunk. 1852 * 1853 * Returns NULL if PV entries were reclaimed from the specified pmap. 1854 * 1855 * We do not, however, unmap 2mpages because subsequent accesses will 1856 * allocate per-page pv entries until repromotion occurs, thereby 1857 * exacerbating the shortage of free pv entries. 1858 */ 1859 static vm_page_t 1860 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1861 { 1862 struct pv_chunk *pc, *pc_marker, *pc_marker_end; 1863 struct pv_chunk_header pc_marker_b, pc_marker_end_b; 1864 struct md_page *pvh; 1865 pd_entry_t *pde; 1866 pmap_t next_pmap, pmap; 1867 pt_entry_t *pte, tpte; 1868 pv_entry_t pv; 1869 vm_offset_t va; 1870 vm_page_t m, m_pc; 1871 struct spglist free; 1872 uint64_t inuse; 1873 int bit, field, freed, lvl; 1874 static int active_reclaims = 0; 1875 1876 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1877 KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); 1878 1879 pmap = NULL; 1880 m_pc = NULL; 1881 SLIST_INIT(&free); 1882 bzero(&pc_marker_b, sizeof(pc_marker_b)); 1883 bzero(&pc_marker_end_b, sizeof(pc_marker_end_b)); 1884 pc_marker = (struct pv_chunk *)&pc_marker_b; 1885 pc_marker_end = (struct pv_chunk *)&pc_marker_end_b; 1886 1887 mtx_lock(&pv_chunks_mutex); 1888 active_reclaims++; 1889 TAILQ_INSERT_HEAD(&pv_chunks, pc_marker, pc_lru); 1890 TAILQ_INSERT_TAIL(&pv_chunks, pc_marker_end, pc_lru); 1891 while ((pc = TAILQ_NEXT(pc_marker, pc_lru)) != pc_marker_end && 1892 SLIST_EMPTY(&free)) { 1893 next_pmap = pc->pc_pmap; 1894 if (next_pmap == NULL) { 1895 /* 1896 * The next chunk is a marker. However, it is 1897 * not our marker, so active_reclaims must be 1898 * > 1. Consequently, the next_chunk code 1899 * will not rotate the pv_chunks list. 1900 */ 1901 goto next_chunk; 1902 } 1903 mtx_unlock(&pv_chunks_mutex); 1904 1905 /* 1906 * A pv_chunk can only be removed from the pc_lru list 1907 * when both pv_chunks_mutex is owned and the 1908 * corresponding pmap is locked. 1909 */ 1910 if (pmap != next_pmap) { 1911 if (pmap != NULL && pmap != locked_pmap) 1912 PMAP_UNLOCK(pmap); 1913 pmap = next_pmap; 1914 /* Avoid deadlock and lock recursion. */ 1915 if (pmap > locked_pmap) { 1916 RELEASE_PV_LIST_LOCK(lockp); 1917 PMAP_LOCK(pmap); 1918 mtx_lock(&pv_chunks_mutex); 1919 continue; 1920 } else if (pmap != locked_pmap) { 1921 if (PMAP_TRYLOCK(pmap)) { 1922 mtx_lock(&pv_chunks_mutex); 1923 continue; 1924 } else { 1925 pmap = NULL; /* pmap is not locked */ 1926 mtx_lock(&pv_chunks_mutex); 1927 pc = TAILQ_NEXT(pc_marker, pc_lru); 1928 if (pc == NULL || 1929 pc->pc_pmap != next_pmap) 1930 continue; 1931 goto next_chunk; 1932 } 1933 } 1934 } 1935 1936 /* 1937 * Destroy every non-wired, 4 KB page mapping in the chunk. 1938 */ 1939 freed = 0; 1940 for (field = 0; field < _NPCM; field++) { 1941 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1942 inuse != 0; inuse &= ~(1UL << bit)) { 1943 bit = ffsl(inuse) - 1; 1944 pv = &pc->pc_pventry[field * 64 + bit]; 1945 va = pv->pv_va; 1946 pde = pmap_pde(pmap, va, &lvl); 1947 if (lvl != 2) 1948 continue; 1949 pte = pmap_l2_to_l3(pde, va); 1950 tpte = pmap_load(pte); 1951 if ((tpte & ATTR_SW_WIRED) != 0) 1952 continue; 1953 tpte = pmap_load_clear(pte); 1954 pmap_invalidate_page(pmap, va); 1955 m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); 1956 if (pmap_page_dirty(tpte)) 1957 vm_page_dirty(m); 1958 if ((tpte & ATTR_AF) != 0) 1959 vm_page_aflag_set(m, PGA_REFERENCED); 1960 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1961 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1962 m->md.pv_gen++; 1963 if (TAILQ_EMPTY(&m->md.pv_list) && 1964 (m->flags & PG_FICTITIOUS) == 0) { 1965 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 1966 if (TAILQ_EMPTY(&pvh->pv_list)) { 1967 vm_page_aflag_clear(m, 1968 PGA_WRITEABLE); 1969 } 1970 } 1971 pc->pc_map[field] |= 1UL << bit; 1972 pmap_unuse_pt(pmap, va, pmap_load(pde), &free); 1973 freed++; 1974 } 1975 } 1976 if (freed == 0) { 1977 mtx_lock(&pv_chunks_mutex); 1978 goto next_chunk; 1979 } 1980 /* Every freed mapping is for a 4 KB page. */ 1981 pmap_resident_count_dec(pmap, freed); 1982 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 1983 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 1984 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 1985 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1986 if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && 1987 pc->pc_map[2] == PC_FREE2) { 1988 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1989 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1990 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1991 /* Entire chunk is free; return it. */ 1992 m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1993 dump_drop_page(m_pc->phys_addr); 1994 mtx_lock(&pv_chunks_mutex); 1995 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1996 break; 1997 } 1998 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1999 mtx_lock(&pv_chunks_mutex); 2000 /* One freed pv entry in locked_pmap is sufficient. */ 2001 if (pmap == locked_pmap) 2002 break; 2003 2004 next_chunk: 2005 TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); 2006 TAILQ_INSERT_AFTER(&pv_chunks, pc, pc_marker, pc_lru); 2007 if (active_reclaims == 1 && pmap != NULL) { 2008 /* 2009 * Rotate the pv chunks list so that we do not 2010 * scan the same pv chunks that could not be 2011 * freed (because they contained a wired 2012 * and/or superpage mapping) on every 2013 * invocation of reclaim_pv_chunk(). 2014 */ 2015 while ((pc = TAILQ_FIRST(&pv_chunks)) != pc_marker) { 2016 MPASS(pc->pc_pmap != NULL); 2017 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2018 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2019 } 2020 } 2021 } 2022 TAILQ_REMOVE(&pv_chunks, pc_marker, pc_lru); 2023 TAILQ_REMOVE(&pv_chunks, pc_marker_end, pc_lru); 2024 active_reclaims--; 2025 mtx_unlock(&pv_chunks_mutex); 2026 if (pmap != NULL && pmap != locked_pmap) 2027 PMAP_UNLOCK(pmap); 2028 if (m_pc == NULL && !SLIST_EMPTY(&free)) { 2029 m_pc = SLIST_FIRST(&free); 2030 SLIST_REMOVE_HEAD(&free, plinks.s.ss); 2031 /* Recycle a freed page table page. */ 2032 m_pc->wire_count = 1; 2033 vm_wire_add(1); 2034 } 2035 vm_page_free_pages_toq(&free, false); 2036 return (m_pc); 2037 } 2038 2039 /* 2040 * free the pv_entry back to the free list 2041 */ 2042 static void 2043 free_pv_entry(pmap_t pmap, pv_entry_t pv) 2044 { 2045 struct pv_chunk *pc; 2046 int idx, field, bit; 2047 2048 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2049 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 2050 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 2051 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 2052 pc = pv_to_chunk(pv); 2053 idx = pv - &pc->pc_pventry[0]; 2054 field = idx / 64; 2055 bit = idx % 64; 2056 pc->pc_map[field] |= 1ul << bit; 2057 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 2058 pc->pc_map[2] != PC_FREE2) { 2059 /* 98% of the time, pc is already at the head of the list. */ 2060 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 2061 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2062 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2063 } 2064 return; 2065 } 2066 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2067 free_pv_chunk(pc); 2068 } 2069 2070 static void 2071 free_pv_chunk(struct pv_chunk *pc) 2072 { 2073 vm_page_t m; 2074 2075 mtx_lock(&pv_chunks_mutex); 2076 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 2077 mtx_unlock(&pv_chunks_mutex); 2078 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 2079 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 2080 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 2081 /* entire chunk is free, return it */ 2082 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 2083 dump_drop_page(m->phys_addr); 2084 vm_page_unwire_noq(m); 2085 vm_page_free(m); 2086 } 2087 2088 /* 2089 * Returns a new PV entry, allocating a new PV chunk from the system when 2090 * needed. If this PV chunk allocation fails and a PV list lock pointer was 2091 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 2092 * returned. 2093 * 2094 * The given PV list lock may be released. 2095 */ 2096 static pv_entry_t 2097 get_pv_entry(pmap_t pmap, struct rwlock **lockp) 2098 { 2099 int bit, field; 2100 pv_entry_t pv; 2101 struct pv_chunk *pc; 2102 vm_page_t m; 2103 2104 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2105 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 2106 retry: 2107 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2108 if (pc != NULL) { 2109 for (field = 0; field < _NPCM; field++) { 2110 if (pc->pc_map[field]) { 2111 bit = ffsl(pc->pc_map[field]) - 1; 2112 break; 2113 } 2114 } 2115 if (field < _NPCM) { 2116 pv = &pc->pc_pventry[field * 64 + bit]; 2117 pc->pc_map[field] &= ~(1ul << bit); 2118 /* If this was the last item, move it to tail */ 2119 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 2120 pc->pc_map[2] == 0) { 2121 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2122 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 2123 pc_list); 2124 } 2125 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2126 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 2127 return (pv); 2128 } 2129 } 2130 /* No free items, allocate another chunk */ 2131 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2132 VM_ALLOC_WIRED); 2133 if (m == NULL) { 2134 if (lockp == NULL) { 2135 PV_STAT(pc_chunk_tryfail++); 2136 return (NULL); 2137 } 2138 m = reclaim_pv_chunk(pmap, lockp); 2139 if (m == NULL) 2140 goto retry; 2141 } 2142 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2143 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2144 dump_add_page(m->phys_addr); 2145 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2146 pc->pc_pmap = pmap; 2147 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 2148 pc->pc_map[1] = PC_FREE1; 2149 pc->pc_map[2] = PC_FREE2; 2150 mtx_lock(&pv_chunks_mutex); 2151 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2152 mtx_unlock(&pv_chunks_mutex); 2153 pv = &pc->pc_pventry[0]; 2154 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2155 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2156 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 2157 return (pv); 2158 } 2159 2160 /* 2161 * Ensure that the number of spare PV entries in the specified pmap meets or 2162 * exceeds the given count, "needed". 2163 * 2164 * The given PV list lock may be released. 2165 */ 2166 static void 2167 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) 2168 { 2169 struct pch new_tail; 2170 struct pv_chunk *pc; 2171 vm_page_t m; 2172 int avail, free; 2173 bool reclaimed; 2174 2175 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2176 KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); 2177 2178 /* 2179 * Newly allocated PV chunks must be stored in a private list until 2180 * the required number of PV chunks have been allocated. Otherwise, 2181 * reclaim_pv_chunk() could recycle one of these chunks. In 2182 * contrast, these chunks must be added to the pmap upon allocation. 2183 */ 2184 TAILQ_INIT(&new_tail); 2185 retry: 2186 avail = 0; 2187 TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { 2188 bit_count((bitstr_t *)pc->pc_map, 0, 2189 sizeof(pc->pc_map) * NBBY, &free); 2190 if (free == 0) 2191 break; 2192 avail += free; 2193 if (avail >= needed) 2194 break; 2195 } 2196 for (reclaimed = false; avail < needed; avail += _NPCPV) { 2197 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2198 VM_ALLOC_WIRED); 2199 if (m == NULL) { 2200 m = reclaim_pv_chunk(pmap, lockp); 2201 if (m == NULL) 2202 goto retry; 2203 reclaimed = true; 2204 } 2205 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2206 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2207 dump_add_page(m->phys_addr); 2208 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2209 pc->pc_pmap = pmap; 2210 pc->pc_map[0] = PC_FREE0; 2211 pc->pc_map[1] = PC_FREE1; 2212 pc->pc_map[2] = PC_FREE2; 2213 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2214 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 2215 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); 2216 2217 /* 2218 * The reclaim might have freed a chunk from the current pmap. 2219 * If that chunk contained available entries, we need to 2220 * re-count the number of available entries. 2221 */ 2222 if (reclaimed) 2223 goto retry; 2224 } 2225 if (!TAILQ_EMPTY(&new_tail)) { 2226 mtx_lock(&pv_chunks_mutex); 2227 TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 2228 mtx_unlock(&pv_chunks_mutex); 2229 } 2230 } 2231 2232 /* 2233 * First find and then remove the pv entry for the specified pmap and virtual 2234 * address from the specified pv list. Returns the pv entry if found and NULL 2235 * otherwise. This operation can be performed on pv lists for either 4KB or 2236 * 2MB page mappings. 2237 */ 2238 static __inline pv_entry_t 2239 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2240 { 2241 pv_entry_t pv; 2242 2243 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 2244 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2245 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 2246 pvh->pv_gen++; 2247 break; 2248 } 2249 } 2250 return (pv); 2251 } 2252 2253 /* 2254 * After demotion from a 2MB page mapping to 512 4KB page mappings, 2255 * destroy the pv entry for the 2MB page mapping and reinstantiate the pv 2256 * entries for each of the 4KB page mappings. 2257 */ 2258 static void 2259 pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2260 struct rwlock **lockp) 2261 { 2262 struct md_page *pvh; 2263 struct pv_chunk *pc; 2264 pv_entry_t pv; 2265 vm_offset_t va_last; 2266 vm_page_t m; 2267 int bit, field; 2268 2269 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2270 KASSERT((pa & L2_OFFSET) == 0, 2271 ("pmap_pv_demote_l2: pa is not 2mpage aligned")); 2272 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2273 2274 /* 2275 * Transfer the 2mpage's pv entry for this mapping to the first 2276 * page's pv list. Once this transfer begins, the pv list lock 2277 * must not be released until the last pv entry is reinstantiated. 2278 */ 2279 pvh = pa_to_pvh(pa); 2280 va = va & ~L2_OFFSET; 2281 pv = pmap_pvh_remove(pvh, pmap, va); 2282 KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); 2283 m = PHYS_TO_VM_PAGE(pa); 2284 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2285 m->md.pv_gen++; 2286 /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ 2287 PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); 2288 va_last = va + L2_SIZE - PAGE_SIZE; 2289 for (;;) { 2290 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2291 KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || 2292 pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); 2293 for (field = 0; field < _NPCM; field++) { 2294 while (pc->pc_map[field]) { 2295 bit = ffsl(pc->pc_map[field]) - 1; 2296 pc->pc_map[field] &= ~(1ul << bit); 2297 pv = &pc->pc_pventry[field * 64 + bit]; 2298 va += PAGE_SIZE; 2299 pv->pv_va = va; 2300 m++; 2301 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2302 ("pmap_pv_demote_l2: page %p is not managed", m)); 2303 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2304 m->md.pv_gen++; 2305 if (va == va_last) 2306 goto out; 2307 } 2308 } 2309 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2310 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2311 } 2312 out: 2313 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { 2314 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2315 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2316 } 2317 PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); 2318 PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); 2319 } 2320 2321 /* 2322 * First find and then destroy the pv entry for the specified pmap and virtual 2323 * address. This operation can be performed on pv lists for either 4KB or 2MB 2324 * page mappings. 2325 */ 2326 static void 2327 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2328 { 2329 pv_entry_t pv; 2330 2331 pv = pmap_pvh_remove(pvh, pmap, va); 2332 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2333 free_pv_entry(pmap, pv); 2334 } 2335 2336 /* 2337 * Conditionally create the PV entry for a 4KB page mapping if the required 2338 * memory can be allocated without resorting to reclamation. 2339 */ 2340 static boolean_t 2341 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 2342 struct rwlock **lockp) 2343 { 2344 pv_entry_t pv; 2345 2346 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2347 /* Pass NULL instead of the lock pointer to disable reclamation. */ 2348 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 2349 pv->pv_va = va; 2350 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2351 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2352 m->md.pv_gen++; 2353 return (TRUE); 2354 } else 2355 return (FALSE); 2356 } 2357 2358 /* 2359 * Create the PV entry for a 2MB page mapping. Always returns true unless the 2360 * flag PMAP_ENTER_NORECLAIM is specified. If that flag is specified, returns 2361 * false if the PV entry cannot be allocated without resorting to reclamation. 2362 */ 2363 static bool 2364 pmap_pv_insert_l2(pmap_t pmap, vm_offset_t va, pd_entry_t l2e, u_int flags, 2365 struct rwlock **lockp) 2366 { 2367 struct md_page *pvh; 2368 pv_entry_t pv; 2369 vm_paddr_t pa; 2370 2371 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2372 /* Pass NULL instead of the lock pointer to disable reclamation. */ 2373 if ((pv = get_pv_entry(pmap, (flags & PMAP_ENTER_NORECLAIM) != 0 ? 2374 NULL : lockp)) == NULL) 2375 return (false); 2376 pv->pv_va = va; 2377 pa = l2e & ~ATTR_MASK; 2378 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2379 pvh = pa_to_pvh(pa); 2380 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 2381 pvh->pv_gen++; 2382 return (true); 2383 } 2384 2385 static void 2386 pmap_remove_kernel_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) 2387 { 2388 pt_entry_t newl2, oldl2; 2389 vm_page_t ml3; 2390 vm_paddr_t ml3pa; 2391 2392 KASSERT(!VIRT_IN_DMAP(va), ("removing direct mapping of %#lx", va)); 2393 KASSERT(pmap == kernel_pmap, ("pmap %p is not kernel_pmap", pmap)); 2394 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2395 2396 ml3 = pmap_remove_pt_page(pmap, va); 2397 if (ml3 == NULL) 2398 panic("pmap_remove_kernel_l2: Missing pt page"); 2399 2400 ml3pa = VM_PAGE_TO_PHYS(ml3); 2401 newl2 = ml3pa | L2_TABLE; 2402 2403 /* 2404 * Initialize the page table page. 2405 */ 2406 pagezero((void *)PHYS_TO_DMAP(ml3pa)); 2407 2408 /* 2409 * Demote the mapping. The caller must have already invalidated the 2410 * mapping (i.e., the "break" in break-before-make). 2411 */ 2412 oldl2 = pmap_load_store(l2, newl2); 2413 KASSERT(oldl2 == 0, ("%s: found existing mapping at %p: %#lx", 2414 __func__, l2, oldl2)); 2415 } 2416 2417 /* 2418 * pmap_remove_l2: Do the things to unmap a level 2 superpage. 2419 */ 2420 static int 2421 pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, 2422 pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) 2423 { 2424 struct md_page *pvh; 2425 pt_entry_t old_l2; 2426 vm_offset_t eva, va; 2427 vm_page_t m, ml3; 2428 2429 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2430 KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); 2431 old_l2 = pmap_load_clear(l2); 2432 KASSERT((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK, 2433 ("pmap_remove_l2: L2e %lx is not a block mapping", old_l2)); 2434 pmap_invalidate_range(pmap, sva, sva + L2_SIZE); 2435 if (old_l2 & ATTR_SW_WIRED) 2436 pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; 2437 pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); 2438 if (old_l2 & ATTR_SW_MANAGED) { 2439 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, old_l2 & ~ATTR_MASK); 2440 pvh = pa_to_pvh(old_l2 & ~ATTR_MASK); 2441 pmap_pvh_free(pvh, pmap, sva); 2442 eva = sva + L2_SIZE; 2443 for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); 2444 va < eva; va += PAGE_SIZE, m++) { 2445 if (pmap_page_dirty(old_l2)) 2446 vm_page_dirty(m); 2447 if (old_l2 & ATTR_AF) 2448 vm_page_aflag_set(m, PGA_REFERENCED); 2449 if (TAILQ_EMPTY(&m->md.pv_list) && 2450 TAILQ_EMPTY(&pvh->pv_list)) 2451 vm_page_aflag_clear(m, PGA_WRITEABLE); 2452 } 2453 } 2454 if (pmap == kernel_pmap) { 2455 pmap_remove_kernel_l2(pmap, l2, sva); 2456 } else { 2457 ml3 = pmap_remove_pt_page(pmap, sva); 2458 if (ml3 != NULL) { 2459 pmap_resident_count_dec(pmap, 1); 2460 KASSERT(ml3->wire_count == NL3PG, 2461 ("pmap_remove_l2: l3 page wire count error")); 2462 ml3->wire_count = 1; 2463 vm_page_unwire_noq(ml3); 2464 pmap_add_delayed_free_list(ml3, free, FALSE); 2465 } 2466 } 2467 return (pmap_unuse_pt(pmap, sva, l1e, free)); 2468 } 2469 2470 /* 2471 * pmap_remove_l3: do the things to unmap a page in a process 2472 */ 2473 static int 2474 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 2475 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 2476 { 2477 struct md_page *pvh; 2478 pt_entry_t old_l3; 2479 vm_page_t m; 2480 2481 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2482 old_l3 = pmap_load_clear(l3); 2483 pmap_invalidate_page(pmap, va); 2484 if (old_l3 & ATTR_SW_WIRED) 2485 pmap->pm_stats.wired_count -= 1; 2486 pmap_resident_count_dec(pmap, 1); 2487 if (old_l3 & ATTR_SW_MANAGED) { 2488 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 2489 if (pmap_page_dirty(old_l3)) 2490 vm_page_dirty(m); 2491 if (old_l3 & ATTR_AF) 2492 vm_page_aflag_set(m, PGA_REFERENCED); 2493 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2494 pmap_pvh_free(&m->md, pmap, va); 2495 if (TAILQ_EMPTY(&m->md.pv_list) && 2496 (m->flags & PG_FICTITIOUS) == 0) { 2497 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2498 if (TAILQ_EMPTY(&pvh->pv_list)) 2499 vm_page_aflag_clear(m, PGA_WRITEABLE); 2500 } 2501 } 2502 return (pmap_unuse_pt(pmap, va, l2e, free)); 2503 } 2504 2505 /* 2506 * Remove the given range of addresses from the specified map. 2507 * 2508 * It is assumed that the start and end are properly 2509 * rounded to the page size. 2510 */ 2511 void 2512 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2513 { 2514 struct rwlock *lock; 2515 vm_offset_t va, va_next; 2516 pd_entry_t *l0, *l1, *l2; 2517 pt_entry_t l3_paddr, *l3; 2518 struct spglist free; 2519 2520 /* 2521 * Perform an unsynchronized read. This is, however, safe. 2522 */ 2523 if (pmap->pm_stats.resident_count == 0) 2524 return; 2525 2526 SLIST_INIT(&free); 2527 2528 PMAP_LOCK(pmap); 2529 2530 lock = NULL; 2531 for (; sva < eva; sva = va_next) { 2532 2533 if (pmap->pm_stats.resident_count == 0) 2534 break; 2535 2536 l0 = pmap_l0(pmap, sva); 2537 if (pmap_load(l0) == 0) { 2538 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2539 if (va_next < sva) 2540 va_next = eva; 2541 continue; 2542 } 2543 2544 l1 = pmap_l0_to_l1(l0, sva); 2545 if (pmap_load(l1) == 0) { 2546 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2547 if (va_next < sva) 2548 va_next = eva; 2549 continue; 2550 } 2551 2552 /* 2553 * Calculate index for next page table. 2554 */ 2555 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2556 if (va_next < sva) 2557 va_next = eva; 2558 2559 l2 = pmap_l1_to_l2(l1, sva); 2560 if (l2 == NULL) 2561 continue; 2562 2563 l3_paddr = pmap_load(l2); 2564 2565 if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { 2566 if (sva + L2_SIZE == va_next && eva >= va_next) { 2567 pmap_remove_l2(pmap, l2, sva, pmap_load(l1), 2568 &free, &lock); 2569 continue; 2570 } else if (pmap_demote_l2_locked(pmap, l2, 2571 sva &~L2_OFFSET, &lock) == NULL) 2572 continue; 2573 l3_paddr = pmap_load(l2); 2574 } 2575 2576 /* 2577 * Weed out invalid mappings. 2578 */ 2579 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 2580 continue; 2581 2582 /* 2583 * Limit our scan to either the end of the va represented 2584 * by the current page table page, or to the end of the 2585 * range being removed. 2586 */ 2587 if (va_next > eva) 2588 va_next = eva; 2589 2590 va = va_next; 2591 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2592 sva += L3_SIZE) { 2593 if (l3 == NULL) 2594 panic("l3 == NULL"); 2595 if (pmap_load(l3) == 0) { 2596 if (va != va_next) { 2597 pmap_invalidate_range(pmap, va, sva); 2598 va = va_next; 2599 } 2600 continue; 2601 } 2602 if (va == va_next) 2603 va = sva; 2604 if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 2605 &lock)) { 2606 sva += L3_SIZE; 2607 break; 2608 } 2609 } 2610 if (va != va_next) 2611 pmap_invalidate_range(pmap, va, sva); 2612 } 2613 if (lock != NULL) 2614 rw_wunlock(lock); 2615 PMAP_UNLOCK(pmap); 2616 vm_page_free_pages_toq(&free, false); 2617 } 2618 2619 /* 2620 * Routine: pmap_remove_all 2621 * Function: 2622 * Removes this physical page from 2623 * all physical maps in which it resides. 2624 * Reflects back modify bits to the pager. 2625 * 2626 * Notes: 2627 * Original versions of this routine were very 2628 * inefficient because they iteratively called 2629 * pmap_remove (slow...) 2630 */ 2631 2632 void 2633 pmap_remove_all(vm_page_t m) 2634 { 2635 struct md_page *pvh; 2636 pv_entry_t pv; 2637 pmap_t pmap; 2638 struct rwlock *lock; 2639 pd_entry_t *pde, tpde; 2640 pt_entry_t *pte, tpte; 2641 vm_offset_t va; 2642 struct spglist free; 2643 int lvl, pvh_gen, md_gen; 2644 2645 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2646 ("pmap_remove_all: page %p is not managed", m)); 2647 SLIST_INIT(&free); 2648 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2649 pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 2650 pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2651 retry: 2652 rw_wlock(lock); 2653 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2654 pmap = PV_PMAP(pv); 2655 if (!PMAP_TRYLOCK(pmap)) { 2656 pvh_gen = pvh->pv_gen; 2657 rw_wunlock(lock); 2658 PMAP_LOCK(pmap); 2659 rw_wlock(lock); 2660 if (pvh_gen != pvh->pv_gen) { 2661 rw_wunlock(lock); 2662 PMAP_UNLOCK(pmap); 2663 goto retry; 2664 } 2665 } 2666 va = pv->pv_va; 2667 pte = pmap_pte(pmap, va, &lvl); 2668 KASSERT(pte != NULL, 2669 ("pmap_remove_all: no page table entry found")); 2670 KASSERT(lvl == 2, 2671 ("pmap_remove_all: invalid pte level %d", lvl)); 2672 2673 pmap_demote_l2_locked(pmap, pte, va, &lock); 2674 PMAP_UNLOCK(pmap); 2675 } 2676 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2677 pmap = PV_PMAP(pv); 2678 if (!PMAP_TRYLOCK(pmap)) { 2679 pvh_gen = pvh->pv_gen; 2680 md_gen = m->md.pv_gen; 2681 rw_wunlock(lock); 2682 PMAP_LOCK(pmap); 2683 rw_wlock(lock); 2684 if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 2685 rw_wunlock(lock); 2686 PMAP_UNLOCK(pmap); 2687 goto retry; 2688 } 2689 } 2690 pmap_resident_count_dec(pmap, 1); 2691 2692 pde = pmap_pde(pmap, pv->pv_va, &lvl); 2693 KASSERT(pde != NULL, 2694 ("pmap_remove_all: no page directory entry found")); 2695 KASSERT(lvl == 2, 2696 ("pmap_remove_all: invalid pde level %d", lvl)); 2697 tpde = pmap_load(pde); 2698 2699 pte = pmap_l2_to_l3(pde, pv->pv_va); 2700 tpte = pmap_load(pte); 2701 pmap_load_clear(pte); 2702 pmap_invalidate_page(pmap, pv->pv_va); 2703 if (tpte & ATTR_SW_WIRED) 2704 pmap->pm_stats.wired_count--; 2705 if ((tpte & ATTR_AF) != 0) 2706 vm_page_aflag_set(m, PGA_REFERENCED); 2707 2708 /* 2709 * Update the vm_page_t clean and reference bits. 2710 */ 2711 if (pmap_page_dirty(tpte)) 2712 vm_page_dirty(m); 2713 pmap_unuse_pt(pmap, pv->pv_va, tpde, &free); 2714 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2715 m->md.pv_gen++; 2716 free_pv_entry(pmap, pv); 2717 PMAP_UNLOCK(pmap); 2718 } 2719 vm_page_aflag_clear(m, PGA_WRITEABLE); 2720 rw_wunlock(lock); 2721 vm_page_free_pages_toq(&free, false); 2722 } 2723 2724 /* 2725 * Set the physical protection on the 2726 * specified range of this map as requested. 2727 */ 2728 void 2729 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2730 { 2731 vm_offset_t va, va_next; 2732 pd_entry_t *l0, *l1, *l2; 2733 pt_entry_t *l3p, l3, nbits; 2734 2735 KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); 2736 if (prot == VM_PROT_NONE) { 2737 pmap_remove(pmap, sva, eva); 2738 return; 2739 } 2740 2741 if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == 2742 (VM_PROT_WRITE | VM_PROT_EXECUTE)) 2743 return; 2744 2745 PMAP_LOCK(pmap); 2746 for (; sva < eva; sva = va_next) { 2747 2748 l0 = pmap_l0(pmap, sva); 2749 if (pmap_load(l0) == 0) { 2750 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2751 if (va_next < sva) 2752 va_next = eva; 2753 continue; 2754 } 2755 2756 l1 = pmap_l0_to_l1(l0, sva); 2757 if (pmap_load(l1) == 0) { 2758 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2759 if (va_next < sva) 2760 va_next = eva; 2761 continue; 2762 } 2763 2764 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2765 if (va_next < sva) 2766 va_next = eva; 2767 2768 l2 = pmap_l1_to_l2(l1, sva); 2769 if (pmap_load(l2) == 0) 2770 continue; 2771 2772 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 2773 l3p = pmap_demote_l2(pmap, l2, sva); 2774 if (l3p == NULL) 2775 continue; 2776 } 2777 KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 2778 ("pmap_protect: Invalid L2 entry after demotion")); 2779 2780 if (va_next > eva) 2781 va_next = eva; 2782 2783 va = va_next; 2784 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2785 sva += L3_SIZE) { 2786 l3 = pmap_load(l3p); 2787 if (!pmap_l3_valid(l3)) 2788 continue; 2789 2790 nbits = 0; 2791 if ((prot & VM_PROT_WRITE) == 0) { 2792 if ((l3 & ATTR_SW_MANAGED) && 2793 pmap_page_dirty(l3)) { 2794 vm_page_dirty(PHYS_TO_VM_PAGE(l3 & 2795 ~ATTR_MASK)); 2796 } 2797 nbits |= ATTR_AP(ATTR_AP_RO); 2798 } 2799 if ((prot & VM_PROT_EXECUTE) == 0) 2800 nbits |= ATTR_XN; 2801 2802 pmap_set(l3p, nbits); 2803 /* XXX: Use pmap_invalidate_range */ 2804 pmap_invalidate_page(pmap, sva); 2805 } 2806 } 2807 PMAP_UNLOCK(pmap); 2808 } 2809 2810 /* 2811 * Inserts the specified page table page into the specified pmap's collection 2812 * of idle page table pages. Each of a pmap's page table pages is responsible 2813 * for mapping a distinct range of virtual addresses. The pmap's collection is 2814 * ordered by this virtual address range. 2815 */ 2816 static __inline int 2817 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 2818 { 2819 2820 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2821 return (vm_radix_insert(&pmap->pm_root, mpte)); 2822 } 2823 2824 /* 2825 * Removes the page table page mapping the specified virtual address from the 2826 * specified pmap's collection of idle page table pages, and returns it. 2827 * Otherwise, returns NULL if there is no page table page corresponding to the 2828 * specified virtual address. 2829 */ 2830 static __inline vm_page_t 2831 pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) 2832 { 2833 2834 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2835 return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); 2836 } 2837 2838 /* 2839 * Performs a break-before-make update of a pmap entry. This is needed when 2840 * either promoting or demoting pages to ensure the TLB doesn't get into an 2841 * inconsistent state. 2842 */ 2843 static void 2844 pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, 2845 vm_offset_t va, vm_size_t size) 2846 { 2847 register_t intr; 2848 2849 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2850 2851 /* 2852 * Ensure we don't get switched out with the page table in an 2853 * inconsistent state. We also need to ensure no interrupts fire 2854 * as they may make use of an address we are about to invalidate. 2855 */ 2856 intr = intr_disable(); 2857 critical_enter(); 2858 2859 /* Clear the old mapping */ 2860 pmap_load_clear(pte); 2861 pmap_invalidate_range_nopin(pmap, va, va + size); 2862 2863 /* Create the new mapping */ 2864 pmap_load_store(pte, newpte); 2865 dsb(ishst); 2866 2867 critical_exit(); 2868 intr_restore(intr); 2869 } 2870 2871 #if VM_NRESERVLEVEL > 0 2872 /* 2873 * After promotion from 512 4KB page mappings to a single 2MB page mapping, 2874 * replace the many pv entries for the 4KB page mappings by a single pv entry 2875 * for the 2MB page mapping. 2876 */ 2877 static void 2878 pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2879 struct rwlock **lockp) 2880 { 2881 struct md_page *pvh; 2882 pv_entry_t pv; 2883 vm_offset_t va_last; 2884 vm_page_t m; 2885 2886 KASSERT((pa & L2_OFFSET) == 0, 2887 ("pmap_pv_promote_l2: pa is not 2mpage aligned")); 2888 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2889 2890 /* 2891 * Transfer the first page's pv entry for this mapping to the 2mpage's 2892 * pv list. Aside from avoiding the cost of a call to get_pv_entry(), 2893 * a transfer avoids the possibility that get_pv_entry() calls 2894 * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the 2895 * mappings that is being promoted. 2896 */ 2897 m = PHYS_TO_VM_PAGE(pa); 2898 va = va & ~L2_OFFSET; 2899 pv = pmap_pvh_remove(&m->md, pmap, va); 2900 KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); 2901 pvh = pa_to_pvh(pa); 2902 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 2903 pvh->pv_gen++; 2904 /* Free the remaining NPTEPG - 1 pv entries. */ 2905 va_last = va + L2_SIZE - PAGE_SIZE; 2906 do { 2907 m++; 2908 va += PAGE_SIZE; 2909 pmap_pvh_free(&m->md, pmap, va); 2910 } while (va < va_last); 2911 } 2912 2913 /* 2914 * Tries to promote the 512, contiguous 4KB page mappings that are within a 2915 * single level 2 table entry to a single 2MB page mapping. For promotion 2916 * to occur, two conditions must be met: (1) the 4KB page mappings must map 2917 * aligned, contiguous physical memory and (2) the 4KB page mappings must have 2918 * identical characteristics. 2919 */ 2920 static void 2921 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, 2922 struct rwlock **lockp) 2923 { 2924 pt_entry_t *firstl3, *l3, newl2, oldl3, pa; 2925 vm_page_t mpte; 2926 vm_offset_t sva; 2927 2928 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2929 2930 sva = va & ~L2_OFFSET; 2931 firstl3 = pmap_l2_to_l3(l2, sva); 2932 newl2 = pmap_load(firstl3); 2933 2934 /* Check the alingment is valid */ 2935 if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) { 2936 atomic_add_long(&pmap_l2_p_failures, 1); 2937 CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2938 " in pmap %p", va, pmap); 2939 return; 2940 } 2941 2942 pa = newl2 + L2_SIZE - PAGE_SIZE; 2943 for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { 2944 oldl3 = pmap_load(l3); 2945 if (oldl3 != pa) { 2946 atomic_add_long(&pmap_l2_p_failures, 1); 2947 CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2948 " in pmap %p", va, pmap); 2949 return; 2950 } 2951 pa -= PAGE_SIZE; 2952 } 2953 2954 /* 2955 * Save the page table page in its current state until the L2 2956 * mapping the superpage is demoted by pmap_demote_l2() or 2957 * destroyed by pmap_remove_l3(). 2958 */ 2959 mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 2960 KASSERT(mpte >= vm_page_array && 2961 mpte < &vm_page_array[vm_page_array_size], 2962 ("pmap_promote_l2: page table page is out of range")); 2963 KASSERT(mpte->pindex == pmap_l2_pindex(va), 2964 ("pmap_promote_l2: page table page's pindex is wrong")); 2965 if (pmap_insert_pt_page(pmap, mpte)) { 2966 atomic_add_long(&pmap_l2_p_failures, 1); 2967 CTR2(KTR_PMAP, 2968 "pmap_promote_l2: failure for va %#lx in pmap %p", va, 2969 pmap); 2970 return; 2971 } 2972 2973 if ((newl2 & ATTR_SW_MANAGED) != 0) 2974 pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); 2975 2976 newl2 &= ~ATTR_DESCR_MASK; 2977 newl2 |= L2_BLOCK; 2978 2979 pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); 2980 2981 atomic_add_long(&pmap_l2_promotions, 1); 2982 CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, 2983 pmap); 2984 } 2985 #endif /* VM_NRESERVLEVEL > 0 */ 2986 2987 /* 2988 * Insert the given physical page (p) at 2989 * the specified virtual address (v) in the 2990 * target physical map with the protection requested. 2991 * 2992 * If specified, the page will be wired down, meaning 2993 * that the related pte can not be reclaimed. 2994 * 2995 * NB: This is the only routine which MAY NOT lazy-evaluate 2996 * or lose information. That is, this routine must actually 2997 * insert this page into the given map NOW. 2998 */ 2999 int 3000 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 3001 u_int flags, int8_t psind) 3002 { 3003 struct rwlock *lock; 3004 pd_entry_t *pde; 3005 pt_entry_t new_l3, orig_l3; 3006 pt_entry_t *l2, *l3; 3007 pv_entry_t pv; 3008 vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; 3009 vm_page_t mpte, om, l1_m, l2_m, l3_m; 3010 boolean_t nosleep; 3011 int lvl, rv; 3012 3013 va = trunc_page(va); 3014 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 3015 VM_OBJECT_ASSERT_LOCKED(m->object); 3016 pa = VM_PAGE_TO_PHYS(m); 3017 new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 3018 L3_PAGE); 3019 if ((prot & VM_PROT_WRITE) == 0) 3020 new_l3 |= ATTR_AP(ATTR_AP_RO); 3021 if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 3022 new_l3 |= ATTR_XN; 3023 if ((flags & PMAP_ENTER_WIRED) != 0) 3024 new_l3 |= ATTR_SW_WIRED; 3025 if (va < VM_MAXUSER_ADDRESS) 3026 new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; 3027 if ((m->oflags & VPO_UNMANAGED) == 0) 3028 new_l3 |= ATTR_SW_MANAGED; 3029 3030 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 3031 3032 lock = NULL; 3033 mpte = NULL; 3034 PMAP_LOCK(pmap); 3035 if (psind == 1) { 3036 /* Assert the required virtual and physical alignment. */ 3037 KASSERT((va & L2_OFFSET) == 0, ("pmap_enter: va unaligned")); 3038 KASSERT(m->psind > 0, ("pmap_enter: m->psind < psind")); 3039 rv = pmap_enter_l2(pmap, va, (new_l3 & ~L3_PAGE) | L2_BLOCK, 3040 flags, m, &lock); 3041 goto out; 3042 } 3043 3044 pde = pmap_pde(pmap, va, &lvl); 3045 if (pde != NULL && lvl == 1) { 3046 l2 = pmap_l1_to_l2(pde, va); 3047 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && 3048 (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET, 3049 &lock)) != NULL) { 3050 l3 = &l3[pmap_l3_index(va)]; 3051 if (va < VM_MAXUSER_ADDRESS) { 3052 mpte = PHYS_TO_VM_PAGE( 3053 pmap_load(l2) & ~ATTR_MASK); 3054 mpte->wire_count++; 3055 } 3056 goto havel3; 3057 } 3058 } 3059 3060 if (va < VM_MAXUSER_ADDRESS) { 3061 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 3062 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 3063 if (mpte == NULL && nosleep) { 3064 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 3065 if (lock != NULL) 3066 rw_wunlock(lock); 3067 PMAP_UNLOCK(pmap); 3068 return (KERN_RESOURCE_SHORTAGE); 3069 } 3070 pde = pmap_pde(pmap, va, &lvl); 3071 KASSERT(pde != NULL, 3072 ("pmap_enter: Invalid page entry, va: 0x%lx", va)); 3073 KASSERT(lvl == 2, 3074 ("pmap_enter: Invalid level %d", lvl)); 3075 } else { 3076 /* 3077 * If we get a level 2 pde it must point to a level 3 entry 3078 * otherwise we will need to create the intermediate tables 3079 */ 3080 if (lvl < 2) { 3081 switch (lvl) { 3082 default: 3083 case -1: 3084 /* Get the l0 pde to update */ 3085 pde = pmap_l0(pmap, va); 3086 KASSERT(pde != NULL, ("...")); 3087 3088 l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 3089 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 3090 VM_ALLOC_ZERO); 3091 if (l1_m == NULL) 3092 panic("pmap_enter: l1 pte_m == NULL"); 3093 if ((l1_m->flags & PG_ZERO) == 0) 3094 pmap_zero_page(l1_m); 3095 3096 l1_pa = VM_PAGE_TO_PHYS(l1_m); 3097 pmap_load_store(pde, l1_pa | L0_TABLE); 3098 /* FALLTHROUGH */ 3099 case 0: 3100 /* Get the l1 pde to update */ 3101 pde = pmap_l1_to_l2(pde, va); 3102 KASSERT(pde != NULL, ("...")); 3103 3104 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 3105 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 3106 VM_ALLOC_ZERO); 3107 if (l2_m == NULL) 3108 panic("pmap_enter: l2 pte_m == NULL"); 3109 if ((l2_m->flags & PG_ZERO) == 0) 3110 pmap_zero_page(l2_m); 3111 3112 l2_pa = VM_PAGE_TO_PHYS(l2_m); 3113 pmap_load_store(pde, l2_pa | L1_TABLE); 3114 /* FALLTHROUGH */ 3115 case 1: 3116 /* Get the l2 pde to update */ 3117 pde = pmap_l1_to_l2(pde, va); 3118 KASSERT(pde != NULL, ("...")); 3119 3120 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 3121 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 3122 VM_ALLOC_ZERO); 3123 if (l3_m == NULL) 3124 panic("pmap_enter: l3 pte_m == NULL"); 3125 if ((l3_m->flags & PG_ZERO) == 0) 3126 pmap_zero_page(l3_m); 3127 3128 l3_pa = VM_PAGE_TO_PHYS(l3_m); 3129 pmap_load_store(pde, l3_pa | L2_TABLE); 3130 break; 3131 } 3132 } 3133 } 3134 l3 = pmap_l2_to_l3(pde, va); 3135 3136 havel3: 3137 orig_l3 = pmap_load(l3); 3138 opa = orig_l3 & ~ATTR_MASK; 3139 pv = NULL; 3140 3141 /* 3142 * Is the specified virtual address already mapped? 3143 */ 3144 if (pmap_l3_valid(orig_l3)) { 3145 /* 3146 * Wiring change, just update stats. We don't worry about 3147 * wiring PT pages as they remain resident as long as there 3148 * are valid mappings in them. Hence, if a user page is wired, 3149 * the PT page will be also. 3150 */ 3151 if ((flags & PMAP_ENTER_WIRED) != 0 && 3152 (orig_l3 & ATTR_SW_WIRED) == 0) 3153 pmap->pm_stats.wired_count++; 3154 else if ((flags & PMAP_ENTER_WIRED) == 0 && 3155 (orig_l3 & ATTR_SW_WIRED) != 0) 3156 pmap->pm_stats.wired_count--; 3157 3158 /* 3159 * Remove the extra PT page reference. 3160 */ 3161 if (mpte != NULL) { 3162 mpte->wire_count--; 3163 KASSERT(mpte->wire_count > 0, 3164 ("pmap_enter: missing reference to page table page," 3165 " va: 0x%lx", va)); 3166 } 3167 3168 /* 3169 * Has the physical page changed? 3170 */ 3171 if (opa == pa) { 3172 /* 3173 * No, might be a protection or wiring change. 3174 */ 3175 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3176 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 3177 ATTR_AP(ATTR_AP_RW)) { 3178 vm_page_aflag_set(m, PGA_WRITEABLE); 3179 } 3180 } 3181 goto validate; 3182 } 3183 3184 /* 3185 * The physical page has changed. 3186 */ 3187 (void)pmap_load_clear(l3); 3188 KASSERT((orig_l3 & ~ATTR_MASK) == opa, 3189 ("pmap_enter: unexpected pa update for %#lx", va)); 3190 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3191 om = PHYS_TO_VM_PAGE(opa); 3192 3193 /* 3194 * The pmap lock is sufficient to synchronize with 3195 * concurrent calls to pmap_page_test_mappings() and 3196 * pmap_ts_referenced(). 3197 */ 3198 if (pmap_page_dirty(orig_l3)) 3199 vm_page_dirty(om); 3200 if ((orig_l3 & ATTR_AF) != 0) 3201 vm_page_aflag_set(om, PGA_REFERENCED); 3202 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 3203 pv = pmap_pvh_remove(&om->md, pmap, va); 3204 if ((m->oflags & VPO_UNMANAGED) != 0) 3205 free_pv_entry(pmap, pv); 3206 if ((om->aflags & PGA_WRITEABLE) != 0 && 3207 TAILQ_EMPTY(&om->md.pv_list) && 3208 ((om->flags & PG_FICTITIOUS) != 0 || 3209 TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 3210 vm_page_aflag_clear(om, PGA_WRITEABLE); 3211 } 3212 pmap_invalidate_page(pmap, va); 3213 orig_l3 = 0; 3214 } else { 3215 /* 3216 * Increment the counters. 3217 */ 3218 if ((new_l3 & ATTR_SW_WIRED) != 0) 3219 pmap->pm_stats.wired_count++; 3220 pmap_resident_count_inc(pmap, 1); 3221 } 3222 /* 3223 * Enter on the PV list if part of our managed memory. 3224 */ 3225 if ((m->oflags & VPO_UNMANAGED) == 0) { 3226 if (pv == NULL) { 3227 pv = get_pv_entry(pmap, &lock); 3228 pv->pv_va = va; 3229 } 3230 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 3231 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3232 m->md.pv_gen++; 3233 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3234 vm_page_aflag_set(m, PGA_WRITEABLE); 3235 } 3236 3237 validate: 3238 /* 3239 * Sync icache if exec permission and attribute VM_MEMATTR_WRITE_BACK 3240 * is set. Do it now, before the mapping is stored and made 3241 * valid for hardware table walk. If done later, then other can 3242 * access this page before caches are properly synced. 3243 * Don't do it for kernel memory which is mapped with exec 3244 * permission even if the memory isn't going to hold executable 3245 * code. The only time when icache sync is needed is after 3246 * kernel module is loaded and the relocation info is processed. 3247 * And it's done in elf_cpu_load_file(). 3248 */ 3249 if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && 3250 m->md.pv_memattr == VM_MEMATTR_WRITE_BACK && 3251 (opa != pa || (orig_l3 & ATTR_XN))) 3252 cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); 3253 3254 /* 3255 * Update the L3 entry 3256 */ 3257 if (pmap_l3_valid(orig_l3)) { 3258 KASSERT(opa == pa, ("pmap_enter: invalid update")); 3259 if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) { 3260 /* same PA, different attributes */ 3261 pmap_load_store(l3, new_l3); 3262 pmap_invalidate_page(pmap, va); 3263 if (pmap_page_dirty(orig_l3) && 3264 (orig_l3 & ATTR_SW_MANAGED) != 0) 3265 vm_page_dirty(m); 3266 } else { 3267 /* 3268 * orig_l3 == new_l3 3269 * This can happens if multiple threads simultaneously 3270 * access not yet mapped page. This bad for performance 3271 * since this can cause full demotion-NOP-promotion 3272 * cycle. 3273 * Another possible reasons are: 3274 * - VM and pmap memory layout are diverged 3275 * - tlb flush is missing somewhere and CPU doesn't see 3276 * actual mapping. 3277 */ 3278 CTR4(KTR_PMAP, "%s: already mapped page - " 3279 "pmap %p va 0x%#lx pte 0x%lx", 3280 __func__, pmap, va, new_l3); 3281 } 3282 } else { 3283 /* New mappig */ 3284 pmap_load_store(l3, new_l3); 3285 dsb(ishst); 3286 } 3287 3288 #if VM_NRESERVLEVEL > 0 3289 if (pmap != pmap_kernel() && 3290 (mpte == NULL || mpte->wire_count == NL3PG) && 3291 pmap_ps_enabled(pmap) && 3292 (m->flags & PG_FICTITIOUS) == 0 && 3293 vm_reserv_level_iffullpop(m) == 0) { 3294 pmap_promote_l2(pmap, pde, va, &lock); 3295 } 3296 #endif 3297 3298 rv = KERN_SUCCESS; 3299 out: 3300 if (lock != NULL) 3301 rw_wunlock(lock); 3302 PMAP_UNLOCK(pmap); 3303 return (rv); 3304 } 3305 3306 /* 3307 * Tries to create a read- and/or execute-only 2MB page mapping. Returns true 3308 * if successful. Returns false if (1) a page table page cannot be allocated 3309 * without sleeping, (2) a mapping already exists at the specified virtual 3310 * address, or (3) a PV entry cannot be allocated without reclaiming another 3311 * PV entry. 3312 */ 3313 static bool 3314 pmap_enter_2mpage(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 3315 struct rwlock **lockp) 3316 { 3317 pd_entry_t new_l2; 3318 3319 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3320 3321 new_l2 = (pd_entry_t)(VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | 3322 ATTR_IDX(m->md.pv_memattr) | ATTR_AP(ATTR_AP_RO) | L2_BLOCK); 3323 if ((m->oflags & VPO_UNMANAGED) == 0) 3324 new_l2 |= ATTR_SW_MANAGED; 3325 if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 3326 new_l2 |= ATTR_XN; 3327 if (va < VM_MAXUSER_ADDRESS) 3328 new_l2 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; 3329 return (pmap_enter_l2(pmap, va, new_l2, PMAP_ENTER_NOSLEEP | 3330 PMAP_ENTER_NOREPLACE | PMAP_ENTER_NORECLAIM, NULL, lockp) == 3331 KERN_SUCCESS); 3332 } 3333 3334 /* 3335 * Tries to create the specified 2MB page mapping. Returns KERN_SUCCESS if 3336 * the mapping was created, and either KERN_FAILURE or KERN_RESOURCE_SHORTAGE 3337 * otherwise. Returns KERN_FAILURE if PMAP_ENTER_NOREPLACE was specified and 3338 * a mapping already exists at the specified virtual address. Returns 3339 * KERN_RESOURCE_SHORTAGE if PMAP_ENTER_NOSLEEP was specified and a page table 3340 * page allocation failed. Returns KERN_RESOURCE_SHORTAGE if 3341 * PMAP_ENTER_NORECLAIM was specified and a PV entry allocation failed. 3342 * 3343 * The parameter "m" is only used when creating a managed, writeable mapping. 3344 */ 3345 static int 3346 pmap_enter_l2(pmap_t pmap, vm_offset_t va, pd_entry_t new_l2, u_int flags, 3347 vm_page_t m, struct rwlock **lockp) 3348 { 3349 struct spglist free; 3350 pd_entry_t *l2, *l3, old_l2; 3351 vm_offset_t sva; 3352 vm_page_t l2pg, mt; 3353 3354 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3355 3356 if ((l2pg = pmap_alloc_l2(pmap, va, (flags & PMAP_ENTER_NOSLEEP) != 0 ? 3357 NULL : lockp)) == NULL) { 3358 CTR2(KTR_PMAP, "pmap_enter_l2: failure for va %#lx in pmap %p", 3359 va, pmap); 3360 return (KERN_RESOURCE_SHORTAGE); 3361 } 3362 3363 l2 = (pd_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(l2pg)); 3364 l2 = &l2[pmap_l2_index(va)]; 3365 if ((old_l2 = pmap_load(l2)) != 0) { 3366 KASSERT(l2pg->wire_count > 1, 3367 ("pmap_enter_l2: l2pg's wire count is too low")); 3368 if ((flags & PMAP_ENTER_NOREPLACE) != 0) { 3369 l2pg->wire_count--; 3370 CTR2(KTR_PMAP, 3371 "pmap_enter_l2: failure for va %#lx in pmap %p", 3372 va, pmap); 3373 return (KERN_FAILURE); 3374 } 3375 SLIST_INIT(&free); 3376 if ((old_l2 & ATTR_DESCR_MASK) == L2_BLOCK) 3377 (void)pmap_remove_l2(pmap, l2, va, 3378 pmap_load(pmap_l1(pmap, va)), &free, lockp); 3379 else 3380 for (sva = va; sva < va + L2_SIZE; sva += PAGE_SIZE) { 3381 l3 = pmap_l2_to_l3(l2, sva); 3382 if (pmap_l3_valid(pmap_load(l3)) && 3383 pmap_remove_l3(pmap, l3, sva, old_l2, &free, 3384 lockp) != 0) 3385 break; 3386 } 3387 vm_page_free_pages_toq(&free, true); 3388 if (va >= VM_MAXUSER_ADDRESS) { 3389 mt = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 3390 if (pmap_insert_pt_page(pmap, mt)) { 3391 /* 3392 * XXX Currently, this can't happen bacuse 3393 * we do not perform pmap_enter(psind == 1) 3394 * on the kernel pmap. 3395 */ 3396 panic("pmap_enter_l2: trie insert failed"); 3397 } 3398 } else 3399 KASSERT(pmap_load(l2) == 0, 3400 ("pmap_enter_l2: non-zero L2 entry %p", l2)); 3401 } 3402 3403 if ((new_l2 & ATTR_SW_MANAGED) != 0) { 3404 /* 3405 * Abort this mapping if its PV entry could not be created. 3406 */ 3407 if (!pmap_pv_insert_l2(pmap, va, new_l2, flags, lockp)) { 3408 SLIST_INIT(&free); 3409 if (pmap_unwire_l3(pmap, va, l2pg, &free)) { 3410 /* 3411 * Although "va" is not mapped, paging-structure 3412 * caches could nonetheless have entries that 3413 * refer to the freed page table pages. 3414 * Invalidate those entries. 3415 */ 3416 pmap_invalidate_page(pmap, va); 3417 vm_page_free_pages_toq(&free, true); 3418 } 3419 CTR2(KTR_PMAP, 3420 "pmap_enter_l2: failure for va %#lx in pmap %p", 3421 va, pmap); 3422 return (KERN_RESOURCE_SHORTAGE); 3423 } 3424 if ((new_l2 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3425 for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3426 vm_page_aflag_set(mt, PGA_WRITEABLE); 3427 } 3428 3429 /* 3430 * Increment counters. 3431 */ 3432 if ((new_l2 & ATTR_SW_WIRED) != 0) 3433 pmap->pm_stats.wired_count += L2_SIZE / PAGE_SIZE; 3434 pmap->pm_stats.resident_count += L2_SIZE / PAGE_SIZE; 3435 3436 /* 3437 * Map the superpage. 3438 */ 3439 (void)pmap_load_store(l2, new_l2); 3440 dsb(ishst); 3441 3442 atomic_add_long(&pmap_l2_mappings, 1); 3443 CTR2(KTR_PMAP, "pmap_enter_l2: success for va %#lx in pmap %p", 3444 va, pmap); 3445 3446 return (KERN_SUCCESS); 3447 } 3448 3449 /* 3450 * Maps a sequence of resident pages belonging to the same object. 3451 * The sequence begins with the given page m_start. This page is 3452 * mapped at the given virtual address start. Each subsequent page is 3453 * mapped at a virtual address that is offset from start by the same 3454 * amount as the page is offset from m_start within the object. The 3455 * last page in the sequence is the page with the largest offset from 3456 * m_start that can be mapped at a virtual address less than the given 3457 * virtual address end. Not every virtual page between start and end 3458 * is mapped; only those for which a resident page exists with the 3459 * corresponding offset from m_start are mapped. 3460 */ 3461 void 3462 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3463 vm_page_t m_start, vm_prot_t prot) 3464 { 3465 struct rwlock *lock; 3466 vm_offset_t va; 3467 vm_page_t m, mpte; 3468 vm_pindex_t diff, psize; 3469 3470 VM_OBJECT_ASSERT_LOCKED(m_start->object); 3471 3472 psize = atop(end - start); 3473 mpte = NULL; 3474 m = m_start; 3475 lock = NULL; 3476 PMAP_LOCK(pmap); 3477 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3478 va = start + ptoa(diff); 3479 if ((va & L2_OFFSET) == 0 && va + L2_SIZE <= end && 3480 m->psind == 1 && pmap_ps_enabled(pmap) && 3481 pmap_enter_2mpage(pmap, va, m, prot, &lock)) 3482 m = &m[L2_SIZE / PAGE_SIZE - 1]; 3483 else 3484 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, 3485 &lock); 3486 m = TAILQ_NEXT(m, listq); 3487 } 3488 if (lock != NULL) 3489 rw_wunlock(lock); 3490 PMAP_UNLOCK(pmap); 3491 } 3492 3493 /* 3494 * this code makes some *MAJOR* assumptions: 3495 * 1. Current pmap & pmap exists. 3496 * 2. Not wired. 3497 * 3. Read access. 3498 * 4. No page table pages. 3499 * but is *MUCH* faster than pmap_enter... 3500 */ 3501 3502 void 3503 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3504 { 3505 struct rwlock *lock; 3506 3507 lock = NULL; 3508 PMAP_LOCK(pmap); 3509 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 3510 if (lock != NULL) 3511 rw_wunlock(lock); 3512 PMAP_UNLOCK(pmap); 3513 } 3514 3515 static vm_page_t 3516 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3517 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 3518 { 3519 struct spglist free; 3520 pd_entry_t *pde; 3521 pt_entry_t *l2, *l3, l3_val; 3522 vm_paddr_t pa; 3523 int lvl; 3524 3525 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3526 (m->oflags & VPO_UNMANAGED) != 0, 3527 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3528 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3529 3530 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 3531 /* 3532 * In the case that a page table page is not 3533 * resident, we are creating it here. 3534 */ 3535 if (va < VM_MAXUSER_ADDRESS) { 3536 vm_pindex_t l2pindex; 3537 3538 /* 3539 * Calculate pagetable page index 3540 */ 3541 l2pindex = pmap_l2_pindex(va); 3542 if (mpte && (mpte->pindex == l2pindex)) { 3543 mpte->wire_count++; 3544 } else { 3545 /* 3546 * Get the l2 entry 3547 */ 3548 pde = pmap_pde(pmap, va, &lvl); 3549 3550 /* 3551 * If the page table page is mapped, we just increment 3552 * the hold count, and activate it. Otherwise, we 3553 * attempt to allocate a page table page. If this 3554 * attempt fails, we don't retry. Instead, we give up. 3555 */ 3556 if (lvl == 1) { 3557 l2 = pmap_l1_to_l2(pde, va); 3558 if ((pmap_load(l2) & ATTR_DESCR_MASK) == 3559 L2_BLOCK) 3560 return (NULL); 3561 } 3562 if (lvl == 2 && pmap_load(pde) != 0) { 3563 mpte = 3564 PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); 3565 mpte->wire_count++; 3566 } else { 3567 /* 3568 * Pass NULL instead of the PV list lock 3569 * pointer, because we don't intend to sleep. 3570 */ 3571 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 3572 if (mpte == NULL) 3573 return (mpte); 3574 } 3575 } 3576 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 3577 l3 = &l3[pmap_l3_index(va)]; 3578 } else { 3579 mpte = NULL; 3580 pde = pmap_pde(kernel_pmap, va, &lvl); 3581 KASSERT(pde != NULL, 3582 ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", 3583 va)); 3584 KASSERT(lvl == 2, 3585 ("pmap_enter_quick_locked: Invalid level %d", lvl)); 3586 l3 = pmap_l2_to_l3(pde, va); 3587 } 3588 3589 if (pmap_load(l3) != 0) { 3590 if (mpte != NULL) { 3591 mpte->wire_count--; 3592 mpte = NULL; 3593 } 3594 return (mpte); 3595 } 3596 3597 /* 3598 * Enter on the PV list if part of our managed memory. 3599 */ 3600 if ((m->oflags & VPO_UNMANAGED) == 0 && 3601 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 3602 if (mpte != NULL) { 3603 SLIST_INIT(&free); 3604 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 3605 pmap_invalidate_page(pmap, va); 3606 vm_page_free_pages_toq(&free, false); 3607 } 3608 mpte = NULL; 3609 } 3610 return (mpte); 3611 } 3612 3613 /* 3614 * Increment counters 3615 */ 3616 pmap_resident_count_inc(pmap, 1); 3617 3618 pa = VM_PAGE_TO_PHYS(m); 3619 l3_val = pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 3620 ATTR_AP(ATTR_AP_RO) | L3_PAGE; 3621 if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 3622 l3_val |= ATTR_XN; 3623 else if (va < VM_MAXUSER_ADDRESS) 3624 l3_val |= ATTR_PXN; 3625 3626 /* 3627 * Now validate mapping with RO protection 3628 */ 3629 if ((m->oflags & VPO_UNMANAGED) == 0) 3630 l3_val |= ATTR_SW_MANAGED; 3631 3632 /* Sync icache before the mapping is stored to PTE */ 3633 if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && 3634 m->md.pv_memattr == VM_MEMATTR_WRITE_BACK) 3635 cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); 3636 3637 pmap_load_store(l3, l3_val); 3638 pmap_invalidate_page(pmap, va); 3639 return (mpte); 3640 } 3641 3642 /* 3643 * This code maps large physical mmap regions into the 3644 * processor address space. Note that some shortcuts 3645 * are taken, but the code works. 3646 */ 3647 void 3648 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3649 vm_pindex_t pindex, vm_size_t size) 3650 { 3651 3652 VM_OBJECT_ASSERT_WLOCKED(object); 3653 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3654 ("pmap_object_init_pt: non-device object")); 3655 } 3656 3657 /* 3658 * Clear the wired attribute from the mappings for the specified range of 3659 * addresses in the given pmap. Every valid mapping within that range 3660 * must have the wired attribute set. In contrast, invalid mappings 3661 * cannot have the wired attribute set, so they are ignored. 3662 * 3663 * The wired attribute of the page table entry is not a hardware feature, 3664 * so there is no need to invalidate any TLB entries. 3665 */ 3666 void 3667 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3668 { 3669 vm_offset_t va_next; 3670 pd_entry_t *l0, *l1, *l2; 3671 pt_entry_t *l3; 3672 3673 PMAP_LOCK(pmap); 3674 for (; sva < eva; sva = va_next) { 3675 l0 = pmap_l0(pmap, sva); 3676 if (pmap_load(l0) == 0) { 3677 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 3678 if (va_next < sva) 3679 va_next = eva; 3680 continue; 3681 } 3682 3683 l1 = pmap_l0_to_l1(l0, sva); 3684 if (pmap_load(l1) == 0) { 3685 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 3686 if (va_next < sva) 3687 va_next = eva; 3688 continue; 3689 } 3690 3691 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 3692 if (va_next < sva) 3693 va_next = eva; 3694 3695 l2 = pmap_l1_to_l2(l1, sva); 3696 if (pmap_load(l2) == 0) 3697 continue; 3698 3699 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 3700 l3 = pmap_demote_l2(pmap, l2, sva); 3701 if (l3 == NULL) 3702 continue; 3703 } 3704 KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 3705 ("pmap_unwire: Invalid l2 entry after demotion")); 3706 3707 if (va_next > eva) 3708 va_next = eva; 3709 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 3710 sva += L3_SIZE) { 3711 if (pmap_load(l3) == 0) 3712 continue; 3713 if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 3714 panic("pmap_unwire: l3 %#jx is missing " 3715 "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 3716 3717 /* 3718 * PG_W must be cleared atomically. Although the pmap 3719 * lock synchronizes access to PG_W, another processor 3720 * could be setting PG_M and/or PG_A concurrently. 3721 */ 3722 atomic_clear_long(l3, ATTR_SW_WIRED); 3723 pmap->pm_stats.wired_count--; 3724 } 3725 } 3726 PMAP_UNLOCK(pmap); 3727 } 3728 3729 /* 3730 * Copy the range specified by src_addr/len 3731 * from the source map to the range dst_addr/len 3732 * in the destination map. 3733 * 3734 * This routine is only advisory and need not do anything. 3735 */ 3736 3737 void 3738 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3739 vm_offset_t src_addr) 3740 { 3741 } 3742 3743 /* 3744 * pmap_zero_page zeros the specified hardware page by mapping 3745 * the page into KVM and using bzero to clear its contents. 3746 */ 3747 void 3748 pmap_zero_page(vm_page_t m) 3749 { 3750 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3751 3752 pagezero((void *)va); 3753 } 3754 3755 /* 3756 * pmap_zero_page_area zeros the specified hardware page by mapping 3757 * the page into KVM and using bzero to clear its contents. 3758 * 3759 * off and size may not cover an area beyond a single hardware page. 3760 */ 3761 void 3762 pmap_zero_page_area(vm_page_t m, int off, int size) 3763 { 3764 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3765 3766 if (off == 0 && size == PAGE_SIZE) 3767 pagezero((void *)va); 3768 else 3769 bzero((char *)va + off, size); 3770 } 3771 3772 /* 3773 * pmap_copy_page copies the specified (machine independent) 3774 * page by mapping the page into virtual memory and using 3775 * bcopy to copy the page, one machine dependent page at a 3776 * time. 3777 */ 3778 void 3779 pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 3780 { 3781 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 3782 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 3783 3784 pagecopy((void *)src, (void *)dst); 3785 } 3786 3787 int unmapped_buf_allowed = 1; 3788 3789 void 3790 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 3791 vm_offset_t b_offset, int xfersize) 3792 { 3793 void *a_cp, *b_cp; 3794 vm_page_t m_a, m_b; 3795 vm_paddr_t p_a, p_b; 3796 vm_offset_t a_pg_offset, b_pg_offset; 3797 int cnt; 3798 3799 while (xfersize > 0) { 3800 a_pg_offset = a_offset & PAGE_MASK; 3801 m_a = ma[a_offset >> PAGE_SHIFT]; 3802 p_a = m_a->phys_addr; 3803 b_pg_offset = b_offset & PAGE_MASK; 3804 m_b = mb[b_offset >> PAGE_SHIFT]; 3805 p_b = m_b->phys_addr; 3806 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 3807 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 3808 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 3809 panic("!DMAP a %lx", p_a); 3810 } else { 3811 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 3812 } 3813 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 3814 panic("!DMAP b %lx", p_b); 3815 } else { 3816 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 3817 } 3818 bcopy(a_cp, b_cp, cnt); 3819 a_offset += cnt; 3820 b_offset += cnt; 3821 xfersize -= cnt; 3822 } 3823 } 3824 3825 vm_offset_t 3826 pmap_quick_enter_page(vm_page_t m) 3827 { 3828 3829 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 3830 } 3831 3832 void 3833 pmap_quick_remove_page(vm_offset_t addr) 3834 { 3835 } 3836 3837 /* 3838 * Returns true if the pmap's pv is one of the first 3839 * 16 pvs linked to from this page. This count may 3840 * be changed upwards or downwards in the future; it 3841 * is only necessary that true be returned for a small 3842 * subset of pmaps for proper page aging. 3843 */ 3844 boolean_t 3845 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3846 { 3847 struct md_page *pvh; 3848 struct rwlock *lock; 3849 pv_entry_t pv; 3850 int loops = 0; 3851 boolean_t rv; 3852 3853 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3854 ("pmap_page_exists_quick: page %p is not managed", m)); 3855 rv = FALSE; 3856 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3857 rw_rlock(lock); 3858 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3859 if (PV_PMAP(pv) == pmap) { 3860 rv = TRUE; 3861 break; 3862 } 3863 loops++; 3864 if (loops >= 16) 3865 break; 3866 } 3867 if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 3868 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3869 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3870 if (PV_PMAP(pv) == pmap) { 3871 rv = TRUE; 3872 break; 3873 } 3874 loops++; 3875 if (loops >= 16) 3876 break; 3877 } 3878 } 3879 rw_runlock(lock); 3880 return (rv); 3881 } 3882 3883 /* 3884 * pmap_page_wired_mappings: 3885 * 3886 * Return the number of managed mappings to the given physical page 3887 * that are wired. 3888 */ 3889 int 3890 pmap_page_wired_mappings(vm_page_t m) 3891 { 3892 struct rwlock *lock; 3893 struct md_page *pvh; 3894 pmap_t pmap; 3895 pt_entry_t *pte; 3896 pv_entry_t pv; 3897 int count, lvl, md_gen, pvh_gen; 3898 3899 if ((m->oflags & VPO_UNMANAGED) != 0) 3900 return (0); 3901 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3902 rw_rlock(lock); 3903 restart: 3904 count = 0; 3905 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3906 pmap = PV_PMAP(pv); 3907 if (!PMAP_TRYLOCK(pmap)) { 3908 md_gen = m->md.pv_gen; 3909 rw_runlock(lock); 3910 PMAP_LOCK(pmap); 3911 rw_rlock(lock); 3912 if (md_gen != m->md.pv_gen) { 3913 PMAP_UNLOCK(pmap); 3914 goto restart; 3915 } 3916 } 3917 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3918 if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3919 count++; 3920 PMAP_UNLOCK(pmap); 3921 } 3922 if ((m->flags & PG_FICTITIOUS) == 0) { 3923 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3924 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3925 pmap = PV_PMAP(pv); 3926 if (!PMAP_TRYLOCK(pmap)) { 3927 md_gen = m->md.pv_gen; 3928 pvh_gen = pvh->pv_gen; 3929 rw_runlock(lock); 3930 PMAP_LOCK(pmap); 3931 rw_rlock(lock); 3932 if (md_gen != m->md.pv_gen || 3933 pvh_gen != pvh->pv_gen) { 3934 PMAP_UNLOCK(pmap); 3935 goto restart; 3936 } 3937 } 3938 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3939 if (pte != NULL && 3940 (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3941 count++; 3942 PMAP_UNLOCK(pmap); 3943 } 3944 } 3945 rw_runlock(lock); 3946 return (count); 3947 } 3948 3949 /* 3950 * Destroy all managed, non-wired mappings in the given user-space 3951 * pmap. This pmap cannot be active on any processor besides the 3952 * caller. 3953 * 3954 * This function cannot be applied to the kernel pmap. Moreover, it 3955 * is not intended for general use. It is only to be used during 3956 * process termination. Consequently, it can be implemented in ways 3957 * that make it faster than pmap_remove(). First, it can more quickly 3958 * destroy mappings by iterating over the pmap's collection of PV 3959 * entries, rather than searching the page table. Second, it doesn't 3960 * have to test and clear the page table entries atomically, because 3961 * no processor is currently accessing the user address space. In 3962 * particular, a page table entry's dirty bit won't change state once 3963 * this function starts. 3964 */ 3965 void 3966 pmap_remove_pages(pmap_t pmap) 3967 { 3968 pd_entry_t *pde; 3969 pt_entry_t *pte, tpte; 3970 struct spglist free; 3971 vm_page_t m, ml3, mt; 3972 pv_entry_t pv; 3973 struct md_page *pvh; 3974 struct pv_chunk *pc, *npc; 3975 struct rwlock *lock; 3976 int64_t bit; 3977 uint64_t inuse, bitmask; 3978 int allfree, field, freed, idx, lvl; 3979 vm_paddr_t pa; 3980 3981 lock = NULL; 3982 3983 SLIST_INIT(&free); 3984 PMAP_LOCK(pmap); 3985 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3986 allfree = 1; 3987 freed = 0; 3988 for (field = 0; field < _NPCM; field++) { 3989 inuse = ~pc->pc_map[field] & pc_freemask[field]; 3990 while (inuse != 0) { 3991 bit = ffsl(inuse) - 1; 3992 bitmask = 1UL << bit; 3993 idx = field * 64 + bit; 3994 pv = &pc->pc_pventry[idx]; 3995 inuse &= ~bitmask; 3996 3997 pde = pmap_pde(pmap, pv->pv_va, &lvl); 3998 KASSERT(pde != NULL, 3999 ("Attempting to remove an unmapped page")); 4000 4001 switch(lvl) { 4002 case 1: 4003 pte = pmap_l1_to_l2(pde, pv->pv_va); 4004 tpte = pmap_load(pte); 4005 KASSERT((tpte & ATTR_DESCR_MASK) == 4006 L2_BLOCK, 4007 ("Attempting to remove an invalid " 4008 "block: %lx", tpte)); 4009 tpte = pmap_load(pte); 4010 break; 4011 case 2: 4012 pte = pmap_l2_to_l3(pde, pv->pv_va); 4013 tpte = pmap_load(pte); 4014 KASSERT((tpte & ATTR_DESCR_MASK) == 4015 L3_PAGE, 4016 ("Attempting to remove an invalid " 4017 "page: %lx", tpte)); 4018 break; 4019 default: 4020 panic( 4021 "Invalid page directory level: %d", 4022 lvl); 4023 } 4024 4025 /* 4026 * We cannot remove wired pages from a process' mapping at this time 4027 */ 4028 if (tpte & ATTR_SW_WIRED) { 4029 allfree = 0; 4030 continue; 4031 } 4032 4033 pa = tpte & ~ATTR_MASK; 4034 4035 m = PHYS_TO_VM_PAGE(pa); 4036 KASSERT(m->phys_addr == pa, 4037 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 4038 m, (uintmax_t)m->phys_addr, 4039 (uintmax_t)tpte)); 4040 4041 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 4042 m < &vm_page_array[vm_page_array_size], 4043 ("pmap_remove_pages: bad pte %#jx", 4044 (uintmax_t)tpte)); 4045 4046 pmap_load_clear(pte); 4047 4048 /* 4049 * Update the vm_page_t clean/reference bits. 4050 */ 4051 if ((tpte & ATTR_AP_RW_BIT) == 4052 ATTR_AP(ATTR_AP_RW)) { 4053 switch (lvl) { 4054 case 1: 4055 for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 4056 vm_page_dirty(m); 4057 break; 4058 case 2: 4059 vm_page_dirty(m); 4060 break; 4061 } 4062 } 4063 4064 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 4065 4066 /* Mark free */ 4067 pc->pc_map[field] |= bitmask; 4068 switch (lvl) { 4069 case 1: 4070 pmap_resident_count_dec(pmap, 4071 L2_SIZE / PAGE_SIZE); 4072 pvh = pa_to_pvh(tpte & ~ATTR_MASK); 4073 TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); 4074 pvh->pv_gen++; 4075 if (TAILQ_EMPTY(&pvh->pv_list)) { 4076 for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 4077 if ((mt->aflags & PGA_WRITEABLE) != 0 && 4078 TAILQ_EMPTY(&mt->md.pv_list)) 4079 vm_page_aflag_clear(mt, PGA_WRITEABLE); 4080 } 4081 ml3 = pmap_remove_pt_page(pmap, 4082 pv->pv_va); 4083 if (ml3 != NULL) { 4084 pmap_resident_count_dec(pmap,1); 4085 KASSERT(ml3->wire_count == NL3PG, 4086 ("pmap_remove_pages: l3 page wire count error")); 4087 ml3->wire_count = 1; 4088 vm_page_unwire_noq(ml3); 4089 pmap_add_delayed_free_list(ml3, 4090 &free, FALSE); 4091 } 4092 break; 4093 case 2: 4094 pmap_resident_count_dec(pmap, 1); 4095 TAILQ_REMOVE(&m->md.pv_list, pv, 4096 pv_next); 4097 m->md.pv_gen++; 4098 if ((m->aflags & PGA_WRITEABLE) != 0 && 4099 TAILQ_EMPTY(&m->md.pv_list) && 4100 (m->flags & PG_FICTITIOUS) == 0) { 4101 pvh = pa_to_pvh( 4102 VM_PAGE_TO_PHYS(m)); 4103 if (TAILQ_EMPTY(&pvh->pv_list)) 4104 vm_page_aflag_clear(m, 4105 PGA_WRITEABLE); 4106 } 4107 break; 4108 } 4109 pmap_unuse_pt(pmap, pv->pv_va, pmap_load(pde), 4110 &free); 4111 freed++; 4112 } 4113 } 4114 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 4115 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 4116 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 4117 if (allfree) { 4118 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 4119 free_pv_chunk(pc); 4120 } 4121 } 4122 pmap_invalidate_all(pmap); 4123 if (lock != NULL) 4124 rw_wunlock(lock); 4125 PMAP_UNLOCK(pmap); 4126 vm_page_free_pages_toq(&free, false); 4127 } 4128 4129 /* 4130 * This is used to check if a page has been accessed or modified. As we 4131 * don't have a bit to see if it has been modified we have to assume it 4132 * has been if the page is read/write. 4133 */ 4134 static boolean_t 4135 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 4136 { 4137 struct rwlock *lock; 4138 pv_entry_t pv; 4139 struct md_page *pvh; 4140 pt_entry_t *pte, mask, value; 4141 pmap_t pmap; 4142 int lvl, md_gen, pvh_gen; 4143 boolean_t rv; 4144 4145 rv = FALSE; 4146 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 4147 rw_rlock(lock); 4148 restart: 4149 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 4150 pmap = PV_PMAP(pv); 4151 if (!PMAP_TRYLOCK(pmap)) { 4152 md_gen = m->md.pv_gen; 4153 rw_runlock(lock); 4154 PMAP_LOCK(pmap); 4155 rw_rlock(lock); 4156 if (md_gen != m->md.pv_gen) { 4157 PMAP_UNLOCK(pmap); 4158 goto restart; 4159 } 4160 } 4161 pte = pmap_pte(pmap, pv->pv_va, &lvl); 4162 KASSERT(lvl == 3, 4163 ("pmap_page_test_mappings: Invalid level %d", lvl)); 4164 mask = 0; 4165 value = 0; 4166 if (modified) { 4167 mask |= ATTR_AP_RW_BIT; 4168 value |= ATTR_AP(ATTR_AP_RW); 4169 } 4170 if (accessed) { 4171 mask |= ATTR_AF | ATTR_DESCR_MASK; 4172 value |= ATTR_AF | L3_PAGE; 4173 } 4174 rv = (pmap_load(pte) & mask) == value; 4175 PMAP_UNLOCK(pmap); 4176 if (rv) 4177 goto out; 4178 } 4179 if ((m->flags & PG_FICTITIOUS) == 0) { 4180 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4181 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 4182 pmap = PV_PMAP(pv); 4183 if (!PMAP_TRYLOCK(pmap)) { 4184 md_gen = m->md.pv_gen; 4185 pvh_gen = pvh->pv_gen; 4186 rw_runlock(lock); 4187 PMAP_LOCK(pmap); 4188 rw_rlock(lock); 4189 if (md_gen != m->md.pv_gen || 4190 pvh_gen != pvh->pv_gen) { 4191 PMAP_UNLOCK(pmap); 4192 goto restart; 4193 } 4194 } 4195 pte = pmap_pte(pmap, pv->pv_va, &lvl); 4196 KASSERT(lvl == 2, 4197 ("pmap_page_test_mappings: Invalid level %d", lvl)); 4198 mask = 0; 4199 value = 0; 4200 if (modified) { 4201 mask |= ATTR_AP_RW_BIT; 4202 value |= ATTR_AP(ATTR_AP_RW); 4203 } 4204 if (accessed) { 4205 mask |= ATTR_AF | ATTR_DESCR_MASK; 4206 value |= ATTR_AF | L2_BLOCK; 4207 } 4208 rv = (pmap_load(pte) & mask) == value; 4209 PMAP_UNLOCK(pmap); 4210 if (rv) 4211 goto out; 4212 } 4213 } 4214 out: 4215 rw_runlock(lock); 4216 return (rv); 4217 } 4218 4219 /* 4220 * pmap_is_modified: 4221 * 4222 * Return whether or not the specified physical page was modified 4223 * in any physical maps. 4224 */ 4225 boolean_t 4226 pmap_is_modified(vm_page_t m) 4227 { 4228 4229 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4230 ("pmap_is_modified: page %p is not managed", m)); 4231 4232 /* 4233 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 4234 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 4235 * is clear, no PTEs can have PG_M set. 4236 */ 4237 VM_OBJECT_ASSERT_WLOCKED(m->object); 4238 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 4239 return (FALSE); 4240 return (pmap_page_test_mappings(m, FALSE, TRUE)); 4241 } 4242 4243 /* 4244 * pmap_is_prefaultable: 4245 * 4246 * Return whether or not the specified virtual address is eligible 4247 * for prefault. 4248 */ 4249 boolean_t 4250 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 4251 { 4252 pt_entry_t *pte; 4253 boolean_t rv; 4254 int lvl; 4255 4256 rv = FALSE; 4257 PMAP_LOCK(pmap); 4258 pte = pmap_pte(pmap, addr, &lvl); 4259 if (pte != NULL && pmap_load(pte) != 0) { 4260 rv = TRUE; 4261 } 4262 PMAP_UNLOCK(pmap); 4263 return (rv); 4264 } 4265 4266 /* 4267 * pmap_is_referenced: 4268 * 4269 * Return whether or not the specified physical page was referenced 4270 * in any physical maps. 4271 */ 4272 boolean_t 4273 pmap_is_referenced(vm_page_t m) 4274 { 4275 4276 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4277 ("pmap_is_referenced: page %p is not managed", m)); 4278 return (pmap_page_test_mappings(m, TRUE, FALSE)); 4279 } 4280 4281 /* 4282 * Clear the write and modified bits in each of the given page's mappings. 4283 */ 4284 void 4285 pmap_remove_write(vm_page_t m) 4286 { 4287 struct md_page *pvh; 4288 pmap_t pmap; 4289 struct rwlock *lock; 4290 pv_entry_t next_pv, pv; 4291 pt_entry_t oldpte, *pte; 4292 vm_offset_t va; 4293 int lvl, md_gen, pvh_gen; 4294 4295 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4296 ("pmap_remove_write: page %p is not managed", m)); 4297 4298 /* 4299 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 4300 * set by another thread while the object is locked. Thus, 4301 * if PGA_WRITEABLE is clear, no page table entries need updating. 4302 */ 4303 VM_OBJECT_ASSERT_WLOCKED(m->object); 4304 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 4305 return; 4306 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 4307 pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 4308 pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4309 retry_pv_loop: 4310 rw_wlock(lock); 4311 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 4312 pmap = PV_PMAP(pv); 4313 if (!PMAP_TRYLOCK(pmap)) { 4314 pvh_gen = pvh->pv_gen; 4315 rw_wunlock(lock); 4316 PMAP_LOCK(pmap); 4317 rw_wlock(lock); 4318 if (pvh_gen != pvh->pv_gen) { 4319 PMAP_UNLOCK(pmap); 4320 rw_wunlock(lock); 4321 goto retry_pv_loop; 4322 } 4323 } 4324 va = pv->pv_va; 4325 pte = pmap_pte(pmap, pv->pv_va, &lvl); 4326 if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 4327 pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET, 4328 &lock); 4329 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4330 ("inconsistent pv lock %p %p for page %p", 4331 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4332 PMAP_UNLOCK(pmap); 4333 } 4334 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 4335 pmap = PV_PMAP(pv); 4336 if (!PMAP_TRYLOCK(pmap)) { 4337 pvh_gen = pvh->pv_gen; 4338 md_gen = m->md.pv_gen; 4339 rw_wunlock(lock); 4340 PMAP_LOCK(pmap); 4341 rw_wlock(lock); 4342 if (pvh_gen != pvh->pv_gen || 4343 md_gen != m->md.pv_gen) { 4344 PMAP_UNLOCK(pmap); 4345 rw_wunlock(lock); 4346 goto retry_pv_loop; 4347 } 4348 } 4349 pte = pmap_pte(pmap, pv->pv_va, &lvl); 4350 retry: 4351 oldpte = pmap_load(pte); 4352 if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 4353 if (!atomic_cmpset_long(pte, oldpte, 4354 oldpte | ATTR_AP(ATTR_AP_RO))) 4355 goto retry; 4356 if ((oldpte & ATTR_AF) != 0) 4357 vm_page_dirty(m); 4358 pmap_invalidate_page(pmap, pv->pv_va); 4359 } 4360 PMAP_UNLOCK(pmap); 4361 } 4362 rw_wunlock(lock); 4363 vm_page_aflag_clear(m, PGA_WRITEABLE); 4364 } 4365 4366 static __inline boolean_t 4367 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 4368 { 4369 4370 return (FALSE); 4371 } 4372 4373 /* 4374 * pmap_ts_referenced: 4375 * 4376 * Return a count of reference bits for a page, clearing those bits. 4377 * It is not necessary for every reference bit to be cleared, but it 4378 * is necessary that 0 only be returned when there are truly no 4379 * reference bits set. 4380 * 4381 * As an optimization, update the page's dirty field if a modified bit is 4382 * found while counting reference bits. This opportunistic update can be 4383 * performed at low cost and can eliminate the need for some future calls 4384 * to pmap_is_modified(). However, since this function stops after 4385 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 4386 * dirty pages. Those dirty pages will only be detected by a future call 4387 * to pmap_is_modified(). 4388 */ 4389 int 4390 pmap_ts_referenced(vm_page_t m) 4391 { 4392 struct md_page *pvh; 4393 pv_entry_t pv, pvf; 4394 pmap_t pmap; 4395 struct rwlock *lock; 4396 pd_entry_t *pde, tpde; 4397 pt_entry_t *pte, tpte; 4398 pt_entry_t *l3; 4399 vm_offset_t va; 4400 vm_paddr_t pa; 4401 int cleared, md_gen, not_cleared, lvl, pvh_gen; 4402 struct spglist free; 4403 bool demoted; 4404 4405 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4406 ("pmap_ts_referenced: page %p is not managed", m)); 4407 SLIST_INIT(&free); 4408 cleared = 0; 4409 pa = VM_PAGE_TO_PHYS(m); 4410 lock = PHYS_TO_PV_LIST_LOCK(pa); 4411 pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); 4412 rw_wlock(lock); 4413 retry: 4414 not_cleared = 0; 4415 if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) 4416 goto small_mappings; 4417 pv = pvf; 4418 do { 4419 if (pvf == NULL) 4420 pvf = pv; 4421 pmap = PV_PMAP(pv); 4422 if (!PMAP_TRYLOCK(pmap)) { 4423 pvh_gen = pvh->pv_gen; 4424 rw_wunlock(lock); 4425 PMAP_LOCK(pmap); 4426 rw_wlock(lock); 4427 if (pvh_gen != pvh->pv_gen) { 4428 PMAP_UNLOCK(pmap); 4429 goto retry; 4430 } 4431 } 4432 va = pv->pv_va; 4433 pde = pmap_pde(pmap, pv->pv_va, &lvl); 4434 KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); 4435 KASSERT(lvl == 1, 4436 ("pmap_ts_referenced: invalid pde level %d", lvl)); 4437 tpde = pmap_load(pde); 4438 KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, 4439 ("pmap_ts_referenced: found an invalid l1 table")); 4440 pte = pmap_l1_to_l2(pde, pv->pv_va); 4441 tpte = pmap_load(pte); 4442 if (pmap_page_dirty(tpte)) { 4443 /* 4444 * Although "tpte" is mapping a 2MB page, because 4445 * this function is called at a 4KB page granularity, 4446 * we only update the 4KB page under test. 4447 */ 4448 vm_page_dirty(m); 4449 } 4450 if ((tpte & ATTR_AF) != 0) { 4451 /* 4452 * Since this reference bit is shared by 512 4KB 4453 * pages, it should not be cleared every time it is 4454 * tested. Apply a simple "hash" function on the 4455 * physical page number, the virtual superpage number, 4456 * and the pmap address to select one 4KB page out of 4457 * the 512 on which testing the reference bit will 4458 * result in clearing that reference bit. This 4459 * function is designed to avoid the selection of the 4460 * same 4KB page for every 2MB page mapping. 4461 * 4462 * On demotion, a mapping that hasn't been referenced 4463 * is simply destroyed. To avoid the possibility of a 4464 * subsequent page fault on a demoted wired mapping, 4465 * always leave its reference bit set. Moreover, 4466 * since the superpage is wired, the current state of 4467 * its reference bit won't affect page replacement. 4468 */ 4469 if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ 4470 (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && 4471 (tpte & ATTR_SW_WIRED) == 0) { 4472 if (safe_to_clear_referenced(pmap, tpte)) { 4473 /* 4474 * TODO: We don't handle the access 4475 * flag at all. We need to be able 4476 * to set it in the exception handler. 4477 */ 4478 panic("ARM64TODO: " 4479 "safe_to_clear_referenced\n"); 4480 } else if (pmap_demote_l2_locked(pmap, pte, 4481 pv->pv_va, &lock) != NULL) { 4482 demoted = true; 4483 va += VM_PAGE_TO_PHYS(m) - 4484 (tpte & ~ATTR_MASK); 4485 l3 = pmap_l2_to_l3(pte, va); 4486 pmap_remove_l3(pmap, l3, va, 4487 pmap_load(pte), NULL, &lock); 4488 } else 4489 demoted = true; 4490 4491 if (demoted) { 4492 /* 4493 * The superpage mapping was removed 4494 * entirely and therefore 'pv' is no 4495 * longer valid. 4496 */ 4497 if (pvf == pv) 4498 pvf = NULL; 4499 pv = NULL; 4500 } 4501 cleared++; 4502 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4503 ("inconsistent pv lock %p %p for page %p", 4504 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4505 } else 4506 not_cleared++; 4507 } 4508 PMAP_UNLOCK(pmap); 4509 /* Rotate the PV list if it has more than one entry. */ 4510 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4511 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 4512 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 4513 pvh->pv_gen++; 4514 } 4515 if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) 4516 goto out; 4517 } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); 4518 small_mappings: 4519 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 4520 goto out; 4521 pv = pvf; 4522 do { 4523 if (pvf == NULL) 4524 pvf = pv; 4525 pmap = PV_PMAP(pv); 4526 if (!PMAP_TRYLOCK(pmap)) { 4527 pvh_gen = pvh->pv_gen; 4528 md_gen = m->md.pv_gen; 4529 rw_wunlock(lock); 4530 PMAP_LOCK(pmap); 4531 rw_wlock(lock); 4532 if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 4533 PMAP_UNLOCK(pmap); 4534 goto retry; 4535 } 4536 } 4537 pde = pmap_pde(pmap, pv->pv_va, &lvl); 4538 KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); 4539 KASSERT(lvl == 2, 4540 ("pmap_ts_referenced: invalid pde level %d", lvl)); 4541 tpde = pmap_load(pde); 4542 KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, 4543 ("pmap_ts_referenced: found an invalid l2 table")); 4544 pte = pmap_l2_to_l3(pde, pv->pv_va); 4545 tpte = pmap_load(pte); 4546 if (pmap_page_dirty(tpte)) 4547 vm_page_dirty(m); 4548 if ((tpte & ATTR_AF) != 0) { 4549 if (safe_to_clear_referenced(pmap, tpte)) { 4550 /* 4551 * TODO: We don't handle the access flag 4552 * at all. We need to be able to set it in 4553 * the exception handler. 4554 */ 4555 panic("ARM64TODO: safe_to_clear_referenced\n"); 4556 } else if ((tpte & ATTR_SW_WIRED) == 0) { 4557 /* 4558 * Wired pages cannot be paged out so 4559 * doing accessed bit emulation for 4560 * them is wasted effort. We do the 4561 * hard work for unwired pages only. 4562 */ 4563 pmap_remove_l3(pmap, pte, pv->pv_va, tpde, 4564 &free, &lock); 4565 pmap_invalidate_page(pmap, pv->pv_va); 4566 cleared++; 4567 if (pvf == pv) 4568 pvf = NULL; 4569 pv = NULL; 4570 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4571 ("inconsistent pv lock %p %p for page %p", 4572 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4573 } else 4574 not_cleared++; 4575 } 4576 PMAP_UNLOCK(pmap); 4577 /* Rotate the PV list if it has more than one entry. */ 4578 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4579 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4580 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 4581 m->md.pv_gen++; 4582 } 4583 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 4584 not_cleared < PMAP_TS_REFERENCED_MAX); 4585 out: 4586 rw_wunlock(lock); 4587 vm_page_free_pages_toq(&free, false); 4588 return (cleared + not_cleared); 4589 } 4590 4591 /* 4592 * Apply the given advice to the specified range of addresses within the 4593 * given pmap. Depending on the advice, clear the referenced and/or 4594 * modified flags in each mapping and set the mapped page's dirty field. 4595 */ 4596 void 4597 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4598 { 4599 } 4600 4601 /* 4602 * Clear the modify bits on the specified physical page. 4603 */ 4604 void 4605 pmap_clear_modify(vm_page_t m) 4606 { 4607 4608 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4609 ("pmap_clear_modify: page %p is not managed", m)); 4610 VM_OBJECT_ASSERT_WLOCKED(m->object); 4611 KASSERT(!vm_page_xbusied(m), 4612 ("pmap_clear_modify: page %p is exclusive busied", m)); 4613 4614 /* 4615 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 4616 * If the object containing the page is locked and the page is not 4617 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4618 */ 4619 if ((m->aflags & PGA_WRITEABLE) == 0) 4620 return; 4621 4622 /* ARM64TODO: We lack support for tracking if a page is modified */ 4623 } 4624 4625 void * 4626 pmap_mapbios(vm_paddr_t pa, vm_size_t size) 4627 { 4628 struct pmap_preinit_mapping *ppim; 4629 vm_offset_t va, offset; 4630 pd_entry_t *pde; 4631 pt_entry_t *l2; 4632 int i, lvl, l2_blocks, free_l2_count, start_idx; 4633 4634 if (!vm_initialized) { 4635 /* 4636 * No L3 ptables so map entire L2 blocks where start VA is: 4637 * preinit_map_va + start_idx * L2_SIZE 4638 * There may be duplicate mappings (multiple VA -> same PA) but 4639 * ARM64 dcache is always PIPT so that's acceptable. 4640 */ 4641 if (size == 0) 4642 return (NULL); 4643 4644 /* Calculate how many L2 blocks are needed for the mapping */ 4645 l2_blocks = (roundup2(pa + size, L2_SIZE) - 4646 rounddown2(pa, L2_SIZE)) >> L2_SHIFT; 4647 4648 offset = pa & L2_OFFSET; 4649 4650 if (preinit_map_va == 0) 4651 return (NULL); 4652 4653 /* Map 2MiB L2 blocks from reserved VA space */ 4654 4655 free_l2_count = 0; 4656 start_idx = -1; 4657 /* Find enough free contiguous VA space */ 4658 for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { 4659 ppim = pmap_preinit_mapping + i; 4660 if (free_l2_count > 0 && ppim->pa != 0) { 4661 /* Not enough space here */ 4662 free_l2_count = 0; 4663 start_idx = -1; 4664 continue; 4665 } 4666 4667 if (ppim->pa == 0) { 4668 /* Free L2 block */ 4669 if (start_idx == -1) 4670 start_idx = i; 4671 free_l2_count++; 4672 if (free_l2_count == l2_blocks) 4673 break; 4674 } 4675 } 4676 if (free_l2_count != l2_blocks) 4677 panic("%s: too many preinit mappings", __func__); 4678 4679 va = preinit_map_va + (start_idx * L2_SIZE); 4680 for (i = start_idx; i < start_idx + l2_blocks; i++) { 4681 /* Mark entries as allocated */ 4682 ppim = pmap_preinit_mapping + i; 4683 ppim->pa = pa; 4684 ppim->va = va + offset; 4685 ppim->size = size; 4686 } 4687 4688 /* Map L2 blocks */ 4689 pa = rounddown2(pa, L2_SIZE); 4690 for (i = 0; i < l2_blocks; i++) { 4691 pde = pmap_pde(kernel_pmap, va, &lvl); 4692 KASSERT(pde != NULL, 4693 ("pmap_mapbios: Invalid page entry, va: 0x%lx", 4694 va)); 4695 KASSERT(lvl == 1, 4696 ("pmap_mapbios: Invalid level %d", lvl)); 4697 4698 /* Insert L2_BLOCK */ 4699 l2 = pmap_l1_to_l2(pde, va); 4700 pmap_load_store(l2, 4701 pa | ATTR_DEFAULT | ATTR_XN | 4702 ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); 4703 4704 va += L2_SIZE; 4705 pa += L2_SIZE; 4706 } 4707 pmap_invalidate_all(kernel_pmap); 4708 4709 va = preinit_map_va + (start_idx * L2_SIZE); 4710 4711 } else { 4712 /* kva_alloc may be used to map the pages */ 4713 offset = pa & PAGE_MASK; 4714 size = round_page(offset + size); 4715 4716 va = kva_alloc(size); 4717 if (va == 0) 4718 panic("%s: Couldn't allocate KVA", __func__); 4719 4720 pde = pmap_pde(kernel_pmap, va, &lvl); 4721 KASSERT(lvl == 2, ("pmap_mapbios: Invalid level %d", lvl)); 4722 4723 /* L3 table is linked */ 4724 va = trunc_page(va); 4725 pa = trunc_page(pa); 4726 pmap_kenter(va, size, pa, CACHED_MEMORY); 4727 } 4728 4729 return ((void *)(va + offset)); 4730 } 4731 4732 void 4733 pmap_unmapbios(vm_offset_t va, vm_size_t size) 4734 { 4735 struct pmap_preinit_mapping *ppim; 4736 vm_offset_t offset, tmpsize, va_trunc; 4737 pd_entry_t *pde; 4738 pt_entry_t *l2; 4739 int i, lvl, l2_blocks, block; 4740 bool preinit_map; 4741 4742 l2_blocks = 4743 (roundup2(va + size, L2_SIZE) - rounddown2(va, L2_SIZE)) >> L2_SHIFT; 4744 KASSERT(l2_blocks > 0, ("pmap_unmapbios: invalid size %lx", size)); 4745 4746 /* Remove preinit mapping */ 4747 preinit_map = false; 4748 block = 0; 4749 for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { 4750 ppim = pmap_preinit_mapping + i; 4751 if (ppim->va == va) { 4752 KASSERT(ppim->size == size, 4753 ("pmap_unmapbios: size mismatch")); 4754 ppim->va = 0; 4755 ppim->pa = 0; 4756 ppim->size = 0; 4757 preinit_map = true; 4758 offset = block * L2_SIZE; 4759 va_trunc = rounddown2(va, L2_SIZE) + offset; 4760 4761 /* Remove L2_BLOCK */ 4762 pde = pmap_pde(kernel_pmap, va_trunc, &lvl); 4763 KASSERT(pde != NULL, 4764 ("pmap_unmapbios: Invalid page entry, va: 0x%lx", 4765 va_trunc)); 4766 l2 = pmap_l1_to_l2(pde, va_trunc); 4767 pmap_load_clear(l2); 4768 4769 if (block == (l2_blocks - 1)) 4770 break; 4771 block++; 4772 } 4773 } 4774 if (preinit_map) { 4775 pmap_invalidate_all(kernel_pmap); 4776 return; 4777 } 4778 4779 /* Unmap the pages reserved with kva_alloc. */ 4780 if (vm_initialized) { 4781 offset = va & PAGE_MASK; 4782 size = round_page(offset + size); 4783 va = trunc_page(va); 4784 4785 pde = pmap_pde(kernel_pmap, va, &lvl); 4786 KASSERT(pde != NULL, 4787 ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va)); 4788 KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl)); 4789 4790 /* Unmap and invalidate the pages */ 4791 for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 4792 pmap_kremove(va + tmpsize); 4793 4794 kva_free(va, size); 4795 } 4796 } 4797 4798 /* 4799 * Sets the memory attribute for the specified page. 4800 */ 4801 void 4802 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4803 { 4804 4805 m->md.pv_memattr = ma; 4806 4807 /* 4808 * If "m" is a normal page, update its direct mapping. This update 4809 * can be relied upon to perform any cache operations that are 4810 * required for data coherence. 4811 */ 4812 if ((m->flags & PG_FICTITIOUS) == 0 && 4813 pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, 4814 m->md.pv_memattr) != 0) 4815 panic("memory attribute change on the direct map failed"); 4816 } 4817 4818 /* 4819 * Changes the specified virtual address range's memory type to that given by 4820 * the parameter "mode". The specified virtual address range must be 4821 * completely contained within either the direct map or the kernel map. If 4822 * the virtual address range is contained within the kernel map, then the 4823 * memory type for each of the corresponding ranges of the direct map is also 4824 * changed. (The corresponding ranges of the direct map are those ranges that 4825 * map the same physical pages as the specified virtual address range.) These 4826 * changes to the direct map are necessary because Intel describes the 4827 * behavior of their processors as "undefined" if two or more mappings to the 4828 * same physical page have different memory types. 4829 * 4830 * Returns zero if the change completed successfully, and either EINVAL or 4831 * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4832 * of the virtual address range was not mapped, and ENOMEM is returned if 4833 * there was insufficient memory available to complete the change. In the 4834 * latter case, the memory type may have been changed on some part of the 4835 * virtual address range or the direct map. 4836 */ 4837 static int 4838 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4839 { 4840 int error; 4841 4842 PMAP_LOCK(kernel_pmap); 4843 error = pmap_change_attr_locked(va, size, mode); 4844 PMAP_UNLOCK(kernel_pmap); 4845 return (error); 4846 } 4847 4848 static int 4849 pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) 4850 { 4851 vm_offset_t base, offset, tmpva; 4852 pt_entry_t l3, *pte, *newpte; 4853 int lvl; 4854 4855 PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); 4856 base = trunc_page(va); 4857 offset = va & PAGE_MASK; 4858 size = round_page(offset + size); 4859 4860 if (!VIRT_IN_DMAP(base)) 4861 return (EINVAL); 4862 4863 for (tmpva = base; tmpva < base + size; ) { 4864 pte = pmap_pte(kernel_pmap, tmpva, &lvl); 4865 if (pte == NULL) 4866 return (EINVAL); 4867 4868 if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) { 4869 /* 4870 * We already have the correct attribute, 4871 * ignore this entry. 4872 */ 4873 switch (lvl) { 4874 default: 4875 panic("Invalid DMAP table level: %d\n", lvl); 4876 case 1: 4877 tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; 4878 break; 4879 case 2: 4880 tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; 4881 break; 4882 case 3: 4883 tmpva += PAGE_SIZE; 4884 break; 4885 } 4886 } else { 4887 /* 4888 * Split the entry to an level 3 table, then 4889 * set the new attribute. 4890 */ 4891 switch (lvl) { 4892 default: 4893 panic("Invalid DMAP table level: %d\n", lvl); 4894 case 1: 4895 newpte = pmap_demote_l1(kernel_pmap, pte, 4896 tmpva & ~L1_OFFSET); 4897 if (newpte == NULL) 4898 return (EINVAL); 4899 pte = pmap_l1_to_l2(pte, tmpva); 4900 case 2: 4901 newpte = pmap_demote_l2(kernel_pmap, pte, 4902 tmpva & ~L2_OFFSET); 4903 if (newpte == NULL) 4904 return (EINVAL); 4905 pte = pmap_l2_to_l3(pte, tmpva); 4906 case 3: 4907 /* Update the entry */ 4908 l3 = pmap_load(pte); 4909 l3 &= ~ATTR_IDX_MASK; 4910 l3 |= ATTR_IDX(mode); 4911 if (mode == DEVICE_MEMORY) 4912 l3 |= ATTR_XN; 4913 4914 pmap_update_entry(kernel_pmap, pte, l3, tmpva, 4915 PAGE_SIZE); 4916 4917 /* 4918 * If moving to a non-cacheable entry flush 4919 * the cache. 4920 */ 4921 if (mode == VM_MEMATTR_UNCACHEABLE) 4922 cpu_dcache_wbinv_range(tmpva, L3_SIZE); 4923 4924 break; 4925 } 4926 tmpva += PAGE_SIZE; 4927 } 4928 } 4929 4930 return (0); 4931 } 4932 4933 /* 4934 * Create an L2 table to map all addresses within an L1 mapping. 4935 */ 4936 static pt_entry_t * 4937 pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) 4938 { 4939 pt_entry_t *l2, newl2, oldl1; 4940 vm_offset_t tmpl1; 4941 vm_paddr_t l2phys, phys; 4942 vm_page_t ml2; 4943 int i; 4944 4945 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4946 oldl1 = pmap_load(l1); 4947 KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, 4948 ("pmap_demote_l1: Demoting a non-block entry")); 4949 KASSERT((va & L1_OFFSET) == 0, 4950 ("pmap_demote_l1: Invalid virtual address %#lx", va)); 4951 KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, 4952 ("pmap_demote_l1: Level 1 table shouldn't be managed")); 4953 4954 tmpl1 = 0; 4955 if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { 4956 tmpl1 = kva_alloc(PAGE_SIZE); 4957 if (tmpl1 == 0) 4958 return (NULL); 4959 } 4960 4961 if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | 4962 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4963 CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" 4964 " in pmap %p", va, pmap); 4965 return (NULL); 4966 } 4967 4968 l2phys = VM_PAGE_TO_PHYS(ml2); 4969 l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); 4970 4971 /* Address the range points at */ 4972 phys = oldl1 & ~ATTR_MASK; 4973 /* The attributed from the old l1 table to be copied */ 4974 newl2 = oldl1 & ATTR_MASK; 4975 4976 /* Create the new entries */ 4977 for (i = 0; i < Ln_ENTRIES; i++) { 4978 l2[i] = newl2 | phys; 4979 phys += L2_SIZE; 4980 } 4981 KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK), 4982 ("Invalid l2 page (%lx != %lx)", l2[0], 4983 (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK)); 4984 4985 if (tmpl1 != 0) { 4986 pmap_kenter(tmpl1, PAGE_SIZE, 4987 DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY); 4988 l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); 4989 } 4990 4991 pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); 4992 4993 if (tmpl1 != 0) { 4994 pmap_kremove(tmpl1); 4995 kva_free(tmpl1, PAGE_SIZE); 4996 } 4997 4998 return (l2); 4999 } 5000 5001 /* 5002 * Create an L3 table to map all addresses within an L2 mapping. 5003 */ 5004 static pt_entry_t * 5005 pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, 5006 struct rwlock **lockp) 5007 { 5008 pt_entry_t *l3, newl3, oldl2; 5009 vm_offset_t tmpl2; 5010 vm_paddr_t l3phys, phys; 5011 vm_page_t ml3; 5012 int i; 5013 5014 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 5015 l3 = NULL; 5016 oldl2 = pmap_load(l2); 5017 KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, 5018 ("pmap_demote_l2: Demoting a non-block entry")); 5019 KASSERT((va & L2_OFFSET) == 0, 5020 ("pmap_demote_l2: Invalid virtual address %#lx", va)); 5021 5022 tmpl2 = 0; 5023 if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { 5024 tmpl2 = kva_alloc(PAGE_SIZE); 5025 if (tmpl2 == 0) 5026 return (NULL); 5027 } 5028 5029 if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { 5030 ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), 5031 (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | 5032 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 5033 if (ml3 == NULL) { 5034 CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" 5035 " in pmap %p", va, pmap); 5036 goto fail; 5037 } 5038 if (va < VM_MAXUSER_ADDRESS) 5039 pmap_resident_count_inc(pmap, 1); 5040 } 5041 5042 l3phys = VM_PAGE_TO_PHYS(ml3); 5043 l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); 5044 5045 /* Address the range points at */ 5046 phys = oldl2 & ~ATTR_MASK; 5047 /* The attributed from the old l2 table to be copied */ 5048 newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; 5049 5050 /* 5051 * If the page table page is new, initialize it. 5052 */ 5053 if (ml3->wire_count == 1) { 5054 ml3->wire_count = NL3PG; 5055 for (i = 0; i < Ln_ENTRIES; i++) { 5056 l3[i] = newl3 | phys; 5057 phys += L3_SIZE; 5058 } 5059 } 5060 KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE), 5061 ("Invalid l3 page (%lx != %lx)", l3[0], 5062 (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE)); 5063 5064 /* 5065 * Map the temporary page so we don't lose access to the l2 table. 5066 */ 5067 if (tmpl2 != 0) { 5068 pmap_kenter(tmpl2, PAGE_SIZE, 5069 DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); 5070 l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); 5071 } 5072 5073 /* 5074 * The spare PV entries must be reserved prior to demoting the 5075 * mapping, that is, prior to changing the PDE. Otherwise, the state 5076 * of the L2 and the PV lists will be inconsistent, which can result 5077 * in reclaim_pv_chunk() attempting to remove a PV entry from the 5078 * wrong PV list and pmap_pv_demote_l2() failing to find the expected 5079 * PV entry for the 2MB page mapping that is being demoted. 5080 */ 5081 if ((oldl2 & ATTR_SW_MANAGED) != 0) 5082 reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); 5083 5084 pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); 5085 5086 /* 5087 * Demote the PV entry. 5088 */ 5089 if ((oldl2 & ATTR_SW_MANAGED) != 0) 5090 pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp); 5091 5092 atomic_add_long(&pmap_l2_demotions, 1); 5093 CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx" 5094 " in pmap %p %lx", va, pmap, l3[0]); 5095 5096 fail: 5097 if (tmpl2 != 0) { 5098 pmap_kremove(tmpl2); 5099 kva_free(tmpl2, PAGE_SIZE); 5100 } 5101 5102 return (l3); 5103 5104 } 5105 5106 static pt_entry_t * 5107 pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) 5108 { 5109 struct rwlock *lock; 5110 pt_entry_t *l3; 5111 5112 lock = NULL; 5113 l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); 5114 if (lock != NULL) 5115 rw_wunlock(lock); 5116 return (l3); 5117 } 5118 5119 /* 5120 * perform the pmap work for mincore 5121 */ 5122 int 5123 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 5124 { 5125 pt_entry_t *pte, tpte; 5126 vm_paddr_t mask, pa; 5127 int lvl, val; 5128 bool managed; 5129 5130 PMAP_LOCK(pmap); 5131 retry: 5132 val = 0; 5133 pte = pmap_pte(pmap, addr, &lvl); 5134 if (pte != NULL) { 5135 tpte = pmap_load(pte); 5136 5137 switch (lvl) { 5138 case 3: 5139 mask = L3_OFFSET; 5140 break; 5141 case 2: 5142 mask = L2_OFFSET; 5143 break; 5144 case 1: 5145 mask = L1_OFFSET; 5146 break; 5147 default: 5148 panic("pmap_mincore: invalid level %d", lvl); 5149 } 5150 5151 val = MINCORE_INCORE; 5152 if (lvl != 3) 5153 val |= MINCORE_SUPER; 5154 if (pmap_page_dirty(tpte)) 5155 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 5156 if ((tpte & ATTR_AF) == ATTR_AF) 5157 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 5158 5159 managed = (tpte & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 5160 pa = (tpte & ~ATTR_MASK) | (addr & mask); 5161 } else 5162 managed = false; 5163 5164 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 5165 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 5166 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 5167 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 5168 goto retry; 5169 } else 5170 PA_UNLOCK_COND(*locked_pa); 5171 PMAP_UNLOCK(pmap); 5172 5173 return (val); 5174 } 5175 5176 void 5177 pmap_activate(struct thread *td) 5178 { 5179 pmap_t pmap; 5180 5181 critical_enter(); 5182 pmap = vmspace_pmap(td->td_proc->p_vmspace); 5183 td->td_proc->p_md.md_l0addr = vtophys(pmap->pm_l0); 5184 __asm __volatile("msr ttbr0_el1, %0" : : 5185 "r"(td->td_proc->p_md.md_l0addr)); 5186 pmap_invalidate_all(pmap); 5187 critical_exit(); 5188 } 5189 5190 struct pcb * 5191 pmap_switch(struct thread *old, struct thread *new) 5192 { 5193 pcpu_bp_harden bp_harden; 5194 struct pcb *pcb; 5195 5196 /* Store the new curthread */ 5197 PCPU_SET(curthread, new); 5198 5199 /* And the new pcb */ 5200 pcb = new->td_pcb; 5201 PCPU_SET(curpcb, pcb); 5202 5203 /* 5204 * TODO: We may need to flush the cache here if switching 5205 * to a user process. 5206 */ 5207 5208 if (old == NULL || 5209 old->td_proc->p_md.md_l0addr != new->td_proc->p_md.md_l0addr) { 5210 __asm __volatile( 5211 /* Switch to the new pmap */ 5212 "msr ttbr0_el1, %0 \n" 5213 "isb \n" 5214 5215 /* Invalidate the TLB */ 5216 "dsb ishst \n" 5217 "tlbi vmalle1is \n" 5218 "dsb ish \n" 5219 "isb \n" 5220 : : "r"(new->td_proc->p_md.md_l0addr)); 5221 5222 /* 5223 * Stop userspace from training the branch predictor against 5224 * other processes. This will call into a CPU specific 5225 * function that clears the branch predictor state. 5226 */ 5227 bp_harden = PCPU_GET(bp_harden); 5228 if (bp_harden != NULL) 5229 bp_harden(); 5230 } 5231 5232 return (pcb); 5233 } 5234 5235 void 5236 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 5237 { 5238 5239 if (va >= VM_MIN_KERNEL_ADDRESS) { 5240 cpu_icache_sync_range(va, sz); 5241 } else { 5242 u_int len, offset; 5243 vm_paddr_t pa; 5244 5245 /* Find the length of data in this page to flush */ 5246 offset = va & PAGE_MASK; 5247 len = imin(PAGE_SIZE - offset, sz); 5248 5249 while (sz != 0) { 5250 /* Extract the physical address & find it in the DMAP */ 5251 pa = pmap_extract(pmap, va); 5252 if (pa != 0) 5253 cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 5254 5255 /* Move to the next page */ 5256 sz -= len; 5257 va += len; 5258 /* Set the length for the next iteration */ 5259 len = imin(PAGE_SIZE, sz); 5260 } 5261 } 5262 } 5263 5264 int 5265 pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) 5266 { 5267 #ifdef SMP 5268 register_t intr; 5269 uint64_t par; 5270 5271 switch (ESR_ELx_EXCEPTION(esr)) { 5272 case EXCP_INSN_ABORT_L: 5273 case EXCP_INSN_ABORT: 5274 case EXCP_DATA_ABORT_L: 5275 case EXCP_DATA_ABORT: 5276 break; 5277 default: 5278 return (KERN_FAILURE); 5279 } 5280 5281 /* Data and insn aborts use same encoding for FCS field. */ 5282 switch (esr & ISS_DATA_DFSC_MASK) { 5283 case ISS_DATA_DFSC_TF_L0: 5284 case ISS_DATA_DFSC_TF_L1: 5285 case ISS_DATA_DFSC_TF_L2: 5286 case ISS_DATA_DFSC_TF_L3: 5287 PMAP_LOCK(pmap); 5288 /* Ask the MMU to check the address */ 5289 intr = intr_disable(); 5290 if (pmap == kernel_pmap) 5291 par = arm64_address_translate_s1e1r(far); 5292 else 5293 par = arm64_address_translate_s1e0r(far); 5294 intr_restore(intr); 5295 PMAP_UNLOCK(pmap); 5296 5297 /* 5298 * If the translation was successful the address was invalid 5299 * due to a break-before-make sequence. We can unlock and 5300 * return success to the trap handler. 5301 */ 5302 if (PAR_SUCCESS(par)) 5303 return (KERN_SUCCESS); 5304 break; 5305 default: 5306 break; 5307 } 5308 #endif 5309 5310 return (KERN_FAILURE); 5311 } 5312 5313 /* 5314 * Increase the starting virtual address of the given mapping if a 5315 * different alignment might result in more superpage mappings. 5316 */ 5317 void 5318 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 5319 vm_offset_t *addr, vm_size_t size) 5320 { 5321 vm_offset_t superpage_offset; 5322 5323 if (size < L2_SIZE) 5324 return; 5325 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 5326 offset += ptoa(object->pg_color); 5327 superpage_offset = offset & L2_OFFSET; 5328 if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || 5329 (*addr & L2_OFFSET) == superpage_offset) 5330 return; 5331 if ((*addr & L2_OFFSET) < superpage_offset) 5332 *addr = (*addr & ~L2_OFFSET) + superpage_offset; 5333 else 5334 *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; 5335 } 5336 5337 /** 5338 * Get the kernel virtual address of a set of physical pages. If there are 5339 * physical addresses not covered by the DMAP perform a transient mapping 5340 * that will be removed when calling pmap_unmap_io_transient. 5341 * 5342 * \param page The pages the caller wishes to obtain the virtual 5343 * address on the kernel memory map. 5344 * \param vaddr On return contains the kernel virtual memory address 5345 * of the pages passed in the page parameter. 5346 * \param count Number of pages passed in. 5347 * \param can_fault TRUE if the thread using the mapped pages can take 5348 * page faults, FALSE otherwise. 5349 * 5350 * \returns TRUE if the caller must call pmap_unmap_io_transient when 5351 * finished or FALSE otherwise. 5352 * 5353 */ 5354 boolean_t 5355 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 5356 boolean_t can_fault) 5357 { 5358 vm_paddr_t paddr; 5359 boolean_t needs_mapping; 5360 int error, i; 5361 5362 /* 5363 * Allocate any KVA space that we need, this is done in a separate 5364 * loop to prevent calling vmem_alloc while pinned. 5365 */ 5366 needs_mapping = FALSE; 5367 for (i = 0; i < count; i++) { 5368 paddr = VM_PAGE_TO_PHYS(page[i]); 5369 if (__predict_false(!PHYS_IN_DMAP(paddr))) { 5370 error = vmem_alloc(kernel_arena, PAGE_SIZE, 5371 M_BESTFIT | M_WAITOK, &vaddr[i]); 5372 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 5373 needs_mapping = TRUE; 5374 } else { 5375 vaddr[i] = PHYS_TO_DMAP(paddr); 5376 } 5377 } 5378 5379 /* Exit early if everything is covered by the DMAP */ 5380 if (!needs_mapping) 5381 return (FALSE); 5382 5383 if (!can_fault) 5384 sched_pin(); 5385 for (i = 0; i < count; i++) { 5386 paddr = VM_PAGE_TO_PHYS(page[i]); 5387 if (!PHYS_IN_DMAP(paddr)) { 5388 panic( 5389 "pmap_map_io_transient: TODO: Map out of DMAP data"); 5390 } 5391 } 5392 5393 return (needs_mapping); 5394 } 5395 5396 void 5397 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 5398 boolean_t can_fault) 5399 { 5400 vm_paddr_t paddr; 5401 int i; 5402 5403 if (!can_fault) 5404 sched_unpin(); 5405 for (i = 0; i < count; i++) { 5406 paddr = VM_PAGE_TO_PHYS(page[i]); 5407 if (!PHYS_IN_DMAP(paddr)) { 5408 panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 5409 } 5410 } 5411 } 5412 5413 boolean_t 5414 pmap_is_valid_memattr(pmap_t pmap __unused, vm_memattr_t mode) 5415 { 5416 5417 return (mode >= VM_MEMATTR_DEVICE && mode <= VM_MEMATTR_WRITE_THROUGH); 5418 } 5419