1 /*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11 * All rights reserved. 12 * Copyright (c) 2014 Andrew Turner 13 * All rights reserved. 14 * Copyright (c) 2014-2016 The FreeBSD Foundation 15 * All rights reserved. 16 * 17 * This code is derived from software contributed to Berkeley by 18 * the Systems Programming Group of the University of Utah Computer 19 * Science Department and William Jolitz of UUNET Technologies Inc. 20 * 21 * This software was developed by Andrew Turner under sponsorship from 22 * the FreeBSD Foundation. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 3. All advertising materials mentioning features or use of this software 33 * must display the following acknowledgement: 34 * This product includes software developed by the University of 35 * California, Berkeley and its contributors. 36 * 4. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53 */ 54 /*- 55 * Copyright (c) 2003 Networks Associates Technology, Inc. 56 * All rights reserved. 57 * 58 * This software was developed for the FreeBSD Project by Jake Burkholder, 59 * Safeport Network Services, and Network Associates Laboratories, the 60 * Security Research Division of Network Associates, Inc. under 61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62 * CHATS research program. 63 * 64 * Redistribution and use in source and binary forms, with or without 65 * modification, are permitted provided that the following conditions 66 * are met: 67 * 1. Redistributions of source code must retain the above copyright 68 * notice, this list of conditions and the following disclaimer. 69 * 2. Redistributions in binary form must reproduce the above copyright 70 * notice, this list of conditions and the following disclaimer in the 71 * documentation and/or other materials provided with the distribution. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 */ 85 86 #include <sys/cdefs.h> 87 __FBSDID("$FreeBSD$"); 88 89 /* 90 * Manages physical address maps. 91 * 92 * Since the information managed by this module is 93 * also stored by the logical address mapping module, 94 * this module may throw away valid virtual-to-physical 95 * mappings at almost any time. However, invalidations 96 * of virtual-to-physical mappings must be done as 97 * requested. 98 * 99 * In order to cope with hardware architectures which 100 * make virtual-to-physical map invalidates expensive, 101 * this module may delay invalidate or reduced protection 102 * operations until such time as they are actually 103 * necessary. This module is given full information as 104 * to which processors are currently using which maps, 105 * and to when physical maps must be made correct. 106 */ 107 108 #include "opt_vm.h" 109 110 #include <sys/param.h> 111 #include <sys/bitstring.h> 112 #include <sys/bus.h> 113 #include <sys/systm.h> 114 #include <sys/kernel.h> 115 #include <sys/ktr.h> 116 #include <sys/lock.h> 117 #include <sys/malloc.h> 118 #include <sys/mman.h> 119 #include <sys/msgbuf.h> 120 #include <sys/mutex.h> 121 #include <sys/proc.h> 122 #include <sys/rwlock.h> 123 #include <sys/sx.h> 124 #include <sys/vmem.h> 125 #include <sys/vmmeter.h> 126 #include <sys/sched.h> 127 #include <sys/sysctl.h> 128 #include <sys/_unrhdr.h> 129 #include <sys/smp.h> 130 131 #include <vm/vm.h> 132 #include <vm/vm_param.h> 133 #include <vm/vm_kern.h> 134 #include <vm/vm_page.h> 135 #include <vm/vm_map.h> 136 #include <vm/vm_object.h> 137 #include <vm/vm_extern.h> 138 #include <vm/vm_pageout.h> 139 #include <vm/vm_pager.h> 140 #include <vm/vm_phys.h> 141 #include <vm/vm_radix.h> 142 #include <vm/vm_reserv.h> 143 #include <vm/uma.h> 144 145 #include <machine/machdep.h> 146 #include <machine/md_var.h> 147 #include <machine/pcb.h> 148 149 #include <arm/include/physmem.h> 150 151 #define NL0PG (PAGE_SIZE/(sizeof (pd_entry_t))) 152 #define NL1PG (PAGE_SIZE/(sizeof (pd_entry_t))) 153 #define NL2PG (PAGE_SIZE/(sizeof (pd_entry_t))) 154 #define NL3PG (PAGE_SIZE/(sizeof (pt_entry_t))) 155 156 #define NUL0E L0_ENTRIES 157 #define NUL1E (NUL0E * NL1PG) 158 #define NUL2E (NUL1E * NL2PG) 159 160 #if !defined(DIAGNOSTIC) 161 #ifdef __GNUC_GNU_INLINE__ 162 #define PMAP_INLINE __attribute__((__gnu_inline__)) inline 163 #else 164 #define PMAP_INLINE extern inline 165 #endif 166 #else 167 #define PMAP_INLINE 168 #endif 169 170 /* 171 * These are configured by the mair_el1 register. This is set up in locore.S 172 */ 173 #define DEVICE_MEMORY 0 174 #define UNCACHED_MEMORY 1 175 #define CACHED_MEMORY 2 176 177 178 #ifdef PV_STATS 179 #define PV_STAT(x) do { x ; } while (0) 180 #else 181 #define PV_STAT(x) do { } while (0) 182 #endif 183 184 #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 185 #define pa_to_pvh(pa) (&pv_table[pmap_l2_pindex(pa)]) 186 187 #define NPV_LIST_LOCKS MAXCPU 188 189 #define PHYS_TO_PV_LIST_LOCK(pa) \ 190 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 191 192 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 193 struct rwlock **_lockp = (lockp); \ 194 struct rwlock *_new_lock; \ 195 \ 196 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 197 if (_new_lock != *_lockp) { \ 198 if (*_lockp != NULL) \ 199 rw_wunlock(*_lockp); \ 200 *_lockp = _new_lock; \ 201 rw_wlock(*_lockp); \ 202 } \ 203 } while (0) 204 205 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 206 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 207 208 #define RELEASE_PV_LIST_LOCK(lockp) do { \ 209 struct rwlock **_lockp = (lockp); \ 210 \ 211 if (*_lockp != NULL) { \ 212 rw_wunlock(*_lockp); \ 213 *_lockp = NULL; \ 214 } \ 215 } while (0) 216 217 #define VM_PAGE_TO_PV_LIST_LOCK(m) \ 218 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 219 220 struct pmap kernel_pmap_store; 221 222 /* Used for mapping ACPI memory before VM is initialized */ 223 #define PMAP_PREINIT_MAPPING_COUNT 32 224 #define PMAP_PREINIT_MAPPING_SIZE (PMAP_PREINIT_MAPPING_COUNT * L2_SIZE) 225 static vm_offset_t preinit_map_va; /* Start VA of pre-init mapping space */ 226 static int vm_initialized = 0; /* No need to use pre-init maps when set */ 227 228 /* 229 * Reserve a few L2 blocks starting from 'preinit_map_va' pointer. 230 * Always map entire L2 block for simplicity. 231 * VA of L2 block = preinit_map_va + i * L2_SIZE 232 */ 233 static struct pmap_preinit_mapping { 234 vm_paddr_t pa; 235 vm_offset_t va; 236 vm_size_t size; 237 } pmap_preinit_mapping[PMAP_PREINIT_MAPPING_COUNT]; 238 239 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 240 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 241 vm_offset_t kernel_vm_end = 0; 242 243 /* 244 * Data for the pv entry allocation mechanism. 245 * Updates to pv_invl_gen are protected by the pv_list_locks[] 246 * elements, but reads are not. 247 */ 248 static struct md_page *pv_table; 249 static struct md_page pv_dummy; 250 251 vm_paddr_t dmap_phys_base; /* The start of the dmap region */ 252 vm_paddr_t dmap_phys_max; /* The limit of the dmap region */ 253 vm_offset_t dmap_max_addr; /* The virtual address limit of the dmap */ 254 255 /* This code assumes all L1 DMAP entries will be used */ 256 CTASSERT((DMAP_MIN_ADDRESS & ~L0_OFFSET) == DMAP_MIN_ADDRESS); 257 CTASSERT((DMAP_MAX_ADDRESS & ~L0_OFFSET) == DMAP_MAX_ADDRESS); 258 259 #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 260 extern pt_entry_t pagetable_dmap[]; 261 262 #define PHYSMAP_SIZE (2 * (VM_PHYSSEG_MAX - 1)) 263 static vm_paddr_t physmap[PHYSMAP_SIZE]; 264 static u_int physmap_idx; 265 266 static SYSCTL_NODE(_vm, OID_AUTO, pmap, CTLFLAG_RD, 0, "VM/pmap parameters"); 267 268 static int superpages_enabled = 1; 269 SYSCTL_INT(_vm_pmap, OID_AUTO, superpages_enabled, 270 CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &superpages_enabled, 0, 271 "Are large page mappings enabled?"); 272 273 /* 274 * Data for the pv entry allocation mechanism 275 */ 276 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 277 static struct mtx pv_chunks_mutex; 278 static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 279 280 static void free_pv_chunk(struct pv_chunk *pc); 281 static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 282 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 283 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 284 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 285 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 286 vm_offset_t va); 287 288 static int pmap_change_attr(vm_offset_t va, vm_size_t size, int mode); 289 static int pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode); 290 static pt_entry_t *pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va); 291 static pt_entry_t *pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, 292 vm_offset_t va, struct rwlock **lockp); 293 static pt_entry_t *pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va); 294 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 295 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 296 static int pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, 297 pd_entry_t l1e, struct spglist *free, struct rwlock **lockp); 298 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 299 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp); 300 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 301 vm_page_t m, struct rwlock **lockp); 302 303 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 304 struct rwlock **lockp); 305 306 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 307 struct spglist *free); 308 static int pmap_unuse_pt(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 309 static __inline vm_page_t pmap_remove_pt_page(pmap_t pmap, vm_offset_t va); 310 311 /* 312 * These load the old table data and store the new value. 313 * They need to be atomic as the System MMU may write to the table at 314 * the same time as the CPU. 315 */ 316 #define pmap_load_store(table, entry) atomic_swap_64(table, entry) 317 #define pmap_set(table, mask) atomic_set_64(table, mask) 318 #define pmap_load_clear(table) atomic_swap_64(table, 0) 319 #define pmap_load(table) (*table) 320 321 /********************/ 322 /* Inline functions */ 323 /********************/ 324 325 static __inline void 326 pagecopy(void *s, void *d) 327 { 328 329 memcpy(d, s, PAGE_SIZE); 330 } 331 332 static __inline pd_entry_t * 333 pmap_l0(pmap_t pmap, vm_offset_t va) 334 { 335 336 return (&pmap->pm_l0[pmap_l0_index(va)]); 337 } 338 339 static __inline pd_entry_t * 340 pmap_l0_to_l1(pd_entry_t *l0, vm_offset_t va) 341 { 342 pd_entry_t *l1; 343 344 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 345 return (&l1[pmap_l1_index(va)]); 346 } 347 348 static __inline pd_entry_t * 349 pmap_l1(pmap_t pmap, vm_offset_t va) 350 { 351 pd_entry_t *l0; 352 353 l0 = pmap_l0(pmap, va); 354 if ((pmap_load(l0) & ATTR_DESCR_MASK) != L0_TABLE) 355 return (NULL); 356 357 return (pmap_l0_to_l1(l0, va)); 358 } 359 360 static __inline pd_entry_t * 361 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 362 { 363 pd_entry_t *l2; 364 365 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 366 return (&l2[pmap_l2_index(va)]); 367 } 368 369 static __inline pd_entry_t * 370 pmap_l2(pmap_t pmap, vm_offset_t va) 371 { 372 pd_entry_t *l1; 373 374 l1 = pmap_l1(pmap, va); 375 if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 376 return (NULL); 377 378 return (pmap_l1_to_l2(l1, va)); 379 } 380 381 static __inline pt_entry_t * 382 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 383 { 384 pt_entry_t *l3; 385 386 l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 387 return (&l3[pmap_l3_index(va)]); 388 } 389 390 /* 391 * Returns the lowest valid pde for a given virtual address. 392 * The next level may or may not point to a valid page or block. 393 */ 394 static __inline pd_entry_t * 395 pmap_pde(pmap_t pmap, vm_offset_t va, int *level) 396 { 397 pd_entry_t *l0, *l1, *l2, desc; 398 399 l0 = pmap_l0(pmap, va); 400 desc = pmap_load(l0) & ATTR_DESCR_MASK; 401 if (desc != L0_TABLE) { 402 *level = -1; 403 return (NULL); 404 } 405 406 l1 = pmap_l0_to_l1(l0, va); 407 desc = pmap_load(l1) & ATTR_DESCR_MASK; 408 if (desc != L1_TABLE) { 409 *level = 0; 410 return (l0); 411 } 412 413 l2 = pmap_l1_to_l2(l1, va); 414 desc = pmap_load(l2) & ATTR_DESCR_MASK; 415 if (desc != L2_TABLE) { 416 *level = 1; 417 return (l1); 418 } 419 420 *level = 2; 421 return (l2); 422 } 423 424 /* 425 * Returns the lowest valid pte block or table entry for a given virtual 426 * address. If there are no valid entries return NULL and set the level to 427 * the first invalid level. 428 */ 429 static __inline pt_entry_t * 430 pmap_pte(pmap_t pmap, vm_offset_t va, int *level) 431 { 432 pd_entry_t *l1, *l2, desc; 433 pt_entry_t *l3; 434 435 l1 = pmap_l1(pmap, va); 436 if (l1 == NULL) { 437 *level = 0; 438 return (NULL); 439 } 440 desc = pmap_load(l1) & ATTR_DESCR_MASK; 441 if (desc == L1_BLOCK) { 442 *level = 1; 443 return (l1); 444 } 445 446 if (desc != L1_TABLE) { 447 *level = 1; 448 return (NULL); 449 } 450 451 l2 = pmap_l1_to_l2(l1, va); 452 desc = pmap_load(l2) & ATTR_DESCR_MASK; 453 if (desc == L2_BLOCK) { 454 *level = 2; 455 return (l2); 456 } 457 458 if (desc != L2_TABLE) { 459 *level = 2; 460 return (NULL); 461 } 462 463 *level = 3; 464 l3 = pmap_l2_to_l3(l2, va); 465 if ((pmap_load(l3) & ATTR_DESCR_MASK) != L3_PAGE) 466 return (NULL); 467 468 return (l3); 469 } 470 471 static inline bool 472 pmap_superpages_enabled(void) 473 { 474 475 return (superpages_enabled != 0); 476 } 477 478 bool 479 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l0, pd_entry_t **l1, 480 pd_entry_t **l2, pt_entry_t **l3) 481 { 482 pd_entry_t *l0p, *l1p, *l2p; 483 484 if (pmap->pm_l0 == NULL) 485 return (false); 486 487 l0p = pmap_l0(pmap, va); 488 *l0 = l0p; 489 490 if ((pmap_load(l0p) & ATTR_DESCR_MASK) != L0_TABLE) 491 return (false); 492 493 l1p = pmap_l0_to_l1(l0p, va); 494 *l1 = l1p; 495 496 if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 497 *l2 = NULL; 498 *l3 = NULL; 499 return (true); 500 } 501 502 if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 503 return (false); 504 505 l2p = pmap_l1_to_l2(l1p, va); 506 *l2 = l2p; 507 508 if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 509 *l3 = NULL; 510 return (true); 511 } 512 513 if ((pmap_load(l2p) & ATTR_DESCR_MASK) != L2_TABLE) 514 return (false); 515 516 *l3 = pmap_l2_to_l3(l2p, va); 517 518 return (true); 519 } 520 521 static __inline int 522 pmap_l3_valid(pt_entry_t l3) 523 { 524 525 return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 526 } 527 528 529 CTASSERT(L1_BLOCK == L2_BLOCK); 530 531 /* 532 * Checks if the page is dirty. We currently lack proper tracking of this on 533 * arm64 so for now assume is a page mapped as rw was accessed it is. 534 */ 535 static inline int 536 pmap_page_dirty(pt_entry_t pte) 537 { 538 539 return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 540 (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 541 } 542 543 static __inline void 544 pmap_resident_count_inc(pmap_t pmap, int count) 545 { 546 547 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 548 pmap->pm_stats.resident_count += count; 549 } 550 551 static __inline void 552 pmap_resident_count_dec(pmap_t pmap, int count) 553 { 554 555 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 556 KASSERT(pmap->pm_stats.resident_count >= count, 557 ("pmap %p resident count underflow %ld %d", pmap, 558 pmap->pm_stats.resident_count, count)); 559 pmap->pm_stats.resident_count -= count; 560 } 561 562 static pt_entry_t * 563 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 564 u_int *l2_slot) 565 { 566 pt_entry_t *l2; 567 pd_entry_t *l1; 568 569 l1 = (pd_entry_t *)l1pt; 570 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 571 572 /* Check locore has used a table L1 map */ 573 KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 574 ("Invalid bootstrap L1 table")); 575 /* Find the address of the L2 table */ 576 l2 = (pt_entry_t *)init_pt_va; 577 *l2_slot = pmap_l2_index(va); 578 579 return (l2); 580 } 581 582 static vm_paddr_t 583 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 584 { 585 u_int l1_slot, l2_slot; 586 pt_entry_t *l2; 587 588 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 589 590 return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 591 } 592 593 static vm_offset_t 594 pmap_bootstrap_dmap(vm_offset_t kern_l1, vm_paddr_t min_pa, 595 vm_offset_t freemempos) 596 { 597 pt_entry_t *l2; 598 vm_offset_t va; 599 vm_paddr_t l2_pa, pa; 600 u_int l1_slot, l2_slot, prev_l1_slot; 601 int i; 602 603 dmap_phys_base = min_pa & ~L1_OFFSET; 604 dmap_phys_max = 0; 605 dmap_max_addr = 0; 606 l2 = NULL; 607 prev_l1_slot = -1; 608 609 #define DMAP_TABLES ((DMAP_MAX_ADDRESS - DMAP_MIN_ADDRESS) >> L0_SHIFT) 610 memset(pagetable_dmap, 0, PAGE_SIZE * DMAP_TABLES); 611 612 for (i = 0; i < (physmap_idx * 2); i += 2) { 613 pa = physmap[i] & ~L2_OFFSET; 614 va = pa - dmap_phys_base + DMAP_MIN_ADDRESS; 615 616 /* Create L2 mappings at the start of the region */ 617 if ((pa & L1_OFFSET) != 0) { 618 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 619 if (l1_slot != prev_l1_slot) { 620 prev_l1_slot = l1_slot; 621 l2 = (pt_entry_t *)freemempos; 622 l2_pa = pmap_early_vtophys(kern_l1, 623 (vm_offset_t)l2); 624 freemempos += PAGE_SIZE; 625 626 pmap_load_store(&pagetable_dmap[l1_slot], 627 (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); 628 629 memset(l2, 0, PAGE_SIZE); 630 } 631 KASSERT(l2 != NULL, 632 ("pmap_bootstrap_dmap: NULL l2 map")); 633 for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; 634 pa += L2_SIZE, va += L2_SIZE) { 635 /* 636 * We are on a boundary, stop to 637 * create a level 1 block 638 */ 639 if ((pa & L1_OFFSET) == 0) 640 break; 641 642 l2_slot = pmap_l2_index(va); 643 KASSERT(l2_slot != 0, ("...")); 644 pmap_load_store(&l2[l2_slot], 645 (pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN | 646 ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); 647 } 648 KASSERT(va == (pa - dmap_phys_base + DMAP_MIN_ADDRESS), 649 ("...")); 650 } 651 652 for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1] && 653 (physmap[i + 1] - pa) >= L1_SIZE; 654 pa += L1_SIZE, va += L1_SIZE) { 655 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 656 pmap_load_store(&pagetable_dmap[l1_slot], 657 (pa & ~L1_OFFSET) | ATTR_DEFAULT | ATTR_XN | 658 ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 659 } 660 661 /* Create L2 mappings at the end of the region */ 662 if (pa < physmap[i + 1]) { 663 l1_slot = ((va - DMAP_MIN_ADDRESS) >> L1_SHIFT); 664 if (l1_slot != prev_l1_slot) { 665 prev_l1_slot = l1_slot; 666 l2 = (pt_entry_t *)freemempos; 667 l2_pa = pmap_early_vtophys(kern_l1, 668 (vm_offset_t)l2); 669 freemempos += PAGE_SIZE; 670 671 pmap_load_store(&pagetable_dmap[l1_slot], 672 (l2_pa & ~Ln_TABLE_MASK) | L1_TABLE); 673 674 memset(l2, 0, PAGE_SIZE); 675 } 676 KASSERT(l2 != NULL, 677 ("pmap_bootstrap_dmap: NULL l2 map")); 678 for (; va < DMAP_MAX_ADDRESS && pa < physmap[i + 1]; 679 pa += L2_SIZE, va += L2_SIZE) { 680 l2_slot = pmap_l2_index(va); 681 pmap_load_store(&l2[l2_slot], 682 (pa & ~L2_OFFSET) | ATTR_DEFAULT | ATTR_XN | 683 ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); 684 } 685 } 686 687 if (pa > dmap_phys_max) { 688 dmap_phys_max = pa; 689 dmap_max_addr = va; 690 } 691 } 692 693 cpu_tlb_flushID(); 694 695 return (freemempos); 696 } 697 698 static vm_offset_t 699 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 700 { 701 vm_offset_t l2pt; 702 vm_paddr_t pa; 703 pd_entry_t *l1; 704 u_int l1_slot; 705 706 KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 707 708 l1 = (pd_entry_t *)l1pt; 709 l1_slot = pmap_l1_index(va); 710 l2pt = l2_start; 711 712 for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 713 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 714 715 pa = pmap_early_vtophys(l1pt, l2pt); 716 pmap_load_store(&l1[l1_slot], 717 (pa & ~Ln_TABLE_MASK) | L1_TABLE); 718 l2pt += PAGE_SIZE; 719 } 720 721 /* Clean the L2 page table */ 722 memset((void *)l2_start, 0, l2pt - l2_start); 723 724 return l2pt; 725 } 726 727 static vm_offset_t 728 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 729 { 730 vm_offset_t l3pt; 731 vm_paddr_t pa; 732 pd_entry_t *l2; 733 u_int l2_slot; 734 735 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 736 737 l2 = pmap_l2(kernel_pmap, va); 738 l2 = (pd_entry_t *)rounddown2((uintptr_t)l2, PAGE_SIZE); 739 l2_slot = pmap_l2_index(va); 740 l3pt = l3_start; 741 742 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 743 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 744 745 pa = pmap_early_vtophys(l1pt, l3pt); 746 pmap_load_store(&l2[l2_slot], 747 (pa & ~Ln_TABLE_MASK) | L2_TABLE); 748 l3pt += PAGE_SIZE; 749 } 750 751 /* Clean the L2 page table */ 752 memset((void *)l3_start, 0, l3pt - l3_start); 753 754 return l3pt; 755 } 756 757 /* 758 * Bootstrap the system enough to run with virtual memory. 759 */ 760 void 761 pmap_bootstrap(vm_offset_t l0pt, vm_offset_t l1pt, vm_paddr_t kernstart, 762 vm_size_t kernlen) 763 { 764 u_int l1_slot, l2_slot; 765 uint64_t kern_delta; 766 pt_entry_t *l2; 767 vm_offset_t va, freemempos; 768 vm_offset_t dpcpu, msgbufpv; 769 vm_paddr_t start_pa, pa, min_pa; 770 int i; 771 772 kern_delta = KERNBASE - kernstart; 773 774 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 775 printf("%lx\n", l1pt); 776 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 777 778 /* Set this early so we can use the pagetable walking functions */ 779 kernel_pmap_store.pm_l0 = (pd_entry_t *)l0pt; 780 PMAP_LOCK_INIT(kernel_pmap); 781 782 /* Assume the address we were loaded to is a valid physical address */ 783 min_pa = KERNBASE - kern_delta; 784 785 physmap_idx = arm_physmem_avail(physmap, nitems(physmap)); 786 physmap_idx /= 2; 787 788 /* 789 * Find the minimum physical address. physmap is sorted, 790 * but may contain empty ranges. 791 */ 792 for (i = 0; i < (physmap_idx * 2); i += 2) { 793 if (physmap[i] == physmap[i + 1]) 794 continue; 795 if (physmap[i] <= min_pa) 796 min_pa = physmap[i]; 797 } 798 799 freemempos = KERNBASE + kernlen; 800 freemempos = roundup2(freemempos, PAGE_SIZE); 801 802 /* Create a direct map region early so we can use it for pa -> va */ 803 freemempos = pmap_bootstrap_dmap(l1pt, min_pa, freemempos); 804 805 va = KERNBASE; 806 start_pa = pa = KERNBASE - kern_delta; 807 808 /* 809 * Read the page table to find out what is already mapped. 810 * This assumes we have mapped a block of memory from KERNBASE 811 * using a single L1 entry. 812 */ 813 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 814 815 /* Sanity check the index, KERNBASE should be the first VA */ 816 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 817 818 /* Find how many pages we have mapped */ 819 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 820 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 821 break; 822 823 /* Check locore used L2 blocks */ 824 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 825 ("Invalid bootstrap L2 table")); 826 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 827 ("Incorrect PA in L2 table")); 828 829 va += L2_SIZE; 830 pa += L2_SIZE; 831 } 832 833 va = roundup2(va, L1_SIZE); 834 835 /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 836 freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 837 /* And the l3 tables for the early devmap */ 838 freemempos = pmap_bootstrap_l3(l1pt, 839 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 840 841 cpu_tlb_flushID(); 842 843 #define alloc_pages(var, np) \ 844 (var) = freemempos; \ 845 freemempos += (np * PAGE_SIZE); \ 846 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 847 848 /* Allocate dynamic per-cpu area. */ 849 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 850 dpcpu_init((void *)dpcpu, 0); 851 852 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 853 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 854 msgbufp = (void *)msgbufpv; 855 856 /* Reserve some VA space for early BIOS/ACPI mapping */ 857 preinit_map_va = roundup2(freemempos, L2_SIZE); 858 859 virtual_avail = preinit_map_va + PMAP_PREINIT_MAPPING_SIZE; 860 virtual_avail = roundup2(virtual_avail, L1_SIZE); 861 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 862 kernel_vm_end = virtual_avail; 863 864 pa = pmap_early_vtophys(l1pt, freemempos); 865 866 arm_physmem_exclude_region(start_pa, pa - start_pa, EXFLAG_NOALLOC); 867 868 cpu_tlb_flushID(); 869 } 870 871 /* 872 * Initialize a vm_page's machine-dependent fields. 873 */ 874 void 875 pmap_page_init(vm_page_t m) 876 { 877 878 TAILQ_INIT(&m->md.pv_list); 879 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 880 } 881 882 /* 883 * Initialize the pmap module. 884 * Called by vm_init, to initialize any structures that the pmap 885 * system needs to map virtual memory. 886 */ 887 void 888 pmap_init(void) 889 { 890 vm_size_t s; 891 int i, pv_npg; 892 893 /* 894 * Are large page mappings enabled? 895 */ 896 TUNABLE_INT_FETCH("vm.pmap.superpages_enabled", &superpages_enabled); 897 898 /* 899 * Initialize the pv chunk list mutex. 900 */ 901 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 902 903 /* 904 * Initialize the pool of pv list locks. 905 */ 906 for (i = 0; i < NPV_LIST_LOCKS; i++) 907 rw_init(&pv_list_locks[i], "pmap pv list"); 908 909 /* 910 * Calculate the size of the pv head table for superpages. 911 */ 912 pv_npg = howmany(vm_phys_segs[vm_phys_nsegs - 1].end, L2_SIZE); 913 914 /* 915 * Allocate memory for the pv head table for superpages. 916 */ 917 s = (vm_size_t)(pv_npg * sizeof(struct md_page)); 918 s = round_page(s); 919 pv_table = (struct md_page *)kmem_malloc(kernel_arena, s, 920 M_WAITOK | M_ZERO); 921 for (i = 0; i < pv_npg; i++) 922 TAILQ_INIT(&pv_table[i].pv_list); 923 TAILQ_INIT(&pv_dummy.pv_list); 924 925 vm_initialized = 1; 926 } 927 928 static SYSCTL_NODE(_vm_pmap, OID_AUTO, l2, CTLFLAG_RD, 0, 929 "2MB page mapping counters"); 930 931 static u_long pmap_l2_demotions; 932 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, demotions, CTLFLAG_RD, 933 &pmap_l2_demotions, 0, "2MB page demotions"); 934 935 static u_long pmap_l2_p_failures; 936 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, p_failures, CTLFLAG_RD, 937 &pmap_l2_p_failures, 0, "2MB page promotion failures"); 938 939 static u_long pmap_l2_promotions; 940 SYSCTL_ULONG(_vm_pmap_l2, OID_AUTO, promotions, CTLFLAG_RD, 941 &pmap_l2_promotions, 0, "2MB page promotions"); 942 943 /* 944 * Invalidate a single TLB entry. 945 */ 946 static __inline void 947 pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 948 { 949 950 sched_pin(); 951 __asm __volatile( 952 "dsb ishst \n" 953 "tlbi vaae1is, %0 \n" 954 "dsb ish \n" 955 "isb \n" 956 : : "r"(va >> PAGE_SHIFT)); 957 sched_unpin(); 958 } 959 960 static __inline void 961 pmap_invalidate_range_nopin(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 962 { 963 vm_offset_t addr; 964 965 dsb(ishst); 966 for (addr = sva; addr < eva; addr += PAGE_SIZE) { 967 __asm __volatile( 968 "tlbi vaae1is, %0" : : "r"(addr >> PAGE_SHIFT)); 969 } 970 __asm __volatile( 971 "dsb ish \n" 972 "isb \n"); 973 } 974 975 static __inline void 976 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 977 { 978 979 sched_pin(); 980 pmap_invalidate_range_nopin(pmap, sva, eva); 981 sched_unpin(); 982 } 983 984 static __inline void 985 pmap_invalidate_all(pmap_t pmap) 986 { 987 988 sched_pin(); 989 __asm __volatile( 990 "dsb ishst \n" 991 "tlbi vmalle1is \n" 992 "dsb ish \n" 993 "isb \n"); 994 sched_unpin(); 995 } 996 997 /* 998 * Routine: pmap_extract 999 * Function: 1000 * Extract the physical page address associated 1001 * with the given map/virtual_address pair. 1002 */ 1003 vm_paddr_t 1004 pmap_extract(pmap_t pmap, vm_offset_t va) 1005 { 1006 pt_entry_t *pte, tpte; 1007 vm_paddr_t pa; 1008 int lvl; 1009 1010 pa = 0; 1011 PMAP_LOCK(pmap); 1012 /* 1013 * Find the block or page map for this virtual address. pmap_pte 1014 * will return either a valid block/page entry, or NULL. 1015 */ 1016 pte = pmap_pte(pmap, va, &lvl); 1017 if (pte != NULL) { 1018 tpte = pmap_load(pte); 1019 pa = tpte & ~ATTR_MASK; 1020 switch(lvl) { 1021 case 1: 1022 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1023 ("pmap_extract: Invalid L1 pte found: %lx", 1024 tpte & ATTR_DESCR_MASK)); 1025 pa |= (va & L1_OFFSET); 1026 break; 1027 case 2: 1028 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1029 ("pmap_extract: Invalid L2 pte found: %lx", 1030 tpte & ATTR_DESCR_MASK)); 1031 pa |= (va & L2_OFFSET); 1032 break; 1033 case 3: 1034 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1035 ("pmap_extract: Invalid L3 pte found: %lx", 1036 tpte & ATTR_DESCR_MASK)); 1037 pa |= (va & L3_OFFSET); 1038 break; 1039 } 1040 } 1041 PMAP_UNLOCK(pmap); 1042 return (pa); 1043 } 1044 1045 /* 1046 * Routine: pmap_extract_and_hold 1047 * Function: 1048 * Atomically extract and hold the physical page 1049 * with the given pmap and virtual address pair 1050 * if that mapping permits the given protection. 1051 */ 1052 vm_page_t 1053 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 1054 { 1055 pt_entry_t *pte, tpte; 1056 vm_offset_t off; 1057 vm_paddr_t pa; 1058 vm_page_t m; 1059 int lvl; 1060 1061 pa = 0; 1062 m = NULL; 1063 PMAP_LOCK(pmap); 1064 retry: 1065 pte = pmap_pte(pmap, va, &lvl); 1066 if (pte != NULL) { 1067 tpte = pmap_load(pte); 1068 1069 KASSERT(lvl > 0 && lvl <= 3, 1070 ("pmap_extract_and_hold: Invalid level %d", lvl)); 1071 CTASSERT(L1_BLOCK == L2_BLOCK); 1072 KASSERT((lvl == 3 && (tpte & ATTR_DESCR_MASK) == L3_PAGE) || 1073 (lvl < 3 && (tpte & ATTR_DESCR_MASK) == L1_BLOCK), 1074 ("pmap_extract_and_hold: Invalid pte at L%d: %lx", lvl, 1075 tpte & ATTR_DESCR_MASK)); 1076 if (((tpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 1077 ((prot & VM_PROT_WRITE) == 0)) { 1078 switch(lvl) { 1079 case 1: 1080 off = va & L1_OFFSET; 1081 break; 1082 case 2: 1083 off = va & L2_OFFSET; 1084 break; 1085 case 3: 1086 default: 1087 off = 0; 1088 } 1089 if (vm_page_pa_tryrelock(pmap, 1090 (tpte & ~ATTR_MASK) | off, &pa)) 1091 goto retry; 1092 m = PHYS_TO_VM_PAGE((tpte & ~ATTR_MASK) | off); 1093 vm_page_hold(m); 1094 } 1095 } 1096 PA_UNLOCK_COND(pa); 1097 PMAP_UNLOCK(pmap); 1098 return (m); 1099 } 1100 1101 vm_paddr_t 1102 pmap_kextract(vm_offset_t va) 1103 { 1104 pt_entry_t *pte, tpte; 1105 vm_paddr_t pa; 1106 int lvl; 1107 1108 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 1109 pa = DMAP_TO_PHYS(va); 1110 } else { 1111 pa = 0; 1112 pte = pmap_pte(kernel_pmap, va, &lvl); 1113 if (pte != NULL) { 1114 tpte = pmap_load(pte); 1115 pa = tpte & ~ATTR_MASK; 1116 switch(lvl) { 1117 case 1: 1118 KASSERT((tpte & ATTR_DESCR_MASK) == L1_BLOCK, 1119 ("pmap_kextract: Invalid L1 pte found: %lx", 1120 tpte & ATTR_DESCR_MASK)); 1121 pa |= (va & L1_OFFSET); 1122 break; 1123 case 2: 1124 KASSERT((tpte & ATTR_DESCR_MASK) == L2_BLOCK, 1125 ("pmap_kextract: Invalid L2 pte found: %lx", 1126 tpte & ATTR_DESCR_MASK)); 1127 pa |= (va & L2_OFFSET); 1128 break; 1129 case 3: 1130 KASSERT((tpte & ATTR_DESCR_MASK) == L3_PAGE, 1131 ("pmap_kextract: Invalid L3 pte found: %lx", 1132 tpte & ATTR_DESCR_MASK)); 1133 pa |= (va & L3_OFFSET); 1134 break; 1135 } 1136 } 1137 } 1138 return (pa); 1139 } 1140 1141 /*************************************************** 1142 * Low level mapping routines..... 1143 ***************************************************/ 1144 1145 void 1146 pmap_kenter(vm_offset_t sva, vm_size_t size, vm_paddr_t pa, int mode) 1147 { 1148 pd_entry_t *pde; 1149 pt_entry_t *pte, attr; 1150 vm_offset_t va; 1151 int lvl; 1152 1153 KASSERT((pa & L3_OFFSET) == 0, 1154 ("pmap_kenter: Invalid physical address")); 1155 KASSERT((sva & L3_OFFSET) == 0, 1156 ("pmap_kenter: Invalid virtual address")); 1157 KASSERT((size & PAGE_MASK) == 0, 1158 ("pmap_kenter: Mapping is not page-sized")); 1159 1160 attr = ATTR_DEFAULT | ATTR_IDX(mode) | L3_PAGE; 1161 if (mode == DEVICE_MEMORY) 1162 attr |= ATTR_XN; 1163 1164 va = sva; 1165 while (size != 0) { 1166 pde = pmap_pde(kernel_pmap, va, &lvl); 1167 KASSERT(pde != NULL, 1168 ("pmap_kenter: Invalid page entry, va: 0x%lx", va)); 1169 KASSERT(lvl == 2, ("pmap_kenter: Invalid level %d", lvl)); 1170 1171 pte = pmap_l2_to_l3(pde, va); 1172 pmap_load_store(pte, (pa & ~L3_OFFSET) | attr); 1173 1174 va += PAGE_SIZE; 1175 pa += PAGE_SIZE; 1176 size -= PAGE_SIZE; 1177 } 1178 pmap_invalidate_range(kernel_pmap, sva, va); 1179 } 1180 1181 void 1182 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 1183 { 1184 1185 pmap_kenter(sva, size, pa, DEVICE_MEMORY); 1186 } 1187 1188 /* 1189 * Remove a page from the kernel pagetables. 1190 */ 1191 PMAP_INLINE void 1192 pmap_kremove(vm_offset_t va) 1193 { 1194 pt_entry_t *pte; 1195 int lvl; 1196 1197 pte = pmap_pte(kernel_pmap, va, &lvl); 1198 KASSERT(pte != NULL, ("pmap_kremove: Invalid address")); 1199 KASSERT(lvl == 3, ("pmap_kremove: Invalid pte level %d", lvl)); 1200 1201 pmap_load_clear(pte); 1202 pmap_invalidate_page(kernel_pmap, va); 1203 } 1204 1205 void 1206 pmap_kremove_device(vm_offset_t sva, vm_size_t size) 1207 { 1208 pt_entry_t *pte; 1209 vm_offset_t va; 1210 int lvl; 1211 1212 KASSERT((sva & L3_OFFSET) == 0, 1213 ("pmap_kremove_device: Invalid virtual address")); 1214 KASSERT((size & PAGE_MASK) == 0, 1215 ("pmap_kremove_device: Mapping is not page-sized")); 1216 1217 va = sva; 1218 while (size != 0) { 1219 pte = pmap_pte(kernel_pmap, va, &lvl); 1220 KASSERT(pte != NULL, ("Invalid page table, va: 0x%lx", va)); 1221 KASSERT(lvl == 3, 1222 ("Invalid device pagetable level: %d != 3", lvl)); 1223 pmap_load_clear(pte); 1224 1225 va += PAGE_SIZE; 1226 size -= PAGE_SIZE; 1227 } 1228 pmap_invalidate_range(kernel_pmap, sva, va); 1229 } 1230 1231 /* 1232 * Used to map a range of physical addresses into kernel 1233 * virtual address space. 1234 * 1235 * The value passed in '*virt' is a suggested virtual address for 1236 * the mapping. Architectures which can support a direct-mapped 1237 * physical to virtual region can return the appropriate address 1238 * within that region, leaving '*virt' unchanged. Other 1239 * architectures should map the pages starting at '*virt' and 1240 * update '*virt' with the first usable address after the mapped 1241 * region. 1242 */ 1243 vm_offset_t 1244 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 1245 { 1246 return PHYS_TO_DMAP(start); 1247 } 1248 1249 1250 /* 1251 * Add a list of wired pages to the kva 1252 * this routine is only used for temporary 1253 * kernel mappings that do not need to have 1254 * page modification or references recorded. 1255 * Note that old mappings are simply written 1256 * over. The page *must* be wired. 1257 * Note: SMP coherent. Uses a ranged shootdown IPI. 1258 */ 1259 void 1260 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1261 { 1262 pd_entry_t *pde; 1263 pt_entry_t *pte, pa; 1264 vm_offset_t va; 1265 vm_page_t m; 1266 int i, lvl; 1267 1268 va = sva; 1269 for (i = 0; i < count; i++) { 1270 pde = pmap_pde(kernel_pmap, va, &lvl); 1271 KASSERT(pde != NULL, 1272 ("pmap_qenter: Invalid page entry, va: 0x%lx", va)); 1273 KASSERT(lvl == 2, 1274 ("pmap_qenter: Invalid level %d", lvl)); 1275 1276 m = ma[i]; 1277 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1278 ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1279 if (m->md.pv_memattr == DEVICE_MEMORY) 1280 pa |= ATTR_XN; 1281 pte = pmap_l2_to_l3(pde, va); 1282 pmap_load_store(pte, pa); 1283 1284 va += L3_SIZE; 1285 } 1286 pmap_invalidate_range(kernel_pmap, sva, va); 1287 } 1288 1289 /* 1290 * This routine tears out page mappings from the 1291 * kernel -- it is meant only for temporary mappings. 1292 */ 1293 void 1294 pmap_qremove(vm_offset_t sva, int count) 1295 { 1296 pt_entry_t *pte; 1297 vm_offset_t va; 1298 int lvl; 1299 1300 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1301 1302 va = sva; 1303 while (count-- > 0) { 1304 pte = pmap_pte(kernel_pmap, va, &lvl); 1305 KASSERT(lvl == 3, 1306 ("Invalid device pagetable level: %d != 3", lvl)); 1307 if (pte != NULL) { 1308 pmap_load_clear(pte); 1309 } 1310 1311 va += PAGE_SIZE; 1312 } 1313 pmap_invalidate_range(kernel_pmap, sva, va); 1314 } 1315 1316 /*************************************************** 1317 * Page table page management routines..... 1318 ***************************************************/ 1319 /* 1320 * Schedule the specified unused page table page to be freed. Specifically, 1321 * add the page to the specified list of pages that will be released to the 1322 * physical memory manager after the TLB has been updated. 1323 */ 1324 static __inline void 1325 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1326 boolean_t set_PG_ZERO) 1327 { 1328 1329 if (set_PG_ZERO) 1330 m->flags |= PG_ZERO; 1331 else 1332 m->flags &= ~PG_ZERO; 1333 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1334 } 1335 1336 /* 1337 * Decrements a page table page's wire count, which is used to record the 1338 * number of valid page table entries within the page. If the wire count 1339 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1340 * page table page was unmapped and FALSE otherwise. 1341 */ 1342 static inline boolean_t 1343 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1344 { 1345 1346 --m->wire_count; 1347 if (m->wire_count == 0) { 1348 _pmap_unwire_l3(pmap, va, m, free); 1349 return (TRUE); 1350 } else 1351 return (FALSE); 1352 } 1353 1354 static void 1355 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1356 { 1357 1358 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1359 /* 1360 * unmap the page table page 1361 */ 1362 if (m->pindex >= (NUL2E + NUL1E)) { 1363 /* l1 page */ 1364 pd_entry_t *l0; 1365 1366 l0 = pmap_l0(pmap, va); 1367 pmap_load_clear(l0); 1368 } else if (m->pindex >= NUL2E) { 1369 /* l2 page */ 1370 pd_entry_t *l1; 1371 1372 l1 = pmap_l1(pmap, va); 1373 pmap_load_clear(l1); 1374 } else { 1375 /* l3 page */ 1376 pd_entry_t *l2; 1377 1378 l2 = pmap_l2(pmap, va); 1379 pmap_load_clear(l2); 1380 } 1381 pmap_resident_count_dec(pmap, 1); 1382 if (m->pindex < NUL2E) { 1383 /* We just released an l3, unhold the matching l2 */ 1384 pd_entry_t *l1, tl1; 1385 vm_page_t l2pg; 1386 1387 l1 = pmap_l1(pmap, va); 1388 tl1 = pmap_load(l1); 1389 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1390 pmap_unwire_l3(pmap, va, l2pg, free); 1391 } else if (m->pindex < (NUL2E + NUL1E)) { 1392 /* We just released an l2, unhold the matching l1 */ 1393 pd_entry_t *l0, tl0; 1394 vm_page_t l1pg; 1395 1396 l0 = pmap_l0(pmap, va); 1397 tl0 = pmap_load(l0); 1398 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1399 pmap_unwire_l3(pmap, va, l1pg, free); 1400 } 1401 pmap_invalidate_page(pmap, va); 1402 1403 vm_wire_sub(1); 1404 1405 /* 1406 * Put page on a list so that it is released after 1407 * *ALL* TLB shootdown is done 1408 */ 1409 pmap_add_delayed_free_list(m, free, TRUE); 1410 } 1411 1412 /* 1413 * After removing a page table entry, this routine is used to 1414 * conditionally free the page, and manage the hold/wire counts. 1415 */ 1416 static int 1417 pmap_unuse_pt(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1418 struct spglist *free) 1419 { 1420 vm_page_t mpte; 1421 1422 if (va >= VM_MAXUSER_ADDRESS) 1423 return (0); 1424 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1425 mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1426 return (pmap_unwire_l3(pmap, va, mpte, free)); 1427 } 1428 1429 void 1430 pmap_pinit0(pmap_t pmap) 1431 { 1432 1433 PMAP_LOCK_INIT(pmap); 1434 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1435 pmap->pm_l0 = kernel_pmap->pm_l0; 1436 pmap->pm_root.rt_root = 0; 1437 } 1438 1439 int 1440 pmap_pinit(pmap_t pmap) 1441 { 1442 vm_paddr_t l0phys; 1443 vm_page_t l0pt; 1444 1445 /* 1446 * allocate the l0 page 1447 */ 1448 while ((l0pt = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1449 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1450 vm_wait(NULL); 1451 1452 l0phys = VM_PAGE_TO_PHYS(l0pt); 1453 pmap->pm_l0 = (pd_entry_t *)PHYS_TO_DMAP(l0phys); 1454 1455 if ((l0pt->flags & PG_ZERO) == 0) 1456 pagezero(pmap->pm_l0); 1457 1458 pmap->pm_root.rt_root = 0; 1459 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1460 1461 return (1); 1462 } 1463 1464 /* 1465 * This routine is called if the desired page table page does not exist. 1466 * 1467 * If page table page allocation fails, this routine may sleep before 1468 * returning NULL. It sleeps only if a lock pointer was given. 1469 * 1470 * Note: If a page allocation fails at page table level two or three, 1471 * one or two pages may be held during the wait, only to be released 1472 * afterwards. This conservative approach is easily argued to avoid 1473 * race conditions. 1474 */ 1475 static vm_page_t 1476 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1477 { 1478 vm_page_t m, l1pg, l2pg; 1479 1480 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1481 1482 /* 1483 * Allocate a page table page. 1484 */ 1485 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1486 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1487 if (lockp != NULL) { 1488 RELEASE_PV_LIST_LOCK(lockp); 1489 PMAP_UNLOCK(pmap); 1490 vm_wait(NULL); 1491 PMAP_LOCK(pmap); 1492 } 1493 1494 /* 1495 * Indicate the need to retry. While waiting, the page table 1496 * page may have been allocated. 1497 */ 1498 return (NULL); 1499 } 1500 if ((m->flags & PG_ZERO) == 0) 1501 pmap_zero_page(m); 1502 1503 /* 1504 * Map the pagetable page into the process address space, if 1505 * it isn't already there. 1506 */ 1507 1508 if (ptepindex >= (NUL2E + NUL1E)) { 1509 pd_entry_t *l0; 1510 vm_pindex_t l0index; 1511 1512 l0index = ptepindex - (NUL2E + NUL1E); 1513 l0 = &pmap->pm_l0[l0index]; 1514 pmap_load_store(l0, VM_PAGE_TO_PHYS(m) | L0_TABLE); 1515 } else if (ptepindex >= NUL2E) { 1516 vm_pindex_t l0index, l1index; 1517 pd_entry_t *l0, *l1; 1518 pd_entry_t tl0; 1519 1520 l1index = ptepindex - NUL2E; 1521 l0index = l1index >> L0_ENTRIES_SHIFT; 1522 1523 l0 = &pmap->pm_l0[l0index]; 1524 tl0 = pmap_load(l0); 1525 if (tl0 == 0) { 1526 /* recurse for allocating page dir */ 1527 if (_pmap_alloc_l3(pmap, NUL2E + NUL1E + l0index, 1528 lockp) == NULL) { 1529 vm_page_unwire_noq(m); 1530 vm_page_free_zero(m); 1531 return (NULL); 1532 } 1533 } else { 1534 l1pg = PHYS_TO_VM_PAGE(tl0 & ~ATTR_MASK); 1535 l1pg->wire_count++; 1536 } 1537 1538 l1 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l0) & ~ATTR_MASK); 1539 l1 = &l1[ptepindex & Ln_ADDR_MASK]; 1540 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1541 } else { 1542 vm_pindex_t l0index, l1index; 1543 pd_entry_t *l0, *l1, *l2; 1544 pd_entry_t tl0, tl1; 1545 1546 l1index = ptepindex >> Ln_ENTRIES_SHIFT; 1547 l0index = l1index >> L0_ENTRIES_SHIFT; 1548 1549 l0 = &pmap->pm_l0[l0index]; 1550 tl0 = pmap_load(l0); 1551 if (tl0 == 0) { 1552 /* recurse for allocating page dir */ 1553 if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1554 lockp) == NULL) { 1555 vm_page_unwire_noq(m); 1556 vm_page_free_zero(m); 1557 return (NULL); 1558 } 1559 tl0 = pmap_load(l0); 1560 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1561 l1 = &l1[l1index & Ln_ADDR_MASK]; 1562 } else { 1563 l1 = (pd_entry_t *)PHYS_TO_DMAP(tl0 & ~ATTR_MASK); 1564 l1 = &l1[l1index & Ln_ADDR_MASK]; 1565 tl1 = pmap_load(l1); 1566 if (tl1 == 0) { 1567 /* recurse for allocating page dir */ 1568 if (_pmap_alloc_l3(pmap, NUL2E + l1index, 1569 lockp) == NULL) { 1570 vm_page_unwire_noq(m); 1571 vm_page_free_zero(m); 1572 return (NULL); 1573 } 1574 } else { 1575 l2pg = PHYS_TO_VM_PAGE(tl1 & ~ATTR_MASK); 1576 l2pg->wire_count++; 1577 } 1578 } 1579 1580 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1581 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1582 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1583 } 1584 1585 pmap_resident_count_inc(pmap, 1); 1586 1587 return (m); 1588 } 1589 1590 static vm_page_t 1591 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1592 { 1593 vm_pindex_t ptepindex; 1594 pd_entry_t *pde, tpde; 1595 #ifdef INVARIANTS 1596 pt_entry_t *pte; 1597 #endif 1598 vm_page_t m; 1599 int lvl; 1600 1601 /* 1602 * Calculate pagetable page index 1603 */ 1604 ptepindex = pmap_l2_pindex(va); 1605 retry: 1606 /* 1607 * Get the page directory entry 1608 */ 1609 pde = pmap_pde(pmap, va, &lvl); 1610 1611 /* 1612 * If the page table page is mapped, we just increment the hold count, 1613 * and activate it. If we get a level 2 pde it will point to a level 3 1614 * table. 1615 */ 1616 switch (lvl) { 1617 case -1: 1618 break; 1619 case 0: 1620 #ifdef INVARIANTS 1621 pte = pmap_l0_to_l1(pde, va); 1622 KASSERT(pmap_load(pte) == 0, 1623 ("pmap_alloc_l3: TODO: l0 superpages")); 1624 #endif 1625 break; 1626 case 1: 1627 #ifdef INVARIANTS 1628 pte = pmap_l1_to_l2(pde, va); 1629 KASSERT(pmap_load(pte) == 0, 1630 ("pmap_alloc_l3: TODO: l1 superpages")); 1631 #endif 1632 break; 1633 case 2: 1634 tpde = pmap_load(pde); 1635 if (tpde != 0) { 1636 m = PHYS_TO_VM_PAGE(tpde & ~ATTR_MASK); 1637 m->wire_count++; 1638 return (m); 1639 } 1640 break; 1641 default: 1642 panic("pmap_alloc_l3: Invalid level %d", lvl); 1643 } 1644 1645 /* 1646 * Here if the pte page isn't mapped, or if it has been deallocated. 1647 */ 1648 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1649 if (m == NULL && lockp != NULL) 1650 goto retry; 1651 1652 return (m); 1653 } 1654 1655 1656 /*************************************************** 1657 * Pmap allocation/deallocation routines. 1658 ***************************************************/ 1659 1660 /* 1661 * Release any resources held by the given physical map. 1662 * Called when a pmap initialized by pmap_pinit is being released. 1663 * Should only be called if the map contains no valid mappings. 1664 */ 1665 void 1666 pmap_release(pmap_t pmap) 1667 { 1668 vm_page_t m; 1669 1670 KASSERT(pmap->pm_stats.resident_count == 0, 1671 ("pmap_release: pmap resident count %ld != 0", 1672 pmap->pm_stats.resident_count)); 1673 KASSERT(vm_radix_is_empty(&pmap->pm_root), 1674 ("pmap_release: pmap has reserved page table page(s)")); 1675 1676 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l0)); 1677 1678 vm_page_unwire_noq(m); 1679 vm_page_free_zero(m); 1680 } 1681 1682 static int 1683 kvm_size(SYSCTL_HANDLER_ARGS) 1684 { 1685 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1686 1687 return sysctl_handle_long(oidp, &ksize, 0, req); 1688 } 1689 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1690 0, 0, kvm_size, "LU", "Size of KVM"); 1691 1692 static int 1693 kvm_free(SYSCTL_HANDLER_ARGS) 1694 { 1695 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1696 1697 return sysctl_handle_long(oidp, &kfree, 0, req); 1698 } 1699 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1700 0, 0, kvm_free, "LU", "Amount of KVM free"); 1701 1702 /* 1703 * grow the number of kernel page table entries, if needed 1704 */ 1705 void 1706 pmap_growkernel(vm_offset_t addr) 1707 { 1708 vm_paddr_t paddr; 1709 vm_page_t nkpg; 1710 pd_entry_t *l0, *l1, *l2; 1711 1712 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1713 1714 addr = roundup2(addr, L2_SIZE); 1715 if (addr - 1 >= kernel_map->max_offset) 1716 addr = kernel_map->max_offset; 1717 while (kernel_vm_end < addr) { 1718 l0 = pmap_l0(kernel_pmap, kernel_vm_end); 1719 KASSERT(pmap_load(l0) != 0, 1720 ("pmap_growkernel: No level 0 kernel entry")); 1721 1722 l1 = pmap_l0_to_l1(l0, kernel_vm_end); 1723 if (pmap_load(l1) == 0) { 1724 /* We need a new PDP entry */ 1725 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1726 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1727 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1728 if (nkpg == NULL) 1729 panic("pmap_growkernel: no memory to grow kernel"); 1730 if ((nkpg->flags & PG_ZERO) == 0) 1731 pmap_zero_page(nkpg); 1732 paddr = VM_PAGE_TO_PHYS(nkpg); 1733 pmap_load_store(l1, paddr | L1_TABLE); 1734 continue; /* try again */ 1735 } 1736 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1737 if ((pmap_load(l2) & ATTR_AF) != 0) { 1738 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1739 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1740 kernel_vm_end = kernel_map->max_offset; 1741 break; 1742 } 1743 continue; 1744 } 1745 1746 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1747 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1748 VM_ALLOC_ZERO); 1749 if (nkpg == NULL) 1750 panic("pmap_growkernel: no memory to grow kernel"); 1751 if ((nkpg->flags & PG_ZERO) == 0) 1752 pmap_zero_page(nkpg); 1753 paddr = VM_PAGE_TO_PHYS(nkpg); 1754 pmap_load_store(l2, paddr | L2_TABLE); 1755 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1756 1757 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1758 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1759 kernel_vm_end = kernel_map->max_offset; 1760 break; 1761 } 1762 } 1763 } 1764 1765 1766 /*************************************************** 1767 * page management routines. 1768 ***************************************************/ 1769 1770 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1771 CTASSERT(_NPCM == 3); 1772 CTASSERT(_NPCPV == 168); 1773 1774 static __inline struct pv_chunk * 1775 pv_to_chunk(pv_entry_t pv) 1776 { 1777 1778 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1779 } 1780 1781 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1782 1783 #define PC_FREE0 0xfffffffffffffffful 1784 #define PC_FREE1 0xfffffffffffffffful 1785 #define PC_FREE2 0x000000fffffffffful 1786 1787 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1788 1789 #if 0 1790 #ifdef PV_STATS 1791 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1792 1793 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1794 "Current number of pv entry chunks"); 1795 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1796 "Current number of pv entry chunks allocated"); 1797 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1798 "Current number of pv entry chunks frees"); 1799 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1800 "Number of times tried to get a chunk page but failed."); 1801 1802 static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1803 static int pv_entry_spare; 1804 1805 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1806 "Current number of pv entry frees"); 1807 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1808 "Current number of pv entry allocs"); 1809 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1810 "Current number of pv entries"); 1811 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1812 "Current number of spare pv entries"); 1813 #endif 1814 #endif /* 0 */ 1815 1816 /* 1817 * We are in a serious low memory condition. Resort to 1818 * drastic measures to free some pages so we can allocate 1819 * another pv entry chunk. 1820 * 1821 * Returns NULL if PV entries were reclaimed from the specified pmap. 1822 * 1823 * We do not, however, unmap 2mpages because subsequent accesses will 1824 * allocate per-page pv entries until repromotion occurs, thereby 1825 * exacerbating the shortage of free pv entries. 1826 */ 1827 static vm_page_t 1828 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1829 { 1830 struct pch new_tail; 1831 struct pv_chunk *pc; 1832 struct md_page *pvh; 1833 pd_entry_t *pde; 1834 pmap_t pmap; 1835 pt_entry_t *pte, tpte; 1836 pv_entry_t pv; 1837 vm_offset_t va; 1838 vm_page_t m, m_pc; 1839 struct spglist free; 1840 uint64_t inuse; 1841 int bit, field, freed, lvl; 1842 1843 PMAP_LOCK_ASSERT(locked_pmap, MA_OWNED); 1844 KASSERT(lockp != NULL, ("reclaim_pv_chunk: lockp is NULL")); 1845 pmap = NULL; 1846 m_pc = NULL; 1847 SLIST_INIT(&free); 1848 TAILQ_INIT(&new_tail); 1849 mtx_lock(&pv_chunks_mutex); 1850 while ((pc = TAILQ_FIRST(&pv_chunks)) != NULL && SLIST_EMPTY(&free)) { 1851 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1852 mtx_unlock(&pv_chunks_mutex); 1853 if (pmap != pc->pc_pmap) { 1854 if (pmap != NULL && pmap != locked_pmap) 1855 PMAP_UNLOCK(pmap); 1856 pmap = pc->pc_pmap; 1857 /* Avoid deadlock and lock recursion. */ 1858 if (pmap > locked_pmap) { 1859 RELEASE_PV_LIST_LOCK(lockp); 1860 PMAP_LOCK(pmap); 1861 } else if (pmap != locked_pmap && 1862 !PMAP_TRYLOCK(pmap)) { 1863 pmap = NULL; 1864 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1865 mtx_lock(&pv_chunks_mutex); 1866 continue; 1867 } 1868 } 1869 1870 /* 1871 * Destroy every non-wired, 4 KB page mapping in the chunk. 1872 */ 1873 freed = 0; 1874 for (field = 0; field < _NPCM; field++) { 1875 for (inuse = ~pc->pc_map[field] & pc_freemask[field]; 1876 inuse != 0; inuse &= ~(1UL << bit)) { 1877 bit = ffsl(inuse) - 1; 1878 pv = &pc->pc_pventry[field * 64 + bit]; 1879 va = pv->pv_va; 1880 pde = pmap_pde(pmap, va, &lvl); 1881 if (lvl != 2) 1882 continue; 1883 pte = pmap_l2_to_l3(pde, va); 1884 tpte = pmap_load(pte); 1885 if ((tpte & ATTR_SW_WIRED) != 0) 1886 continue; 1887 tpte = pmap_load_clear(pte); 1888 pmap_invalidate_page(pmap, va); 1889 m = PHYS_TO_VM_PAGE(tpte & ~ATTR_MASK); 1890 if (pmap_page_dirty(tpte)) 1891 vm_page_dirty(m); 1892 if ((tpte & ATTR_AF) != 0) 1893 vm_page_aflag_set(m, PGA_REFERENCED); 1894 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1895 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1896 m->md.pv_gen++; 1897 if (TAILQ_EMPTY(&m->md.pv_list) && 1898 (m->flags & PG_FICTITIOUS) == 0) { 1899 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 1900 if (TAILQ_EMPTY(&pvh->pv_list)) { 1901 vm_page_aflag_clear(m, 1902 PGA_WRITEABLE); 1903 } 1904 } 1905 pc->pc_map[field] |= 1UL << bit; 1906 pmap_unuse_pt(pmap, va, pmap_load(pde), &free); 1907 freed++; 1908 } 1909 } 1910 if (freed == 0) { 1911 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1912 mtx_lock(&pv_chunks_mutex); 1913 continue; 1914 } 1915 /* Every freed mapping is for a 4 KB page. */ 1916 pmap_resident_count_dec(pmap, freed); 1917 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 1918 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 1919 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 1920 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1921 if (pc->pc_map[0] == PC_FREE0 && pc->pc_map[1] == PC_FREE1 && 1922 pc->pc_map[2] == PC_FREE2) { 1923 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1924 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1925 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1926 /* Entire chunk is free; return it. */ 1927 m_pc = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1928 dump_drop_page(m_pc->phys_addr); 1929 mtx_lock(&pv_chunks_mutex); 1930 break; 1931 } 1932 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1933 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 1934 mtx_lock(&pv_chunks_mutex); 1935 /* One freed pv entry in locked_pmap is sufficient. */ 1936 if (pmap == locked_pmap) 1937 break; 1938 } 1939 TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 1940 mtx_unlock(&pv_chunks_mutex); 1941 if (pmap != NULL && pmap != locked_pmap) 1942 PMAP_UNLOCK(pmap); 1943 if (m_pc == NULL && !SLIST_EMPTY(&free)) { 1944 m_pc = SLIST_FIRST(&free); 1945 SLIST_REMOVE_HEAD(&free, plinks.s.ss); 1946 /* Recycle a freed page table page. */ 1947 m_pc->wire_count = 1; 1948 vm_wire_add(1); 1949 } 1950 vm_page_free_pages_toq(&free, false); 1951 return (m_pc); 1952 } 1953 1954 /* 1955 * free the pv_entry back to the free list 1956 */ 1957 static void 1958 free_pv_entry(pmap_t pmap, pv_entry_t pv) 1959 { 1960 struct pv_chunk *pc; 1961 int idx, field, bit; 1962 1963 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1964 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1965 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1966 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1967 pc = pv_to_chunk(pv); 1968 idx = pv - &pc->pc_pventry[0]; 1969 field = idx / 64; 1970 bit = idx % 64; 1971 pc->pc_map[field] |= 1ul << bit; 1972 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1973 pc->pc_map[2] != PC_FREE2) { 1974 /* 98% of the time, pc is already at the head of the list. */ 1975 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1976 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1977 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1978 } 1979 return; 1980 } 1981 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1982 free_pv_chunk(pc); 1983 } 1984 1985 static void 1986 free_pv_chunk(struct pv_chunk *pc) 1987 { 1988 vm_page_t m; 1989 1990 mtx_lock(&pv_chunks_mutex); 1991 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1992 mtx_unlock(&pv_chunks_mutex); 1993 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1994 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1995 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1996 /* entire chunk is free, return it */ 1997 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1998 dump_drop_page(m->phys_addr); 1999 vm_page_unwire_noq(m); 2000 vm_page_free(m); 2001 } 2002 2003 /* 2004 * Returns a new PV entry, allocating a new PV chunk from the system when 2005 * needed. If this PV chunk allocation fails and a PV list lock pointer was 2006 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 2007 * returned. 2008 * 2009 * The given PV list lock may be released. 2010 */ 2011 static pv_entry_t 2012 get_pv_entry(pmap_t pmap, struct rwlock **lockp) 2013 { 2014 int bit, field; 2015 pv_entry_t pv; 2016 struct pv_chunk *pc; 2017 vm_page_t m; 2018 2019 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2020 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 2021 retry: 2022 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2023 if (pc != NULL) { 2024 for (field = 0; field < _NPCM; field++) { 2025 if (pc->pc_map[field]) { 2026 bit = ffsl(pc->pc_map[field]) - 1; 2027 break; 2028 } 2029 } 2030 if (field < _NPCM) { 2031 pv = &pc->pc_pventry[field * 64 + bit]; 2032 pc->pc_map[field] &= ~(1ul << bit); 2033 /* If this was the last item, move it to tail */ 2034 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 2035 pc->pc_map[2] == 0) { 2036 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2037 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 2038 pc_list); 2039 } 2040 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2041 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 2042 return (pv); 2043 } 2044 } 2045 /* No free items, allocate another chunk */ 2046 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2047 VM_ALLOC_WIRED); 2048 if (m == NULL) { 2049 if (lockp == NULL) { 2050 PV_STAT(pc_chunk_tryfail++); 2051 return (NULL); 2052 } 2053 m = reclaim_pv_chunk(pmap, lockp); 2054 if (m == NULL) 2055 goto retry; 2056 } 2057 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2058 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2059 dump_add_page(m->phys_addr); 2060 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2061 pc->pc_pmap = pmap; 2062 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 2063 pc->pc_map[1] = PC_FREE1; 2064 pc->pc_map[2] = PC_FREE2; 2065 mtx_lock(&pv_chunks_mutex); 2066 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 2067 mtx_unlock(&pv_chunks_mutex); 2068 pv = &pc->pc_pventry[0]; 2069 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2070 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 2071 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 2072 return (pv); 2073 } 2074 2075 /* 2076 * Ensure that the number of spare PV entries in the specified pmap meets or 2077 * exceeds the given count, "needed". 2078 * 2079 * The given PV list lock may be released. 2080 */ 2081 static void 2082 reserve_pv_entries(pmap_t pmap, int needed, struct rwlock **lockp) 2083 { 2084 struct pch new_tail; 2085 struct pv_chunk *pc; 2086 vm_page_t m; 2087 int avail, free; 2088 bool reclaimed; 2089 2090 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2091 KASSERT(lockp != NULL, ("reserve_pv_entries: lockp is NULL")); 2092 2093 /* 2094 * Newly allocated PV chunks must be stored in a private list until 2095 * the required number of PV chunks have been allocated. Otherwise, 2096 * reclaim_pv_chunk() could recycle one of these chunks. In 2097 * contrast, these chunks must be added to the pmap upon allocation. 2098 */ 2099 TAILQ_INIT(&new_tail); 2100 retry: 2101 avail = 0; 2102 TAILQ_FOREACH(pc, &pmap->pm_pvchunk, pc_list) { 2103 bit_count((bitstr_t *)pc->pc_map, 0, 2104 sizeof(pc->pc_map) * NBBY, &free); 2105 if (free == 0) 2106 break; 2107 avail += free; 2108 if (avail >= needed) 2109 break; 2110 } 2111 for (reclaimed = false; avail < needed; avail += _NPCPV) { 2112 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 2113 VM_ALLOC_WIRED); 2114 if (m == NULL) { 2115 m = reclaim_pv_chunk(pmap, lockp); 2116 if (m == NULL) 2117 goto retry; 2118 reclaimed = true; 2119 } 2120 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 2121 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 2122 dump_add_page(m->phys_addr); 2123 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 2124 pc->pc_pmap = pmap; 2125 pc->pc_map[0] = PC_FREE0; 2126 pc->pc_map[1] = PC_FREE1; 2127 pc->pc_map[2] = PC_FREE2; 2128 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 2129 TAILQ_INSERT_TAIL(&new_tail, pc, pc_lru); 2130 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV)); 2131 2132 /* 2133 * The reclaim might have freed a chunk from the current pmap. 2134 * If that chunk contained available entries, we need to 2135 * re-count the number of available entries. 2136 */ 2137 if (reclaimed) 2138 goto retry; 2139 } 2140 if (!TAILQ_EMPTY(&new_tail)) { 2141 mtx_lock(&pv_chunks_mutex); 2142 TAILQ_CONCAT(&pv_chunks, &new_tail, pc_lru); 2143 mtx_unlock(&pv_chunks_mutex); 2144 } 2145 } 2146 2147 /* 2148 * First find and then remove the pv entry for the specified pmap and virtual 2149 * address from the specified pv list. Returns the pv entry if found and NULL 2150 * otherwise. This operation can be performed on pv lists for either 4KB or 2151 * 2MB page mappings. 2152 */ 2153 static __inline pv_entry_t 2154 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2155 { 2156 pv_entry_t pv; 2157 2158 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 2159 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 2160 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 2161 pvh->pv_gen++; 2162 break; 2163 } 2164 } 2165 return (pv); 2166 } 2167 2168 /* 2169 * After demotion from a 2MB page mapping to 512 4KB page mappings, 2170 * destroy the pv entry for the 2MB page mapping and reinstantiate the pv 2171 * entries for each of the 4KB page mappings. 2172 */ 2173 static void 2174 pmap_pv_demote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2175 struct rwlock **lockp) 2176 { 2177 struct md_page *pvh; 2178 struct pv_chunk *pc; 2179 pv_entry_t pv; 2180 vm_offset_t va_last; 2181 vm_page_t m; 2182 int bit, field; 2183 2184 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2185 KASSERT((pa & L2_OFFSET) == 0, 2186 ("pmap_pv_demote_l2: pa is not 2mpage aligned")); 2187 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2188 2189 /* 2190 * Transfer the 2mpage's pv entry for this mapping to the first 2191 * page's pv list. Once this transfer begins, the pv list lock 2192 * must not be released until the last pv entry is reinstantiated. 2193 */ 2194 pvh = pa_to_pvh(pa); 2195 va = va & ~L2_OFFSET; 2196 pv = pmap_pvh_remove(pvh, pmap, va); 2197 KASSERT(pv != NULL, ("pmap_pv_demote_l2: pv not found")); 2198 m = PHYS_TO_VM_PAGE(pa); 2199 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2200 m->md.pv_gen++; 2201 /* Instantiate the remaining Ln_ENTRIES - 1 pv entries. */ 2202 PV_STAT(atomic_add_long(&pv_entry_allocs, Ln_ENTRIES - 1)); 2203 va_last = va + L2_SIZE - PAGE_SIZE; 2204 for (;;) { 2205 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 2206 KASSERT(pc->pc_map[0] != 0 || pc->pc_map[1] != 0 || 2207 pc->pc_map[2] != 0, ("pmap_pv_demote_l2: missing spare")); 2208 for (field = 0; field < _NPCM; field++) { 2209 while (pc->pc_map[field]) { 2210 bit = ffsl(pc->pc_map[field]) - 1; 2211 pc->pc_map[field] &= ~(1ul << bit); 2212 pv = &pc->pc_pventry[field * 64 + bit]; 2213 va += PAGE_SIZE; 2214 pv->pv_va = va; 2215 m++; 2216 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2217 ("pmap_pv_demote_l2: page %p is not managed", m)); 2218 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2219 m->md.pv_gen++; 2220 if (va == va_last) 2221 goto out; 2222 } 2223 } 2224 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2225 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2226 } 2227 out: 2228 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && pc->pc_map[2] == 0) { 2229 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2230 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, pc_list); 2231 } 2232 PV_STAT(atomic_add_long(&pv_entry_count, Ln_ENTRIES - 1)); 2233 PV_STAT(atomic_subtract_int(&pv_entry_spare, Ln_ENTRIES - 1)); 2234 } 2235 2236 /* 2237 * First find and then destroy the pv entry for the specified pmap and virtual 2238 * address. This operation can be performed on pv lists for either 4KB or 2MB 2239 * page mappings. 2240 */ 2241 static void 2242 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 2243 { 2244 pv_entry_t pv; 2245 2246 pv = pmap_pvh_remove(pvh, pmap, va); 2247 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 2248 free_pv_entry(pmap, pv); 2249 } 2250 2251 /* 2252 * Conditionally create the PV entry for a 4KB page mapping if the required 2253 * memory can be allocated without resorting to reclamation. 2254 */ 2255 static boolean_t 2256 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 2257 struct rwlock **lockp) 2258 { 2259 pv_entry_t pv; 2260 2261 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2262 /* Pass NULL instead of the lock pointer to disable reclamation. */ 2263 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 2264 pv->pv_va = va; 2265 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2266 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2267 m->md.pv_gen++; 2268 return (TRUE); 2269 } else 2270 return (FALSE); 2271 } 2272 2273 /* 2274 * pmap_remove_l2: do the things to unmap a level 2 superpage in a process 2275 */ 2276 static int 2277 pmap_remove_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t sva, 2278 pd_entry_t l1e, struct spglist *free, struct rwlock **lockp) 2279 { 2280 struct md_page *pvh; 2281 pt_entry_t old_l2; 2282 vm_offset_t eva, va; 2283 vm_page_t m, ml3; 2284 2285 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2286 KASSERT((sva & L2_OFFSET) == 0, ("pmap_remove_l2: sva is not aligned")); 2287 old_l2 = pmap_load_clear(l2); 2288 pmap_invalidate_range(pmap, sva, sva + L2_SIZE); 2289 if (old_l2 & ATTR_SW_WIRED) 2290 pmap->pm_stats.wired_count -= L2_SIZE / PAGE_SIZE; 2291 pmap_resident_count_dec(pmap, L2_SIZE / PAGE_SIZE); 2292 if (old_l2 & ATTR_SW_MANAGED) { 2293 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, old_l2 & ~ATTR_MASK); 2294 pvh = pa_to_pvh(old_l2 & ~ATTR_MASK); 2295 pmap_pvh_free(pvh, pmap, sva); 2296 eva = sva + L2_SIZE; 2297 for (va = sva, m = PHYS_TO_VM_PAGE(old_l2 & ~ATTR_MASK); 2298 va < eva; va += PAGE_SIZE, m++) { 2299 if (pmap_page_dirty(old_l2)) 2300 vm_page_dirty(m); 2301 if (old_l2 & ATTR_AF) 2302 vm_page_aflag_set(m, PGA_REFERENCED); 2303 if (TAILQ_EMPTY(&m->md.pv_list) && 2304 TAILQ_EMPTY(&pvh->pv_list)) 2305 vm_page_aflag_clear(m, PGA_WRITEABLE); 2306 } 2307 } 2308 KASSERT(pmap != kernel_pmap, 2309 ("Attempting to remove an l2 kernel page")); 2310 ml3 = pmap_remove_pt_page(pmap, sva); 2311 if (ml3 != NULL) { 2312 pmap_resident_count_dec(pmap, 1); 2313 KASSERT(ml3->wire_count == NL3PG, 2314 ("pmap_remove_pages: l3 page wire count error")); 2315 ml3->wire_count = 1; 2316 vm_page_unwire_noq(ml3); 2317 pmap_add_delayed_free_list(ml3, free, FALSE); 2318 } 2319 return (pmap_unuse_pt(pmap, sva, l1e, free)); 2320 } 2321 2322 /* 2323 * pmap_remove_l3: do the things to unmap a page in a process 2324 */ 2325 static int 2326 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 2327 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 2328 { 2329 struct md_page *pvh; 2330 pt_entry_t old_l3; 2331 vm_page_t m; 2332 2333 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2334 old_l3 = pmap_load_clear(l3); 2335 pmap_invalidate_page(pmap, va); 2336 if (old_l3 & ATTR_SW_WIRED) 2337 pmap->pm_stats.wired_count -= 1; 2338 pmap_resident_count_dec(pmap, 1); 2339 if (old_l3 & ATTR_SW_MANAGED) { 2340 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 2341 if (pmap_page_dirty(old_l3)) 2342 vm_page_dirty(m); 2343 if (old_l3 & ATTR_AF) 2344 vm_page_aflag_set(m, PGA_REFERENCED); 2345 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 2346 pmap_pvh_free(&m->md, pmap, va); 2347 if (TAILQ_EMPTY(&m->md.pv_list) && 2348 (m->flags & PG_FICTITIOUS) == 0) { 2349 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2350 if (TAILQ_EMPTY(&pvh->pv_list)) 2351 vm_page_aflag_clear(m, PGA_WRITEABLE); 2352 } 2353 } 2354 return (pmap_unuse_pt(pmap, va, l2e, free)); 2355 } 2356 2357 /* 2358 * Remove the given range of addresses from the specified map. 2359 * 2360 * It is assumed that the start and end are properly 2361 * rounded to the page size. 2362 */ 2363 void 2364 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2365 { 2366 struct rwlock *lock; 2367 vm_offset_t va, va_next; 2368 pd_entry_t *l0, *l1, *l2; 2369 pt_entry_t l3_paddr, *l3; 2370 struct spglist free; 2371 2372 /* 2373 * Perform an unsynchronized read. This is, however, safe. 2374 */ 2375 if (pmap->pm_stats.resident_count == 0) 2376 return; 2377 2378 SLIST_INIT(&free); 2379 2380 PMAP_LOCK(pmap); 2381 2382 lock = NULL; 2383 for (; sva < eva; sva = va_next) { 2384 2385 if (pmap->pm_stats.resident_count == 0) 2386 break; 2387 2388 l0 = pmap_l0(pmap, sva); 2389 if (pmap_load(l0) == 0) { 2390 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2391 if (va_next < sva) 2392 va_next = eva; 2393 continue; 2394 } 2395 2396 l1 = pmap_l0_to_l1(l0, sva); 2397 if (pmap_load(l1) == 0) { 2398 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2399 if (va_next < sva) 2400 va_next = eva; 2401 continue; 2402 } 2403 2404 /* 2405 * Calculate index for next page table. 2406 */ 2407 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2408 if (va_next < sva) 2409 va_next = eva; 2410 2411 l2 = pmap_l1_to_l2(l1, sva); 2412 if (l2 == NULL) 2413 continue; 2414 2415 l3_paddr = pmap_load(l2); 2416 2417 if ((l3_paddr & ATTR_DESCR_MASK) == L2_BLOCK) { 2418 if (sva + L2_SIZE == va_next && eva >= va_next) { 2419 pmap_remove_l2(pmap, l2, sva, pmap_load(l1), 2420 &free, &lock); 2421 continue; 2422 } else if (pmap_demote_l2_locked(pmap, l2, 2423 sva &~L2_OFFSET, &lock) == NULL) 2424 continue; 2425 l3_paddr = pmap_load(l2); 2426 } 2427 2428 /* 2429 * Weed out invalid mappings. 2430 */ 2431 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 2432 continue; 2433 2434 /* 2435 * Limit our scan to either the end of the va represented 2436 * by the current page table page, or to the end of the 2437 * range being removed. 2438 */ 2439 if (va_next > eva) 2440 va_next = eva; 2441 2442 va = va_next; 2443 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2444 sva += L3_SIZE) { 2445 if (l3 == NULL) 2446 panic("l3 == NULL"); 2447 if (pmap_load(l3) == 0) { 2448 if (va != va_next) { 2449 pmap_invalidate_range(pmap, va, sva); 2450 va = va_next; 2451 } 2452 continue; 2453 } 2454 if (va == va_next) 2455 va = sva; 2456 if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 2457 &lock)) { 2458 sva += L3_SIZE; 2459 break; 2460 } 2461 } 2462 if (va != va_next) 2463 pmap_invalidate_range(pmap, va, sva); 2464 } 2465 if (lock != NULL) 2466 rw_wunlock(lock); 2467 PMAP_UNLOCK(pmap); 2468 vm_page_free_pages_toq(&free, false); 2469 } 2470 2471 /* 2472 * Routine: pmap_remove_all 2473 * Function: 2474 * Removes this physical page from 2475 * all physical maps in which it resides. 2476 * Reflects back modify bits to the pager. 2477 * 2478 * Notes: 2479 * Original versions of this routine were very 2480 * inefficient because they iteratively called 2481 * pmap_remove (slow...) 2482 */ 2483 2484 void 2485 pmap_remove_all(vm_page_t m) 2486 { 2487 struct md_page *pvh; 2488 pv_entry_t pv; 2489 pmap_t pmap; 2490 struct rwlock *lock; 2491 pd_entry_t *pde, tpde; 2492 pt_entry_t *pte, tpte; 2493 vm_offset_t va; 2494 struct spglist free; 2495 int lvl, pvh_gen, md_gen; 2496 2497 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2498 ("pmap_remove_all: page %p is not managed", m)); 2499 SLIST_INIT(&free); 2500 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2501 pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 2502 pa_to_pvh(VM_PAGE_TO_PHYS(m)); 2503 retry: 2504 rw_wlock(lock); 2505 while ((pv = TAILQ_FIRST(&pvh->pv_list)) != NULL) { 2506 pmap = PV_PMAP(pv); 2507 if (!PMAP_TRYLOCK(pmap)) { 2508 pvh_gen = pvh->pv_gen; 2509 rw_wunlock(lock); 2510 PMAP_LOCK(pmap); 2511 rw_wlock(lock); 2512 if (pvh_gen != pvh->pv_gen) { 2513 rw_wunlock(lock); 2514 PMAP_UNLOCK(pmap); 2515 goto retry; 2516 } 2517 } 2518 va = pv->pv_va; 2519 pte = pmap_pte(pmap, va, &lvl); 2520 KASSERT(pte != NULL, 2521 ("pmap_remove_all: no page table entry found")); 2522 KASSERT(lvl == 2, 2523 ("pmap_remove_all: invalid pte level %d", lvl)); 2524 2525 pmap_demote_l2_locked(pmap, pte, va, &lock); 2526 PMAP_UNLOCK(pmap); 2527 } 2528 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 2529 pmap = PV_PMAP(pv); 2530 if (!PMAP_TRYLOCK(pmap)) { 2531 pvh_gen = pvh->pv_gen; 2532 md_gen = m->md.pv_gen; 2533 rw_wunlock(lock); 2534 PMAP_LOCK(pmap); 2535 rw_wlock(lock); 2536 if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 2537 rw_wunlock(lock); 2538 PMAP_UNLOCK(pmap); 2539 goto retry; 2540 } 2541 } 2542 pmap_resident_count_dec(pmap, 1); 2543 2544 pde = pmap_pde(pmap, pv->pv_va, &lvl); 2545 KASSERT(pde != NULL, 2546 ("pmap_remove_all: no page directory entry found")); 2547 KASSERT(lvl == 2, 2548 ("pmap_remove_all: invalid pde level %d", lvl)); 2549 tpde = pmap_load(pde); 2550 2551 pte = pmap_l2_to_l3(pde, pv->pv_va); 2552 tpte = pmap_load(pte); 2553 pmap_load_clear(pte); 2554 pmap_invalidate_page(pmap, pv->pv_va); 2555 if (tpte & ATTR_SW_WIRED) 2556 pmap->pm_stats.wired_count--; 2557 if ((tpte & ATTR_AF) != 0) 2558 vm_page_aflag_set(m, PGA_REFERENCED); 2559 2560 /* 2561 * Update the vm_page_t clean and reference bits. 2562 */ 2563 if (pmap_page_dirty(tpte)) 2564 vm_page_dirty(m); 2565 pmap_unuse_pt(pmap, pv->pv_va, tpde, &free); 2566 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2567 m->md.pv_gen++; 2568 free_pv_entry(pmap, pv); 2569 PMAP_UNLOCK(pmap); 2570 } 2571 vm_page_aflag_clear(m, PGA_WRITEABLE); 2572 rw_wunlock(lock); 2573 vm_page_free_pages_toq(&free, false); 2574 } 2575 2576 /* 2577 * Set the physical protection on the 2578 * specified range of this map as requested. 2579 */ 2580 void 2581 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 2582 { 2583 vm_offset_t va, va_next; 2584 pd_entry_t *l0, *l1, *l2; 2585 pt_entry_t *l3p, l3, nbits; 2586 2587 KASSERT((prot & ~VM_PROT_ALL) == 0, ("invalid prot %x", prot)); 2588 if (prot == VM_PROT_NONE) { 2589 pmap_remove(pmap, sva, eva); 2590 return; 2591 } 2592 2593 if ((prot & (VM_PROT_WRITE | VM_PROT_EXECUTE)) == 2594 (VM_PROT_WRITE | VM_PROT_EXECUTE)) 2595 return; 2596 2597 PMAP_LOCK(pmap); 2598 for (; sva < eva; sva = va_next) { 2599 2600 l0 = pmap_l0(pmap, sva); 2601 if (pmap_load(l0) == 0) { 2602 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 2603 if (va_next < sva) 2604 va_next = eva; 2605 continue; 2606 } 2607 2608 l1 = pmap_l0_to_l1(l0, sva); 2609 if (pmap_load(l1) == 0) { 2610 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2611 if (va_next < sva) 2612 va_next = eva; 2613 continue; 2614 } 2615 2616 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2617 if (va_next < sva) 2618 va_next = eva; 2619 2620 l2 = pmap_l1_to_l2(l1, sva); 2621 if (pmap_load(l2) == 0) 2622 continue; 2623 2624 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 2625 l3p = pmap_demote_l2(pmap, l2, sva); 2626 if (l3p == NULL) 2627 continue; 2628 } 2629 KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 2630 ("pmap_protect: Invalid L2 entry after demotion")); 2631 2632 if (va_next > eva) 2633 va_next = eva; 2634 2635 va = va_next; 2636 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 2637 sva += L3_SIZE) { 2638 l3 = pmap_load(l3p); 2639 if (!pmap_l3_valid(l3)) 2640 continue; 2641 2642 nbits = 0; 2643 if ((prot & VM_PROT_WRITE) == 0) { 2644 if ((l3 & ATTR_SW_MANAGED) && 2645 pmap_page_dirty(l3)) { 2646 vm_page_dirty(PHYS_TO_VM_PAGE(l3 & 2647 ~ATTR_MASK)); 2648 } 2649 nbits |= ATTR_AP(ATTR_AP_RO); 2650 } 2651 if ((prot & VM_PROT_EXECUTE) == 0) 2652 nbits |= ATTR_XN; 2653 2654 pmap_set(l3p, nbits); 2655 /* XXX: Use pmap_invalidate_range */ 2656 pmap_invalidate_page(pmap, sva); 2657 } 2658 } 2659 PMAP_UNLOCK(pmap); 2660 } 2661 2662 /* 2663 * Inserts the specified page table page into the specified pmap's collection 2664 * of idle page table pages. Each of a pmap's page table pages is responsible 2665 * for mapping a distinct range of virtual addresses. The pmap's collection is 2666 * ordered by this virtual address range. 2667 */ 2668 static __inline int 2669 pmap_insert_pt_page(pmap_t pmap, vm_page_t mpte) 2670 { 2671 2672 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2673 return (vm_radix_insert(&pmap->pm_root, mpte)); 2674 } 2675 2676 /* 2677 * Removes the page table page mapping the specified virtual address from the 2678 * specified pmap's collection of idle page table pages, and returns it. 2679 * Otherwise, returns NULL if there is no page table page corresponding to the 2680 * specified virtual address. 2681 */ 2682 static __inline vm_page_t 2683 pmap_remove_pt_page(pmap_t pmap, vm_offset_t va) 2684 { 2685 2686 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2687 return (vm_radix_remove(&pmap->pm_root, pmap_l2_pindex(va))); 2688 } 2689 2690 /* 2691 * Performs a break-before-make update of a pmap entry. This is needed when 2692 * either promoting or demoting pages to ensure the TLB doesn't get into an 2693 * inconsistent state. 2694 */ 2695 static void 2696 pmap_update_entry(pmap_t pmap, pd_entry_t *pte, pd_entry_t newpte, 2697 vm_offset_t va, vm_size_t size) 2698 { 2699 register_t intr; 2700 2701 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2702 2703 /* 2704 * Ensure we don't get switched out with the page table in an 2705 * inconsistent state. We also need to ensure no interrupts fire 2706 * as they may make use of an address we are about to invalidate. 2707 */ 2708 intr = intr_disable(); 2709 critical_enter(); 2710 2711 /* Clear the old mapping */ 2712 pmap_load_clear(pte); 2713 pmap_invalidate_range_nopin(pmap, va, va + size); 2714 2715 /* Create the new mapping */ 2716 pmap_load_store(pte, newpte); 2717 2718 critical_exit(); 2719 intr_restore(intr); 2720 } 2721 2722 #if VM_NRESERVLEVEL > 0 2723 /* 2724 * After promotion from 512 4KB page mappings to a single 2MB page mapping, 2725 * replace the many pv entries for the 4KB page mappings by a single pv entry 2726 * for the 2MB page mapping. 2727 */ 2728 static void 2729 pmap_pv_promote_l2(pmap_t pmap, vm_offset_t va, vm_paddr_t pa, 2730 struct rwlock **lockp) 2731 { 2732 struct md_page *pvh; 2733 pv_entry_t pv; 2734 vm_offset_t va_last; 2735 vm_page_t m; 2736 2737 KASSERT((pa & L2_OFFSET) == 0, 2738 ("pmap_pv_promote_l2: pa is not 2mpage aligned")); 2739 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa); 2740 2741 /* 2742 * Transfer the first page's pv entry for this mapping to the 2mpage's 2743 * pv list. Aside from avoiding the cost of a call to get_pv_entry(), 2744 * a transfer avoids the possibility that get_pv_entry() calls 2745 * reclaim_pv_chunk() and that reclaim_pv_chunk() removes one of the 2746 * mappings that is being promoted. 2747 */ 2748 m = PHYS_TO_VM_PAGE(pa); 2749 va = va & ~L2_OFFSET; 2750 pv = pmap_pvh_remove(&m->md, pmap, va); 2751 KASSERT(pv != NULL, ("pmap_pv_promote_l2: pv not found")); 2752 pvh = pa_to_pvh(pa); 2753 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 2754 pvh->pv_gen++; 2755 /* Free the remaining NPTEPG - 1 pv entries. */ 2756 va_last = va + L2_SIZE - PAGE_SIZE; 2757 do { 2758 m++; 2759 va += PAGE_SIZE; 2760 pmap_pvh_free(&m->md, pmap, va); 2761 } while (va < va_last); 2762 } 2763 2764 /* 2765 * Tries to promote the 512, contiguous 4KB page mappings that are within a 2766 * single level 2 table entry to a single 2MB page mapping. For promotion 2767 * to occur, two conditions must be met: (1) the 4KB page mappings must map 2768 * aligned, contiguous physical memory and (2) the 4KB page mappings must have 2769 * identical characteristics. 2770 */ 2771 static void 2772 pmap_promote_l2(pmap_t pmap, pd_entry_t *l2, vm_offset_t va, 2773 struct rwlock **lockp) 2774 { 2775 pt_entry_t *firstl3, *l3, newl2, oldl3, pa; 2776 vm_page_t mpte; 2777 vm_offset_t sva; 2778 2779 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2780 2781 sva = va & ~L2_OFFSET; 2782 firstl3 = pmap_l2_to_l3(l2, sva); 2783 newl2 = pmap_load(firstl3); 2784 2785 /* Check the alingment is valid */ 2786 if (((newl2 & ~ATTR_MASK) & L2_OFFSET) != 0) { 2787 atomic_add_long(&pmap_l2_p_failures, 1); 2788 CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2789 " in pmap %p", va, pmap); 2790 return; 2791 } 2792 2793 pa = newl2 + L2_SIZE - PAGE_SIZE; 2794 for (l3 = firstl3 + NL3PG - 1; l3 > firstl3; l3--) { 2795 oldl3 = pmap_load(l3); 2796 if (oldl3 != pa) { 2797 atomic_add_long(&pmap_l2_p_failures, 1); 2798 CTR2(KTR_PMAP, "pmap_promote_l2: failure for va %#lx" 2799 " in pmap %p", va, pmap); 2800 return; 2801 } 2802 pa -= PAGE_SIZE; 2803 } 2804 2805 /* 2806 * Save the page table page in its current state until the L2 2807 * mapping the superpage is demoted by pmap_demote_l2() or 2808 * destroyed by pmap_remove_l3(). 2809 */ 2810 mpte = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 2811 KASSERT(mpte >= vm_page_array && 2812 mpte < &vm_page_array[vm_page_array_size], 2813 ("pmap_promote_l2: page table page is out of range")); 2814 KASSERT(mpte->pindex == pmap_l2_pindex(va), 2815 ("pmap_promote_l2: page table page's pindex is wrong")); 2816 if (pmap_insert_pt_page(pmap, mpte)) { 2817 atomic_add_long(&pmap_l2_p_failures, 1); 2818 CTR2(KTR_PMAP, 2819 "pmap_promote_l2: failure for va %#lx in pmap %p", va, 2820 pmap); 2821 return; 2822 } 2823 2824 if ((newl2 & ATTR_SW_MANAGED) != 0) 2825 pmap_pv_promote_l2(pmap, va, newl2 & ~ATTR_MASK, lockp); 2826 2827 newl2 &= ~ATTR_DESCR_MASK; 2828 newl2 |= L2_BLOCK; 2829 2830 pmap_update_entry(pmap, l2, newl2, sva, L2_SIZE); 2831 2832 atomic_add_long(&pmap_l2_promotions, 1); 2833 CTR2(KTR_PMAP, "pmap_promote_l2: success for va %#lx in pmap %p", va, 2834 pmap); 2835 } 2836 #endif /* VM_NRESERVLEVEL > 0 */ 2837 2838 /* 2839 * Insert the given physical page (p) at 2840 * the specified virtual address (v) in the 2841 * target physical map with the protection requested. 2842 * 2843 * If specified, the page will be wired down, meaning 2844 * that the related pte can not be reclaimed. 2845 * 2846 * NB: This is the only routine which MAY NOT lazy-evaluate 2847 * or lose information. That is, this routine must actually 2848 * insert this page into the given map NOW. 2849 */ 2850 int 2851 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 2852 u_int flags, int8_t psind __unused) 2853 { 2854 struct rwlock *lock; 2855 pd_entry_t *pde; 2856 pt_entry_t new_l3, orig_l3; 2857 pt_entry_t *l2, *l3; 2858 pv_entry_t pv; 2859 vm_paddr_t opa, pa, l1_pa, l2_pa, l3_pa; 2860 vm_page_t mpte, om, l1_m, l2_m, l3_m; 2861 boolean_t nosleep; 2862 int lvl; 2863 2864 va = trunc_page(va); 2865 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 2866 VM_OBJECT_ASSERT_LOCKED(m->object); 2867 pa = VM_PAGE_TO_PHYS(m); 2868 new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2869 L3_PAGE); 2870 if ((prot & VM_PROT_WRITE) == 0) 2871 new_l3 |= ATTR_AP(ATTR_AP_RO); 2872 if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 2873 new_l3 |= ATTR_XN; 2874 if ((flags & PMAP_ENTER_WIRED) != 0) 2875 new_l3 |= ATTR_SW_WIRED; 2876 if (va < VM_MAXUSER_ADDRESS) 2877 new_l3 |= ATTR_AP(ATTR_AP_USER) | ATTR_PXN; 2878 if ((m->oflags & VPO_UNMANAGED) == 0) 2879 new_l3 |= ATTR_SW_MANAGED; 2880 2881 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 2882 2883 mpte = NULL; 2884 2885 lock = NULL; 2886 PMAP_LOCK(pmap); 2887 2888 pde = pmap_pde(pmap, va, &lvl); 2889 if (pde != NULL && lvl == 1) { 2890 l2 = pmap_l1_to_l2(pde, va); 2891 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK && 2892 (l3 = pmap_demote_l2_locked(pmap, l2, va & ~L2_OFFSET, 2893 &lock)) != NULL) { 2894 l3 = &l3[pmap_l3_index(va)]; 2895 if (va < VM_MAXUSER_ADDRESS) { 2896 mpte = PHYS_TO_VM_PAGE( 2897 pmap_load(l2) & ~ATTR_MASK); 2898 mpte->wire_count++; 2899 } 2900 goto havel3; 2901 } 2902 } 2903 2904 if (va < VM_MAXUSER_ADDRESS) { 2905 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 2906 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 2907 if (mpte == NULL && nosleep) { 2908 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 2909 if (lock != NULL) 2910 rw_wunlock(lock); 2911 PMAP_UNLOCK(pmap); 2912 return (KERN_RESOURCE_SHORTAGE); 2913 } 2914 pde = pmap_pde(pmap, va, &lvl); 2915 KASSERT(pde != NULL, 2916 ("pmap_enter: Invalid page entry, va: 0x%lx", va)); 2917 KASSERT(lvl == 2, 2918 ("pmap_enter: Invalid level %d", lvl)); 2919 } else { 2920 /* 2921 * If we get a level 2 pde it must point to a level 3 entry 2922 * otherwise we will need to create the intermediate tables 2923 */ 2924 if (lvl < 2) { 2925 switch (lvl) { 2926 default: 2927 case -1: 2928 /* Get the l0 pde to update */ 2929 pde = pmap_l0(pmap, va); 2930 KASSERT(pde != NULL, ("...")); 2931 2932 l1_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2933 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2934 VM_ALLOC_ZERO); 2935 if (l1_m == NULL) 2936 panic("pmap_enter: l1 pte_m == NULL"); 2937 if ((l1_m->flags & PG_ZERO) == 0) 2938 pmap_zero_page(l1_m); 2939 2940 l1_pa = VM_PAGE_TO_PHYS(l1_m); 2941 pmap_load_store(pde, l1_pa | L0_TABLE); 2942 /* FALLTHROUGH */ 2943 case 0: 2944 /* Get the l1 pde to update */ 2945 pde = pmap_l1_to_l2(pde, va); 2946 KASSERT(pde != NULL, ("...")); 2947 2948 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2949 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2950 VM_ALLOC_ZERO); 2951 if (l2_m == NULL) 2952 panic("pmap_enter: l2 pte_m == NULL"); 2953 if ((l2_m->flags & PG_ZERO) == 0) 2954 pmap_zero_page(l2_m); 2955 2956 l2_pa = VM_PAGE_TO_PHYS(l2_m); 2957 pmap_load_store(pde, l2_pa | L1_TABLE); 2958 /* FALLTHROUGH */ 2959 case 1: 2960 /* Get the l2 pde to update */ 2961 pde = pmap_l1_to_l2(pde, va); 2962 KASSERT(pde != NULL, ("...")); 2963 2964 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2965 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2966 VM_ALLOC_ZERO); 2967 if (l3_m == NULL) 2968 panic("pmap_enter: l3 pte_m == NULL"); 2969 if ((l3_m->flags & PG_ZERO) == 0) 2970 pmap_zero_page(l3_m); 2971 2972 l3_pa = VM_PAGE_TO_PHYS(l3_m); 2973 pmap_load_store(pde, l3_pa | L2_TABLE); 2974 break; 2975 } 2976 } 2977 } 2978 l3 = pmap_l2_to_l3(pde, va); 2979 2980 havel3: 2981 orig_l3 = pmap_load(l3); 2982 opa = orig_l3 & ~ATTR_MASK; 2983 pv = NULL; 2984 2985 /* 2986 * Is the specified virtual address already mapped? 2987 */ 2988 if (pmap_l3_valid(orig_l3)) { 2989 /* 2990 * Wiring change, just update stats. We don't worry about 2991 * wiring PT pages as they remain resident as long as there 2992 * are valid mappings in them. Hence, if a user page is wired, 2993 * the PT page will be also. 2994 */ 2995 if ((flags & PMAP_ENTER_WIRED) != 0 && 2996 (orig_l3 & ATTR_SW_WIRED) == 0) 2997 pmap->pm_stats.wired_count++; 2998 else if ((flags & PMAP_ENTER_WIRED) == 0 && 2999 (orig_l3 & ATTR_SW_WIRED) != 0) 3000 pmap->pm_stats.wired_count--; 3001 3002 /* 3003 * Remove the extra PT page reference. 3004 */ 3005 if (mpte != NULL) { 3006 mpte->wire_count--; 3007 KASSERT(mpte->wire_count > 0, 3008 ("pmap_enter: missing reference to page table page," 3009 " va: 0x%lx", va)); 3010 } 3011 3012 /* 3013 * Has the physical page changed? 3014 */ 3015 if (opa == pa) { 3016 /* 3017 * No, might be a protection or wiring change. 3018 */ 3019 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3020 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 3021 ATTR_AP(ATTR_AP_RW)) { 3022 vm_page_aflag_set(m, PGA_WRITEABLE); 3023 } 3024 } 3025 goto validate; 3026 } 3027 3028 /* 3029 * The physical page has changed. 3030 */ 3031 (void)pmap_load_clear(l3); 3032 KASSERT((orig_l3 & ~ATTR_MASK) == opa, 3033 ("pmap_enter: unexpected pa update for %#lx", va)); 3034 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 3035 om = PHYS_TO_VM_PAGE(opa); 3036 3037 /* 3038 * The pmap lock is sufficient to synchronize with 3039 * concurrent calls to pmap_page_test_mappings() and 3040 * pmap_ts_referenced(). 3041 */ 3042 if (pmap_page_dirty(orig_l3)) 3043 vm_page_dirty(om); 3044 if ((orig_l3 & ATTR_AF) != 0) 3045 vm_page_aflag_set(om, PGA_REFERENCED); 3046 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 3047 pv = pmap_pvh_remove(&om->md, pmap, va); 3048 if ((m->oflags & VPO_UNMANAGED) != 0) 3049 free_pv_entry(pmap, pv); 3050 if ((om->aflags & PGA_WRITEABLE) != 0 && 3051 TAILQ_EMPTY(&om->md.pv_list) && 3052 ((om->flags & PG_FICTITIOUS) != 0 || 3053 TAILQ_EMPTY(&pa_to_pvh(opa)->pv_list))) 3054 vm_page_aflag_clear(om, PGA_WRITEABLE); 3055 } 3056 pmap_invalidate_page(pmap, va); 3057 orig_l3 = 0; 3058 } else { 3059 /* 3060 * Increment the counters. 3061 */ 3062 if ((new_l3 & ATTR_SW_WIRED) != 0) 3063 pmap->pm_stats.wired_count++; 3064 pmap_resident_count_inc(pmap, 1); 3065 } 3066 /* 3067 * Enter on the PV list if part of our managed memory. 3068 */ 3069 if ((m->oflags & VPO_UNMANAGED) == 0) { 3070 if (pv == NULL) { 3071 pv = get_pv_entry(pmap, &lock); 3072 pv->pv_va = va; 3073 } 3074 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 3075 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 3076 m->md.pv_gen++; 3077 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 3078 vm_page_aflag_set(m, PGA_WRITEABLE); 3079 } 3080 3081 validate: 3082 /* 3083 * Sync icache if exec permission and attribute VM_MEMATTR_WRITE_BACK 3084 * is set. Do it now, before the mapping is stored and made 3085 * valid for hardware table walk. If done later, then other can 3086 * access this page before caches are properly synced. 3087 * Don't do it for kernel memory which is mapped with exec 3088 * permission even if the memory isn't going to hold executable 3089 * code. The only time when icache sync is needed is after 3090 * kernel module is loaded and the relocation info is processed. 3091 * And it's done in elf_cpu_load_file(). 3092 */ 3093 if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && 3094 m->md.pv_memattr == VM_MEMATTR_WRITE_BACK && 3095 (opa != pa || (orig_l3 & ATTR_XN))) 3096 cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); 3097 3098 /* 3099 * Update the L3 entry 3100 */ 3101 if (pmap_l3_valid(orig_l3)) { 3102 KASSERT(opa == pa, ("pmap_enter: invalid update")); 3103 if ((orig_l3 & ~ATTR_AF) != (new_l3 & ~ATTR_AF)) { 3104 /* same PA, different attributes */ 3105 pmap_load_store(l3, new_l3); 3106 pmap_invalidate_page(pmap, va); 3107 if (pmap_page_dirty(orig_l3) && 3108 (orig_l3 & ATTR_SW_MANAGED) != 0) 3109 vm_page_dirty(m); 3110 } else { 3111 /* 3112 * orig_l3 == new_l3 3113 * This can happens if multiple threads simultaneously 3114 * access not yet mapped page. This bad for performance 3115 * since this can cause full demotion-NOP-promotion 3116 * cycle. 3117 * Another possible reasons are: 3118 * - VM and pmap memory layout are diverged 3119 * - tlb flush is missing somewhere and CPU doesn't see 3120 * actual mapping. 3121 */ 3122 CTR4(KTR_PMAP, "%s: already mapped page - " 3123 "pmap %p va 0x%#lx pte 0x%lx", 3124 __func__, pmap, va, new_l3); 3125 } 3126 } else { 3127 /* New mappig */ 3128 pmap_load_store(l3, new_l3); 3129 } 3130 3131 #if VM_NRESERVLEVEL > 0 3132 if (pmap != pmap_kernel() && 3133 (mpte == NULL || mpte->wire_count == NL3PG) && 3134 pmap_superpages_enabled() && 3135 (m->flags & PG_FICTITIOUS) == 0 && 3136 vm_reserv_level_iffullpop(m) == 0) { 3137 pmap_promote_l2(pmap, pde, va, &lock); 3138 } 3139 #endif 3140 3141 if (lock != NULL) 3142 rw_wunlock(lock); 3143 PMAP_UNLOCK(pmap); 3144 return (KERN_SUCCESS); 3145 } 3146 3147 /* 3148 * Maps a sequence of resident pages belonging to the same object. 3149 * The sequence begins with the given page m_start. This page is 3150 * mapped at the given virtual address start. Each subsequent page is 3151 * mapped at a virtual address that is offset from start by the same 3152 * amount as the page is offset from m_start within the object. The 3153 * last page in the sequence is the page with the largest offset from 3154 * m_start that can be mapped at a virtual address less than the given 3155 * virtual address end. Not every virtual page between start and end 3156 * is mapped; only those for which a resident page exists with the 3157 * corresponding offset from m_start are mapped. 3158 */ 3159 void 3160 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 3161 vm_page_t m_start, vm_prot_t prot) 3162 { 3163 struct rwlock *lock; 3164 vm_offset_t va; 3165 vm_page_t m, mpte; 3166 vm_pindex_t diff, psize; 3167 3168 VM_OBJECT_ASSERT_LOCKED(m_start->object); 3169 3170 psize = atop(end - start); 3171 mpte = NULL; 3172 m = m_start; 3173 lock = NULL; 3174 PMAP_LOCK(pmap); 3175 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 3176 va = start + ptoa(diff); 3177 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 3178 m = TAILQ_NEXT(m, listq); 3179 } 3180 if (lock != NULL) 3181 rw_wunlock(lock); 3182 PMAP_UNLOCK(pmap); 3183 } 3184 3185 /* 3186 * this code makes some *MAJOR* assumptions: 3187 * 1. Current pmap & pmap exists. 3188 * 2. Not wired. 3189 * 3. Read access. 3190 * 4. No page table pages. 3191 * but is *MUCH* faster than pmap_enter... 3192 */ 3193 3194 void 3195 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 3196 { 3197 struct rwlock *lock; 3198 3199 lock = NULL; 3200 PMAP_LOCK(pmap); 3201 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 3202 if (lock != NULL) 3203 rw_wunlock(lock); 3204 PMAP_UNLOCK(pmap); 3205 } 3206 3207 static vm_page_t 3208 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 3209 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 3210 { 3211 struct spglist free; 3212 pd_entry_t *pde; 3213 pt_entry_t *l2, *l3, l3_val; 3214 vm_paddr_t pa; 3215 int lvl; 3216 3217 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 3218 (m->oflags & VPO_UNMANAGED) != 0, 3219 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 3220 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 3221 3222 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 3223 /* 3224 * In the case that a page table page is not 3225 * resident, we are creating it here. 3226 */ 3227 if (va < VM_MAXUSER_ADDRESS) { 3228 vm_pindex_t l2pindex; 3229 3230 /* 3231 * Calculate pagetable page index 3232 */ 3233 l2pindex = pmap_l2_pindex(va); 3234 if (mpte && (mpte->pindex == l2pindex)) { 3235 mpte->wire_count++; 3236 } else { 3237 /* 3238 * Get the l2 entry 3239 */ 3240 pde = pmap_pde(pmap, va, &lvl); 3241 3242 /* 3243 * If the page table page is mapped, we just increment 3244 * the hold count, and activate it. Otherwise, we 3245 * attempt to allocate a page table page. If this 3246 * attempt fails, we don't retry. Instead, we give up. 3247 */ 3248 if (lvl == 1) { 3249 l2 = pmap_l1_to_l2(pde, va); 3250 if ((pmap_load(l2) & ATTR_DESCR_MASK) == 3251 L2_BLOCK) 3252 return (NULL); 3253 } 3254 if (lvl == 2 && pmap_load(pde) != 0) { 3255 mpte = 3256 PHYS_TO_VM_PAGE(pmap_load(pde) & ~ATTR_MASK); 3257 mpte->wire_count++; 3258 } else { 3259 /* 3260 * Pass NULL instead of the PV list lock 3261 * pointer, because we don't intend to sleep. 3262 */ 3263 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 3264 if (mpte == NULL) 3265 return (mpte); 3266 } 3267 } 3268 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 3269 l3 = &l3[pmap_l3_index(va)]; 3270 } else { 3271 mpte = NULL; 3272 pde = pmap_pde(kernel_pmap, va, &lvl); 3273 KASSERT(pde != NULL, 3274 ("pmap_enter_quick_locked: Invalid page entry, va: 0x%lx", 3275 va)); 3276 KASSERT(lvl == 2, 3277 ("pmap_enter_quick_locked: Invalid level %d", lvl)); 3278 l3 = pmap_l2_to_l3(pde, va); 3279 } 3280 3281 if (pmap_load(l3) != 0) { 3282 if (mpte != NULL) { 3283 mpte->wire_count--; 3284 mpte = NULL; 3285 } 3286 return (mpte); 3287 } 3288 3289 /* 3290 * Enter on the PV list if part of our managed memory. 3291 */ 3292 if ((m->oflags & VPO_UNMANAGED) == 0 && 3293 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 3294 if (mpte != NULL) { 3295 SLIST_INIT(&free); 3296 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 3297 pmap_invalidate_page(pmap, va); 3298 vm_page_free_pages_toq(&free, false); 3299 } 3300 mpte = NULL; 3301 } 3302 return (mpte); 3303 } 3304 3305 /* 3306 * Increment counters 3307 */ 3308 pmap_resident_count_inc(pmap, 1); 3309 3310 pa = VM_PAGE_TO_PHYS(m); 3311 l3_val = pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 3312 ATTR_AP(ATTR_AP_RO) | L3_PAGE; 3313 if ((prot & VM_PROT_EXECUTE) == 0 || m->md.pv_memattr == DEVICE_MEMORY) 3314 l3_val |= ATTR_XN; 3315 else if (va < VM_MAXUSER_ADDRESS) 3316 l3_val |= ATTR_PXN; 3317 3318 /* 3319 * Now validate mapping with RO protection 3320 */ 3321 if ((m->oflags & VPO_UNMANAGED) == 0) 3322 l3_val |= ATTR_SW_MANAGED; 3323 3324 /* Sync icache before the mapping is stored to PTE */ 3325 if ((prot & VM_PROT_EXECUTE) && pmap != kernel_pmap && 3326 m->md.pv_memattr == VM_MEMATTR_WRITE_BACK) 3327 cpu_icache_sync_range(PHYS_TO_DMAP(pa), PAGE_SIZE); 3328 3329 pmap_load_store(l3, l3_val); 3330 pmap_invalidate_page(pmap, va); 3331 return (mpte); 3332 } 3333 3334 /* 3335 * This code maps large physical mmap regions into the 3336 * processor address space. Note that some shortcuts 3337 * are taken, but the code works. 3338 */ 3339 void 3340 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 3341 vm_pindex_t pindex, vm_size_t size) 3342 { 3343 3344 VM_OBJECT_ASSERT_WLOCKED(object); 3345 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 3346 ("pmap_object_init_pt: non-device object")); 3347 } 3348 3349 /* 3350 * Clear the wired attribute from the mappings for the specified range of 3351 * addresses in the given pmap. Every valid mapping within that range 3352 * must have the wired attribute set. In contrast, invalid mappings 3353 * cannot have the wired attribute set, so they are ignored. 3354 * 3355 * The wired attribute of the page table entry is not a hardware feature, 3356 * so there is no need to invalidate any TLB entries. 3357 */ 3358 void 3359 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 3360 { 3361 vm_offset_t va_next; 3362 pd_entry_t *l0, *l1, *l2; 3363 pt_entry_t *l3; 3364 3365 PMAP_LOCK(pmap); 3366 for (; sva < eva; sva = va_next) { 3367 l0 = pmap_l0(pmap, sva); 3368 if (pmap_load(l0) == 0) { 3369 va_next = (sva + L0_SIZE) & ~L0_OFFSET; 3370 if (va_next < sva) 3371 va_next = eva; 3372 continue; 3373 } 3374 3375 l1 = pmap_l0_to_l1(l0, sva); 3376 if (pmap_load(l1) == 0) { 3377 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 3378 if (va_next < sva) 3379 va_next = eva; 3380 continue; 3381 } 3382 3383 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 3384 if (va_next < sva) 3385 va_next = eva; 3386 3387 l2 = pmap_l1_to_l2(l1, sva); 3388 if (pmap_load(l2) == 0) 3389 continue; 3390 3391 if ((pmap_load(l2) & ATTR_DESCR_MASK) == L2_BLOCK) { 3392 l3 = pmap_demote_l2(pmap, l2, sva); 3393 if (l3 == NULL) 3394 continue; 3395 } 3396 KASSERT((pmap_load(l2) & ATTR_DESCR_MASK) == L2_TABLE, 3397 ("pmap_unwire: Invalid l2 entry after demotion")); 3398 3399 if (va_next > eva) 3400 va_next = eva; 3401 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 3402 sva += L3_SIZE) { 3403 if (pmap_load(l3) == 0) 3404 continue; 3405 if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 3406 panic("pmap_unwire: l3 %#jx is missing " 3407 "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 3408 3409 /* 3410 * PG_W must be cleared atomically. Although the pmap 3411 * lock synchronizes access to PG_W, another processor 3412 * could be setting PG_M and/or PG_A concurrently. 3413 */ 3414 atomic_clear_long(l3, ATTR_SW_WIRED); 3415 pmap->pm_stats.wired_count--; 3416 } 3417 } 3418 PMAP_UNLOCK(pmap); 3419 } 3420 3421 /* 3422 * Copy the range specified by src_addr/len 3423 * from the source map to the range dst_addr/len 3424 * in the destination map. 3425 * 3426 * This routine is only advisory and need not do anything. 3427 */ 3428 3429 void 3430 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 3431 vm_offset_t src_addr) 3432 { 3433 } 3434 3435 /* 3436 * pmap_zero_page zeros the specified hardware page by mapping 3437 * the page into KVM and using bzero to clear its contents. 3438 */ 3439 void 3440 pmap_zero_page(vm_page_t m) 3441 { 3442 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3443 3444 pagezero((void *)va); 3445 } 3446 3447 /* 3448 * pmap_zero_page_area zeros the specified hardware page by mapping 3449 * the page into KVM and using bzero to clear its contents. 3450 * 3451 * off and size may not cover an area beyond a single hardware page. 3452 */ 3453 void 3454 pmap_zero_page_area(vm_page_t m, int off, int size) 3455 { 3456 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 3457 3458 if (off == 0 && size == PAGE_SIZE) 3459 pagezero((void *)va); 3460 else 3461 bzero((char *)va + off, size); 3462 } 3463 3464 /* 3465 * pmap_copy_page copies the specified (machine independent) 3466 * page by mapping the page into virtual memory and using 3467 * bcopy to copy the page, one machine dependent page at a 3468 * time. 3469 */ 3470 void 3471 pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 3472 { 3473 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 3474 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 3475 3476 pagecopy((void *)src, (void *)dst); 3477 } 3478 3479 int unmapped_buf_allowed = 1; 3480 3481 void 3482 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 3483 vm_offset_t b_offset, int xfersize) 3484 { 3485 void *a_cp, *b_cp; 3486 vm_page_t m_a, m_b; 3487 vm_paddr_t p_a, p_b; 3488 vm_offset_t a_pg_offset, b_pg_offset; 3489 int cnt; 3490 3491 while (xfersize > 0) { 3492 a_pg_offset = a_offset & PAGE_MASK; 3493 m_a = ma[a_offset >> PAGE_SHIFT]; 3494 p_a = m_a->phys_addr; 3495 b_pg_offset = b_offset & PAGE_MASK; 3496 m_b = mb[b_offset >> PAGE_SHIFT]; 3497 p_b = m_b->phys_addr; 3498 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 3499 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 3500 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 3501 panic("!DMAP a %lx", p_a); 3502 } else { 3503 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 3504 } 3505 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 3506 panic("!DMAP b %lx", p_b); 3507 } else { 3508 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 3509 } 3510 bcopy(a_cp, b_cp, cnt); 3511 a_offset += cnt; 3512 b_offset += cnt; 3513 xfersize -= cnt; 3514 } 3515 } 3516 3517 vm_offset_t 3518 pmap_quick_enter_page(vm_page_t m) 3519 { 3520 3521 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 3522 } 3523 3524 void 3525 pmap_quick_remove_page(vm_offset_t addr) 3526 { 3527 } 3528 3529 /* 3530 * Returns true if the pmap's pv is one of the first 3531 * 16 pvs linked to from this page. This count may 3532 * be changed upwards or downwards in the future; it 3533 * is only necessary that true be returned for a small 3534 * subset of pmaps for proper page aging. 3535 */ 3536 boolean_t 3537 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 3538 { 3539 struct md_page *pvh; 3540 struct rwlock *lock; 3541 pv_entry_t pv; 3542 int loops = 0; 3543 boolean_t rv; 3544 3545 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3546 ("pmap_page_exists_quick: page %p is not managed", m)); 3547 rv = FALSE; 3548 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3549 rw_rlock(lock); 3550 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3551 if (PV_PMAP(pv) == pmap) { 3552 rv = TRUE; 3553 break; 3554 } 3555 loops++; 3556 if (loops >= 16) 3557 break; 3558 } 3559 if (!rv && loops < 16 && (m->flags & PG_FICTITIOUS) == 0) { 3560 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3561 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3562 if (PV_PMAP(pv) == pmap) { 3563 rv = TRUE; 3564 break; 3565 } 3566 loops++; 3567 if (loops >= 16) 3568 break; 3569 } 3570 } 3571 rw_runlock(lock); 3572 return (rv); 3573 } 3574 3575 /* 3576 * pmap_page_wired_mappings: 3577 * 3578 * Return the number of managed mappings to the given physical page 3579 * that are wired. 3580 */ 3581 int 3582 pmap_page_wired_mappings(vm_page_t m) 3583 { 3584 struct rwlock *lock; 3585 struct md_page *pvh; 3586 pmap_t pmap; 3587 pt_entry_t *pte; 3588 pv_entry_t pv; 3589 int count, lvl, md_gen, pvh_gen; 3590 3591 if ((m->oflags & VPO_UNMANAGED) != 0) 3592 return (0); 3593 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3594 rw_rlock(lock); 3595 restart: 3596 count = 0; 3597 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3598 pmap = PV_PMAP(pv); 3599 if (!PMAP_TRYLOCK(pmap)) { 3600 md_gen = m->md.pv_gen; 3601 rw_runlock(lock); 3602 PMAP_LOCK(pmap); 3603 rw_rlock(lock); 3604 if (md_gen != m->md.pv_gen) { 3605 PMAP_UNLOCK(pmap); 3606 goto restart; 3607 } 3608 } 3609 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3610 if (pte != NULL && (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3611 count++; 3612 PMAP_UNLOCK(pmap); 3613 } 3614 if ((m->flags & PG_FICTITIOUS) == 0) { 3615 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3616 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3617 pmap = PV_PMAP(pv); 3618 if (!PMAP_TRYLOCK(pmap)) { 3619 md_gen = m->md.pv_gen; 3620 pvh_gen = pvh->pv_gen; 3621 rw_runlock(lock); 3622 PMAP_LOCK(pmap); 3623 rw_rlock(lock); 3624 if (md_gen != m->md.pv_gen || 3625 pvh_gen != pvh->pv_gen) { 3626 PMAP_UNLOCK(pmap); 3627 goto restart; 3628 } 3629 } 3630 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3631 if (pte != NULL && 3632 (pmap_load(pte) & ATTR_SW_WIRED) != 0) 3633 count++; 3634 PMAP_UNLOCK(pmap); 3635 } 3636 } 3637 rw_runlock(lock); 3638 return (count); 3639 } 3640 3641 /* 3642 * Destroy all managed, non-wired mappings in the given user-space 3643 * pmap. This pmap cannot be active on any processor besides the 3644 * caller. 3645 * 3646 * This function cannot be applied to the kernel pmap. Moreover, it 3647 * is not intended for general use. It is only to be used during 3648 * process termination. Consequently, it can be implemented in ways 3649 * that make it faster than pmap_remove(). First, it can more quickly 3650 * destroy mappings by iterating over the pmap's collection of PV 3651 * entries, rather than searching the page table. Second, it doesn't 3652 * have to test and clear the page table entries atomically, because 3653 * no processor is currently accessing the user address space. In 3654 * particular, a page table entry's dirty bit won't change state once 3655 * this function starts. 3656 */ 3657 void 3658 pmap_remove_pages(pmap_t pmap) 3659 { 3660 pd_entry_t *pde; 3661 pt_entry_t *pte, tpte; 3662 struct spglist free; 3663 vm_page_t m, ml3, mt; 3664 pv_entry_t pv; 3665 struct md_page *pvh; 3666 struct pv_chunk *pc, *npc; 3667 struct rwlock *lock; 3668 int64_t bit; 3669 uint64_t inuse, bitmask; 3670 int allfree, field, freed, idx, lvl; 3671 vm_paddr_t pa; 3672 3673 lock = NULL; 3674 3675 SLIST_INIT(&free); 3676 PMAP_LOCK(pmap); 3677 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 3678 allfree = 1; 3679 freed = 0; 3680 for (field = 0; field < _NPCM; field++) { 3681 inuse = ~pc->pc_map[field] & pc_freemask[field]; 3682 while (inuse != 0) { 3683 bit = ffsl(inuse) - 1; 3684 bitmask = 1UL << bit; 3685 idx = field * 64 + bit; 3686 pv = &pc->pc_pventry[idx]; 3687 inuse &= ~bitmask; 3688 3689 pde = pmap_pde(pmap, pv->pv_va, &lvl); 3690 KASSERT(pde != NULL, 3691 ("Attempting to remove an unmapped page")); 3692 3693 switch(lvl) { 3694 case 1: 3695 pte = pmap_l1_to_l2(pde, pv->pv_va); 3696 tpte = pmap_load(pte); 3697 KASSERT((tpte & ATTR_DESCR_MASK) == 3698 L2_BLOCK, 3699 ("Attempting to remove an invalid " 3700 "block: %lx", tpte)); 3701 tpte = pmap_load(pte); 3702 break; 3703 case 2: 3704 pte = pmap_l2_to_l3(pde, pv->pv_va); 3705 tpte = pmap_load(pte); 3706 KASSERT((tpte & ATTR_DESCR_MASK) == 3707 L3_PAGE, 3708 ("Attempting to remove an invalid " 3709 "page: %lx", tpte)); 3710 break; 3711 default: 3712 panic( 3713 "Invalid page directory level: %d", 3714 lvl); 3715 } 3716 3717 /* 3718 * We cannot remove wired pages from a process' mapping at this time 3719 */ 3720 if (tpte & ATTR_SW_WIRED) { 3721 allfree = 0; 3722 continue; 3723 } 3724 3725 pa = tpte & ~ATTR_MASK; 3726 3727 m = PHYS_TO_VM_PAGE(pa); 3728 KASSERT(m->phys_addr == pa, 3729 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 3730 m, (uintmax_t)m->phys_addr, 3731 (uintmax_t)tpte)); 3732 3733 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 3734 m < &vm_page_array[vm_page_array_size], 3735 ("pmap_remove_pages: bad pte %#jx", 3736 (uintmax_t)tpte)); 3737 3738 pmap_load_clear(pte); 3739 3740 /* 3741 * Update the vm_page_t clean/reference bits. 3742 */ 3743 if ((tpte & ATTR_AP_RW_BIT) == 3744 ATTR_AP(ATTR_AP_RW)) { 3745 switch (lvl) { 3746 case 1: 3747 for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3748 vm_page_dirty(m); 3749 break; 3750 case 2: 3751 vm_page_dirty(m); 3752 break; 3753 } 3754 } 3755 3756 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 3757 3758 /* Mark free */ 3759 pc->pc_map[field] |= bitmask; 3760 switch (lvl) { 3761 case 1: 3762 pmap_resident_count_dec(pmap, 3763 L2_SIZE / PAGE_SIZE); 3764 pvh = pa_to_pvh(tpte & ~ATTR_MASK); 3765 TAILQ_REMOVE(&pvh->pv_list, pv,pv_next); 3766 pvh->pv_gen++; 3767 if (TAILQ_EMPTY(&pvh->pv_list)) { 3768 for (mt = m; mt < &m[L2_SIZE / PAGE_SIZE]; mt++) 3769 if ((mt->aflags & PGA_WRITEABLE) != 0 && 3770 TAILQ_EMPTY(&mt->md.pv_list)) 3771 vm_page_aflag_clear(mt, PGA_WRITEABLE); 3772 } 3773 ml3 = pmap_remove_pt_page(pmap, 3774 pv->pv_va); 3775 if (ml3 != NULL) { 3776 pmap_resident_count_dec(pmap,1); 3777 KASSERT(ml3->wire_count == NL3PG, 3778 ("pmap_remove_pages: l3 page wire count error")); 3779 ml3->wire_count = 1; 3780 vm_page_unwire_noq(ml3); 3781 pmap_add_delayed_free_list(ml3, 3782 &free, FALSE); 3783 } 3784 break; 3785 case 2: 3786 pmap_resident_count_dec(pmap, 1); 3787 TAILQ_REMOVE(&m->md.pv_list, pv, 3788 pv_next); 3789 m->md.pv_gen++; 3790 if ((m->aflags & PGA_WRITEABLE) != 0 && 3791 TAILQ_EMPTY(&m->md.pv_list) && 3792 (m->flags & PG_FICTITIOUS) == 0) { 3793 pvh = pa_to_pvh( 3794 VM_PAGE_TO_PHYS(m)); 3795 if (TAILQ_EMPTY(&pvh->pv_list)) 3796 vm_page_aflag_clear(m, 3797 PGA_WRITEABLE); 3798 } 3799 break; 3800 } 3801 pmap_unuse_pt(pmap, pv->pv_va, pmap_load(pde), 3802 &free); 3803 freed++; 3804 } 3805 } 3806 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 3807 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 3808 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 3809 if (allfree) { 3810 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 3811 free_pv_chunk(pc); 3812 } 3813 } 3814 pmap_invalidate_all(pmap); 3815 if (lock != NULL) 3816 rw_wunlock(lock); 3817 PMAP_UNLOCK(pmap); 3818 vm_page_free_pages_toq(&free, false); 3819 } 3820 3821 /* 3822 * This is used to check if a page has been accessed or modified. As we 3823 * don't have a bit to see if it has been modified we have to assume it 3824 * has been if the page is read/write. 3825 */ 3826 static boolean_t 3827 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 3828 { 3829 struct rwlock *lock; 3830 pv_entry_t pv; 3831 struct md_page *pvh; 3832 pt_entry_t *pte, mask, value; 3833 pmap_t pmap; 3834 int lvl, md_gen, pvh_gen; 3835 boolean_t rv; 3836 3837 rv = FALSE; 3838 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3839 rw_rlock(lock); 3840 restart: 3841 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 3842 pmap = PV_PMAP(pv); 3843 if (!PMAP_TRYLOCK(pmap)) { 3844 md_gen = m->md.pv_gen; 3845 rw_runlock(lock); 3846 PMAP_LOCK(pmap); 3847 rw_rlock(lock); 3848 if (md_gen != m->md.pv_gen) { 3849 PMAP_UNLOCK(pmap); 3850 goto restart; 3851 } 3852 } 3853 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3854 KASSERT(lvl == 3, 3855 ("pmap_page_test_mappings: Invalid level %d", lvl)); 3856 mask = 0; 3857 value = 0; 3858 if (modified) { 3859 mask |= ATTR_AP_RW_BIT; 3860 value |= ATTR_AP(ATTR_AP_RW); 3861 } 3862 if (accessed) { 3863 mask |= ATTR_AF | ATTR_DESCR_MASK; 3864 value |= ATTR_AF | L3_PAGE; 3865 } 3866 rv = (pmap_load(pte) & mask) == value; 3867 PMAP_UNLOCK(pmap); 3868 if (rv) 3869 goto out; 3870 } 3871 if ((m->flags & PG_FICTITIOUS) == 0) { 3872 pvh = pa_to_pvh(VM_PAGE_TO_PHYS(m)); 3873 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 3874 pmap = PV_PMAP(pv); 3875 if (!PMAP_TRYLOCK(pmap)) { 3876 md_gen = m->md.pv_gen; 3877 pvh_gen = pvh->pv_gen; 3878 rw_runlock(lock); 3879 PMAP_LOCK(pmap); 3880 rw_rlock(lock); 3881 if (md_gen != m->md.pv_gen || 3882 pvh_gen != pvh->pv_gen) { 3883 PMAP_UNLOCK(pmap); 3884 goto restart; 3885 } 3886 } 3887 pte = pmap_pte(pmap, pv->pv_va, &lvl); 3888 KASSERT(lvl == 2, 3889 ("pmap_page_test_mappings: Invalid level %d", lvl)); 3890 mask = 0; 3891 value = 0; 3892 if (modified) { 3893 mask |= ATTR_AP_RW_BIT; 3894 value |= ATTR_AP(ATTR_AP_RW); 3895 } 3896 if (accessed) { 3897 mask |= ATTR_AF | ATTR_DESCR_MASK; 3898 value |= ATTR_AF | L2_BLOCK; 3899 } 3900 rv = (pmap_load(pte) & mask) == value; 3901 PMAP_UNLOCK(pmap); 3902 if (rv) 3903 goto out; 3904 } 3905 } 3906 out: 3907 rw_runlock(lock); 3908 return (rv); 3909 } 3910 3911 /* 3912 * pmap_is_modified: 3913 * 3914 * Return whether or not the specified physical page was modified 3915 * in any physical maps. 3916 */ 3917 boolean_t 3918 pmap_is_modified(vm_page_t m) 3919 { 3920 3921 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3922 ("pmap_is_modified: page %p is not managed", m)); 3923 3924 /* 3925 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3926 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 3927 * is clear, no PTEs can have PG_M set. 3928 */ 3929 VM_OBJECT_ASSERT_WLOCKED(m->object); 3930 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3931 return (FALSE); 3932 return (pmap_page_test_mappings(m, FALSE, TRUE)); 3933 } 3934 3935 /* 3936 * pmap_is_prefaultable: 3937 * 3938 * Return whether or not the specified virtual address is eligible 3939 * for prefault. 3940 */ 3941 boolean_t 3942 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 3943 { 3944 pt_entry_t *pte; 3945 boolean_t rv; 3946 int lvl; 3947 3948 rv = FALSE; 3949 PMAP_LOCK(pmap); 3950 pte = pmap_pte(pmap, addr, &lvl); 3951 if (pte != NULL && pmap_load(pte) != 0) { 3952 rv = TRUE; 3953 } 3954 PMAP_UNLOCK(pmap); 3955 return (rv); 3956 } 3957 3958 /* 3959 * pmap_is_referenced: 3960 * 3961 * Return whether or not the specified physical page was referenced 3962 * in any physical maps. 3963 */ 3964 boolean_t 3965 pmap_is_referenced(vm_page_t m) 3966 { 3967 3968 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3969 ("pmap_is_referenced: page %p is not managed", m)); 3970 return (pmap_page_test_mappings(m, TRUE, FALSE)); 3971 } 3972 3973 /* 3974 * Clear the write and modified bits in each of the given page's mappings. 3975 */ 3976 void 3977 pmap_remove_write(vm_page_t m) 3978 { 3979 struct md_page *pvh; 3980 pmap_t pmap; 3981 struct rwlock *lock; 3982 pv_entry_t next_pv, pv; 3983 pt_entry_t oldpte, *pte; 3984 vm_offset_t va; 3985 int lvl, md_gen, pvh_gen; 3986 3987 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3988 ("pmap_remove_write: page %p is not managed", m)); 3989 3990 /* 3991 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 3992 * set by another thread while the object is locked. Thus, 3993 * if PGA_WRITEABLE is clear, no page table entries need updating. 3994 */ 3995 VM_OBJECT_ASSERT_WLOCKED(m->object); 3996 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 3997 return; 3998 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 3999 pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : 4000 pa_to_pvh(VM_PAGE_TO_PHYS(m)); 4001 retry_pv_loop: 4002 rw_wlock(lock); 4003 TAILQ_FOREACH_SAFE(pv, &pvh->pv_list, pv_next, next_pv) { 4004 pmap = PV_PMAP(pv); 4005 if (!PMAP_TRYLOCK(pmap)) { 4006 pvh_gen = pvh->pv_gen; 4007 rw_wunlock(lock); 4008 PMAP_LOCK(pmap); 4009 rw_wlock(lock); 4010 if (pvh_gen != pvh->pv_gen) { 4011 PMAP_UNLOCK(pmap); 4012 rw_wunlock(lock); 4013 goto retry_pv_loop; 4014 } 4015 } 4016 va = pv->pv_va; 4017 pte = pmap_pte(pmap, pv->pv_va, &lvl); 4018 if ((pmap_load(pte) & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 4019 pmap_demote_l2_locked(pmap, pte, va & ~L2_OFFSET, 4020 &lock); 4021 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4022 ("inconsistent pv lock %p %p for page %p", 4023 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4024 PMAP_UNLOCK(pmap); 4025 } 4026 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 4027 pmap = PV_PMAP(pv); 4028 if (!PMAP_TRYLOCK(pmap)) { 4029 pvh_gen = pvh->pv_gen; 4030 md_gen = m->md.pv_gen; 4031 rw_wunlock(lock); 4032 PMAP_LOCK(pmap); 4033 rw_wlock(lock); 4034 if (pvh_gen != pvh->pv_gen || 4035 md_gen != m->md.pv_gen) { 4036 PMAP_UNLOCK(pmap); 4037 rw_wunlock(lock); 4038 goto retry_pv_loop; 4039 } 4040 } 4041 pte = pmap_pte(pmap, pv->pv_va, &lvl); 4042 retry: 4043 oldpte = pmap_load(pte); 4044 if ((oldpte & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 4045 if (!atomic_cmpset_long(pte, oldpte, 4046 oldpte | ATTR_AP(ATTR_AP_RO))) 4047 goto retry; 4048 if ((oldpte & ATTR_AF) != 0) 4049 vm_page_dirty(m); 4050 pmap_invalidate_page(pmap, pv->pv_va); 4051 } 4052 PMAP_UNLOCK(pmap); 4053 } 4054 rw_wunlock(lock); 4055 vm_page_aflag_clear(m, PGA_WRITEABLE); 4056 } 4057 4058 static __inline boolean_t 4059 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 4060 { 4061 4062 return (FALSE); 4063 } 4064 4065 /* 4066 * pmap_ts_referenced: 4067 * 4068 * Return a count of reference bits for a page, clearing those bits. 4069 * It is not necessary for every reference bit to be cleared, but it 4070 * is necessary that 0 only be returned when there are truly no 4071 * reference bits set. 4072 * 4073 * As an optimization, update the page's dirty field if a modified bit is 4074 * found while counting reference bits. This opportunistic update can be 4075 * performed at low cost and can eliminate the need for some future calls 4076 * to pmap_is_modified(). However, since this function stops after 4077 * finding PMAP_TS_REFERENCED_MAX reference bits, it may not detect some 4078 * dirty pages. Those dirty pages will only be detected by a future call 4079 * to pmap_is_modified(). 4080 */ 4081 int 4082 pmap_ts_referenced(vm_page_t m) 4083 { 4084 struct md_page *pvh; 4085 pv_entry_t pv, pvf; 4086 pmap_t pmap; 4087 struct rwlock *lock; 4088 pd_entry_t *pde, tpde; 4089 pt_entry_t *pte, tpte; 4090 pt_entry_t *l3; 4091 vm_offset_t va; 4092 vm_paddr_t pa; 4093 int cleared, md_gen, not_cleared, lvl, pvh_gen; 4094 struct spglist free; 4095 bool demoted; 4096 4097 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4098 ("pmap_ts_referenced: page %p is not managed", m)); 4099 SLIST_INIT(&free); 4100 cleared = 0; 4101 pa = VM_PAGE_TO_PHYS(m); 4102 lock = PHYS_TO_PV_LIST_LOCK(pa); 4103 pvh = (m->flags & PG_FICTITIOUS) != 0 ? &pv_dummy : pa_to_pvh(pa); 4104 rw_wlock(lock); 4105 retry: 4106 not_cleared = 0; 4107 if ((pvf = TAILQ_FIRST(&pvh->pv_list)) == NULL) 4108 goto small_mappings; 4109 pv = pvf; 4110 do { 4111 if (pvf == NULL) 4112 pvf = pv; 4113 pmap = PV_PMAP(pv); 4114 if (!PMAP_TRYLOCK(pmap)) { 4115 pvh_gen = pvh->pv_gen; 4116 rw_wunlock(lock); 4117 PMAP_LOCK(pmap); 4118 rw_wlock(lock); 4119 if (pvh_gen != pvh->pv_gen) { 4120 PMAP_UNLOCK(pmap); 4121 goto retry; 4122 } 4123 } 4124 va = pv->pv_va; 4125 pde = pmap_pde(pmap, pv->pv_va, &lvl); 4126 KASSERT(pde != NULL, ("pmap_ts_referenced: no l1 table found")); 4127 KASSERT(lvl == 1, 4128 ("pmap_ts_referenced: invalid pde level %d", lvl)); 4129 tpde = pmap_load(pde); 4130 KASSERT((tpde & ATTR_DESCR_MASK) == L1_TABLE, 4131 ("pmap_ts_referenced: found an invalid l1 table")); 4132 pte = pmap_l1_to_l2(pde, pv->pv_va); 4133 tpte = pmap_load(pte); 4134 if (pmap_page_dirty(tpte)) { 4135 /* 4136 * Although "tpte" is mapping a 2MB page, because 4137 * this function is called at a 4KB page granularity, 4138 * we only update the 4KB page under test. 4139 */ 4140 vm_page_dirty(m); 4141 } 4142 if ((tpte & ATTR_AF) != 0) { 4143 /* 4144 * Since this reference bit is shared by 512 4KB 4145 * pages, it should not be cleared every time it is 4146 * tested. Apply a simple "hash" function on the 4147 * physical page number, the virtual superpage number, 4148 * and the pmap address to select one 4KB page out of 4149 * the 512 on which testing the reference bit will 4150 * result in clearing that reference bit. This 4151 * function is designed to avoid the selection of the 4152 * same 4KB page for every 2MB page mapping. 4153 * 4154 * On demotion, a mapping that hasn't been referenced 4155 * is simply destroyed. To avoid the possibility of a 4156 * subsequent page fault on a demoted wired mapping, 4157 * always leave its reference bit set. Moreover, 4158 * since the superpage is wired, the current state of 4159 * its reference bit won't affect page replacement. 4160 */ 4161 if ((((pa >> PAGE_SHIFT) ^ (pv->pv_va >> L2_SHIFT) ^ 4162 (uintptr_t)pmap) & (Ln_ENTRIES - 1)) == 0 && 4163 (tpte & ATTR_SW_WIRED) == 0) { 4164 if (safe_to_clear_referenced(pmap, tpte)) { 4165 /* 4166 * TODO: We don't handle the access 4167 * flag at all. We need to be able 4168 * to set it in the exception handler. 4169 */ 4170 panic("ARM64TODO: " 4171 "safe_to_clear_referenced\n"); 4172 } else if (pmap_demote_l2_locked(pmap, pte, 4173 pv->pv_va, &lock) != NULL) { 4174 demoted = true; 4175 va += VM_PAGE_TO_PHYS(m) - 4176 (tpte & ~ATTR_MASK); 4177 l3 = pmap_l2_to_l3(pte, va); 4178 pmap_remove_l3(pmap, l3, va, 4179 pmap_load(pte), NULL, &lock); 4180 } else 4181 demoted = true; 4182 4183 if (demoted) { 4184 /* 4185 * The superpage mapping was removed 4186 * entirely and therefore 'pv' is no 4187 * longer valid. 4188 */ 4189 if (pvf == pv) 4190 pvf = NULL; 4191 pv = NULL; 4192 } 4193 cleared++; 4194 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4195 ("inconsistent pv lock %p %p for page %p", 4196 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4197 } else 4198 not_cleared++; 4199 } 4200 PMAP_UNLOCK(pmap); 4201 /* Rotate the PV list if it has more than one entry. */ 4202 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4203 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 4204 TAILQ_INSERT_TAIL(&pvh->pv_list, pv, pv_next); 4205 pvh->pv_gen++; 4206 } 4207 if (cleared + not_cleared >= PMAP_TS_REFERENCED_MAX) 4208 goto out; 4209 } while ((pv = TAILQ_FIRST(&pvh->pv_list)) != pvf); 4210 small_mappings: 4211 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 4212 goto out; 4213 pv = pvf; 4214 do { 4215 if (pvf == NULL) 4216 pvf = pv; 4217 pmap = PV_PMAP(pv); 4218 if (!PMAP_TRYLOCK(pmap)) { 4219 pvh_gen = pvh->pv_gen; 4220 md_gen = m->md.pv_gen; 4221 rw_wunlock(lock); 4222 PMAP_LOCK(pmap); 4223 rw_wlock(lock); 4224 if (pvh_gen != pvh->pv_gen || md_gen != m->md.pv_gen) { 4225 PMAP_UNLOCK(pmap); 4226 goto retry; 4227 } 4228 } 4229 pde = pmap_pde(pmap, pv->pv_va, &lvl); 4230 KASSERT(pde != NULL, ("pmap_ts_referenced: no l2 table found")); 4231 KASSERT(lvl == 2, 4232 ("pmap_ts_referenced: invalid pde level %d", lvl)); 4233 tpde = pmap_load(pde); 4234 KASSERT((tpde & ATTR_DESCR_MASK) == L2_TABLE, 4235 ("pmap_ts_referenced: found an invalid l2 table")); 4236 pte = pmap_l2_to_l3(pde, pv->pv_va); 4237 tpte = pmap_load(pte); 4238 if (pmap_page_dirty(tpte)) 4239 vm_page_dirty(m); 4240 if ((tpte & ATTR_AF) != 0) { 4241 if (safe_to_clear_referenced(pmap, tpte)) { 4242 /* 4243 * TODO: We don't handle the access flag 4244 * at all. We need to be able to set it in 4245 * the exception handler. 4246 */ 4247 panic("ARM64TODO: safe_to_clear_referenced\n"); 4248 } else if ((tpte & ATTR_SW_WIRED) == 0) { 4249 /* 4250 * Wired pages cannot be paged out so 4251 * doing accessed bit emulation for 4252 * them is wasted effort. We do the 4253 * hard work for unwired pages only. 4254 */ 4255 pmap_remove_l3(pmap, pte, pv->pv_va, tpde, 4256 &free, &lock); 4257 pmap_invalidate_page(pmap, pv->pv_va); 4258 cleared++; 4259 if (pvf == pv) 4260 pvf = NULL; 4261 pv = NULL; 4262 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 4263 ("inconsistent pv lock %p %p for page %p", 4264 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 4265 } else 4266 not_cleared++; 4267 } 4268 PMAP_UNLOCK(pmap); 4269 /* Rotate the PV list if it has more than one entry. */ 4270 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 4271 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 4272 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 4273 m->md.pv_gen++; 4274 } 4275 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 4276 not_cleared < PMAP_TS_REFERENCED_MAX); 4277 out: 4278 rw_wunlock(lock); 4279 vm_page_free_pages_toq(&free, false); 4280 return (cleared + not_cleared); 4281 } 4282 4283 /* 4284 * Apply the given advice to the specified range of addresses within the 4285 * given pmap. Depending on the advice, clear the referenced and/or 4286 * modified flags in each mapping and set the mapped page's dirty field. 4287 */ 4288 void 4289 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 4290 { 4291 } 4292 4293 /* 4294 * Clear the modify bits on the specified physical page. 4295 */ 4296 void 4297 pmap_clear_modify(vm_page_t m) 4298 { 4299 4300 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 4301 ("pmap_clear_modify: page %p is not managed", m)); 4302 VM_OBJECT_ASSERT_WLOCKED(m->object); 4303 KASSERT(!vm_page_xbusied(m), 4304 ("pmap_clear_modify: page %p is exclusive busied", m)); 4305 4306 /* 4307 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 4308 * If the object containing the page is locked and the page is not 4309 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 4310 */ 4311 if ((m->aflags & PGA_WRITEABLE) == 0) 4312 return; 4313 4314 /* ARM64TODO: We lack support for tracking if a page is modified */ 4315 } 4316 4317 void * 4318 pmap_mapbios(vm_paddr_t pa, vm_size_t size) 4319 { 4320 struct pmap_preinit_mapping *ppim; 4321 vm_offset_t va, offset; 4322 pd_entry_t *pde; 4323 pt_entry_t *l2; 4324 int i, lvl, l2_blocks, free_l2_count, start_idx; 4325 4326 if (!vm_initialized) { 4327 /* 4328 * No L3 ptables so map entire L2 blocks where start VA is: 4329 * preinit_map_va + start_idx * L2_SIZE 4330 * There may be duplicate mappings (multiple VA -> same PA) but 4331 * ARM64 dcache is always PIPT so that's acceptable. 4332 */ 4333 if (size == 0) 4334 return (NULL); 4335 4336 /* Calculate how many full L2 blocks are needed for the mapping */ 4337 l2_blocks = (roundup2(pa + size, L2_SIZE) - rounddown2(pa, L2_SIZE)) >> L2_SHIFT; 4338 4339 offset = pa & L2_OFFSET; 4340 4341 if (preinit_map_va == 0) 4342 return (NULL); 4343 4344 /* Map 2MiB L2 blocks from reserved VA space */ 4345 4346 free_l2_count = 0; 4347 start_idx = -1; 4348 /* Find enough free contiguous VA space */ 4349 for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { 4350 ppim = pmap_preinit_mapping + i; 4351 if (free_l2_count > 0 && ppim->pa != 0) { 4352 /* Not enough space here */ 4353 free_l2_count = 0; 4354 start_idx = -1; 4355 continue; 4356 } 4357 4358 if (ppim->pa == 0) { 4359 /* Free L2 block */ 4360 if (start_idx == -1) 4361 start_idx = i; 4362 free_l2_count++; 4363 if (free_l2_count == l2_blocks) 4364 break; 4365 } 4366 } 4367 if (free_l2_count != l2_blocks) 4368 panic("%s: too many preinit mappings", __func__); 4369 4370 va = preinit_map_va + (start_idx * L2_SIZE); 4371 for (i = start_idx; i < start_idx + l2_blocks; i++) { 4372 /* Mark entries as allocated */ 4373 ppim = pmap_preinit_mapping + i; 4374 ppim->pa = pa; 4375 ppim->va = va + offset; 4376 ppim->size = size; 4377 } 4378 4379 /* Map L2 blocks */ 4380 pa = rounddown2(pa, L2_SIZE); 4381 for (i = 0; i < l2_blocks; i++) { 4382 pde = pmap_pde(kernel_pmap, va, &lvl); 4383 KASSERT(pde != NULL, 4384 ("pmap_mapbios: Invalid page entry, va: 0x%lx", va)); 4385 KASSERT(lvl == 1, ("pmap_mapbios: Invalid level %d", lvl)); 4386 4387 /* Insert L2_BLOCK */ 4388 l2 = pmap_l1_to_l2(pde, va); 4389 pmap_load_store(l2, 4390 pa | ATTR_DEFAULT | ATTR_XN | 4391 ATTR_IDX(CACHED_MEMORY) | L2_BLOCK); 4392 pmap_invalidate_range(kernel_pmap, va, va + L2_SIZE); 4393 4394 va += L2_SIZE; 4395 pa += L2_SIZE; 4396 } 4397 4398 va = preinit_map_va + (start_idx * L2_SIZE); 4399 4400 } else { 4401 /* kva_alloc may be used to map the pages */ 4402 offset = pa & PAGE_MASK; 4403 size = round_page(offset + size); 4404 4405 va = kva_alloc(size); 4406 if (va == 0) 4407 panic("%s: Couldn't allocate KVA", __func__); 4408 4409 pde = pmap_pde(kernel_pmap, va, &lvl); 4410 KASSERT(lvl == 2, ("pmap_mapbios: Invalid level %d", lvl)); 4411 4412 /* L3 table is linked */ 4413 va = trunc_page(va); 4414 pa = trunc_page(pa); 4415 pmap_kenter(va, size, pa, CACHED_MEMORY); 4416 } 4417 4418 return ((void *)(va + offset)); 4419 } 4420 4421 void 4422 pmap_unmapbios(vm_offset_t va, vm_size_t size) 4423 { 4424 struct pmap_preinit_mapping *ppim; 4425 vm_offset_t offset, tmpsize, va_trunc; 4426 pd_entry_t *pde; 4427 pt_entry_t *l2; 4428 int i, lvl, l2_blocks, block; 4429 4430 l2_blocks = (roundup2(va + size, L2_SIZE) - rounddown2(va, L2_SIZE)) >> L2_SHIFT; 4431 KASSERT(l2_blocks > 0, ("pmap_unmapbios: invalid size %lx", size)); 4432 4433 /* Remove preinit mapping */ 4434 block = 0; 4435 for (i = 0; i < PMAP_PREINIT_MAPPING_COUNT; i++) { 4436 ppim = pmap_preinit_mapping + i; 4437 if (ppim->va == va) { 4438 KASSERT(ppim->size == size, ("pmap_unmapbios: size mismatch")); 4439 ppim->va = 0; 4440 ppim->pa = 0; 4441 ppim->size = 0; 4442 offset = block * L2_SIZE; 4443 va_trunc = rounddown2(va, L2_SIZE) + offset; 4444 4445 /* Remove L2_BLOCK */ 4446 pde = pmap_pde(kernel_pmap, va_trunc, &lvl); 4447 KASSERT(pde != NULL, 4448 ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va_trunc)); 4449 l2 = pmap_l1_to_l2(pde, va_trunc); 4450 pmap_load_clear(l2); 4451 pmap_invalidate_range(kernel_pmap, va_trunc, va_trunc + L2_SIZE); 4452 4453 if (block == (l2_blocks - 1)) 4454 return; 4455 block++; 4456 } 4457 } 4458 4459 /* Unmap the pages reserved with kva_alloc. */ 4460 if (vm_initialized) { 4461 offset = va & PAGE_MASK; 4462 size = round_page(offset + size); 4463 va = trunc_page(va); 4464 4465 pde = pmap_pde(kernel_pmap, va, &lvl); 4466 KASSERT(pde != NULL, 4467 ("pmap_unmapbios: Invalid page entry, va: 0x%lx", va)); 4468 KASSERT(lvl == 2, ("pmap_unmapbios: Invalid level %d", lvl)); 4469 4470 /* Unmap and invalidate the pages */ 4471 for (tmpsize = 0; tmpsize < size; tmpsize += PAGE_SIZE) 4472 pmap_kremove(va + tmpsize); 4473 4474 kva_free(va, size); 4475 } 4476 } 4477 4478 /* 4479 * Sets the memory attribute for the specified page. 4480 */ 4481 void 4482 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 4483 { 4484 4485 m->md.pv_memattr = ma; 4486 4487 /* 4488 * If "m" is a normal page, update its direct mapping. This update 4489 * can be relied upon to perform any cache operations that are 4490 * required for data coherence. 4491 */ 4492 if ((m->flags & PG_FICTITIOUS) == 0 && 4493 pmap_change_attr(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)), PAGE_SIZE, 4494 m->md.pv_memattr) != 0) 4495 panic("memory attribute change on the direct map failed"); 4496 } 4497 4498 /* 4499 * Changes the specified virtual address range's memory type to that given by 4500 * the parameter "mode". The specified virtual address range must be 4501 * completely contained within either the direct map or the kernel map. If 4502 * the virtual address range is contained within the kernel map, then the 4503 * memory type for each of the corresponding ranges of the direct map is also 4504 * changed. (The corresponding ranges of the direct map are those ranges that 4505 * map the same physical pages as the specified virtual address range.) These 4506 * changes to the direct map are necessary because Intel describes the 4507 * behavior of their processors as "undefined" if two or more mappings to the 4508 * same physical page have different memory types. 4509 * 4510 * Returns zero if the change completed successfully, and either EINVAL or 4511 * ENOMEM if the change failed. Specifically, EINVAL is returned if some part 4512 * of the virtual address range was not mapped, and ENOMEM is returned if 4513 * there was insufficient memory available to complete the change. In the 4514 * latter case, the memory type may have been changed on some part of the 4515 * virtual address range or the direct map. 4516 */ 4517 static int 4518 pmap_change_attr(vm_offset_t va, vm_size_t size, int mode) 4519 { 4520 int error; 4521 4522 PMAP_LOCK(kernel_pmap); 4523 error = pmap_change_attr_locked(va, size, mode); 4524 PMAP_UNLOCK(kernel_pmap); 4525 return (error); 4526 } 4527 4528 static int 4529 pmap_change_attr_locked(vm_offset_t va, vm_size_t size, int mode) 4530 { 4531 vm_offset_t base, offset, tmpva; 4532 pt_entry_t l3, *pte, *newpte; 4533 int lvl; 4534 4535 PMAP_LOCK_ASSERT(kernel_pmap, MA_OWNED); 4536 base = trunc_page(va); 4537 offset = va & PAGE_MASK; 4538 size = round_page(offset + size); 4539 4540 if (!VIRT_IN_DMAP(base)) 4541 return (EINVAL); 4542 4543 for (tmpva = base; tmpva < base + size; ) { 4544 pte = pmap_pte(kernel_pmap, va, &lvl); 4545 if (pte == NULL) 4546 return (EINVAL); 4547 4548 if ((pmap_load(pte) & ATTR_IDX_MASK) == ATTR_IDX(mode)) { 4549 /* 4550 * We already have the correct attribute, 4551 * ignore this entry. 4552 */ 4553 switch (lvl) { 4554 default: 4555 panic("Invalid DMAP table level: %d\n", lvl); 4556 case 1: 4557 tmpva = (tmpva & ~L1_OFFSET) + L1_SIZE; 4558 break; 4559 case 2: 4560 tmpva = (tmpva & ~L2_OFFSET) + L2_SIZE; 4561 break; 4562 case 3: 4563 tmpva += PAGE_SIZE; 4564 break; 4565 } 4566 } else { 4567 /* 4568 * Split the entry to an level 3 table, then 4569 * set the new attribute. 4570 */ 4571 switch (lvl) { 4572 default: 4573 panic("Invalid DMAP table level: %d\n", lvl); 4574 case 1: 4575 newpte = pmap_demote_l1(kernel_pmap, pte, 4576 tmpva & ~L1_OFFSET); 4577 if (newpte == NULL) 4578 return (EINVAL); 4579 pte = pmap_l1_to_l2(pte, tmpva); 4580 case 2: 4581 newpte = pmap_demote_l2(kernel_pmap, pte, 4582 tmpva & ~L2_OFFSET); 4583 if (newpte == NULL) 4584 return (EINVAL); 4585 pte = pmap_l2_to_l3(pte, tmpva); 4586 case 3: 4587 /* Update the entry */ 4588 l3 = pmap_load(pte); 4589 l3 &= ~ATTR_IDX_MASK; 4590 l3 |= ATTR_IDX(mode); 4591 if (mode == DEVICE_MEMORY) 4592 l3 |= ATTR_XN; 4593 4594 pmap_update_entry(kernel_pmap, pte, l3, tmpva, 4595 PAGE_SIZE); 4596 4597 /* 4598 * If moving to a non-cacheable entry flush 4599 * the cache. 4600 */ 4601 if (mode == VM_MEMATTR_UNCACHEABLE) 4602 cpu_dcache_wbinv_range(tmpva, L3_SIZE); 4603 4604 break; 4605 } 4606 tmpva += PAGE_SIZE; 4607 } 4608 } 4609 4610 return (0); 4611 } 4612 4613 /* 4614 * Create an L2 table to map all addresses within an L1 mapping. 4615 */ 4616 static pt_entry_t * 4617 pmap_demote_l1(pmap_t pmap, pt_entry_t *l1, vm_offset_t va) 4618 { 4619 pt_entry_t *l2, newl2, oldl1; 4620 vm_offset_t tmpl1; 4621 vm_paddr_t l2phys, phys; 4622 vm_page_t ml2; 4623 int i; 4624 4625 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4626 oldl1 = pmap_load(l1); 4627 KASSERT((oldl1 & ATTR_DESCR_MASK) == L1_BLOCK, 4628 ("pmap_demote_l1: Demoting a non-block entry")); 4629 KASSERT((va & L1_OFFSET) == 0, 4630 ("pmap_demote_l1: Invalid virtual address %#lx", va)); 4631 KASSERT((oldl1 & ATTR_SW_MANAGED) == 0, 4632 ("pmap_demote_l1: Level 1 table shouldn't be managed")); 4633 4634 tmpl1 = 0; 4635 if (va <= (vm_offset_t)l1 && va + L1_SIZE > (vm_offset_t)l1) { 4636 tmpl1 = kva_alloc(PAGE_SIZE); 4637 if (tmpl1 == 0) 4638 return (NULL); 4639 } 4640 4641 if ((ml2 = vm_page_alloc(NULL, 0, VM_ALLOC_INTERRUPT | 4642 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED)) == NULL) { 4643 CTR2(KTR_PMAP, "pmap_demote_l1: failure for va %#lx" 4644 " in pmap %p", va, pmap); 4645 return (NULL); 4646 } 4647 4648 l2phys = VM_PAGE_TO_PHYS(ml2); 4649 l2 = (pt_entry_t *)PHYS_TO_DMAP(l2phys); 4650 4651 /* Address the range points at */ 4652 phys = oldl1 & ~ATTR_MASK; 4653 /* The attributed from the old l1 table to be copied */ 4654 newl2 = oldl1 & ATTR_MASK; 4655 4656 /* Create the new entries */ 4657 for (i = 0; i < Ln_ENTRIES; i++) { 4658 l2[i] = newl2 | phys; 4659 phys += L2_SIZE; 4660 } 4661 KASSERT(l2[0] == ((oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK), 4662 ("Invalid l2 page (%lx != %lx)", l2[0], 4663 (oldl1 & ~ATTR_DESCR_MASK) | L2_BLOCK)); 4664 4665 if (tmpl1 != 0) { 4666 pmap_kenter(tmpl1, PAGE_SIZE, 4667 DMAP_TO_PHYS((vm_offset_t)l1) & ~L3_OFFSET, CACHED_MEMORY); 4668 l1 = (pt_entry_t *)(tmpl1 + ((vm_offset_t)l1 & PAGE_MASK)); 4669 } 4670 4671 pmap_update_entry(pmap, l1, l2phys | L1_TABLE, va, PAGE_SIZE); 4672 4673 if (tmpl1 != 0) { 4674 pmap_kremove(tmpl1); 4675 kva_free(tmpl1, PAGE_SIZE); 4676 } 4677 4678 return (l2); 4679 } 4680 4681 /* 4682 * Create an L3 table to map all addresses within an L2 mapping. 4683 */ 4684 static pt_entry_t * 4685 pmap_demote_l2_locked(pmap_t pmap, pt_entry_t *l2, vm_offset_t va, 4686 struct rwlock **lockp) 4687 { 4688 pt_entry_t *l3, newl3, oldl2; 4689 vm_offset_t tmpl2; 4690 vm_paddr_t l3phys, phys; 4691 vm_page_t ml3; 4692 int i; 4693 4694 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 4695 l3 = NULL; 4696 oldl2 = pmap_load(l2); 4697 KASSERT((oldl2 & ATTR_DESCR_MASK) == L2_BLOCK, 4698 ("pmap_demote_l2: Demoting a non-block entry")); 4699 KASSERT((va & L2_OFFSET) == 0, 4700 ("pmap_demote_l2: Invalid virtual address %#lx", va)); 4701 4702 tmpl2 = 0; 4703 if (va <= (vm_offset_t)l2 && va + L2_SIZE > (vm_offset_t)l2) { 4704 tmpl2 = kva_alloc(PAGE_SIZE); 4705 if (tmpl2 == 0) 4706 return (NULL); 4707 } 4708 4709 if ((ml3 = pmap_remove_pt_page(pmap, va)) == NULL) { 4710 ml3 = vm_page_alloc(NULL, pmap_l2_pindex(va), 4711 (VIRT_IN_DMAP(va) ? VM_ALLOC_INTERRUPT : VM_ALLOC_NORMAL) | 4712 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED); 4713 if (ml3 == NULL) { 4714 CTR2(KTR_PMAP, "pmap_demote_l2: failure for va %#lx" 4715 " in pmap %p", va, pmap); 4716 goto fail; 4717 } 4718 if (va < VM_MAXUSER_ADDRESS) 4719 pmap_resident_count_inc(pmap, 1); 4720 } 4721 4722 l3phys = VM_PAGE_TO_PHYS(ml3); 4723 l3 = (pt_entry_t *)PHYS_TO_DMAP(l3phys); 4724 4725 /* Address the range points at */ 4726 phys = oldl2 & ~ATTR_MASK; 4727 /* The attributed from the old l2 table to be copied */ 4728 newl3 = (oldl2 & (ATTR_MASK & ~ATTR_DESCR_MASK)) | L3_PAGE; 4729 4730 /* 4731 * If the page table page is new, initialize it. 4732 */ 4733 if (ml3->wire_count == 1) { 4734 for (i = 0; i < Ln_ENTRIES; i++) { 4735 l3[i] = newl3 | phys; 4736 phys += L3_SIZE; 4737 } 4738 } 4739 KASSERT(l3[0] == ((oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE), 4740 ("Invalid l3 page (%lx != %lx)", l3[0], 4741 (oldl2 & ~ATTR_DESCR_MASK) | L3_PAGE)); 4742 4743 /* 4744 * Map the temporary page so we don't lose access to the l2 table. 4745 */ 4746 if (tmpl2 != 0) { 4747 pmap_kenter(tmpl2, PAGE_SIZE, 4748 DMAP_TO_PHYS((vm_offset_t)l2) & ~L3_OFFSET, CACHED_MEMORY); 4749 l2 = (pt_entry_t *)(tmpl2 + ((vm_offset_t)l2 & PAGE_MASK)); 4750 } 4751 4752 /* 4753 * The spare PV entries must be reserved prior to demoting the 4754 * mapping, that is, prior to changing the PDE. Otherwise, the state 4755 * of the L2 and the PV lists will be inconsistent, which can result 4756 * in reclaim_pv_chunk() attempting to remove a PV entry from the 4757 * wrong PV list and pmap_pv_demote_l2() failing to find the expected 4758 * PV entry for the 2MB page mapping that is being demoted. 4759 */ 4760 if ((oldl2 & ATTR_SW_MANAGED) != 0) 4761 reserve_pv_entries(pmap, Ln_ENTRIES - 1, lockp); 4762 4763 pmap_update_entry(pmap, l2, l3phys | L2_TABLE, va, PAGE_SIZE); 4764 4765 /* 4766 * Demote the PV entry. 4767 */ 4768 if ((oldl2 & ATTR_SW_MANAGED) != 0) 4769 pmap_pv_demote_l2(pmap, va, oldl2 & ~ATTR_MASK, lockp); 4770 4771 atomic_add_long(&pmap_l2_demotions, 1); 4772 CTR3(KTR_PMAP, "pmap_demote_l2: success for va %#lx" 4773 " in pmap %p %lx", va, pmap, l3[0]); 4774 4775 fail: 4776 if (tmpl2 != 0) { 4777 pmap_kremove(tmpl2); 4778 kva_free(tmpl2, PAGE_SIZE); 4779 } 4780 4781 return (l3); 4782 4783 } 4784 4785 static pt_entry_t * 4786 pmap_demote_l2(pmap_t pmap, pt_entry_t *l2, vm_offset_t va) 4787 { 4788 struct rwlock *lock; 4789 pt_entry_t *l3; 4790 4791 lock = NULL; 4792 l3 = pmap_demote_l2_locked(pmap, l2, va, &lock); 4793 if (lock != NULL) 4794 rw_wunlock(lock); 4795 return (l3); 4796 } 4797 4798 /* 4799 * perform the pmap work for mincore 4800 */ 4801 int 4802 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 4803 { 4804 pd_entry_t *l1p, l1; 4805 pd_entry_t *l2p, l2; 4806 pt_entry_t *l3p, l3; 4807 vm_paddr_t pa; 4808 bool managed; 4809 int val; 4810 4811 PMAP_LOCK(pmap); 4812 retry: 4813 pa = 0; 4814 val = 0; 4815 managed = false; 4816 4817 l1p = pmap_l1(pmap, addr); 4818 if (l1p == NULL) /* No l1 */ 4819 goto done; 4820 4821 l1 = pmap_load(l1p); 4822 if ((l1 & ATTR_DESCR_MASK) == L1_INVAL) 4823 goto done; 4824 4825 if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { 4826 pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); 4827 managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4828 val = MINCORE_SUPER | MINCORE_INCORE; 4829 if (pmap_page_dirty(l1)) 4830 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4831 if ((l1 & ATTR_AF) == ATTR_AF) 4832 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4833 goto done; 4834 } 4835 4836 l2p = pmap_l1_to_l2(l1p, addr); 4837 if (l2p == NULL) /* No l2 */ 4838 goto done; 4839 4840 l2 = pmap_load(l2p); 4841 if ((l2 & ATTR_DESCR_MASK) == L2_INVAL) 4842 goto done; 4843 4844 if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { 4845 pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); 4846 managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4847 val = MINCORE_SUPER | MINCORE_INCORE; 4848 if (pmap_page_dirty(l2)) 4849 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4850 if ((l2 & ATTR_AF) == ATTR_AF) 4851 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4852 goto done; 4853 } 4854 4855 l3p = pmap_l2_to_l3(l2p, addr); 4856 if (l3p == NULL) /* No l3 */ 4857 goto done; 4858 4859 l3 = pmap_load(l2p); 4860 if ((l3 & ATTR_DESCR_MASK) == L3_INVAL) 4861 goto done; 4862 4863 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { 4864 pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); 4865 managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 4866 val = MINCORE_INCORE; 4867 if (pmap_page_dirty(l3)) 4868 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 4869 if ((l3 & ATTR_AF) == ATTR_AF) 4870 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 4871 } 4872 4873 done: 4874 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 4875 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 4876 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 4877 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 4878 goto retry; 4879 } else 4880 PA_UNLOCK_COND(*locked_pa); 4881 PMAP_UNLOCK(pmap); 4882 4883 return (val); 4884 } 4885 4886 void 4887 pmap_activate(struct thread *td) 4888 { 4889 pmap_t pmap; 4890 4891 critical_enter(); 4892 pmap = vmspace_pmap(td->td_proc->p_vmspace); 4893 td->td_proc->p_md.md_l0addr = vtophys(pmap->pm_l0); 4894 __asm __volatile("msr ttbr0_el1, %0" : : 4895 "r"(td->td_proc->p_md.md_l0addr)); 4896 pmap_invalidate_all(pmap); 4897 critical_exit(); 4898 } 4899 4900 struct pcb * 4901 pmap_switch(struct thread *old, struct thread *new) 4902 { 4903 pcpu_bp_harden bp_harden; 4904 struct pcb *pcb; 4905 4906 /* Store the new curthread */ 4907 PCPU_SET(curthread, new); 4908 4909 /* And the new pcb */ 4910 pcb = new->td_pcb; 4911 PCPU_SET(curpcb, pcb); 4912 4913 /* 4914 * TODO: We may need to flush the cache here if switching 4915 * to a user process. 4916 */ 4917 4918 if (old == NULL || 4919 old->td_proc->p_md.md_l0addr != new->td_proc->p_md.md_l0addr) { 4920 __asm __volatile( 4921 /* Switch to the new pmap */ 4922 "msr ttbr0_el1, %0 \n" 4923 "isb \n" 4924 4925 /* Invalidate the TLB */ 4926 "dsb ishst \n" 4927 "tlbi vmalle1is \n" 4928 "dsb ish \n" 4929 "isb \n" 4930 : : "r"(new->td_proc->p_md.md_l0addr)); 4931 4932 /* 4933 * Stop userspace from training the branch predictor against 4934 * other processes. This will call into a CPU specific 4935 * function that clears the branch predictor state. 4936 */ 4937 bp_harden = PCPU_GET(bp_harden); 4938 if (bp_harden != NULL) 4939 bp_harden(); 4940 } 4941 4942 return (pcb); 4943 } 4944 4945 void 4946 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 4947 { 4948 4949 if (va >= VM_MIN_KERNEL_ADDRESS) { 4950 cpu_icache_sync_range(va, sz); 4951 } else { 4952 u_int len, offset; 4953 vm_paddr_t pa; 4954 4955 /* Find the length of data in this page to flush */ 4956 offset = va & PAGE_MASK; 4957 len = imin(PAGE_SIZE - offset, sz); 4958 4959 while (sz != 0) { 4960 /* Extract the physical address & find it in the DMAP */ 4961 pa = pmap_extract(pmap, va); 4962 if (pa != 0) 4963 cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 4964 4965 /* Move to the next page */ 4966 sz -= len; 4967 va += len; 4968 /* Set the length for the next iteration */ 4969 len = imin(PAGE_SIZE, sz); 4970 } 4971 } 4972 } 4973 4974 int 4975 pmap_fault(pmap_t pmap, uint64_t esr, uint64_t far) 4976 { 4977 #ifdef SMP 4978 register_t intr; 4979 uint64_t par; 4980 4981 switch (ESR_ELx_EXCEPTION(esr)) { 4982 case EXCP_INSN_ABORT_L: 4983 case EXCP_INSN_ABORT: 4984 case EXCP_DATA_ABORT_L: 4985 case EXCP_DATA_ABORT: 4986 break; 4987 default: 4988 return (KERN_FAILURE); 4989 } 4990 4991 /* Data and insn aborts use same encoding for FCS field. */ 4992 switch (esr & ISS_DATA_DFSC_MASK) { 4993 case ISS_DATA_DFSC_TF_L0: 4994 case ISS_DATA_DFSC_TF_L1: 4995 case ISS_DATA_DFSC_TF_L2: 4996 case ISS_DATA_DFSC_TF_L3: 4997 PMAP_LOCK(pmap); 4998 /* Ask the MMU to check the address */ 4999 intr = intr_disable(); 5000 if (pmap == kernel_pmap) 5001 par = arm64_address_translate_s1e1r(far); 5002 else 5003 par = arm64_address_translate_s1e0r(far); 5004 intr_restore(intr); 5005 PMAP_UNLOCK(pmap); 5006 5007 /* 5008 * If the translation was successful the address was invalid 5009 * due to a break-before-make sequence. We can unlock and 5010 * return success to the trap handler. 5011 */ 5012 if (PAR_SUCCESS(par)) 5013 return (KERN_SUCCESS); 5014 break; 5015 default: 5016 break; 5017 } 5018 #endif 5019 5020 return (KERN_FAILURE); 5021 } 5022 5023 /* 5024 * Increase the starting virtual address of the given mapping if a 5025 * different alignment might result in more superpage mappings. 5026 */ 5027 void 5028 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 5029 vm_offset_t *addr, vm_size_t size) 5030 { 5031 vm_offset_t superpage_offset; 5032 5033 if (size < L2_SIZE) 5034 return; 5035 if (object != NULL && (object->flags & OBJ_COLORED) != 0) 5036 offset += ptoa(object->pg_color); 5037 superpage_offset = offset & L2_OFFSET; 5038 if (size - ((L2_SIZE - superpage_offset) & L2_OFFSET) < L2_SIZE || 5039 (*addr & L2_OFFSET) == superpage_offset) 5040 return; 5041 if ((*addr & L2_OFFSET) < superpage_offset) 5042 *addr = (*addr & ~L2_OFFSET) + superpage_offset; 5043 else 5044 *addr = ((*addr + L2_OFFSET) & ~L2_OFFSET) + superpage_offset; 5045 } 5046 5047 /** 5048 * Get the kernel virtual address of a set of physical pages. If there are 5049 * physical addresses not covered by the DMAP perform a transient mapping 5050 * that will be removed when calling pmap_unmap_io_transient. 5051 * 5052 * \param page The pages the caller wishes to obtain the virtual 5053 * address on the kernel memory map. 5054 * \param vaddr On return contains the kernel virtual memory address 5055 * of the pages passed in the page parameter. 5056 * \param count Number of pages passed in. 5057 * \param can_fault TRUE if the thread using the mapped pages can take 5058 * page faults, FALSE otherwise. 5059 * 5060 * \returns TRUE if the caller must call pmap_unmap_io_transient when 5061 * finished or FALSE otherwise. 5062 * 5063 */ 5064 boolean_t 5065 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 5066 boolean_t can_fault) 5067 { 5068 vm_paddr_t paddr; 5069 boolean_t needs_mapping; 5070 int error, i; 5071 5072 /* 5073 * Allocate any KVA space that we need, this is done in a separate 5074 * loop to prevent calling vmem_alloc while pinned. 5075 */ 5076 needs_mapping = FALSE; 5077 for (i = 0; i < count; i++) { 5078 paddr = VM_PAGE_TO_PHYS(page[i]); 5079 if (__predict_false(!PHYS_IN_DMAP(paddr))) { 5080 error = vmem_alloc(kernel_arena, PAGE_SIZE, 5081 M_BESTFIT | M_WAITOK, &vaddr[i]); 5082 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 5083 needs_mapping = TRUE; 5084 } else { 5085 vaddr[i] = PHYS_TO_DMAP(paddr); 5086 } 5087 } 5088 5089 /* Exit early if everything is covered by the DMAP */ 5090 if (!needs_mapping) 5091 return (FALSE); 5092 5093 if (!can_fault) 5094 sched_pin(); 5095 for (i = 0; i < count; i++) { 5096 paddr = VM_PAGE_TO_PHYS(page[i]); 5097 if (!PHYS_IN_DMAP(paddr)) { 5098 panic( 5099 "pmap_map_io_transient: TODO: Map out of DMAP data"); 5100 } 5101 } 5102 5103 return (needs_mapping); 5104 } 5105 5106 void 5107 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 5108 boolean_t can_fault) 5109 { 5110 vm_paddr_t paddr; 5111 int i; 5112 5113 if (!can_fault) 5114 sched_unpin(); 5115 for (i = 0; i < count; i++) { 5116 paddr = VM_PAGE_TO_PHYS(page[i]); 5117 if (!PHYS_IN_DMAP(paddr)) { 5118 panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 5119 } 5120 } 5121 } 5122