1 /*- 2 * Copyright (c) 1991 Regents of the University of California. 3 * All rights reserved. 4 * Copyright (c) 1994 John S. Dyson 5 * All rights reserved. 6 * Copyright (c) 1994 David Greenman 7 * All rights reserved. 8 * Copyright (c) 2003 Peter Wemm 9 * All rights reserved. 10 * Copyright (c) 2005-2010 Alan L. Cox <alc@cs.rice.edu> 11 * All rights reserved. 12 * Copyright (c) 2014 Andrew Turner 13 * All rights reserved. 14 * Copyright (c) 2014 The FreeBSD Foundation 15 * All rights reserved. 16 * 17 * This code is derived from software contributed to Berkeley by 18 * the Systems Programming Group of the University of Utah Computer 19 * Science Department and William Jolitz of UUNET Technologies Inc. 20 * 21 * This software was developed by Andrew Turner under sponsorship from 22 * the FreeBSD Foundation. 23 * 24 * Redistribution and use in source and binary forms, with or without 25 * modification, are permitted provided that the following conditions 26 * are met: 27 * 1. Redistributions of source code must retain the above copyright 28 * notice, this list of conditions and the following disclaimer. 29 * 2. Redistributions in binary form must reproduce the above copyright 30 * notice, this list of conditions and the following disclaimer in the 31 * documentation and/or other materials provided with the distribution. 32 * 3. All advertising materials mentioning features or use of this software 33 * must display the following acknowledgement: 34 * This product includes software developed by the University of 35 * California, Berkeley and its contributors. 36 * 4. Neither the name of the University nor the names of its contributors 37 * may be used to endorse or promote products derived from this software 38 * without specific prior written permission. 39 * 40 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * from: @(#)pmap.c 7.7 (Berkeley) 5/12/91 53 */ 54 /*- 55 * Copyright (c) 2003 Networks Associates Technology, Inc. 56 * All rights reserved. 57 * 58 * This software was developed for the FreeBSD Project by Jake Burkholder, 59 * Safeport Network Services, and Network Associates Laboratories, the 60 * Security Research Division of Network Associates, Inc. under 61 * DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the DARPA 62 * CHATS research program. 63 * 64 * Redistribution and use in source and binary forms, with or without 65 * modification, are permitted provided that the following conditions 66 * are met: 67 * 1. Redistributions of source code must retain the above copyright 68 * notice, this list of conditions and the following disclaimer. 69 * 2. Redistributions in binary form must reproduce the above copyright 70 * notice, this list of conditions and the following disclaimer in the 71 * documentation and/or other materials provided with the distribution. 72 * 73 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 74 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 75 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 76 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 77 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 78 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 79 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 80 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 81 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 82 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 83 * SUCH DAMAGE. 84 */ 85 86 #include <sys/cdefs.h> 87 __FBSDID("$FreeBSD$"); 88 89 /* 90 * Manages physical address maps. 91 * 92 * Since the information managed by this module is 93 * also stored by the logical address mapping module, 94 * this module may throw away valid virtual-to-physical 95 * mappings at almost any time. However, invalidations 96 * of virtual-to-physical mappings must be done as 97 * requested. 98 * 99 * In order to cope with hardware architectures which 100 * make virtual-to-physical map invalidates expensive, 101 * this module may delay invalidate or reduced protection 102 * operations until such time as they are actually 103 * necessary. This module is given full information as 104 * to which processors are currently using which maps, 105 * and to when physical maps must be made correct. 106 */ 107 108 #include <sys/param.h> 109 #include <sys/bus.h> 110 #include <sys/systm.h> 111 #include <sys/kernel.h> 112 #include <sys/ktr.h> 113 #include <sys/lock.h> 114 #include <sys/malloc.h> 115 #include <sys/mman.h> 116 #include <sys/msgbuf.h> 117 #include <sys/mutex.h> 118 #include <sys/proc.h> 119 #include <sys/rwlock.h> 120 #include <sys/sx.h> 121 #include <sys/vmem.h> 122 #include <sys/vmmeter.h> 123 #include <sys/sched.h> 124 #include <sys/sysctl.h> 125 #include <sys/_unrhdr.h> 126 #include <sys/smp.h> 127 128 #include <vm/vm.h> 129 #include <vm/vm_param.h> 130 #include <vm/vm_kern.h> 131 #include <vm/vm_page.h> 132 #include <vm/vm_map.h> 133 #include <vm/vm_object.h> 134 #include <vm/vm_extern.h> 135 #include <vm/vm_pageout.h> 136 #include <vm/vm_pager.h> 137 #include <vm/vm_radix.h> 138 #include <vm/vm_reserv.h> 139 #include <vm/uma.h> 140 141 #include <machine/machdep.h> 142 #include <machine/md_var.h> 143 #include <machine/pcb.h> 144 145 #define NPDEPG (PAGE_SIZE/(sizeof (pd_entry_t))) 146 #define NUPDE (NPDEPG * NPDEPG) 147 #define NUSERPGTBLS (NUPDE + NPDEPG) 148 149 #if !defined(DIAGNOSTIC) 150 #ifdef __GNUC_GNU_INLINE__ 151 #define PMAP_INLINE __attribute__((__gnu_inline__)) inline 152 #else 153 #define PMAP_INLINE extern inline 154 #endif 155 #else 156 #define PMAP_INLINE 157 #endif 158 159 /* 160 * These are configured by the mair_el1 register. This is set up in locore.S 161 */ 162 #define DEVICE_MEMORY 0 163 #define UNCACHED_MEMORY 1 164 #define CACHED_MEMORY 2 165 166 167 #ifdef PV_STATS 168 #define PV_STAT(x) do { x ; } while (0) 169 #else 170 #define PV_STAT(x) do { } while (0) 171 #endif 172 173 #define pmap_l2_pindex(v) ((v) >> L2_SHIFT) 174 175 #define NPV_LIST_LOCKS MAXCPU 176 177 #define PHYS_TO_PV_LIST_LOCK(pa) \ 178 (&pv_list_locks[pa_index(pa) % NPV_LIST_LOCKS]) 179 180 #define CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, pa) do { \ 181 struct rwlock **_lockp = (lockp); \ 182 struct rwlock *_new_lock; \ 183 \ 184 _new_lock = PHYS_TO_PV_LIST_LOCK(pa); \ 185 if (_new_lock != *_lockp) { \ 186 if (*_lockp != NULL) \ 187 rw_wunlock(*_lockp); \ 188 *_lockp = _new_lock; \ 189 rw_wlock(*_lockp); \ 190 } \ 191 } while (0) 192 193 #define CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m) \ 194 CHANGE_PV_LIST_LOCK_TO_PHYS(lockp, VM_PAGE_TO_PHYS(m)) 195 196 #define RELEASE_PV_LIST_LOCK(lockp) do { \ 197 struct rwlock **_lockp = (lockp); \ 198 \ 199 if (*_lockp != NULL) { \ 200 rw_wunlock(*_lockp); \ 201 *_lockp = NULL; \ 202 } \ 203 } while (0) 204 205 #define VM_PAGE_TO_PV_LIST_LOCK(m) \ 206 PHYS_TO_PV_LIST_LOCK(VM_PAGE_TO_PHYS(m)) 207 208 struct pmap kernel_pmap_store; 209 210 vm_offset_t virtual_avail; /* VA of first avail page (after kernel bss) */ 211 vm_offset_t virtual_end; /* VA of last avail page (end of kernel AS) */ 212 vm_offset_t kernel_vm_end = 0; 213 214 struct msgbuf *msgbufp = NULL; 215 216 static struct rwlock_padalign pvh_global_lock; 217 218 vm_paddr_t dmap_phys_base; /* The start of the dmap region */ 219 220 /* 221 * Data for the pv entry allocation mechanism 222 */ 223 static TAILQ_HEAD(pch, pv_chunk) pv_chunks = TAILQ_HEAD_INITIALIZER(pv_chunks); 224 static struct mtx pv_chunks_mutex; 225 static struct rwlock pv_list_locks[NPV_LIST_LOCKS]; 226 227 static void free_pv_chunk(struct pv_chunk *pc); 228 static void free_pv_entry(pmap_t pmap, pv_entry_t pv); 229 static pv_entry_t get_pv_entry(pmap_t pmap, struct rwlock **lockp); 230 static vm_page_t reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp); 231 static void pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va); 232 static pv_entry_t pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, 233 vm_offset_t va); 234 static vm_page_t pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, 235 vm_page_t m, vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp); 236 static int pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t sva, 237 pd_entry_t ptepde, struct spglist *free, struct rwlock **lockp); 238 static boolean_t pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, 239 vm_page_t m, struct rwlock **lockp); 240 241 static vm_page_t _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, 242 struct rwlock **lockp); 243 244 static void _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, 245 struct spglist *free); 246 static int pmap_unuse_l3(pmap_t, vm_offset_t, pd_entry_t, struct spglist *); 247 248 /* 249 * These load the old table data and store the new value. 250 * They need to be atomic as the System MMU may write to the table at 251 * the same time as the CPU. 252 */ 253 #define pmap_load_store(table, entry) atomic_swap_64(table, entry) 254 #define pmap_set(table, mask) atomic_set_64(table, mask) 255 #define pmap_load_clear(table) atomic_swap_64(table, 0) 256 #define pmap_load(table) (*table) 257 258 /********************/ 259 /* Inline functions */ 260 /********************/ 261 262 static __inline void 263 pagecopy(void *s, void *d) 264 { 265 266 memcpy(d, s, PAGE_SIZE); 267 } 268 269 static __inline void 270 pagezero(void *p) 271 { 272 273 bzero(p, PAGE_SIZE); 274 } 275 276 #define pmap_l1_index(va) (((va) >> L1_SHIFT) & Ln_ADDR_MASK) 277 #define pmap_l2_index(va) (((va) >> L2_SHIFT) & Ln_ADDR_MASK) 278 #define pmap_l3_index(va) (((va) >> L3_SHIFT) & Ln_ADDR_MASK) 279 280 static __inline pd_entry_t * 281 pmap_l1(pmap_t pmap, vm_offset_t va) 282 { 283 284 return (&pmap->pm_l1[pmap_l1_index(va)]); 285 } 286 287 static __inline pd_entry_t * 288 pmap_l1_to_l2(pd_entry_t *l1, vm_offset_t va) 289 { 290 pd_entry_t *l2; 291 292 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 293 return (&l2[pmap_l2_index(va)]); 294 } 295 296 static __inline pd_entry_t * 297 pmap_l2(pmap_t pmap, vm_offset_t va) 298 { 299 pd_entry_t *l1; 300 301 l1 = pmap_l1(pmap, va); 302 if ((pmap_load(l1) & ATTR_DESCR_MASK) != L1_TABLE) 303 return (NULL); 304 305 return (pmap_l1_to_l2(l1, va)); 306 } 307 308 static __inline pt_entry_t * 309 pmap_l2_to_l3(pd_entry_t *l2, vm_offset_t va) 310 { 311 pt_entry_t *l3; 312 313 l3 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l2) & ~ATTR_MASK); 314 return (&l3[pmap_l3_index(va)]); 315 } 316 317 static __inline pt_entry_t * 318 pmap_l3(pmap_t pmap, vm_offset_t va) 319 { 320 pd_entry_t *l2; 321 322 l2 = pmap_l2(pmap, va); 323 if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) 324 return (NULL); 325 326 return (pmap_l2_to_l3(l2, va)); 327 } 328 329 bool 330 pmap_get_tables(pmap_t pmap, vm_offset_t va, pd_entry_t **l1, pd_entry_t **l2, 331 pt_entry_t **l3) 332 { 333 pd_entry_t *l1p, *l2p; 334 335 if (pmap->pm_l1 == NULL) 336 return (false); 337 338 l1p = pmap_l1(pmap, va); 339 *l1 = l1p; 340 341 if ((pmap_load(l1p) & ATTR_DESCR_MASK) == L1_BLOCK) { 342 *l2 = NULL; 343 *l3 = NULL; 344 return (true); 345 } 346 347 if ((pmap_load(l1p) & ATTR_DESCR_MASK) != L1_TABLE) 348 return (false); 349 350 l2p = pmap_l1_to_l2(l1p, va); 351 *l2 = l2p; 352 353 if ((pmap_load(l2p) & ATTR_DESCR_MASK) == L2_BLOCK) { 354 *l3 = NULL; 355 return (true); 356 } 357 358 *l3 = pmap_l2_to_l3(l2p, va); 359 360 return (true); 361 } 362 363 static __inline int 364 pmap_is_current(pmap_t pmap) 365 { 366 367 return ((pmap == pmap_kernel()) || 368 (pmap == curthread->td_proc->p_vmspace->vm_map.pmap)); 369 } 370 371 static __inline int 372 pmap_l3_valid(pt_entry_t l3) 373 { 374 375 return ((l3 & ATTR_DESCR_MASK) == L3_PAGE); 376 } 377 378 static __inline int 379 pmap_l3_valid_cacheable(pt_entry_t l3) 380 { 381 382 return (((l3 & ATTR_DESCR_MASK) == L3_PAGE) && 383 ((l3 & ATTR_IDX_MASK) == ATTR_IDX(CACHED_MEMORY))); 384 } 385 386 #define PTE_SYNC(pte) cpu_dcache_wb_range((vm_offset_t)pte, sizeof(*pte)) 387 388 /* 389 * Checks if the page is dirty. We currently lack proper tracking of this on 390 * arm64 so for now assume is a page mapped as rw was accessed it is. 391 */ 392 static inline int 393 pmap_page_dirty(pt_entry_t pte) 394 { 395 396 return ((pte & (ATTR_AF | ATTR_AP_RW_BIT)) == 397 (ATTR_AF | ATTR_AP(ATTR_AP_RW))); 398 } 399 400 static __inline void 401 pmap_resident_count_inc(pmap_t pmap, int count) 402 { 403 404 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 405 pmap->pm_stats.resident_count += count; 406 } 407 408 static __inline void 409 pmap_resident_count_dec(pmap_t pmap, int count) 410 { 411 412 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 413 KASSERT(pmap->pm_stats.resident_count >= count, 414 ("pmap %p resident count underflow %ld %d", pmap, 415 pmap->pm_stats.resident_count, count)); 416 pmap->pm_stats.resident_count -= count; 417 } 418 419 static pt_entry_t * 420 pmap_early_page_idx(vm_offset_t l1pt, vm_offset_t va, u_int *l1_slot, 421 u_int *l2_slot) 422 { 423 pt_entry_t *l2; 424 pd_entry_t *l1; 425 426 l1 = (pd_entry_t *)l1pt; 427 *l1_slot = (va >> L1_SHIFT) & Ln_ADDR_MASK; 428 429 /* Check locore has used a table L1 map */ 430 KASSERT((l1[*l1_slot] & ATTR_DESCR_MASK) == L1_TABLE, 431 ("Invalid bootstrap L1 table")); 432 /* Find the address of the L2 table */ 433 l2 = (pt_entry_t *)init_pt_va; 434 *l2_slot = pmap_l2_index(va); 435 436 return (l2); 437 } 438 439 static vm_paddr_t 440 pmap_early_vtophys(vm_offset_t l1pt, vm_offset_t va) 441 { 442 u_int l1_slot, l2_slot; 443 pt_entry_t *l2; 444 445 l2 = pmap_early_page_idx(l1pt, va, &l1_slot, &l2_slot); 446 447 return ((l2[l2_slot] & ~ATTR_MASK) + (va & L2_OFFSET)); 448 } 449 450 static void 451 pmap_bootstrap_dmap(vm_offset_t l1pt, vm_paddr_t kernstart) 452 { 453 vm_offset_t va; 454 vm_paddr_t pa; 455 pd_entry_t *l1; 456 u_int l1_slot; 457 458 pa = dmap_phys_base = kernstart & ~L1_OFFSET; 459 va = DMAP_MIN_ADDRESS; 460 l1 = (pd_entry_t *)l1pt; 461 l1_slot = pmap_l1_index(DMAP_MIN_ADDRESS); 462 463 for (; va < DMAP_MAX_ADDRESS; 464 pa += L1_SIZE, va += L1_SIZE, l1_slot++) { 465 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 466 467 pmap_load_store(&l1[l1_slot], 468 (pa & ~L1_OFFSET) | ATTR_DEFAULT | 469 ATTR_IDX(CACHED_MEMORY) | L1_BLOCK); 470 } 471 472 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 473 cpu_tlb_flushID(); 474 } 475 476 static vm_offset_t 477 pmap_bootstrap_l2(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l2_start) 478 { 479 vm_offset_t l2pt; 480 vm_paddr_t pa; 481 pd_entry_t *l1; 482 u_int l1_slot; 483 484 KASSERT((va & L1_OFFSET) == 0, ("Invalid virtual address")); 485 486 l1 = (pd_entry_t *)l1pt; 487 l1_slot = pmap_l1_index(va); 488 l2pt = l2_start; 489 490 for (; va < VM_MAX_KERNEL_ADDRESS; l1_slot++, va += L1_SIZE) { 491 KASSERT(l1_slot < Ln_ENTRIES, ("Invalid L1 index")); 492 493 pa = pmap_early_vtophys(l1pt, l2pt); 494 pmap_load_store(&l1[l1_slot], 495 (pa & ~Ln_TABLE_MASK) | L1_TABLE); 496 l2pt += PAGE_SIZE; 497 } 498 499 /* Clean the L2 page table */ 500 memset((void *)l2_start, 0, l2pt - l2_start); 501 cpu_dcache_wb_range(l2_start, l2pt - l2_start); 502 503 /* Flush the l1 table to ram */ 504 cpu_dcache_wb_range((vm_offset_t)l1, PAGE_SIZE); 505 506 return l2pt; 507 } 508 509 static vm_offset_t 510 pmap_bootstrap_l3(vm_offset_t l1pt, vm_offset_t va, vm_offset_t l3_start) 511 { 512 vm_offset_t l2pt, l3pt; 513 vm_paddr_t pa; 514 pd_entry_t *l2; 515 u_int l2_slot; 516 517 KASSERT((va & L2_OFFSET) == 0, ("Invalid virtual address")); 518 519 l2 = pmap_l2(kernel_pmap, va); 520 l2 = (pd_entry_t *)((uintptr_t)l2 & ~(PAGE_SIZE - 1)); 521 l2pt = (vm_offset_t)l2; 522 l2_slot = pmap_l2_index(va); 523 l3pt = l3_start; 524 525 for (; va < VM_MAX_KERNEL_ADDRESS; l2_slot++, va += L2_SIZE) { 526 KASSERT(l2_slot < Ln_ENTRIES, ("Invalid L2 index")); 527 528 pa = pmap_early_vtophys(l1pt, l3pt); 529 pmap_load_store(&l2[l2_slot], 530 (pa & ~Ln_TABLE_MASK) | L2_TABLE); 531 l3pt += PAGE_SIZE; 532 } 533 534 /* Clean the L2 page table */ 535 memset((void *)l3_start, 0, l3pt - l3_start); 536 cpu_dcache_wb_range(l3_start, l3pt - l3_start); 537 538 cpu_dcache_wb_range((vm_offset_t)l2, PAGE_SIZE); 539 540 return l3pt; 541 } 542 543 /* 544 * Bootstrap the system enough to run with virtual memory. 545 */ 546 void 547 pmap_bootstrap(vm_offset_t l1pt, vm_paddr_t kernstart, vm_size_t kernlen) 548 { 549 u_int l1_slot, l2_slot, avail_slot, map_slot, used_map_slot; 550 uint64_t kern_delta; 551 pt_entry_t *l2; 552 vm_offset_t va, freemempos; 553 vm_offset_t dpcpu, msgbufpv; 554 vm_paddr_t pa, min_pa; 555 int i; 556 557 kern_delta = KERNBASE - kernstart; 558 physmem = 0; 559 560 printf("pmap_bootstrap %lx %lx %lx\n", l1pt, kernstart, kernlen); 561 printf("%lx\n", l1pt); 562 printf("%lx\n", (KERNBASE >> L1_SHIFT) & Ln_ADDR_MASK); 563 564 /* Set this early so we can use the pagetable walking functions */ 565 kernel_pmap_store.pm_l1 = (pd_entry_t *)l1pt; 566 PMAP_LOCK_INIT(kernel_pmap); 567 568 /* 569 * Initialize the global pv list lock. 570 */ 571 rw_init(&pvh_global_lock, "pmap pv global"); 572 573 /* Assume the address we were loaded to is a valid physical address */ 574 min_pa = KERNBASE - kern_delta; 575 576 /* 577 * Find the minimum physical address. physmap is sorted, 578 * but may contain empty ranges. 579 */ 580 for (i = 0; i < (physmap_idx * 2); i += 2) { 581 if (physmap[i] == physmap[i + 1]) 582 continue; 583 if (physmap[i] <= min_pa) 584 min_pa = physmap[i]; 585 break; 586 } 587 588 /* Create a direct map region early so we can use it for pa -> va */ 589 pmap_bootstrap_dmap(l1pt, min_pa); 590 591 va = KERNBASE; 592 pa = KERNBASE - kern_delta; 593 594 /* 595 * Start to initialise phys_avail by copying from physmap 596 * up to the physical address KERNBASE points at. 597 */ 598 map_slot = avail_slot = 0; 599 for (; map_slot < (physmap_idx * 2); map_slot += 2) { 600 if (physmap[map_slot] == physmap[map_slot + 1]) 601 continue; 602 603 if (physmap[map_slot] <= pa && 604 physmap[map_slot + 1] > pa) 605 break; 606 607 phys_avail[avail_slot] = physmap[map_slot]; 608 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 609 physmem += (phys_avail[avail_slot + 1] - 610 phys_avail[avail_slot]) >> PAGE_SHIFT; 611 avail_slot += 2; 612 } 613 614 /* Add the memory before the kernel */ 615 if (physmap[avail_slot] < pa) { 616 phys_avail[avail_slot] = physmap[map_slot]; 617 phys_avail[avail_slot + 1] = pa; 618 physmem += (phys_avail[avail_slot + 1] - 619 phys_avail[avail_slot]) >> PAGE_SHIFT; 620 avail_slot += 2; 621 } 622 used_map_slot = map_slot; 623 624 /* 625 * Read the page table to find out what is already mapped. 626 * This assumes we have mapped a block of memory from KERNBASE 627 * using a single L1 entry. 628 */ 629 l2 = pmap_early_page_idx(l1pt, KERNBASE, &l1_slot, &l2_slot); 630 631 /* Sanity check the index, KERNBASE should be the first VA */ 632 KASSERT(l2_slot == 0, ("The L2 index is non-zero")); 633 634 /* Find how many pages we have mapped */ 635 for (; l2_slot < Ln_ENTRIES; l2_slot++) { 636 if ((l2[l2_slot] & ATTR_DESCR_MASK) == 0) 637 break; 638 639 /* Check locore used L2 blocks */ 640 KASSERT((l2[l2_slot] & ATTR_DESCR_MASK) == L2_BLOCK, 641 ("Invalid bootstrap L2 table")); 642 KASSERT((l2[l2_slot] & ~ATTR_MASK) == pa, 643 ("Incorrect PA in L2 table")); 644 645 va += L2_SIZE; 646 pa += L2_SIZE; 647 } 648 649 va = roundup2(va, L1_SIZE); 650 651 freemempos = KERNBASE + kernlen; 652 freemempos = roundup2(freemempos, PAGE_SIZE); 653 /* Create the l2 tables up to VM_MAX_KERNEL_ADDRESS */ 654 freemempos = pmap_bootstrap_l2(l1pt, va, freemempos); 655 /* And the l3 tables for the early devmap */ 656 freemempos = pmap_bootstrap_l3(l1pt, 657 VM_MAX_KERNEL_ADDRESS - L2_SIZE, freemempos); 658 659 cpu_tlb_flushID(); 660 661 #define alloc_pages(var, np) \ 662 (var) = freemempos; \ 663 freemempos += (np * PAGE_SIZE); \ 664 memset((char *)(var), 0, ((np) * PAGE_SIZE)); 665 666 /* Allocate dynamic per-cpu area. */ 667 alloc_pages(dpcpu, DPCPU_SIZE / PAGE_SIZE); 668 dpcpu_init((void *)dpcpu, 0); 669 670 /* Allocate memory for the msgbuf, e.g. for /sbin/dmesg */ 671 alloc_pages(msgbufpv, round_page(msgbufsize) / PAGE_SIZE); 672 msgbufp = (void *)msgbufpv; 673 674 virtual_avail = roundup2(freemempos, L1_SIZE); 675 virtual_end = VM_MAX_KERNEL_ADDRESS - L2_SIZE; 676 kernel_vm_end = virtual_avail; 677 678 pa = pmap_early_vtophys(l1pt, freemempos); 679 680 /* Finish initialising physmap */ 681 map_slot = used_map_slot; 682 for (; avail_slot < (PHYS_AVAIL_SIZE - 2) && 683 map_slot < (physmap_idx * 2); map_slot += 2) { 684 if (physmap[map_slot] == physmap[map_slot + 1]) 685 continue; 686 687 /* Have we used the current range? */ 688 if (physmap[map_slot + 1] <= pa) 689 continue; 690 691 /* Do we need to split the entry? */ 692 if (physmap[map_slot] < pa) { 693 phys_avail[avail_slot] = pa; 694 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 695 } else { 696 phys_avail[avail_slot] = physmap[map_slot]; 697 phys_avail[avail_slot + 1] = physmap[map_slot + 1]; 698 } 699 physmem += (phys_avail[avail_slot + 1] - 700 phys_avail[avail_slot]) >> PAGE_SHIFT; 701 702 avail_slot += 2; 703 } 704 phys_avail[avail_slot] = 0; 705 phys_avail[avail_slot + 1] = 0; 706 707 /* 708 * Maxmem isn't the "maximum memory", it's one larger than the 709 * highest page of the physical address space. It should be 710 * called something like "Maxphyspage". 711 */ 712 Maxmem = atop(phys_avail[avail_slot - 1]); 713 714 cpu_tlb_flushID(); 715 } 716 717 /* 718 * Initialize a vm_page's machine-dependent fields. 719 */ 720 void 721 pmap_page_init(vm_page_t m) 722 { 723 724 TAILQ_INIT(&m->md.pv_list); 725 m->md.pv_memattr = VM_MEMATTR_WRITE_BACK; 726 } 727 728 /* 729 * Initialize the pmap module. 730 * Called by vm_init, to initialize any structures that the pmap 731 * system needs to map virtual memory. 732 */ 733 void 734 pmap_init(void) 735 { 736 int i; 737 738 /* 739 * Initialize the pv chunk list mutex. 740 */ 741 mtx_init(&pv_chunks_mutex, "pmap pv chunk list", NULL, MTX_DEF); 742 743 /* 744 * Initialize the pool of pv list locks. 745 */ 746 for (i = 0; i < NPV_LIST_LOCKS; i++) 747 rw_init(&pv_list_locks[i], "pmap pv list"); 748 } 749 750 /* 751 * Normal, non-SMP, invalidation functions. 752 * We inline these within pmap.c for speed. 753 */ 754 PMAP_INLINE void 755 pmap_invalidate_page(pmap_t pmap, vm_offset_t va) 756 { 757 758 sched_pin(); 759 __asm __volatile( 760 "dsb sy \n" 761 "tlbi vaae1is, %0 \n" 762 "dsb sy \n" 763 "isb \n" 764 : : "r"(va >> PAGE_SHIFT)); 765 sched_unpin(); 766 } 767 768 PMAP_INLINE void 769 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 770 { 771 vm_offset_t addr; 772 773 sched_pin(); 774 sva >>= PAGE_SHIFT; 775 eva >>= PAGE_SHIFT; 776 __asm __volatile("dsb sy"); 777 for (addr = sva; addr < eva; addr++) { 778 __asm __volatile( 779 "tlbi vaae1is, %0" : : "r"(addr)); 780 } 781 __asm __volatile( 782 "dsb sy \n" 783 "isb \n"); 784 sched_unpin(); 785 } 786 787 PMAP_INLINE void 788 pmap_invalidate_all(pmap_t pmap) 789 { 790 791 sched_pin(); 792 __asm __volatile( 793 "dsb sy \n" 794 "tlbi vmalle1is \n" 795 "dsb sy \n" 796 "isb \n"); 797 sched_unpin(); 798 } 799 800 /* 801 * Routine: pmap_extract 802 * Function: 803 * Extract the physical page address associated 804 * with the given map/virtual_address pair. 805 */ 806 vm_paddr_t 807 pmap_extract(pmap_t pmap, vm_offset_t va) 808 { 809 pd_entry_t *l2p, l2; 810 pt_entry_t *l3p, l3; 811 vm_paddr_t pa; 812 813 pa = 0; 814 PMAP_LOCK(pmap); 815 /* 816 * Start with the l2 tabel. We are unable to allocate 817 * pages in the l1 table. 818 */ 819 l2p = pmap_l2(pmap, va); 820 if (l2p != NULL) { 821 l2 = pmap_load(l2p); 822 if ((l2 & ATTR_DESCR_MASK) == L2_TABLE) { 823 l3p = pmap_l2_to_l3(l2p, va); 824 if (l3p != NULL) { 825 l3 = pmap_load(l3p); 826 827 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) 828 pa = (l3 & ~ATTR_MASK) | 829 (va & L3_OFFSET); 830 } 831 } else if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) 832 pa = (l2 & ~ATTR_MASK) | (va & L2_OFFSET); 833 } 834 PMAP_UNLOCK(pmap); 835 return (pa); 836 } 837 838 /* 839 * Routine: pmap_extract_and_hold 840 * Function: 841 * Atomically extract and hold the physical page 842 * with the given pmap and virtual address pair 843 * if that mapping permits the given protection. 844 */ 845 vm_page_t 846 pmap_extract_and_hold(pmap_t pmap, vm_offset_t va, vm_prot_t prot) 847 { 848 pt_entry_t *l3p, l3; 849 vm_paddr_t pa; 850 vm_page_t m; 851 852 pa = 0; 853 m = NULL; 854 PMAP_LOCK(pmap); 855 retry: 856 l3p = pmap_l3(pmap, va); 857 if (l3p != NULL && (l3 = pmap_load(l3p)) != 0) { 858 if (((l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) || 859 ((prot & VM_PROT_WRITE) == 0)) { 860 if (vm_page_pa_tryrelock(pmap, l3 & ~ATTR_MASK, &pa)) 861 goto retry; 862 m = PHYS_TO_VM_PAGE(l3 & ~ATTR_MASK); 863 vm_page_hold(m); 864 } 865 } 866 PA_UNLOCK_COND(pa); 867 PMAP_UNLOCK(pmap); 868 return (m); 869 } 870 871 vm_paddr_t 872 pmap_kextract(vm_offset_t va) 873 { 874 pd_entry_t *l2p, l2; 875 pt_entry_t *l3; 876 vm_paddr_t pa; 877 878 if (va >= DMAP_MIN_ADDRESS && va < DMAP_MAX_ADDRESS) { 879 pa = DMAP_TO_PHYS(va); 880 } else { 881 l2p = pmap_l2(kernel_pmap, va); 882 if (l2p == NULL) 883 panic("pmap_kextract: No l2"); 884 l2 = pmap_load(l2p); 885 if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) 886 return ((l2 & ~ATTR_MASK) | 887 (va & L2_OFFSET)); 888 889 l3 = pmap_l2_to_l3(l2p, va); 890 if (l3 == NULL) 891 panic("pmap_kextract: No l3..."); 892 pa = (pmap_load(l3) & ~ATTR_MASK) | (va & PAGE_MASK); 893 } 894 return (pa); 895 } 896 897 /*************************************************** 898 * Low level mapping routines..... 899 ***************************************************/ 900 901 void 902 pmap_kenter_device(vm_offset_t sva, vm_size_t size, vm_paddr_t pa) 903 { 904 pt_entry_t *l3; 905 vm_offset_t va; 906 907 KASSERT((pa & L3_OFFSET) == 0, 908 ("pmap_kenter_device: Invalid physical address")); 909 KASSERT((sva & L3_OFFSET) == 0, 910 ("pmap_kenter_device: Invalid virtual address")); 911 KASSERT((size & PAGE_MASK) == 0, 912 ("pmap_kenter_device: Mapping is not page-sized")); 913 914 va = sva; 915 while (size != 0) { 916 l3 = pmap_l3(kernel_pmap, va); 917 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 918 pmap_load_store(l3, (pa & ~L3_OFFSET) | ATTR_DEFAULT | 919 ATTR_IDX(DEVICE_MEMORY) | L3_PAGE); 920 PTE_SYNC(l3); 921 922 va += PAGE_SIZE; 923 pa += PAGE_SIZE; 924 size -= PAGE_SIZE; 925 } 926 pmap_invalidate_range(kernel_pmap, sva, va); 927 } 928 929 /* 930 * Remove a page from the kernel pagetables. 931 * Note: not SMP coherent. 932 */ 933 PMAP_INLINE void 934 pmap_kremove(vm_offset_t va) 935 { 936 pt_entry_t *l3; 937 938 l3 = pmap_l3(kernel_pmap, va); 939 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 940 941 if (pmap_l3_valid_cacheable(pmap_load(l3))) 942 cpu_dcache_wb_range(va, L3_SIZE); 943 pmap_load_clear(l3); 944 PTE_SYNC(l3); 945 pmap_invalidate_page(kernel_pmap, va); 946 } 947 948 void 949 pmap_kremove_device(vm_offset_t sva, vm_size_t size) 950 { 951 pt_entry_t *l3; 952 vm_offset_t va; 953 954 KASSERT((sva & L3_OFFSET) == 0, 955 ("pmap_kremove_device: Invalid virtual address")); 956 KASSERT((size & PAGE_MASK) == 0, 957 ("pmap_kremove_device: Mapping is not page-sized")); 958 959 va = sva; 960 while (size != 0) { 961 l3 = pmap_l3(kernel_pmap, va); 962 KASSERT(l3 != NULL, ("Invalid page table, va: 0x%lx", va)); 963 pmap_load_clear(l3); 964 PTE_SYNC(l3); 965 966 va += PAGE_SIZE; 967 size -= PAGE_SIZE; 968 } 969 pmap_invalidate_range(kernel_pmap, sva, va); 970 } 971 972 /* 973 * Used to map a range of physical addresses into kernel 974 * virtual address space. 975 * 976 * The value passed in '*virt' is a suggested virtual address for 977 * the mapping. Architectures which can support a direct-mapped 978 * physical to virtual region can return the appropriate address 979 * within that region, leaving '*virt' unchanged. Other 980 * architectures should map the pages starting at '*virt' and 981 * update '*virt' with the first usable address after the mapped 982 * region. 983 */ 984 vm_offset_t 985 pmap_map(vm_offset_t *virt, vm_paddr_t start, vm_paddr_t end, int prot) 986 { 987 return PHYS_TO_DMAP(start); 988 } 989 990 991 /* 992 * Add a list of wired pages to the kva 993 * this routine is only used for temporary 994 * kernel mappings that do not need to have 995 * page modification or references recorded. 996 * Note that old mappings are simply written 997 * over. The page *must* be wired. 998 * Note: SMP coherent. Uses a ranged shootdown IPI. 999 */ 1000 void 1001 pmap_qenter(vm_offset_t sva, vm_page_t *ma, int count) 1002 { 1003 pt_entry_t *l3, pa; 1004 vm_offset_t va; 1005 vm_page_t m; 1006 int i; 1007 1008 va = sva; 1009 for (i = 0; i < count; i++) { 1010 m = ma[i]; 1011 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_AP(ATTR_AP_RW) | 1012 ATTR_IDX(m->md.pv_memattr) | L3_PAGE; 1013 l3 = pmap_l3(kernel_pmap, va); 1014 pmap_load_store(l3, pa); 1015 PTE_SYNC(l3); 1016 1017 va += L3_SIZE; 1018 } 1019 pmap_invalidate_range(kernel_pmap, sva, va); 1020 } 1021 1022 /* 1023 * This routine tears out page mappings from the 1024 * kernel -- it is meant only for temporary mappings. 1025 * Note: SMP coherent. Uses a ranged shootdown IPI. 1026 */ 1027 void 1028 pmap_qremove(vm_offset_t sva, int count) 1029 { 1030 pt_entry_t *l3; 1031 vm_offset_t va; 1032 1033 KASSERT(sva >= VM_MIN_KERNEL_ADDRESS, ("usermode va %lx", sva)); 1034 1035 va = sva; 1036 while (count-- > 0) { 1037 l3 = pmap_l3(kernel_pmap, va); 1038 KASSERT(l3 != NULL, ("pmap_kremove: Invalid address")); 1039 1040 if (pmap_l3_valid_cacheable(pmap_load(l3))) 1041 cpu_dcache_wb_range(va, L3_SIZE); 1042 pmap_load_clear(l3); 1043 PTE_SYNC(l3); 1044 1045 va += PAGE_SIZE; 1046 } 1047 pmap_invalidate_range(kernel_pmap, sva, va); 1048 } 1049 1050 /*************************************************** 1051 * Page table page management routines..... 1052 ***************************************************/ 1053 static __inline void 1054 pmap_free_zero_pages(struct spglist *free) 1055 { 1056 vm_page_t m; 1057 1058 while ((m = SLIST_FIRST(free)) != NULL) { 1059 SLIST_REMOVE_HEAD(free, plinks.s.ss); 1060 /* Preserve the page's PG_ZERO setting. */ 1061 vm_page_free_toq(m); 1062 } 1063 } 1064 1065 /* 1066 * Schedule the specified unused page table page to be freed. Specifically, 1067 * add the page to the specified list of pages that will be released to the 1068 * physical memory manager after the TLB has been updated. 1069 */ 1070 static __inline void 1071 pmap_add_delayed_free_list(vm_page_t m, struct spglist *free, 1072 boolean_t set_PG_ZERO) 1073 { 1074 1075 if (set_PG_ZERO) 1076 m->flags |= PG_ZERO; 1077 else 1078 m->flags &= ~PG_ZERO; 1079 SLIST_INSERT_HEAD(free, m, plinks.s.ss); 1080 } 1081 1082 /* 1083 * Decrements a page table page's wire count, which is used to record the 1084 * number of valid page table entries within the page. If the wire count 1085 * drops to zero, then the page table page is unmapped. Returns TRUE if the 1086 * page table page was unmapped and FALSE otherwise. 1087 */ 1088 static inline boolean_t 1089 pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1090 { 1091 1092 --m->wire_count; 1093 if (m->wire_count == 0) { 1094 _pmap_unwire_l3(pmap, va, m, free); 1095 return (TRUE); 1096 } else 1097 return (FALSE); 1098 } 1099 1100 static void 1101 _pmap_unwire_l3(pmap_t pmap, vm_offset_t va, vm_page_t m, struct spglist *free) 1102 { 1103 1104 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1105 /* 1106 * unmap the page table page 1107 */ 1108 if (m->pindex >= NUPDE) { 1109 /* PD page */ 1110 pd_entry_t *l1; 1111 l1 = pmap_l1(pmap, va); 1112 pmap_load_clear(l1); 1113 PTE_SYNC(l1); 1114 } else { 1115 /* PTE page */ 1116 pd_entry_t *l2; 1117 l2 = pmap_l2(pmap, va); 1118 pmap_load_clear(l2); 1119 PTE_SYNC(l2); 1120 } 1121 pmap_resident_count_dec(pmap, 1); 1122 if (m->pindex < NUPDE) { 1123 /* We just released a PT, unhold the matching PD */ 1124 vm_page_t pdpg; 1125 1126 pdpg = PHYS_TO_VM_PAGE(*pmap_l1(pmap, va) & ~ATTR_MASK); 1127 pmap_unwire_l3(pmap, va, pdpg, free); 1128 } 1129 pmap_invalidate_page(pmap, va); 1130 1131 /* 1132 * This is a release store so that the ordinary store unmapping 1133 * the page table page is globally performed before TLB shoot- 1134 * down is begun. 1135 */ 1136 atomic_subtract_rel_int(&vm_cnt.v_wire_count, 1); 1137 1138 /* 1139 * Put page on a list so that it is released after 1140 * *ALL* TLB shootdown is done 1141 */ 1142 pmap_add_delayed_free_list(m, free, TRUE); 1143 } 1144 1145 /* 1146 * After removing an l3 entry, this routine is used to 1147 * conditionally free the page, and manage the hold/wire counts. 1148 */ 1149 static int 1150 pmap_unuse_l3(pmap_t pmap, vm_offset_t va, pd_entry_t ptepde, 1151 struct spglist *free) 1152 { 1153 vm_page_t mpte; 1154 1155 if (va >= VM_MAXUSER_ADDRESS) 1156 return (0); 1157 KASSERT(ptepde != 0, ("pmap_unuse_pt: ptepde != 0")); 1158 mpte = PHYS_TO_VM_PAGE(ptepde & ~ATTR_MASK); 1159 return (pmap_unwire_l3(pmap, va, mpte, free)); 1160 } 1161 1162 void 1163 pmap_pinit0(pmap_t pmap) 1164 { 1165 1166 PMAP_LOCK_INIT(pmap); 1167 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1168 pmap->pm_l1 = kernel_pmap->pm_l1; 1169 } 1170 1171 int 1172 pmap_pinit(pmap_t pmap) 1173 { 1174 vm_paddr_t l1phys; 1175 vm_page_t l1pt; 1176 1177 /* 1178 * allocate the l1 page 1179 */ 1180 while ((l1pt = vm_page_alloc(NULL, 0xdeadbeef, VM_ALLOC_NORMAL | 1181 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) 1182 VM_WAIT; 1183 1184 l1phys = VM_PAGE_TO_PHYS(l1pt); 1185 pmap->pm_l1 = (pd_entry_t *)PHYS_TO_DMAP(l1phys); 1186 1187 if ((l1pt->flags & PG_ZERO) == 0) 1188 pagezero(pmap->pm_l1); 1189 1190 bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); 1191 1192 return (1); 1193 } 1194 1195 /* 1196 * This routine is called if the desired page table page does not exist. 1197 * 1198 * If page table page allocation fails, this routine may sleep before 1199 * returning NULL. It sleeps only if a lock pointer was given. 1200 * 1201 * Note: If a page allocation fails at page table level two or three, 1202 * one or two pages may be held during the wait, only to be released 1203 * afterwards. This conservative approach is easily argued to avoid 1204 * race conditions. 1205 */ 1206 static vm_page_t 1207 _pmap_alloc_l3(pmap_t pmap, vm_pindex_t ptepindex, struct rwlock **lockp) 1208 { 1209 vm_page_t m, /*pdppg, */pdpg; 1210 1211 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1212 1213 /* 1214 * Allocate a page table page. 1215 */ 1216 if ((m = vm_page_alloc(NULL, ptepindex, VM_ALLOC_NOOBJ | 1217 VM_ALLOC_WIRED | VM_ALLOC_ZERO)) == NULL) { 1218 if (lockp != NULL) { 1219 RELEASE_PV_LIST_LOCK(lockp); 1220 PMAP_UNLOCK(pmap); 1221 rw_runlock(&pvh_global_lock); 1222 VM_WAIT; 1223 rw_rlock(&pvh_global_lock); 1224 PMAP_LOCK(pmap); 1225 } 1226 1227 /* 1228 * Indicate the need to retry. While waiting, the page table 1229 * page may have been allocated. 1230 */ 1231 return (NULL); 1232 } 1233 if ((m->flags & PG_ZERO) == 0) 1234 pmap_zero_page(m); 1235 1236 /* 1237 * Map the pagetable page into the process address space, if 1238 * it isn't already there. 1239 */ 1240 1241 if (ptepindex >= NUPDE) { 1242 pd_entry_t *l1; 1243 vm_pindex_t l1index; 1244 1245 l1index = ptepindex - NUPDE; 1246 l1 = &pmap->pm_l1[l1index]; 1247 pmap_load_store(l1, VM_PAGE_TO_PHYS(m) | L1_TABLE); 1248 PTE_SYNC(l1); 1249 1250 } else { 1251 vm_pindex_t l1index; 1252 pd_entry_t *l1, *l2; 1253 1254 l1index = ptepindex >> (L1_SHIFT - L2_SHIFT); 1255 l1 = &pmap->pm_l1[l1index]; 1256 if (pmap_load(l1) == 0) { 1257 /* recurse for allocating page dir */ 1258 if (_pmap_alloc_l3(pmap, NUPDE + l1index, 1259 lockp) == NULL) { 1260 --m->wire_count; 1261 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1262 vm_page_free_zero(m); 1263 return (NULL); 1264 } 1265 } else { 1266 pdpg = PHYS_TO_VM_PAGE(pmap_load(l1) & ~ATTR_MASK); 1267 pdpg->wire_count++; 1268 } 1269 1270 l2 = (pd_entry_t *)PHYS_TO_DMAP(pmap_load(l1) & ~ATTR_MASK); 1271 l2 = &l2[ptepindex & Ln_ADDR_MASK]; 1272 pmap_load_store(l2, VM_PAGE_TO_PHYS(m) | L2_TABLE); 1273 PTE_SYNC(l2); 1274 } 1275 1276 pmap_resident_count_inc(pmap, 1); 1277 1278 return (m); 1279 } 1280 1281 static vm_page_t 1282 pmap_alloc_l3(pmap_t pmap, vm_offset_t va, struct rwlock **lockp) 1283 { 1284 vm_pindex_t ptepindex; 1285 pd_entry_t *l2; 1286 vm_page_t m; 1287 1288 /* 1289 * Calculate pagetable page index 1290 */ 1291 ptepindex = pmap_l2_pindex(va); 1292 retry: 1293 /* 1294 * Get the page directory entry 1295 */ 1296 l2 = pmap_l2(pmap, va); 1297 1298 /* 1299 * If the page table page is mapped, we just increment the 1300 * hold count, and activate it. 1301 */ 1302 if (l2 != NULL && pmap_load(l2) != 0) { 1303 m = PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 1304 m->wire_count++; 1305 } else { 1306 /* 1307 * Here if the pte page isn't mapped, or if it has been 1308 * deallocated. 1309 */ 1310 m = _pmap_alloc_l3(pmap, ptepindex, lockp); 1311 if (m == NULL && lockp != NULL) 1312 goto retry; 1313 } 1314 return (m); 1315 } 1316 1317 1318 /*************************************************** 1319 * Pmap allocation/deallocation routines. 1320 ***************************************************/ 1321 1322 /* 1323 * Release any resources held by the given physical map. 1324 * Called when a pmap initialized by pmap_pinit is being released. 1325 * Should only be called if the map contains no valid mappings. 1326 */ 1327 void 1328 pmap_release(pmap_t pmap) 1329 { 1330 vm_page_t m; 1331 1332 KASSERT(pmap->pm_stats.resident_count == 0, 1333 ("pmap_release: pmap resident count %ld != 0", 1334 pmap->pm_stats.resident_count)); 1335 1336 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pmap->pm_l1)); 1337 1338 m->wire_count--; 1339 atomic_subtract_int(&vm_cnt.v_wire_count, 1); 1340 vm_page_free_zero(m); 1341 } 1342 1343 #if 0 1344 static int 1345 kvm_size(SYSCTL_HANDLER_ARGS) 1346 { 1347 unsigned long ksize = VM_MAX_KERNEL_ADDRESS - VM_MIN_KERNEL_ADDRESS; 1348 1349 return sysctl_handle_long(oidp, &ksize, 0, req); 1350 } 1351 SYSCTL_PROC(_vm, OID_AUTO, kvm_size, CTLTYPE_LONG|CTLFLAG_RD, 1352 0, 0, kvm_size, "LU", "Size of KVM"); 1353 1354 static int 1355 kvm_free(SYSCTL_HANDLER_ARGS) 1356 { 1357 unsigned long kfree = VM_MAX_KERNEL_ADDRESS - kernel_vm_end; 1358 1359 return sysctl_handle_long(oidp, &kfree, 0, req); 1360 } 1361 SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 1362 0, 0, kvm_free, "LU", "Amount of KVM free"); 1363 #endif /* 0 */ 1364 1365 /* 1366 * grow the number of kernel page table entries, if needed 1367 */ 1368 void 1369 pmap_growkernel(vm_offset_t addr) 1370 { 1371 vm_paddr_t paddr; 1372 vm_page_t nkpg; 1373 pd_entry_t *l1, *l2; 1374 1375 mtx_assert(&kernel_map->system_mtx, MA_OWNED); 1376 1377 addr = roundup2(addr, L2_SIZE); 1378 if (addr - 1 >= kernel_map->max_offset) 1379 addr = kernel_map->max_offset; 1380 while (kernel_vm_end < addr) { 1381 l1 = pmap_l1(kernel_pmap, kernel_vm_end); 1382 if (pmap_load(l1) == 0) { 1383 /* We need a new PDP entry */ 1384 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L1_SHIFT, 1385 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | 1386 VM_ALLOC_WIRED | VM_ALLOC_ZERO); 1387 if (nkpg == NULL) 1388 panic("pmap_growkernel: no memory to grow kernel"); 1389 if ((nkpg->flags & PG_ZERO) == 0) 1390 pmap_zero_page(nkpg); 1391 paddr = VM_PAGE_TO_PHYS(nkpg); 1392 pmap_load_store(l1, paddr | L1_TABLE); 1393 PTE_SYNC(l1); 1394 continue; /* try again */ 1395 } 1396 l2 = pmap_l1_to_l2(l1, kernel_vm_end); 1397 if ((pmap_load(l2) & ATTR_AF) != 0) { 1398 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1399 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1400 kernel_vm_end = kernel_map->max_offset; 1401 break; 1402 } 1403 continue; 1404 } 1405 1406 nkpg = vm_page_alloc(NULL, kernel_vm_end >> L2_SHIFT, 1407 VM_ALLOC_INTERRUPT | VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 1408 VM_ALLOC_ZERO); 1409 if (nkpg == NULL) 1410 panic("pmap_growkernel: no memory to grow kernel"); 1411 if ((nkpg->flags & PG_ZERO) == 0) 1412 pmap_zero_page(nkpg); 1413 paddr = VM_PAGE_TO_PHYS(nkpg); 1414 pmap_load_store(l2, paddr | L2_TABLE); 1415 PTE_SYNC(l2); 1416 pmap_invalidate_page(kernel_pmap, kernel_vm_end); 1417 1418 kernel_vm_end = (kernel_vm_end + L2_SIZE) & ~L2_OFFSET; 1419 if (kernel_vm_end - 1 >= kernel_map->max_offset) { 1420 kernel_vm_end = kernel_map->max_offset; 1421 break; 1422 } 1423 } 1424 } 1425 1426 1427 /*************************************************** 1428 * page management routines. 1429 ***************************************************/ 1430 1431 CTASSERT(sizeof(struct pv_chunk) == PAGE_SIZE); 1432 CTASSERT(_NPCM == 3); 1433 CTASSERT(_NPCPV == 168); 1434 1435 static __inline struct pv_chunk * 1436 pv_to_chunk(pv_entry_t pv) 1437 { 1438 1439 return ((struct pv_chunk *)((uintptr_t)pv & ~(uintptr_t)PAGE_MASK)); 1440 } 1441 1442 #define PV_PMAP(pv) (pv_to_chunk(pv)->pc_pmap) 1443 1444 #define PC_FREE0 0xfffffffffffffffful 1445 #define PC_FREE1 0xfffffffffffffffful 1446 #define PC_FREE2 0x000000fffffffffful 1447 1448 static const uint64_t pc_freemask[_NPCM] = { PC_FREE0, PC_FREE1, PC_FREE2 }; 1449 1450 #if 0 1451 #ifdef PV_STATS 1452 static int pc_chunk_count, pc_chunk_allocs, pc_chunk_frees, pc_chunk_tryfail; 1453 1454 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_count, CTLFLAG_RD, &pc_chunk_count, 0, 1455 "Current number of pv entry chunks"); 1456 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_allocs, CTLFLAG_RD, &pc_chunk_allocs, 0, 1457 "Current number of pv entry chunks allocated"); 1458 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_frees, CTLFLAG_RD, &pc_chunk_frees, 0, 1459 "Current number of pv entry chunks frees"); 1460 SYSCTL_INT(_vm_pmap, OID_AUTO, pc_chunk_tryfail, CTLFLAG_RD, &pc_chunk_tryfail, 0, 1461 "Number of times tried to get a chunk page but failed."); 1462 1463 static long pv_entry_frees, pv_entry_allocs, pv_entry_count; 1464 static int pv_entry_spare; 1465 1466 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_frees, CTLFLAG_RD, &pv_entry_frees, 0, 1467 "Current number of pv entry frees"); 1468 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_allocs, CTLFLAG_RD, &pv_entry_allocs, 0, 1469 "Current number of pv entry allocs"); 1470 SYSCTL_LONG(_vm_pmap, OID_AUTO, pv_entry_count, CTLFLAG_RD, &pv_entry_count, 0, 1471 "Current number of pv entries"); 1472 SYSCTL_INT(_vm_pmap, OID_AUTO, pv_entry_spare, CTLFLAG_RD, &pv_entry_spare, 0, 1473 "Current number of spare pv entries"); 1474 #endif 1475 #endif /* 0 */ 1476 1477 /* 1478 * We are in a serious low memory condition. Resort to 1479 * drastic measures to free some pages so we can allocate 1480 * another pv entry chunk. 1481 * 1482 * Returns NULL if PV entries were reclaimed from the specified pmap. 1483 * 1484 * We do not, however, unmap 2mpages because subsequent accesses will 1485 * allocate per-page pv entries until repromotion occurs, thereby 1486 * exacerbating the shortage of free pv entries. 1487 */ 1488 static vm_page_t 1489 reclaim_pv_chunk(pmap_t locked_pmap, struct rwlock **lockp) 1490 { 1491 1492 panic("ARM64TODO: reclaim_pv_chunk"); 1493 } 1494 1495 /* 1496 * free the pv_entry back to the free list 1497 */ 1498 static void 1499 free_pv_entry(pmap_t pmap, pv_entry_t pv) 1500 { 1501 struct pv_chunk *pc; 1502 int idx, field, bit; 1503 1504 rw_assert(&pvh_global_lock, RA_LOCKED); 1505 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1506 PV_STAT(atomic_add_long(&pv_entry_frees, 1)); 1507 PV_STAT(atomic_add_int(&pv_entry_spare, 1)); 1508 PV_STAT(atomic_subtract_long(&pv_entry_count, 1)); 1509 pc = pv_to_chunk(pv); 1510 idx = pv - &pc->pc_pventry[0]; 1511 field = idx / 64; 1512 bit = idx % 64; 1513 pc->pc_map[field] |= 1ul << bit; 1514 if (pc->pc_map[0] != PC_FREE0 || pc->pc_map[1] != PC_FREE1 || 1515 pc->pc_map[2] != PC_FREE2) { 1516 /* 98% of the time, pc is already at the head of the list. */ 1517 if (__predict_false(pc != TAILQ_FIRST(&pmap->pm_pvchunk))) { 1518 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1519 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1520 } 1521 return; 1522 } 1523 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1524 free_pv_chunk(pc); 1525 } 1526 1527 static void 1528 free_pv_chunk(struct pv_chunk *pc) 1529 { 1530 vm_page_t m; 1531 1532 mtx_lock(&pv_chunks_mutex); 1533 TAILQ_REMOVE(&pv_chunks, pc, pc_lru); 1534 mtx_unlock(&pv_chunks_mutex); 1535 PV_STAT(atomic_subtract_int(&pv_entry_spare, _NPCPV)); 1536 PV_STAT(atomic_subtract_int(&pc_chunk_count, 1)); 1537 PV_STAT(atomic_add_int(&pc_chunk_frees, 1)); 1538 /* entire chunk is free, return it */ 1539 m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)pc)); 1540 dump_drop_page(m->phys_addr); 1541 vm_page_unwire(m, PQ_NONE); 1542 vm_page_free(m); 1543 } 1544 1545 /* 1546 * Returns a new PV entry, allocating a new PV chunk from the system when 1547 * needed. If this PV chunk allocation fails and a PV list lock pointer was 1548 * given, a PV chunk is reclaimed from an arbitrary pmap. Otherwise, NULL is 1549 * returned. 1550 * 1551 * The given PV list lock may be released. 1552 */ 1553 static pv_entry_t 1554 get_pv_entry(pmap_t pmap, struct rwlock **lockp) 1555 { 1556 int bit, field; 1557 pv_entry_t pv; 1558 struct pv_chunk *pc; 1559 vm_page_t m; 1560 1561 rw_assert(&pvh_global_lock, RA_LOCKED); 1562 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1563 PV_STAT(atomic_add_long(&pv_entry_allocs, 1)); 1564 retry: 1565 pc = TAILQ_FIRST(&pmap->pm_pvchunk); 1566 if (pc != NULL) { 1567 for (field = 0; field < _NPCM; field++) { 1568 if (pc->pc_map[field]) { 1569 bit = ffsl(pc->pc_map[field]) - 1; 1570 break; 1571 } 1572 } 1573 if (field < _NPCM) { 1574 pv = &pc->pc_pventry[field * 64 + bit]; 1575 pc->pc_map[field] &= ~(1ul << bit); 1576 /* If this was the last item, move it to tail */ 1577 if (pc->pc_map[0] == 0 && pc->pc_map[1] == 0 && 1578 pc->pc_map[2] == 0) { 1579 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 1580 TAILQ_INSERT_TAIL(&pmap->pm_pvchunk, pc, 1581 pc_list); 1582 } 1583 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1584 PV_STAT(atomic_subtract_int(&pv_entry_spare, 1)); 1585 return (pv); 1586 } 1587 } 1588 /* No free items, allocate another chunk */ 1589 m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOOBJ | 1590 VM_ALLOC_WIRED); 1591 if (m == NULL) { 1592 if (lockp == NULL) { 1593 PV_STAT(pc_chunk_tryfail++); 1594 return (NULL); 1595 } 1596 m = reclaim_pv_chunk(pmap, lockp); 1597 if (m == NULL) 1598 goto retry; 1599 } 1600 PV_STAT(atomic_add_int(&pc_chunk_count, 1)); 1601 PV_STAT(atomic_add_int(&pc_chunk_allocs, 1)); 1602 dump_add_page(m->phys_addr); 1603 pc = (void *)PHYS_TO_DMAP(m->phys_addr); 1604 pc->pc_pmap = pmap; 1605 pc->pc_map[0] = PC_FREE0 & ~1ul; /* preallocated bit 0 */ 1606 pc->pc_map[1] = PC_FREE1; 1607 pc->pc_map[2] = PC_FREE2; 1608 mtx_lock(&pv_chunks_mutex); 1609 TAILQ_INSERT_TAIL(&pv_chunks, pc, pc_lru); 1610 mtx_unlock(&pv_chunks_mutex); 1611 pv = &pc->pc_pventry[0]; 1612 TAILQ_INSERT_HEAD(&pmap->pm_pvchunk, pc, pc_list); 1613 PV_STAT(atomic_add_long(&pv_entry_count, 1)); 1614 PV_STAT(atomic_add_int(&pv_entry_spare, _NPCPV - 1)); 1615 return (pv); 1616 } 1617 1618 /* 1619 * First find and then remove the pv entry for the specified pmap and virtual 1620 * address from the specified pv list. Returns the pv entry if found and NULL 1621 * otherwise. This operation can be performed on pv lists for either 4KB or 1622 * 2MB page mappings. 1623 */ 1624 static __inline pv_entry_t 1625 pmap_pvh_remove(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1626 { 1627 pv_entry_t pv; 1628 1629 rw_assert(&pvh_global_lock, RA_LOCKED); 1630 TAILQ_FOREACH(pv, &pvh->pv_list, pv_next) { 1631 if (pmap == PV_PMAP(pv) && va == pv->pv_va) { 1632 TAILQ_REMOVE(&pvh->pv_list, pv, pv_next); 1633 pvh->pv_gen++; 1634 break; 1635 } 1636 } 1637 return (pv); 1638 } 1639 1640 /* 1641 * First find and then destroy the pv entry for the specified pmap and virtual 1642 * address. This operation can be performed on pv lists for either 4KB or 2MB 1643 * page mappings. 1644 */ 1645 static void 1646 pmap_pvh_free(struct md_page *pvh, pmap_t pmap, vm_offset_t va) 1647 { 1648 pv_entry_t pv; 1649 1650 pv = pmap_pvh_remove(pvh, pmap, va); 1651 KASSERT(pv != NULL, ("pmap_pvh_free: pv not found")); 1652 free_pv_entry(pmap, pv); 1653 } 1654 1655 /* 1656 * Conditionally create the PV entry for a 4KB page mapping if the required 1657 * memory can be allocated without resorting to reclamation. 1658 */ 1659 static boolean_t 1660 pmap_try_insert_pv_entry(pmap_t pmap, vm_offset_t va, vm_page_t m, 1661 struct rwlock **lockp) 1662 { 1663 pv_entry_t pv; 1664 1665 rw_assert(&pvh_global_lock, RA_LOCKED); 1666 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1667 /* Pass NULL instead of the lock pointer to disable reclamation. */ 1668 if ((pv = get_pv_entry(pmap, NULL)) != NULL) { 1669 pv->pv_va = va; 1670 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1671 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 1672 m->md.pv_gen++; 1673 return (TRUE); 1674 } else 1675 return (FALSE); 1676 } 1677 1678 /* 1679 * pmap_remove_l3: do the things to unmap a page in a process 1680 */ 1681 static int 1682 pmap_remove_l3(pmap_t pmap, pt_entry_t *l3, vm_offset_t va, 1683 pd_entry_t l2e, struct spglist *free, struct rwlock **lockp) 1684 { 1685 pt_entry_t old_l3; 1686 vm_page_t m; 1687 1688 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 1689 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(pmap_load(l3))) 1690 cpu_dcache_wb_range(va, L3_SIZE); 1691 old_l3 = pmap_load_clear(l3); 1692 PTE_SYNC(l3); 1693 pmap_invalidate_page(pmap, va); 1694 if (old_l3 & ATTR_SW_WIRED) 1695 pmap->pm_stats.wired_count -= 1; 1696 pmap_resident_count_dec(pmap, 1); 1697 if (old_l3 & ATTR_SW_MANAGED) { 1698 m = PHYS_TO_VM_PAGE(old_l3 & ~ATTR_MASK); 1699 if (pmap_page_dirty(old_l3)) 1700 vm_page_dirty(m); 1701 if (old_l3 & ATTR_AF) 1702 vm_page_aflag_set(m, PGA_REFERENCED); 1703 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(lockp, m); 1704 pmap_pvh_free(&m->md, pmap, va); 1705 } 1706 return (pmap_unuse_l3(pmap, va, l2e, free)); 1707 } 1708 1709 /* 1710 * Remove the given range of addresses from the specified map. 1711 * 1712 * It is assumed that the start and end are properly 1713 * rounded to the page size. 1714 */ 1715 void 1716 pmap_remove(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 1717 { 1718 struct rwlock *lock; 1719 vm_offset_t va, va_next; 1720 pd_entry_t *l1, *l2; 1721 pt_entry_t l3_paddr, *l3; 1722 struct spglist free; 1723 int anyvalid; 1724 1725 /* 1726 * Perform an unsynchronized read. This is, however, safe. 1727 */ 1728 if (pmap->pm_stats.resident_count == 0) 1729 return; 1730 1731 anyvalid = 0; 1732 SLIST_INIT(&free); 1733 1734 rw_rlock(&pvh_global_lock); 1735 PMAP_LOCK(pmap); 1736 1737 lock = NULL; 1738 for (; sva < eva; sva = va_next) { 1739 1740 if (pmap->pm_stats.resident_count == 0) 1741 break; 1742 1743 l1 = pmap_l1(pmap, sva); 1744 if (pmap_load(l1) == 0) { 1745 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1746 if (va_next < sva) 1747 va_next = eva; 1748 continue; 1749 } 1750 1751 /* 1752 * Calculate index for next page table. 1753 */ 1754 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1755 if (va_next < sva) 1756 va_next = eva; 1757 1758 l2 = pmap_l1_to_l2(l1, sva); 1759 if (l2 == NULL) 1760 continue; 1761 1762 l3_paddr = pmap_load(l2); 1763 1764 /* 1765 * Weed out invalid mappings. 1766 */ 1767 if ((l3_paddr & ATTR_DESCR_MASK) != L2_TABLE) 1768 continue; 1769 1770 /* 1771 * Limit our scan to either the end of the va represented 1772 * by the current page table page, or to the end of the 1773 * range being removed. 1774 */ 1775 if (va_next > eva) 1776 va_next = eva; 1777 1778 va = va_next; 1779 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 1780 sva += L3_SIZE) { 1781 if (l3 == NULL) 1782 panic("l3 == NULL"); 1783 if (pmap_load(l3) == 0) { 1784 if (va != va_next) { 1785 pmap_invalidate_range(pmap, va, sva); 1786 va = va_next; 1787 } 1788 continue; 1789 } 1790 if (va == va_next) 1791 va = sva; 1792 if (pmap_remove_l3(pmap, l3, sva, l3_paddr, &free, 1793 &lock)) { 1794 sva += L3_SIZE; 1795 break; 1796 } 1797 } 1798 if (va != va_next) 1799 pmap_invalidate_range(pmap, va, sva); 1800 } 1801 if (lock != NULL) 1802 rw_wunlock(lock); 1803 if (anyvalid) 1804 pmap_invalidate_all(pmap); 1805 rw_runlock(&pvh_global_lock); 1806 PMAP_UNLOCK(pmap); 1807 pmap_free_zero_pages(&free); 1808 } 1809 1810 /* 1811 * Routine: pmap_remove_all 1812 * Function: 1813 * Removes this physical page from 1814 * all physical maps in which it resides. 1815 * Reflects back modify bits to the pager. 1816 * 1817 * Notes: 1818 * Original versions of this routine were very 1819 * inefficient because they iteratively called 1820 * pmap_remove (slow...) 1821 */ 1822 1823 void 1824 pmap_remove_all(vm_page_t m) 1825 { 1826 pv_entry_t pv; 1827 pmap_t pmap; 1828 pt_entry_t *l3, tl3; 1829 pd_entry_t *l2, tl2; 1830 struct spglist free; 1831 1832 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 1833 ("pmap_remove_all: page %p is not managed", m)); 1834 SLIST_INIT(&free); 1835 rw_wlock(&pvh_global_lock); 1836 while ((pv = TAILQ_FIRST(&m->md.pv_list)) != NULL) { 1837 pmap = PV_PMAP(pv); 1838 PMAP_LOCK(pmap); 1839 pmap_resident_count_dec(pmap, 1); 1840 l2 = pmap_l2(pmap, pv->pv_va); 1841 KASSERT(l2 != NULL, ("pmap_remove_all: no l2 table found")); 1842 tl2 = pmap_load(l2); 1843 KASSERT((tl2 & ATTR_DESCR_MASK) == L2_TABLE, 1844 ("pmap_remove_all: found a table when expecting " 1845 "a block in %p's pv list", m)); 1846 l3 = pmap_l2_to_l3(l2, pv->pv_va); 1847 if (pmap_is_current(pmap) && 1848 pmap_l3_valid_cacheable(pmap_load(l3))) 1849 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 1850 tl3 = pmap_load_clear(l3); 1851 PTE_SYNC(l3); 1852 pmap_invalidate_page(pmap, pv->pv_va); 1853 if (tl3 & ATTR_SW_WIRED) 1854 pmap->pm_stats.wired_count--; 1855 if ((tl3 & ATTR_AF) != 0) 1856 vm_page_aflag_set(m, PGA_REFERENCED); 1857 1858 /* 1859 * Update the vm_page_t clean and reference bits. 1860 */ 1861 if (pmap_page_dirty(tl3)) 1862 vm_page_dirty(m); 1863 pmap_unuse_l3(pmap, pv->pv_va, tl2, &free); 1864 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 1865 m->md.pv_gen++; 1866 free_pv_entry(pmap, pv); 1867 PMAP_UNLOCK(pmap); 1868 } 1869 vm_page_aflag_clear(m, PGA_WRITEABLE); 1870 rw_wunlock(&pvh_global_lock); 1871 pmap_free_zero_pages(&free); 1872 } 1873 1874 /* 1875 * Set the physical protection on the 1876 * specified range of this map as requested. 1877 */ 1878 void 1879 pmap_protect(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, vm_prot_t prot) 1880 { 1881 vm_offset_t va, va_next; 1882 pd_entry_t *l1, *l2; 1883 pt_entry_t *l3p, l3; 1884 1885 if ((prot & VM_PROT_READ) == VM_PROT_NONE) { 1886 pmap_remove(pmap, sva, eva); 1887 return; 1888 } 1889 1890 if ((prot & VM_PROT_WRITE) == VM_PROT_WRITE) 1891 return; 1892 1893 PMAP_LOCK(pmap); 1894 for (; sva < eva; sva = va_next) { 1895 1896 l1 = pmap_l1(pmap, sva); 1897 if (pmap_load(l1) == 0) { 1898 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 1899 if (va_next < sva) 1900 va_next = eva; 1901 continue; 1902 } 1903 1904 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 1905 if (va_next < sva) 1906 va_next = eva; 1907 1908 l2 = pmap_l1_to_l2(l1, sva); 1909 if (l2 == NULL || (pmap_load(l2) & ATTR_DESCR_MASK) != L2_TABLE) 1910 continue; 1911 1912 if (va_next > eva) 1913 va_next = eva; 1914 1915 va = va_next; 1916 for (l3p = pmap_l2_to_l3(l2, sva); sva != va_next; l3p++, 1917 sva += L3_SIZE) { 1918 l3 = pmap_load(l3p); 1919 if (pmap_l3_valid(l3)) { 1920 pmap_set(l3p, ATTR_AP(ATTR_AP_RO)); 1921 PTE_SYNC(l3p); 1922 /* XXX: Use pmap_invalidate_range */ 1923 pmap_invalidate_page(pmap, va); 1924 } 1925 } 1926 } 1927 PMAP_UNLOCK(pmap); 1928 1929 /* TODO: Only invalidate entries we are touching */ 1930 pmap_invalidate_all(pmap); 1931 } 1932 1933 /* 1934 * Insert the given physical page (p) at 1935 * the specified virtual address (v) in the 1936 * target physical map with the protection requested. 1937 * 1938 * If specified, the page will be wired down, meaning 1939 * that the related pte can not be reclaimed. 1940 * 1941 * NB: This is the only routine which MAY NOT lazy-evaluate 1942 * or lose information. That is, this routine must actually 1943 * insert this page into the given map NOW. 1944 */ 1945 int 1946 pmap_enter(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot, 1947 u_int flags, int8_t psind __unused) 1948 { 1949 struct rwlock *lock; 1950 pd_entry_t *l1, *l2; 1951 pt_entry_t new_l3, orig_l3; 1952 pt_entry_t *l3; 1953 pv_entry_t pv; 1954 vm_paddr_t opa, pa, l2_pa, l3_pa; 1955 vm_page_t mpte, om, l2_m, l3_m; 1956 boolean_t nosleep; 1957 1958 va = trunc_page(va); 1959 if ((m->oflags & VPO_UNMANAGED) == 0 && !vm_page_xbusied(m)) 1960 VM_OBJECT_ASSERT_LOCKED(m->object); 1961 pa = VM_PAGE_TO_PHYS(m); 1962 new_l3 = (pt_entry_t)(pa | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 1963 L3_PAGE); 1964 if ((prot & VM_PROT_WRITE) == 0) 1965 new_l3 |= ATTR_AP(ATTR_AP_RO); 1966 if ((flags & PMAP_ENTER_WIRED) != 0) 1967 new_l3 |= ATTR_SW_WIRED; 1968 if ((va >> 63) == 0) 1969 new_l3 |= ATTR_AP(ATTR_AP_USER); 1970 1971 CTR2(KTR_PMAP, "pmap_enter: %.16lx -> %.16lx", va, pa); 1972 1973 mpte = NULL; 1974 1975 lock = NULL; 1976 rw_rlock(&pvh_global_lock); 1977 PMAP_LOCK(pmap); 1978 1979 if (va < VM_MAXUSER_ADDRESS) { 1980 nosleep = (flags & PMAP_ENTER_NOSLEEP) != 0; 1981 mpte = pmap_alloc_l3(pmap, va, nosleep ? NULL : &lock); 1982 if (mpte == NULL && nosleep) { 1983 CTR0(KTR_PMAP, "pmap_enter: mpte == NULL"); 1984 if (lock != NULL) 1985 rw_wunlock(lock); 1986 rw_runlock(&pvh_global_lock); 1987 PMAP_UNLOCK(pmap); 1988 return (KERN_RESOURCE_SHORTAGE); 1989 } 1990 l3 = pmap_l3(pmap, va); 1991 } else { 1992 l3 = pmap_l3(pmap, va); 1993 /* TODO: This is not optimal, but should mostly work */ 1994 if (l3 == NULL) { 1995 l2 = pmap_l2(pmap, va); 1996 1997 if (l2 == NULL) { 1998 l2_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 1999 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | 2000 VM_ALLOC_ZERO); 2001 if (l2_m == NULL) 2002 panic("pmap_enter: l2 pte_m == NULL"); 2003 if ((l2_m->flags & PG_ZERO) == 0) 2004 pmap_zero_page(l2_m); 2005 2006 l2_pa = VM_PAGE_TO_PHYS(l2_m); 2007 l1 = pmap_l1(pmap, va); 2008 pmap_load_store(l1, l2_pa | L1_TABLE); 2009 PTE_SYNC(l1); 2010 l2 = pmap_l1_to_l2(l1, va); 2011 } 2012 2013 KASSERT(l2 != NULL, 2014 ("No l2 table after allocating one")); 2015 2016 l3_m = vm_page_alloc(NULL, 0, VM_ALLOC_NORMAL | 2017 VM_ALLOC_NOOBJ | VM_ALLOC_WIRED | VM_ALLOC_ZERO); 2018 if (l3_m == NULL) 2019 panic("pmap_enter: l3 pte_m == NULL"); 2020 if ((l3_m->flags & PG_ZERO) == 0) 2021 pmap_zero_page(l3_m); 2022 2023 l3_pa = VM_PAGE_TO_PHYS(l3_m); 2024 pmap_load_store(l2, l3_pa | L2_TABLE); 2025 PTE_SYNC(l2); 2026 l3 = pmap_l2_to_l3(l2, va); 2027 } 2028 pmap_invalidate_page(pmap, va); 2029 } 2030 2031 om = NULL; 2032 orig_l3 = pmap_load(l3); 2033 opa = orig_l3 & ~ATTR_MASK; 2034 2035 /* 2036 * Is the specified virtual address already mapped? 2037 */ 2038 if (pmap_l3_valid(orig_l3)) { 2039 /* 2040 * Wiring change, just update stats. We don't worry about 2041 * wiring PT pages as they remain resident as long as there 2042 * are valid mappings in them. Hence, if a user page is wired, 2043 * the PT page will be also. 2044 */ 2045 if ((flags & PMAP_ENTER_WIRED) != 0 && 2046 (orig_l3 & ATTR_SW_WIRED) == 0) 2047 pmap->pm_stats.wired_count++; 2048 else if ((flags & PMAP_ENTER_WIRED) == 0 && 2049 (orig_l3 & ATTR_SW_WIRED) != 0) 2050 pmap->pm_stats.wired_count--; 2051 2052 /* 2053 * Remove the extra PT page reference. 2054 */ 2055 if (mpte != NULL) { 2056 mpte->wire_count--; 2057 KASSERT(mpte->wire_count > 0, 2058 ("pmap_enter: missing reference to page table page," 2059 " va: 0x%lx", va)); 2060 } 2061 2062 /* 2063 * Has the physical page changed? 2064 */ 2065 if (opa == pa) { 2066 /* 2067 * No, might be a protection or wiring change. 2068 */ 2069 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2070 new_l3 |= ATTR_SW_MANAGED; 2071 if ((new_l3 & ATTR_AP(ATTR_AP_RW)) == 2072 ATTR_AP(ATTR_AP_RW)) { 2073 vm_page_aflag_set(m, PGA_WRITEABLE); 2074 } 2075 } 2076 goto validate; 2077 } 2078 2079 /* Flush the cache, there might be uncommitted data in it */ 2080 if (pmap_is_current(pmap) && pmap_l3_valid_cacheable(orig_l3)) 2081 cpu_dcache_wb_range(va, L3_SIZE); 2082 } else { 2083 /* 2084 * Increment the counters. 2085 */ 2086 if ((new_l3 & ATTR_SW_WIRED) != 0) 2087 pmap->pm_stats.wired_count++; 2088 pmap_resident_count_inc(pmap, 1); 2089 } 2090 /* 2091 * Enter on the PV list if part of our managed memory. 2092 */ 2093 if ((m->oflags & VPO_UNMANAGED) == 0) { 2094 new_l3 |= ATTR_SW_MANAGED; 2095 pv = get_pv_entry(pmap, &lock); 2096 pv->pv_va = va; 2097 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, pa); 2098 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2099 m->md.pv_gen++; 2100 if ((new_l3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) 2101 vm_page_aflag_set(m, PGA_WRITEABLE); 2102 } 2103 2104 /* 2105 * Update the L3 entry. 2106 */ 2107 if (orig_l3 != 0) { 2108 validate: 2109 orig_l3 = pmap_load_store(l3, new_l3); 2110 PTE_SYNC(l3); 2111 opa = orig_l3 & ~ATTR_MASK; 2112 2113 if (opa != pa) { 2114 if ((orig_l3 & ATTR_SW_MANAGED) != 0) { 2115 om = PHYS_TO_VM_PAGE(opa); 2116 if (pmap_page_dirty(orig_l3)) 2117 vm_page_dirty(om); 2118 if ((orig_l3 & ATTR_AF) != 0) 2119 vm_page_aflag_set(om, PGA_REFERENCED); 2120 CHANGE_PV_LIST_LOCK_TO_PHYS(&lock, opa); 2121 pmap_pvh_free(&om->md, pmap, va); 2122 } 2123 } else if (pmap_page_dirty(orig_l3)) { 2124 if ((orig_l3 & ATTR_SW_MANAGED) != 0) 2125 vm_page_dirty(m); 2126 } 2127 } else { 2128 pmap_load_store(l3, new_l3); 2129 PTE_SYNC(l3); 2130 } 2131 pmap_invalidate_page(pmap, va); 2132 if ((pmap != pmap_kernel()) && (pmap == &curproc->p_vmspace->vm_pmap)) 2133 cpu_icache_sync_range(va, PAGE_SIZE); 2134 2135 if (lock != NULL) 2136 rw_wunlock(lock); 2137 rw_runlock(&pvh_global_lock); 2138 PMAP_UNLOCK(pmap); 2139 return (KERN_SUCCESS); 2140 } 2141 2142 /* 2143 * Maps a sequence of resident pages belonging to the same object. 2144 * The sequence begins with the given page m_start. This page is 2145 * mapped at the given virtual address start. Each subsequent page is 2146 * mapped at a virtual address that is offset from start by the same 2147 * amount as the page is offset from m_start within the object. The 2148 * last page in the sequence is the page with the largest offset from 2149 * m_start that can be mapped at a virtual address less than the given 2150 * virtual address end. Not every virtual page between start and end 2151 * is mapped; only those for which a resident page exists with the 2152 * corresponding offset from m_start are mapped. 2153 */ 2154 void 2155 pmap_enter_object(pmap_t pmap, vm_offset_t start, vm_offset_t end, 2156 vm_page_t m_start, vm_prot_t prot) 2157 { 2158 struct rwlock *lock; 2159 vm_offset_t va; 2160 vm_page_t m, mpte; 2161 vm_pindex_t diff, psize; 2162 2163 VM_OBJECT_ASSERT_LOCKED(m_start->object); 2164 2165 psize = atop(end - start); 2166 mpte = NULL; 2167 m = m_start; 2168 lock = NULL; 2169 rw_rlock(&pvh_global_lock); 2170 PMAP_LOCK(pmap); 2171 while (m != NULL && (diff = m->pindex - m_start->pindex) < psize) { 2172 va = start + ptoa(diff); 2173 mpte = pmap_enter_quick_locked(pmap, va, m, prot, mpte, &lock); 2174 m = TAILQ_NEXT(m, listq); 2175 } 2176 if (lock != NULL) 2177 rw_wunlock(lock); 2178 rw_runlock(&pvh_global_lock); 2179 PMAP_UNLOCK(pmap); 2180 } 2181 2182 /* 2183 * this code makes some *MAJOR* assumptions: 2184 * 1. Current pmap & pmap exists. 2185 * 2. Not wired. 2186 * 3. Read access. 2187 * 4. No page table pages. 2188 * but is *MUCH* faster than pmap_enter... 2189 */ 2190 2191 void 2192 pmap_enter_quick(pmap_t pmap, vm_offset_t va, vm_page_t m, vm_prot_t prot) 2193 { 2194 struct rwlock *lock; 2195 2196 lock = NULL; 2197 rw_rlock(&pvh_global_lock); 2198 PMAP_LOCK(pmap); 2199 (void)pmap_enter_quick_locked(pmap, va, m, prot, NULL, &lock); 2200 if (lock != NULL) 2201 rw_wunlock(lock); 2202 rw_runlock(&pvh_global_lock); 2203 PMAP_UNLOCK(pmap); 2204 } 2205 2206 static vm_page_t 2207 pmap_enter_quick_locked(pmap_t pmap, vm_offset_t va, vm_page_t m, 2208 vm_prot_t prot, vm_page_t mpte, struct rwlock **lockp) 2209 { 2210 struct spglist free; 2211 pd_entry_t *l2; 2212 pt_entry_t *l3; 2213 vm_paddr_t pa; 2214 2215 KASSERT(va < kmi.clean_sva || va >= kmi.clean_eva || 2216 (m->oflags & VPO_UNMANAGED) != 0, 2217 ("pmap_enter_quick_locked: managed mapping within the clean submap")); 2218 rw_assert(&pvh_global_lock, RA_LOCKED); 2219 PMAP_LOCK_ASSERT(pmap, MA_OWNED); 2220 2221 CTR2(KTR_PMAP, "pmap_enter_quick_locked: %p %lx", pmap, va); 2222 /* 2223 * In the case that a page table page is not 2224 * resident, we are creating it here. 2225 */ 2226 if (va < VM_MAXUSER_ADDRESS) { 2227 vm_pindex_t l2pindex; 2228 2229 /* 2230 * Calculate pagetable page index 2231 */ 2232 l2pindex = pmap_l2_pindex(va); 2233 if (mpte && (mpte->pindex == l2pindex)) { 2234 mpte->wire_count++; 2235 } else { 2236 /* 2237 * Get the l2 entry 2238 */ 2239 l2 = pmap_l2(pmap, va); 2240 2241 /* 2242 * If the page table page is mapped, we just increment 2243 * the hold count, and activate it. Otherwise, we 2244 * attempt to allocate a page table page. If this 2245 * attempt fails, we don't retry. Instead, we give up. 2246 */ 2247 if (l2 != NULL && pmap_load(l2) != 0) { 2248 mpte = 2249 PHYS_TO_VM_PAGE(pmap_load(l2) & ~ATTR_MASK); 2250 mpte->wire_count++; 2251 } else { 2252 /* 2253 * Pass NULL instead of the PV list lock 2254 * pointer, because we don't intend to sleep. 2255 */ 2256 mpte = _pmap_alloc_l3(pmap, l2pindex, NULL); 2257 if (mpte == NULL) 2258 return (mpte); 2259 } 2260 } 2261 l3 = (pt_entry_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mpte)); 2262 l3 = &l3[pmap_l3_index(va)]; 2263 } else { 2264 mpte = NULL; 2265 l3 = pmap_l3(kernel_pmap, va); 2266 } 2267 if (l3 == NULL) 2268 panic("pmap_enter_quick_locked: No l3"); 2269 if (pmap_load(l3) != 0) { 2270 if (mpte != NULL) { 2271 mpte->wire_count--; 2272 mpte = NULL; 2273 } 2274 return (mpte); 2275 } 2276 2277 /* 2278 * Enter on the PV list if part of our managed memory. 2279 */ 2280 if ((m->oflags & VPO_UNMANAGED) == 0 && 2281 !pmap_try_insert_pv_entry(pmap, va, m, lockp)) { 2282 if (mpte != NULL) { 2283 SLIST_INIT(&free); 2284 if (pmap_unwire_l3(pmap, va, mpte, &free)) { 2285 pmap_invalidate_page(pmap, va); 2286 pmap_free_zero_pages(&free); 2287 } 2288 mpte = NULL; 2289 } 2290 return (mpte); 2291 } 2292 2293 /* 2294 * Increment counters 2295 */ 2296 pmap_resident_count_inc(pmap, 1); 2297 2298 pa = VM_PAGE_TO_PHYS(m) | ATTR_DEFAULT | ATTR_IDX(m->md.pv_memattr) | 2299 ATTR_AP(ATTR_AP_RW) | L3_PAGE; 2300 2301 /* 2302 * Now validate mapping with RO protection 2303 */ 2304 if ((m->oflags & VPO_UNMANAGED) == 0) 2305 pa |= ATTR_SW_MANAGED; 2306 pmap_load_store(l3, pa); 2307 PTE_SYNC(l3); 2308 pmap_invalidate_page(pmap, va); 2309 return (mpte); 2310 } 2311 2312 /* 2313 * This code maps large physical mmap regions into the 2314 * processor address space. Note that some shortcuts 2315 * are taken, but the code works. 2316 */ 2317 void 2318 pmap_object_init_pt(pmap_t pmap, vm_offset_t addr, vm_object_t object, 2319 vm_pindex_t pindex, vm_size_t size) 2320 { 2321 2322 VM_OBJECT_ASSERT_WLOCKED(object); 2323 KASSERT(object->type == OBJT_DEVICE || object->type == OBJT_SG, 2324 ("pmap_object_init_pt: non-device object")); 2325 } 2326 2327 /* 2328 * Clear the wired attribute from the mappings for the specified range of 2329 * addresses in the given pmap. Every valid mapping within that range 2330 * must have the wired attribute set. In contrast, invalid mappings 2331 * cannot have the wired attribute set, so they are ignored. 2332 * 2333 * The wired attribute of the page table entry is not a hardware feature, 2334 * so there is no need to invalidate any TLB entries. 2335 */ 2336 void 2337 pmap_unwire(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) 2338 { 2339 vm_offset_t va_next; 2340 pd_entry_t *l1, *l2; 2341 pt_entry_t *l3; 2342 boolean_t pv_lists_locked; 2343 2344 pv_lists_locked = FALSE; 2345 PMAP_LOCK(pmap); 2346 for (; sva < eva; sva = va_next) { 2347 l1 = pmap_l1(pmap, sva); 2348 if (pmap_load(l1) == 0) { 2349 va_next = (sva + L1_SIZE) & ~L1_OFFSET; 2350 if (va_next < sva) 2351 va_next = eva; 2352 continue; 2353 } 2354 2355 va_next = (sva + L2_SIZE) & ~L2_OFFSET; 2356 if (va_next < sva) 2357 va_next = eva; 2358 2359 l2 = pmap_l1_to_l2(l1, sva); 2360 if (pmap_load(l2) == 0) 2361 continue; 2362 2363 if (va_next > eva) 2364 va_next = eva; 2365 for (l3 = pmap_l2_to_l3(l2, sva); sva != va_next; l3++, 2366 sva += L3_SIZE) { 2367 if (pmap_load(l3) == 0) 2368 continue; 2369 if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) 2370 panic("pmap_unwire: l3 %#jx is missing " 2371 "ATTR_SW_WIRED", (uintmax_t)pmap_load(l3)); 2372 2373 /* 2374 * PG_W must be cleared atomically. Although the pmap 2375 * lock synchronizes access to PG_W, another processor 2376 * could be setting PG_M and/or PG_A concurrently. 2377 */ 2378 atomic_clear_long(l3, ATTR_SW_WIRED); 2379 pmap->pm_stats.wired_count--; 2380 } 2381 } 2382 if (pv_lists_locked) 2383 rw_runlock(&pvh_global_lock); 2384 PMAP_UNLOCK(pmap); 2385 } 2386 2387 /* 2388 * Copy the range specified by src_addr/len 2389 * from the source map to the range dst_addr/len 2390 * in the destination map. 2391 * 2392 * This routine is only advisory and need not do anything. 2393 */ 2394 2395 void 2396 pmap_copy(pmap_t dst_pmap, pmap_t src_pmap, vm_offset_t dst_addr, vm_size_t len, 2397 vm_offset_t src_addr) 2398 { 2399 } 2400 2401 /* 2402 * pmap_zero_page zeros the specified hardware page by mapping 2403 * the page into KVM and using bzero to clear its contents. 2404 */ 2405 void 2406 pmap_zero_page(vm_page_t m) 2407 { 2408 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2409 2410 pagezero((void *)va); 2411 } 2412 2413 /* 2414 * pmap_zero_page_area zeros the specified hardware page by mapping 2415 * the page into KVM and using bzero to clear its contents. 2416 * 2417 * off and size may not cover an area beyond a single hardware page. 2418 */ 2419 void 2420 pmap_zero_page_area(vm_page_t m, int off, int size) 2421 { 2422 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2423 2424 if (off == 0 && size == PAGE_SIZE) 2425 pagezero((void *)va); 2426 else 2427 bzero((char *)va + off, size); 2428 } 2429 2430 /* 2431 * pmap_zero_page_idle zeros the specified hardware page by mapping 2432 * the page into KVM and using bzero to clear its contents. This 2433 * is intended to be called from the vm_pagezero process only and 2434 * outside of Giant. 2435 */ 2436 void 2437 pmap_zero_page_idle(vm_page_t m) 2438 { 2439 vm_offset_t va = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); 2440 2441 pagezero((void *)va); 2442 } 2443 2444 /* 2445 * pmap_copy_page copies the specified (machine independent) 2446 * page by mapping the page into virtual memory and using 2447 * bcopy to copy the page, one machine dependent page at a 2448 * time. 2449 */ 2450 void 2451 pmap_copy_page(vm_page_t msrc, vm_page_t mdst) 2452 { 2453 vm_offset_t src = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(msrc)); 2454 vm_offset_t dst = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(mdst)); 2455 2456 pagecopy((void *)src, (void *)dst); 2457 } 2458 2459 int unmapped_buf_allowed = 1; 2460 2461 void 2462 pmap_copy_pages(vm_page_t ma[], vm_offset_t a_offset, vm_page_t mb[], 2463 vm_offset_t b_offset, int xfersize) 2464 { 2465 void *a_cp, *b_cp; 2466 vm_page_t m_a, m_b; 2467 vm_paddr_t p_a, p_b; 2468 vm_offset_t a_pg_offset, b_pg_offset; 2469 int cnt; 2470 2471 while (xfersize > 0) { 2472 a_pg_offset = a_offset & PAGE_MASK; 2473 m_a = ma[a_offset >> PAGE_SHIFT]; 2474 p_a = m_a->phys_addr; 2475 b_pg_offset = b_offset & PAGE_MASK; 2476 m_b = mb[b_offset >> PAGE_SHIFT]; 2477 p_b = m_b->phys_addr; 2478 cnt = min(xfersize, PAGE_SIZE - a_pg_offset); 2479 cnt = min(cnt, PAGE_SIZE - b_pg_offset); 2480 if (__predict_false(!PHYS_IN_DMAP(p_a))) { 2481 panic("!DMAP a %lx", p_a); 2482 } else { 2483 a_cp = (char *)PHYS_TO_DMAP(p_a) + a_pg_offset; 2484 } 2485 if (__predict_false(!PHYS_IN_DMAP(p_b))) { 2486 panic("!DMAP b %lx", p_b); 2487 } else { 2488 b_cp = (char *)PHYS_TO_DMAP(p_b) + b_pg_offset; 2489 } 2490 bcopy(a_cp, b_cp, cnt); 2491 a_offset += cnt; 2492 b_offset += cnt; 2493 xfersize -= cnt; 2494 } 2495 } 2496 2497 vm_offset_t 2498 pmap_quick_enter_page(vm_page_t m) 2499 { 2500 2501 return (PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m))); 2502 } 2503 2504 void 2505 pmap_quick_remove_page(vm_offset_t addr) 2506 { 2507 } 2508 2509 /* 2510 * Returns true if the pmap's pv is one of the first 2511 * 16 pvs linked to from this page. This count may 2512 * be changed upwards or downwards in the future; it 2513 * is only necessary that true be returned for a small 2514 * subset of pmaps for proper page aging. 2515 */ 2516 boolean_t 2517 pmap_page_exists_quick(pmap_t pmap, vm_page_t m) 2518 { 2519 struct rwlock *lock; 2520 pv_entry_t pv; 2521 int loops = 0; 2522 boolean_t rv; 2523 2524 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2525 ("pmap_page_exists_quick: page %p is not managed", m)); 2526 rv = FALSE; 2527 rw_rlock(&pvh_global_lock); 2528 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2529 rw_rlock(lock); 2530 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2531 if (PV_PMAP(pv) == pmap) { 2532 rv = TRUE; 2533 break; 2534 } 2535 loops++; 2536 if (loops >= 16) 2537 break; 2538 } 2539 rw_runlock(lock); 2540 rw_runlock(&pvh_global_lock); 2541 return (rv); 2542 } 2543 2544 /* 2545 * pmap_page_wired_mappings: 2546 * 2547 * Return the number of managed mappings to the given physical page 2548 * that are wired. 2549 */ 2550 int 2551 pmap_page_wired_mappings(vm_page_t m) 2552 { 2553 struct rwlock *lock; 2554 pmap_t pmap; 2555 pt_entry_t *l3; 2556 pv_entry_t pv; 2557 int count, md_gen; 2558 2559 if ((m->oflags & VPO_UNMANAGED) != 0) 2560 return (0); 2561 rw_rlock(&pvh_global_lock); 2562 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2563 rw_rlock(lock); 2564 restart: 2565 count = 0; 2566 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2567 pmap = PV_PMAP(pv); 2568 if (!PMAP_TRYLOCK(pmap)) { 2569 md_gen = m->md.pv_gen; 2570 rw_runlock(lock); 2571 PMAP_LOCK(pmap); 2572 rw_rlock(lock); 2573 if (md_gen != m->md.pv_gen) { 2574 PMAP_UNLOCK(pmap); 2575 goto restart; 2576 } 2577 } 2578 l3 = pmap_l3(pmap, pv->pv_va); 2579 if (l3 != NULL && (pmap_load(l3) & ATTR_SW_WIRED) != 0) 2580 count++; 2581 PMAP_UNLOCK(pmap); 2582 } 2583 rw_runlock(lock); 2584 rw_runlock(&pvh_global_lock); 2585 return (count); 2586 } 2587 2588 /* 2589 * Destroy all managed, non-wired mappings in the given user-space 2590 * pmap. This pmap cannot be active on any processor besides the 2591 * caller. 2592 * 2593 * This function cannot be applied to the kernel pmap. Moreover, it 2594 * is not intended for general use. It is only to be used during 2595 * process termination. Consequently, it can be implemented in ways 2596 * that make it faster than pmap_remove(). First, it can more quickly 2597 * destroy mappings by iterating over the pmap's collection of PV 2598 * entries, rather than searching the page table. Second, it doesn't 2599 * have to test and clear the page table entries atomically, because 2600 * no processor is currently accessing the user address space. In 2601 * particular, a page table entry's dirty bit won't change state once 2602 * this function starts. 2603 */ 2604 void 2605 pmap_remove_pages(pmap_t pmap) 2606 { 2607 pd_entry_t ptepde, *l2; 2608 pt_entry_t *l3, tl3; 2609 struct spglist free; 2610 vm_page_t m; 2611 pv_entry_t pv; 2612 struct pv_chunk *pc, *npc; 2613 struct rwlock *lock; 2614 int64_t bit; 2615 uint64_t inuse, bitmask; 2616 int allfree, field, freed, idx; 2617 vm_paddr_t pa; 2618 2619 lock = NULL; 2620 2621 SLIST_INIT(&free); 2622 rw_rlock(&pvh_global_lock); 2623 PMAP_LOCK(pmap); 2624 TAILQ_FOREACH_SAFE(pc, &pmap->pm_pvchunk, pc_list, npc) { 2625 allfree = 1; 2626 freed = 0; 2627 for (field = 0; field < _NPCM; field++) { 2628 inuse = ~pc->pc_map[field] & pc_freemask[field]; 2629 while (inuse != 0) { 2630 bit = ffsl(inuse) - 1; 2631 bitmask = 1UL << bit; 2632 idx = field * 64 + bit; 2633 pv = &pc->pc_pventry[idx]; 2634 inuse &= ~bitmask; 2635 2636 l2 = pmap_l2(pmap, pv->pv_va); 2637 ptepde = pmap_load(l2); 2638 l3 = pmap_l2_to_l3(l2, pv->pv_va); 2639 tl3 = pmap_load(l3); 2640 2641 /* 2642 * We cannot remove wired pages from a process' mapping at this time 2643 */ 2644 if (tl3 & ATTR_SW_WIRED) { 2645 allfree = 0; 2646 continue; 2647 } 2648 2649 pa = tl3 & ~ATTR_MASK; 2650 2651 m = PHYS_TO_VM_PAGE(pa); 2652 KASSERT(m->phys_addr == pa, 2653 ("vm_page_t %p phys_addr mismatch %016jx %016jx", 2654 m, (uintmax_t)m->phys_addr, 2655 (uintmax_t)tl3)); 2656 2657 KASSERT((m->flags & PG_FICTITIOUS) != 0 || 2658 m < &vm_page_array[vm_page_array_size], 2659 ("pmap_remove_pages: bad l3 %#jx", 2660 (uintmax_t)tl3)); 2661 2662 if (pmap_is_current(pmap) && 2663 pmap_l3_valid_cacheable(pmap_load(l3))) 2664 cpu_dcache_wb_range(pv->pv_va, L3_SIZE); 2665 pmap_load_clear(l3); 2666 PTE_SYNC(l3); 2667 pmap_invalidate_page(pmap, pv->pv_va); 2668 2669 /* 2670 * Update the vm_page_t clean/reference bits. 2671 */ 2672 if ((tl3 & ATTR_AP_RW_BIT) == 2673 ATTR_AP(ATTR_AP_RW)) 2674 vm_page_dirty(m); 2675 2676 CHANGE_PV_LIST_LOCK_TO_VM_PAGE(&lock, m); 2677 2678 /* Mark free */ 2679 pc->pc_map[field] |= bitmask; 2680 2681 pmap_resident_count_dec(pmap, 1); 2682 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2683 m->md.pv_gen++; 2684 2685 pmap_unuse_l3(pmap, pv->pv_va, ptepde, &free); 2686 freed++; 2687 } 2688 } 2689 PV_STAT(atomic_add_long(&pv_entry_frees, freed)); 2690 PV_STAT(atomic_add_int(&pv_entry_spare, freed)); 2691 PV_STAT(atomic_subtract_long(&pv_entry_count, freed)); 2692 if (allfree) { 2693 TAILQ_REMOVE(&pmap->pm_pvchunk, pc, pc_list); 2694 free_pv_chunk(pc); 2695 } 2696 } 2697 pmap_invalidate_all(pmap); 2698 if (lock != NULL) 2699 rw_wunlock(lock); 2700 rw_runlock(&pvh_global_lock); 2701 PMAP_UNLOCK(pmap); 2702 pmap_free_zero_pages(&free); 2703 } 2704 2705 /* 2706 * This is used to check if a page has been accessed or modified. As we 2707 * don't have a bit to see if it has been modified we have to assume it 2708 * has been if the page is read/write. 2709 */ 2710 static boolean_t 2711 pmap_page_test_mappings(vm_page_t m, boolean_t accessed, boolean_t modified) 2712 { 2713 struct rwlock *lock; 2714 pv_entry_t pv; 2715 pt_entry_t *l3, mask, value; 2716 pmap_t pmap; 2717 int md_gen; 2718 boolean_t rv; 2719 2720 rv = FALSE; 2721 rw_rlock(&pvh_global_lock); 2722 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2723 rw_rlock(lock); 2724 restart: 2725 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2726 pmap = PV_PMAP(pv); 2727 if (!PMAP_TRYLOCK(pmap)) { 2728 md_gen = m->md.pv_gen; 2729 rw_runlock(lock); 2730 PMAP_LOCK(pmap); 2731 rw_rlock(lock); 2732 if (md_gen != m->md.pv_gen) { 2733 PMAP_UNLOCK(pmap); 2734 goto restart; 2735 } 2736 } 2737 l3 = pmap_l3(pmap, pv->pv_va); 2738 mask = 0; 2739 value = 0; 2740 if (modified) { 2741 mask |= ATTR_AP_RW_BIT; 2742 value |= ATTR_AP(ATTR_AP_RW); 2743 } 2744 if (accessed) { 2745 mask |= ATTR_AF | ATTR_DESCR_MASK; 2746 value |= ATTR_AF | L3_PAGE; 2747 } 2748 rv = (pmap_load(l3) & mask) == value; 2749 PMAP_UNLOCK(pmap); 2750 if (rv) 2751 goto out; 2752 } 2753 out: 2754 rw_runlock(lock); 2755 rw_runlock(&pvh_global_lock); 2756 return (rv); 2757 } 2758 2759 /* 2760 * pmap_is_modified: 2761 * 2762 * Return whether or not the specified physical page was modified 2763 * in any physical maps. 2764 */ 2765 boolean_t 2766 pmap_is_modified(vm_page_t m) 2767 { 2768 2769 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2770 ("pmap_is_modified: page %p is not managed", m)); 2771 2772 /* 2773 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2774 * concurrently set while the object is locked. Thus, if PGA_WRITEABLE 2775 * is clear, no PTEs can have PG_M set. 2776 */ 2777 VM_OBJECT_ASSERT_WLOCKED(m->object); 2778 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2779 return (FALSE); 2780 return (pmap_page_test_mappings(m, FALSE, TRUE)); 2781 } 2782 2783 /* 2784 * pmap_is_prefaultable: 2785 * 2786 * Return whether or not the specified virtual address is eligible 2787 * for prefault. 2788 */ 2789 boolean_t 2790 pmap_is_prefaultable(pmap_t pmap, vm_offset_t addr) 2791 { 2792 pt_entry_t *l3; 2793 boolean_t rv; 2794 2795 rv = FALSE; 2796 PMAP_LOCK(pmap); 2797 l3 = pmap_l3(pmap, addr); 2798 if (l3 != NULL && pmap_load(l3) != 0) { 2799 rv = TRUE; 2800 } 2801 PMAP_UNLOCK(pmap); 2802 return (rv); 2803 } 2804 2805 /* 2806 * pmap_is_referenced: 2807 * 2808 * Return whether or not the specified physical page was referenced 2809 * in any physical maps. 2810 */ 2811 boolean_t 2812 pmap_is_referenced(vm_page_t m) 2813 { 2814 2815 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2816 ("pmap_is_referenced: page %p is not managed", m)); 2817 return (pmap_page_test_mappings(m, TRUE, FALSE)); 2818 } 2819 2820 /* 2821 * Clear the write and modified bits in each of the given page's mappings. 2822 */ 2823 void 2824 pmap_remove_write(vm_page_t m) 2825 { 2826 pmap_t pmap; 2827 struct rwlock *lock; 2828 pv_entry_t pv; 2829 pt_entry_t *l3, oldl3; 2830 int md_gen; 2831 2832 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2833 ("pmap_remove_write: page %p is not managed", m)); 2834 2835 /* 2836 * If the page is not exclusive busied, then PGA_WRITEABLE cannot be 2837 * set by another thread while the object is locked. Thus, 2838 * if PGA_WRITEABLE is clear, no page table entries need updating. 2839 */ 2840 VM_OBJECT_ASSERT_WLOCKED(m->object); 2841 if (!vm_page_xbusied(m) && (m->aflags & PGA_WRITEABLE) == 0) 2842 return; 2843 rw_rlock(&pvh_global_lock); 2844 lock = VM_PAGE_TO_PV_LIST_LOCK(m); 2845 retry_pv_loop: 2846 rw_wlock(lock); 2847 TAILQ_FOREACH(pv, &m->md.pv_list, pv_next) { 2848 pmap = PV_PMAP(pv); 2849 if (!PMAP_TRYLOCK(pmap)) { 2850 md_gen = m->md.pv_gen; 2851 rw_wunlock(lock); 2852 PMAP_LOCK(pmap); 2853 rw_wlock(lock); 2854 if (md_gen != m->md.pv_gen) { 2855 PMAP_UNLOCK(pmap); 2856 rw_wunlock(lock); 2857 goto retry_pv_loop; 2858 } 2859 } 2860 l3 = pmap_l3(pmap, pv->pv_va); 2861 retry: 2862 oldl3 = pmap_load(l3); 2863 if ((oldl3 & ATTR_AP_RW_BIT) == ATTR_AP(ATTR_AP_RW)) { 2864 if (!atomic_cmpset_long(l3, oldl3, 2865 oldl3 | ATTR_AP(ATTR_AP_RO))) 2866 goto retry; 2867 if ((oldl3 & ATTR_AF) != 0) 2868 vm_page_dirty(m); 2869 pmap_invalidate_page(pmap, pv->pv_va); 2870 } 2871 PMAP_UNLOCK(pmap); 2872 } 2873 rw_wunlock(lock); 2874 vm_page_aflag_clear(m, PGA_WRITEABLE); 2875 rw_runlock(&pvh_global_lock); 2876 } 2877 2878 static __inline boolean_t 2879 safe_to_clear_referenced(pmap_t pmap, pt_entry_t pte) 2880 { 2881 2882 return (FALSE); 2883 } 2884 2885 #define PMAP_TS_REFERENCED_MAX 5 2886 2887 /* 2888 * pmap_ts_referenced: 2889 * 2890 * Return a count of reference bits for a page, clearing those bits. 2891 * It is not necessary for every reference bit to be cleared, but it 2892 * is necessary that 0 only be returned when there are truly no 2893 * reference bits set. 2894 * 2895 * XXX: The exact number of bits to check and clear is a matter that 2896 * should be tested and standardized at some point in the future for 2897 * optimal aging of shared pages. 2898 */ 2899 int 2900 pmap_ts_referenced(vm_page_t m) 2901 { 2902 pv_entry_t pv, pvf; 2903 pmap_t pmap; 2904 struct rwlock *lock; 2905 pd_entry_t *l2p, l2; 2906 pt_entry_t *l3; 2907 vm_paddr_t pa; 2908 int cleared, md_gen, not_cleared; 2909 struct spglist free; 2910 2911 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 2912 ("pmap_ts_referenced: page %p is not managed", m)); 2913 SLIST_INIT(&free); 2914 cleared = 0; 2915 pa = VM_PAGE_TO_PHYS(m); 2916 lock = PHYS_TO_PV_LIST_LOCK(pa); 2917 rw_rlock(&pvh_global_lock); 2918 rw_wlock(lock); 2919 retry: 2920 not_cleared = 0; 2921 if ((pvf = TAILQ_FIRST(&m->md.pv_list)) == NULL) 2922 goto out; 2923 pv = pvf; 2924 do { 2925 if (pvf == NULL) 2926 pvf = pv; 2927 pmap = PV_PMAP(pv); 2928 if (!PMAP_TRYLOCK(pmap)) { 2929 md_gen = m->md.pv_gen; 2930 rw_wunlock(lock); 2931 PMAP_LOCK(pmap); 2932 rw_wlock(lock); 2933 if (md_gen != m->md.pv_gen) { 2934 PMAP_UNLOCK(pmap); 2935 goto retry; 2936 } 2937 } 2938 l2p = pmap_l2(pmap, pv->pv_va); 2939 KASSERT(l2p != NULL, ("pmap_ts_referenced: no l2 table found")); 2940 l2 = pmap_load(l2p); 2941 KASSERT((l2 & ATTR_DESCR_MASK) == L2_TABLE, 2942 ("pmap_ts_referenced: found an invalid l2 table")); 2943 l3 = pmap_l2_to_l3(l2p, pv->pv_va); 2944 if ((pmap_load(l3) & ATTR_AF) != 0) { 2945 if (safe_to_clear_referenced(pmap, pmap_load(l3))) { 2946 /* 2947 * TODO: We don't handle the access flag 2948 * at all. We need to be able to set it in 2949 * the exception handler. 2950 */ 2951 panic("ARM64TODO: safe_to_clear_referenced\n"); 2952 } else if ((pmap_load(l3) & ATTR_SW_WIRED) == 0) { 2953 /* 2954 * Wired pages cannot be paged out so 2955 * doing accessed bit emulation for 2956 * them is wasted effort. We do the 2957 * hard work for unwired pages only. 2958 */ 2959 pmap_remove_l3(pmap, l3, pv->pv_va, l2, 2960 &free, &lock); 2961 pmap_invalidate_page(pmap, pv->pv_va); 2962 cleared++; 2963 if (pvf == pv) 2964 pvf = NULL; 2965 pv = NULL; 2966 KASSERT(lock == VM_PAGE_TO_PV_LIST_LOCK(m), 2967 ("inconsistent pv lock %p %p for page %p", 2968 lock, VM_PAGE_TO_PV_LIST_LOCK(m), m)); 2969 } else 2970 not_cleared++; 2971 } 2972 PMAP_UNLOCK(pmap); 2973 /* Rotate the PV list if it has more than one entry. */ 2974 if (pv != NULL && TAILQ_NEXT(pv, pv_next) != NULL) { 2975 TAILQ_REMOVE(&m->md.pv_list, pv, pv_next); 2976 TAILQ_INSERT_TAIL(&m->md.pv_list, pv, pv_next); 2977 m->md.pv_gen++; 2978 } 2979 } while ((pv = TAILQ_FIRST(&m->md.pv_list)) != pvf && cleared + 2980 not_cleared < PMAP_TS_REFERENCED_MAX); 2981 out: 2982 rw_wunlock(lock); 2983 rw_runlock(&pvh_global_lock); 2984 pmap_free_zero_pages(&free); 2985 return (cleared + not_cleared); 2986 } 2987 2988 /* 2989 * Apply the given advice to the specified range of addresses within the 2990 * given pmap. Depending on the advice, clear the referenced and/or 2991 * modified flags in each mapping and set the mapped page's dirty field. 2992 */ 2993 void 2994 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva, int advice) 2995 { 2996 } 2997 2998 /* 2999 * Clear the modify bits on the specified physical page. 3000 */ 3001 void 3002 pmap_clear_modify(vm_page_t m) 3003 { 3004 3005 KASSERT((m->oflags & VPO_UNMANAGED) == 0, 3006 ("pmap_clear_modify: page %p is not managed", m)); 3007 VM_OBJECT_ASSERT_WLOCKED(m->object); 3008 KASSERT(!vm_page_xbusied(m), 3009 ("pmap_clear_modify: page %p is exclusive busied", m)); 3010 3011 /* 3012 * If the page is not PGA_WRITEABLE, then no PTEs can have PG_M set. 3013 * If the object containing the page is locked and the page is not 3014 * exclusive busied, then PGA_WRITEABLE cannot be concurrently set. 3015 */ 3016 if ((m->aflags & PGA_WRITEABLE) == 0) 3017 return; 3018 3019 /* ARM64TODO: We lack support for tracking if a page is modified */ 3020 } 3021 3022 void * 3023 pmap_mapbios(vm_paddr_t pa, vm_size_t size) 3024 { 3025 3026 return ((void *)PHYS_TO_DMAP(pa)); 3027 } 3028 3029 void 3030 pmap_unmapbios(vm_paddr_t pa, vm_size_t size) 3031 { 3032 } 3033 3034 /* 3035 * Sets the memory attribute for the specified page. 3036 */ 3037 void 3038 pmap_page_set_memattr(vm_page_t m, vm_memattr_t ma) 3039 { 3040 3041 m->md.pv_memattr = ma; 3042 3043 /* 3044 * ARM64TODO: Implement the below (from the amd64 pmap) 3045 * If "m" is a normal page, update its direct mapping. This update 3046 * can be relied upon to perform any cache operations that are 3047 * required for data coherence. 3048 */ 3049 if ((m->flags & PG_FICTITIOUS) == 0 && 3050 PHYS_IN_DMAP(VM_PAGE_TO_PHYS(m))) 3051 panic("ARM64TODO: pmap_page_set_memattr"); 3052 } 3053 3054 /* 3055 * perform the pmap work for mincore 3056 */ 3057 int 3058 pmap_mincore(pmap_t pmap, vm_offset_t addr, vm_paddr_t *locked_pa) 3059 { 3060 pd_entry_t *l1p, l1; 3061 pd_entry_t *l2p, l2; 3062 pt_entry_t *l3p, l3; 3063 vm_paddr_t pa; 3064 bool managed; 3065 int val; 3066 3067 PMAP_LOCK(pmap); 3068 retry: 3069 pa = 0; 3070 val = 0; 3071 managed = false; 3072 3073 l1p = pmap_l1(pmap, addr); 3074 if (l1p == NULL) /* No l1 */ 3075 goto done; 3076 l1 = pmap_load(l1p); 3077 if ((l1 & ATTR_DESCR_MASK) == L1_BLOCK) { 3078 pa = (l1 & ~ATTR_MASK) | (addr & L1_OFFSET); 3079 managed = (l1 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3080 val = MINCORE_SUPER | MINCORE_INCORE; 3081 if (pmap_page_dirty(l1)) 3082 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3083 if ((l1 & ATTR_AF) == ATTR_AF) 3084 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3085 goto done; 3086 } 3087 3088 l2p = pmap_l1_to_l2(l1p, addr); 3089 if (l2p == NULL) /* No l2 */ 3090 goto done; 3091 l2 = pmap_load(l2p); 3092 if ((l2 & ATTR_DESCR_MASK) == L2_BLOCK) { 3093 pa = (l2 & ~ATTR_MASK) | (addr & L2_OFFSET); 3094 managed = (l2 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3095 val = MINCORE_SUPER | MINCORE_INCORE; 3096 if (pmap_page_dirty(l2)) 3097 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3098 if ((l2 & ATTR_AF) == ATTR_AF) 3099 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3100 goto done; 3101 } 3102 3103 l3p = pmap_l2_to_l3(l2p, addr); 3104 if (l3p == NULL) /* No l3 */ 3105 goto done; 3106 l3 = pmap_load(l2p); 3107 if ((l3 & ATTR_DESCR_MASK) == L3_PAGE) { 3108 pa = (l3 & ~ATTR_MASK) | (addr & L3_OFFSET); 3109 managed = (l3 & ATTR_SW_MANAGED) == ATTR_SW_MANAGED; 3110 val = MINCORE_INCORE; 3111 if (pmap_page_dirty(l3)) 3112 val |= MINCORE_MODIFIED | MINCORE_MODIFIED_OTHER; 3113 if ((l3 & ATTR_AF) == ATTR_AF) 3114 val |= MINCORE_REFERENCED | MINCORE_REFERENCED_OTHER; 3115 } 3116 3117 done: 3118 if ((val & (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER)) != 3119 (MINCORE_MODIFIED_OTHER | MINCORE_REFERENCED_OTHER) && managed) { 3120 /* Ensure that "PHYS_TO_VM_PAGE(pa)->object" doesn't change. */ 3121 if (vm_page_pa_tryrelock(pmap, pa, locked_pa)) 3122 goto retry; 3123 } else 3124 PA_UNLOCK_COND(*locked_pa); 3125 PMAP_UNLOCK(pmap); 3126 3127 return (val); 3128 } 3129 3130 void 3131 pmap_activate(struct thread *td) 3132 { 3133 pmap_t pmap; 3134 3135 critical_enter(); 3136 pmap = vmspace_pmap(td->td_proc->p_vmspace); 3137 td->td_pcb->pcb_l1addr = vtophys(pmap->pm_l1); 3138 __asm __volatile("msr ttbr0_el1, %0" : : "r"(td->td_pcb->pcb_l1addr)); 3139 pmap_invalidate_all(pmap); 3140 critical_exit(); 3141 } 3142 3143 void 3144 pmap_sync_icache(pmap_t pmap, vm_offset_t va, vm_size_t sz) 3145 { 3146 3147 if (va >= VM_MIN_KERNEL_ADDRESS) { 3148 cpu_icache_sync_range(va, sz); 3149 } else { 3150 u_int len, offset; 3151 vm_paddr_t pa; 3152 3153 /* Find the length of data in this page to flush */ 3154 offset = va & PAGE_MASK; 3155 len = imin(PAGE_SIZE - offset, sz); 3156 3157 while (sz != 0) { 3158 /* Extract the physical address & find it in the DMAP */ 3159 pa = pmap_extract(pmap, va); 3160 if (pa != 0) 3161 cpu_icache_sync_range(PHYS_TO_DMAP(pa), len); 3162 3163 /* Move to the next page */ 3164 sz -= len; 3165 va += len; 3166 /* Set the length for the next iteration */ 3167 len = imin(PAGE_SIZE, sz); 3168 } 3169 } 3170 } 3171 3172 /* 3173 * Increase the starting virtual address of the given mapping if a 3174 * different alignment might result in more superpage mappings. 3175 */ 3176 void 3177 pmap_align_superpage(vm_object_t object, vm_ooffset_t offset, 3178 vm_offset_t *addr, vm_size_t size) 3179 { 3180 } 3181 3182 /** 3183 * Get the kernel virtual address of a set of physical pages. If there are 3184 * physical addresses not covered by the DMAP perform a transient mapping 3185 * that will be removed when calling pmap_unmap_io_transient. 3186 * 3187 * \param page The pages the caller wishes to obtain the virtual 3188 * address on the kernel memory map. 3189 * \param vaddr On return contains the kernel virtual memory address 3190 * of the pages passed in the page parameter. 3191 * \param count Number of pages passed in. 3192 * \param can_fault TRUE if the thread using the mapped pages can take 3193 * page faults, FALSE otherwise. 3194 * 3195 * \returns TRUE if the caller must call pmap_unmap_io_transient when 3196 * finished or FALSE otherwise. 3197 * 3198 */ 3199 boolean_t 3200 pmap_map_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3201 boolean_t can_fault) 3202 { 3203 vm_paddr_t paddr; 3204 boolean_t needs_mapping; 3205 int error, i; 3206 3207 /* 3208 * Allocate any KVA space that we need, this is done in a separate 3209 * loop to prevent calling vmem_alloc while pinned. 3210 */ 3211 needs_mapping = FALSE; 3212 for (i = 0; i < count; i++) { 3213 paddr = VM_PAGE_TO_PHYS(page[i]); 3214 if (__predict_false(paddr >= DMAP_MAX_PHYSADDR)) { 3215 error = vmem_alloc(kernel_arena, PAGE_SIZE, 3216 M_BESTFIT | M_WAITOK, &vaddr[i]); 3217 KASSERT(error == 0, ("vmem_alloc failed: %d", error)); 3218 needs_mapping = TRUE; 3219 } else { 3220 vaddr[i] = PHYS_TO_DMAP(paddr); 3221 } 3222 } 3223 3224 /* Exit early if everything is covered by the DMAP */ 3225 if (!needs_mapping) 3226 return (FALSE); 3227 3228 if (!can_fault) 3229 sched_pin(); 3230 for (i = 0; i < count; i++) { 3231 paddr = VM_PAGE_TO_PHYS(page[i]); 3232 if (paddr >= DMAP_MAX_PHYSADDR) { 3233 panic( 3234 "pmap_map_io_transient: TODO: Map out of DMAP data"); 3235 } 3236 } 3237 3238 return (needs_mapping); 3239 } 3240 3241 void 3242 pmap_unmap_io_transient(vm_page_t page[], vm_offset_t vaddr[], int count, 3243 boolean_t can_fault) 3244 { 3245 vm_paddr_t paddr; 3246 int i; 3247 3248 if (!can_fault) 3249 sched_unpin(); 3250 for (i = 0; i < count; i++) { 3251 paddr = VM_PAGE_TO_PHYS(page[i]); 3252 if (paddr >= DMAP_MAX_PHYSADDR) { 3253 panic("ARM64TODO: pmap_unmap_io_transient: Unmap data"); 3254 } 3255 } 3256 } 3257